2 # See the NOTICE file distributed with this work for additional information
3 # regarding copyright ownership.
5 # Licensed under the Apache License, Version 2.0 (the "License");
6 # you may not use this file except in compliance with the License.
7 # You may obtain a copy of the License at
9 # http://www.apache.org/licenses/LICENSE-2.0
11 # Unless required by applicable law or agreed to in writing, software
12 # distributed under the License is distributed on an "AS IS" BASIS,
13 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 # See the License for the specific language governing permissions and
15 # limitations under the License.
26 my ($db_name, $db_host, $db_user, $db_pass, $db_port, $help, $species, $group, $no_interactive);
29 my $NCBI_BASE =
'https://ftp.ncbi.nlm.nih.gov/genomes/ASSEMBLY_REPORTS/All';
36 "db_name|dbname|database=s" => \$db_name,
37 "db_host|dbhost|host=s" => \$db_host,
38 "db_user|dbuser|user|username=s" => \$db_user,
39 "db_pass|dbpass|pass|password=s" => \$db_pass,
40 "db_port|dbport|port=s" => \$db_port,
41 "species=s" => \$species,
43 'karyotype=s@' => \@user_karyotype,
44 'no_interactive!' => \$no_interactive,
49 if ($help) { &
usage; exit 0; }
50 unless ($db_name and $db_host) { print
"Insufficient arguments\n"; &
usage; exit 1; }
74 @karyotype = @user_karyotype;
82 my $ok_to_write =
confirm(
'Is this correct?');
84 my $new_karyotype =
capture_user_input(
"Please give the correct karyotype (comma separated)");
93 $overwrite =
confirm(
'Karyotypes have already been assigned. Are you sure you want to overwrite');
95 die
"Cannot continue. Karyotypes have already been assigned to these Slices";
106 my $mc = $dba->get_MetaContainer();
107 my $acc = $mc->single_value_by_key(
'assembly.accession', 1);
109 print
"Cannot continue. Species does not have a valid Genome Collections accession in its meta table\n";
118 print STDOUT
"Fetching karyotype from NCBI using accession '$accession'\n";
119 my $url =
"$NCBI_BASE/${accession}.assembly.txt";
120 my $content = do_FTP($url, 5, 2);
121 my @lines = split(/\n/, $content);
123 foreach my $line (@lines) {
124 next
if $line =~ /^#/;
126 my ($name, $role) = split(/\t/, $line);
127 if($role eq
'assembled-molecule') {
128 push(@karyotype, $name);
135 my ($dba, @karyotype) = @_;
136 my $sa = $dba->get_SliceAdaptor();
138 foreach my $name (@karyotype) {
139 my $slice = $sa->fetch_by_region(
'toplevel', $name);
140 push(@slices, $slice);
147 if($no_interactive) {
151 return ($userinput =~ /^y(?:es)?$/xmsi) ? 1 : 0;
156 die
"Cannot continue; asking for user input but -no_interactive is on" if $no_interactive;
157 print STDOUT
"$msg: ";
158 my $userinput = <STDIN>;
164 my (@karyotype) = @_;
165 print STDOUT
"Karyotype to be used: ";
166 print STDOUT join(q{,},@karyotype);
172 return split(q{,}, join(q{,}, @_));
177 my $has_karyotype = 0;
178 foreach my $slice (@{$slices}) {
179 if($slice->has_karyotype()) {
184 return $has_karyotype;
188 my ($dba, $slices) = @_;
189 my $aa = $dba->get_AttributeAdaptor();
190 my $code =
'karyotype_rank';
192 foreach my $slice (@{$slices}) {
193 printf STDOUT
'%s has been assigned rank %d', $slice->seq_region_name(), $rank;
195 $aa->remove_from_Slice($slice, $code);
196 $aa->store_on_Slice($slice, $code, $rank);
205 A program for writing karyotype ranks into a core-like database. If one is
206 not specified then we query NCBI's assembly report resource for a candidate
207 list. The program also allows you to specify a custom rank if required.
209 This module uses LWP to communicate with NCBI's FTP site. Should you have
210 any issues please consult its documentation on how to debug the issue. Also
211 make sure you can access $NCBI_BASE
215 perl karyotype_rank_assigner.pl -db_name NAME -db_host HOST -db_user USR -db_pass PASS -species human -group GRP
219 -db_name The DB to add the rank to
223 -db_host Hostname for the DB
227 -db_user Username for the DB
232 -db_pass Password for the DB
237 -db_port Port for the DB
241 -species Name of the species; defaults to human
242 -group Name of the DB group; defaults to core
244 -karyotype Comma separated list of karyotypes to write. Also
245 supports specifying this option multiple times
247 -no_interactive Do not prompt for confirmation
249 -help Print this message