2 # See the NOTICE file distributed with this work for additional information
3 # regarding copyright ownership.
5 # Licensed under the Apache License, Version 2.0 (the "License");
6 # you may not use this file except in compliance with the License.
7 # You may obtain a copy of the License at
9 # http://www.apache.org/licenses/LICENSE-2.0
11 # Unless required by applicable law or agreed to in writing, software
12 # distributed under the License is distributed on an "AS IS" BASIS,
13 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 # See the License for the specific language governing permissions and
15 # limitations under the License.
20 use Getopt::Long qw(:config pass_through);
25 my ( $host, $port, $user,
26 $pass, $dbname, $release,
27 $species, $taxon, $division_id,
28 $parser, $source, $file,
29 $db, $keep_db, $help);
31 my $options = join(
" ",@ARGV);
33 print
"Options: ".join(
" ",@ARGV).
"\n";
36 'dbuser|user=s' => \$user,
37 'dbpass|pass=s' => \$pass,
38 'dbhost|host=s' => \$host,
39 'dbport|port=i' => \$port,
40 'dbname=s' => \$dbname,
41 'release=s' => \$release,
42 'species=s' => \$species,
44 'division_id=s' => \$division_id,
45 'parser=s' => \$parser,
46 'source=s' => \$source,
49 'keep_db=s' => \$keep_db,
50 'help' => sub {
usage(); exit(0); } );
53 print STDERR
"Unknown command line arguments:-\n";
54 foreach my $a (@ARGV){
55 print STDERR
"\t".$a.
"\n";
57 print STDERR
"Stopping script. Please fix the command line.\n";
58 print STDERR
"use -help for full list of command line options.\n";;
62 if ( !$host || !$species || !$parser) {
67 my $registry =
'Bio::EnsEMBL::Registry';
68 $registry->load_registry_from_multiple_dbs(
74 -db_version => $release
77 if (!defined $taxon) {
78 my $meta_container = $registry->get_adaptor($species,
'core',
'MetaContainer');
79 $taxon = $meta_container->get_taxonomy_id();
81 if (!defined $division_id) {
82 my $meta_container = $registry->get_adaptor($species,
'core',
'MetaContainer');
83 my $division = $meta_container->get_division();
84 my %division_taxon = (
86 'EnsemblVertebrates' => 7742,
87 'Vertebrates' => 7742,
88 'EnsemblMetazoa' => 33208,
91 $division_id = $division_taxon{$division};
94 my $sql_dir = dirname($0);
102 $xref_dbc->create($sql_dir, 1, 1) unless $keep_db;
103 my $xref_db_url = sprintf(
"mysql://%s:%s@%s:%s/%s", $user, $pass, $host, $port, $dbname);
104 my $xref_dbi = $xref_dbc->dbi();
106 my $module =
"XrefParser::$parser";
107 eval
"require $module";
108 my $xref_run = $module->new($xref_dbc);
110 my $source_id =
get_source_id($xref_dbi, $parser, $taxon, $source, $division_id);
113 my $dba = $registry->get_DBAdaptor($species, $db);
114 $dba->dbc()->disconnect_if_idle();
115 $xref_run->run_script( { source_id => $source_id,
116 species_id => $taxon,
124 $xref_run->run( { source_id => $source_id,
125 species_id => $taxon,
128 files => [@files] }) ;
132 my ($dbi, $parser, $species_id, $name, $division_id) = @_;
135 my $select_source_id_sth = $dbi->prepare(
"SELECT u.source_id FROM source_url u, source s WHERE s.source_id = u.source_id AND parser = ? and species_id = ?");
136 my $select_count_source_id_sth = $dbi->prepare(
"SELECT count(*) FROM source_url u, source s WHERE s.source_id = u.source_id AND parser = ? AND species_id = ?");
137 $select_count_source_id_sth->execute($parser, $species_id);
138 my $count = ($select_count_source_id_sth->fetchrow_array());
140 $select_source_id_sth->execute($parser, $species_id);
141 $source_id = ($select_source_id_sth->fetchrow_array());
143 $select_source_id_sth = $dbi->prepare(
"SELECT u.source_id FROM source_url u, source s WHERE s.source_id = u.source_id AND parser = ? and species_id = ? and name like ?");
144 $select_count_source_id_sth = $dbi->prepare(
"SELECT count(*) FROM source_url u, source s WHERE s.source_id = u.source_id AND parser = ? AND species_id = ? AND name like ?");
145 $select_count_source_id_sth->execute($parser, $species_id, $name);
146 $count = ($select_count_source_id_sth->fetchrow_array());
148 $select_source_id_sth->execute($parser, $species_id, $name);
149 $source_id = ($select_source_id_sth->fetchrow_array());
151 # If no species-specific source, look for common sources
152 if (!defined $source_id) {
153 $select_source_id_sth->execute($parser, $division_id, $name);
154 $source_id = ($select_source_id_sth->fetchrow_array())[0];
156 $select_source_id_sth->finish();
157 $select_count_source_id_sth->finish();
163 # --------------------------------------------------------------------------------
169 xref_parser.pl -host {host} -port {port} -user {user} -pass {pass} -dbname {dbname} -release {release} \\
170 -species {species} -taxon_id {taxon_id} \\
171 -parser {parser} -source {source_id} -file {file} \\
172 -db {db} =keep_db {keep_db} \\
175 -user User name to access database. Must allow writing.
177 -pass Password
for user.
183 -dbname Name of xref database to use/create.
185 -release Release version of the species to parse
186 Used to find the right database on the server specified in the arguments
188 -species Which species to
import.
189 Species may be referred to by genus/species
190 (e.g. homo_sapiens) or common aliases (e.g. human).
191 Specifying an unknown species will cause a list
192 of valid species to be printed.
194 -taxon Which taxon to
import.
195 Can be used as an alternative to species.
197 -division Which division the species belongs to.
198 This defines which sources will be parsed and does
199 not necessarily imply taxonomic relationship
200 (e.g. ciona intestinalis is a vertebrate in
this context)
202 -parser Which parser to
run
204 -source Name of the source to extra data
for (should match equivalent parser)
206 -file Location and name of the file to be parsed
207 Path should be absolute
209 -db If the parser requires connection to a database, specify here
210 For example, specify otherfeatures when running RefSeqCoordinateParser
212 -keep_db When re-
using an existing xref database, use the option
213 By
default, deletes any existing one and creates a
new one
219 #--------------------------------------------------------------------------------