3 See the NOTICE file distributed with
this work
for additional information
4 regarding copyright ownership.
6 Licensed under the Apache License, Version 2.0 (the
"License");
7 you may not use
this file except in compliance with the License.
8 You may obtain a copy of the License at
12 Unless required by applicable law or agreed to in writing, software
13 distributed under the License is distributed on an
"AS IS" BASIS,
14 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 See the License
for the specific language governing permissions and
16 limitations under the License.
20 package XrefParser::EG_DBParser;
31 my ($self, $ref_arg) = @_;
32 my $source_id = $ref_arg->{source_id};
33 my $species_id = $ref_arg->{species_id};
34 my $file = $ref_arg->{file};
35 my $verbose = $ref_arg->{verbose};
37 if((!defined $source_id) or (!defined $species_id) or (!defined $file) ){
38 croak
"Need to pass source_id, species_id and file as pairs";
42 print STDERR
"parsing EG_DB_Xrefs...\n";
44 my ($type, $my_args) = split(/:/,$file);
52 if($my_args =~ /host[=][>](\S+?)[,]/){
55 if($my_args =~ /port[=][>](\S+?)[,]/){
58 if($my_args =~ /dbname[=][>](\S+?)[,]/){
61 if($my_args =~ /pass[=][>](\S+?)[,]/){
64 if($my_args =~ /user[=][>](\S+?)[,]/){
68 print STDERR
"species_id, $species_id\n";
72 # Todo: get the whole list of sources for All EG set
74 my $sources_aref = [
'Mycgr3_jgi_v2.0_gene',
'BROAD_U_maydis',
'CADRE',
'CADRE_Afum_A1163',
'AspGD',
'ENA_GENE',
'BROAD_F_oxysporum',
'BROAD_G_moniliformis',
'BROAD_G_zeae',
'GeneDB',
'BROAD_P_infestans',
'phyra_jgi_v1.1',
'physo1_jgi_v1.1',
'PGD_GENE',
'phatr_jgi_v2_bd',
'phatr_jgi_v2',
'thaps_jgi_v2',
'thaps_jgi_v2_bd',
'SCHISTODB',
'triad_jgi_v1.0'];
75 foreach my $source_name (@$sources_aref) {
76 $source{$source_name} = $self->get_source_id_for_source_name($source_name) || die
"Could not get source_id for $source_name\n";
85 my $dbi2 = $db->dbi();
87 print STDERR
"failed to connect to EG_Xrefs database!\n";
92 SELECT gene_stable_id, transcript_stable_id, gene_dbid, transcript_dbid,
93 source, xref_name, xref_primary_id, xref_description
95 WHERE taxonomy_id = $species_id
98 my ($gene_stable_id, $transcript_stable_id, $gene_dbid, $transcript_dbid, $source_name, $label, $acc, $desc);
99 my $sth = $dbi2->prepare($sql);
100 $sth->execute() or croak( $dbi2->errstr() );
101 $sth->bind_columns(\$gene_stable_id, \$transcript_stable_id, \$gene_dbid, \$transcript_dbid, \$source_name, \$label, \$acc, \$desc);
103 while ( $sth->fetch() ) {
105 my ($description,$junk) = split(
"[[]Source:",$desc);
107 if(!defined($source{$source_name})){
108 print STDERR
"Could not find source_id for source $source_name for xref $acc\n";
111 my $xref_id = $self->get_xref($acc,$source{$source_name}, $species_id);
112 if(!defined($xref_id)){
113 $xref_id = $self->add_xref({ acc => $acc,
115 desc => $description,
116 source_id => $source{$source_name},
117 species_id => $species_id,
118 info_type =>
"DIRECT"} );
121 my $transcript_id = $transcript_dbid;
122 if(defined($transcript_stable_id) and $transcript_stable_id ne
""){
123 $transcript_id = $transcript_stable_id;
125 my $gene_id = $gene_dbid;
126 if(defined($gene_stable_id) and $gene_stable_id ne
""){
127 $gene_id = $gene_stable_id;
130 #$self->add_direct_xref($xref_id, $transcript_id, "Transcript", "") if (defined($transcript_id));
131 $self->add_direct_xref($xref_id, $gene_id,
"Gene",
"")
if (defined($gene_id));
135 print
"Added $added Xrefs for EGs\n" if($verbose);