ensembl-hive  2.7.0
EG_DBParser.pm
Go to the documentation of this file.
1 =head1 LICENSE
2 
3 See the NOTICE file distributed with this work for additional information
4 regarding copyright ownership.
5 
6 Licensed under the Apache License, Version 2.0 (the "License");
7 you may not use this file except in compliance with the License.
8 You may obtain a copy of the License at
9 
10  http://www.apache.org/licenses/LICENSE-2.0
11 
12 Unless required by applicable law or agreed to in writing, software
13 distributed under the License is distributed on an "AS IS" BASIS,
14 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 See the License for the specific language governing permissions and
16 limitations under the License.
17 
18 =cut
19 
20 package XrefParser::EG_DBParser;
21 
22 use strict;
23 use warnings;
24 use Carp;
25 use File::Basename;
26 
27 use base qw( XrefParser::BaseParser );
29 
30 sub run_script {
31  my ($self, $ref_arg) = @_;
32  my $source_id = $ref_arg->{source_id};
33  my $species_id = $ref_arg->{species_id};
34  my $file = $ref_arg->{file};
35  my $verbose = $ref_arg->{verbose};
36 
37  if((!defined $source_id) or (!defined $species_id) or (!defined $file) ){
38  croak "Need to pass source_id, species_id and file as pairs";
39  }
40  $verbose |=0;
41 
42  print STDERR "parsing EG_DB_Xrefs...\n";
43 
44  my ($type, $my_args) = split(/:/,$file);
45 
46  my $user ="ensro";
47  my $host;
48  my $port;
49  my $dbname;
50  my $pass;
51 
52  if($my_args =~ /host[=][>](\S+?)[,]/){
53  $host = $1;
54  }
55  if($my_args =~ /port[=][>](\S+?)[,]/){
56  $port = $1;
57  }
58  if($my_args =~ /dbname[=][>](\S+?)[,]/){
59  $dbname = $1;
60  }
61  if($my_args =~ /pass[=][>](\S+?)[,]/){
62  $pass = $1;
63  }
64  if($my_args =~ /user[=][>](\S+?)[,]/){
65  $user = $1;
66  }
67 
68  print STDERR "species_id, $species_id\n";
69 
70  my %source;
71 
72  # Todo: get the whole list of sources for All EG set
73 
74  my $sources_aref = ['Mycgr3_jgi_v2.0_gene', 'BROAD_U_maydis','CADRE','CADRE_Afum_A1163','AspGD','ENA_GENE','BROAD_F_oxysporum','BROAD_G_moniliformis','BROAD_G_zeae','GeneDB','BROAD_P_infestans','phyra_jgi_v1.1','physo1_jgi_v1.1','PGD_GENE','phatr_jgi_v2_bd','phatr_jgi_v2','thaps_jgi_v2','thaps_jgi_v2_bd','SCHISTODB','triad_jgi_v1.0'];
75  foreach my $source_name (@$sources_aref) {
76  $source{$source_name} = $self->get_source_id_for_source_name($source_name) || die "Could not get source_id for $source_name\n";
77  }
78 
79  my $db = XrefParser::Database->new({ host => $host,
80  port => $port,
81  user => $user,
82  dbname => $dbname,
83  pass => $pass});
84 
85  my $dbi2 = $db->dbi();
86  if(!defined($dbi2)){
87  print STDERR "failed to connect to EG_Xrefs database!\n";
88  return 1;
89  }
90 
91  my $sql=(<<"SQL");
92  SELECT gene_stable_id, transcript_stable_id, gene_dbid, transcript_dbid,
93  source, xref_name, xref_primary_id, xref_description
94  FROM EG_Xref
95  WHERE taxonomy_id = $species_id
96 SQL
97 
98  my ($gene_stable_id, $transcript_stable_id, $gene_dbid, $transcript_dbid, $source_name, $label, $acc, $desc);
99  my $sth = $dbi2->prepare($sql);
100  $sth->execute() or croak( $dbi2->errstr() );
101  $sth->bind_columns(\$gene_stable_id, \$transcript_stable_id, \$gene_dbid, \$transcript_dbid, \$source_name, \$label, \$acc, \$desc);
102  my $added = 0;
103  while ( $sth->fetch() ) {
104 
105  my ($description,$junk) = split("[[]Source:",$desc);
106 
107  if(!defined($source{$source_name})){
108  print STDERR "Could not find source_id for source $source_name for xref $acc\n";
109  next;
110  }
111  my $xref_id = $self->get_xref($acc,$source{$source_name}, $species_id);
112  if(!defined($xref_id)){
113  $xref_id = $self->add_xref({ acc => $acc,
114  label => $label,
115  desc => $description,
116  source_id => $source{$source_name},
117  species_id => $species_id,
118  info_type => "DIRECT"} );
119  $added++;
120  }
121  my $transcript_id = $transcript_dbid;
122  if(defined($transcript_stable_id) and $transcript_stable_id ne ""){
123  $transcript_id = $transcript_stable_id;
124  }
125  my $gene_id = $gene_dbid;
126  if(defined($gene_stable_id) and $gene_stable_id ne ""){
127  $gene_id = $gene_stable_id;
128  }
129 
130  #$self->add_direct_xref($xref_id, $transcript_id, "Transcript", "") if (defined($transcript_id));
131  $self->add_direct_xref($xref_id, $gene_id, "Gene", "") if (defined($gene_id));
132  }
133  $sth->finish;
134 
135  print "Added $added Xrefs for EGs\n" if($verbose);
136  return 0;
137 }
138 
139 1;
XrefParser::BaseParser
Definition: BaseParser.pm:8
XrefParser::Database::new
public new()
XrefParser::Database
Definition: Database.pm:8