3 See the NOTICE file distributed with
this work
for additional information
4 regarding copyright ownership.
6 Licensed under the Apache License, Version 2.0 (the
"License");
7 you may not use
this file except in compliance with the License.
8 You may obtain a copy of the License at
12 Unless required by applicable law or agreed to in writing, software
13 distributed under the License is distributed on an
"AS IS" BASIS,
14 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 See the License
for the specific language governing permissions and
16 limitations under the License.
20 package XrefParser::RFAMParser;
33 my ($self, $ref_arg) = @_;
34 my $source_id = $ref_arg->{source_id};
35 my $species_id = $ref_arg->{species_id};
36 my $species_name = $ref_arg->{species};
37 my $file = $ref_arg->{file};
38 my $verbose = $ref_arg->{verbose};
39 my $core_db = $ref_arg->{dba};
40 my $dbi = $ref_arg->{dbi};
41 $dbi = $self->dbi unless defined $dbi;
43 if((!defined $source_id) or (!defined $species_id) or (!defined $file) ){
44 croak
"Need to pass source_id, species_id and file as pairs";
55 if($file =~ /wget[=][>](\S+?)[,]/){
58 if($file =~ /host[=][>](\S+?)[,]/){
61 if($file =~ /port[=][>](\S+?)[,]/){
64 if($file =~ /dbname[=][>](\S+?)[,]/){
67 if($file =~ /pass[=][>](\S+?)[,]/){
70 if($file =~ /user[=][>](\S+?)[,]/){
75 #get direct RFAM xrefs from core
76 my $registry =
"Bio::EnsEMBL::Registry";
80 my %id2name = $self->species_id2name($dbi);
81 if (defined $species_name) { push @{$id2name{$species_id}}, $species_name; }
82 if (!defined $id2name{$species_id}) { next; }
83 $species_name = $id2name{$species_id}[0];
92 '-species' => $species_name,
95 } elsif (defined $core_db) {
98 $registry->load_registry_from_multiple_dbs(
100 '-host' =>
'mysql-ens-sta-1',
105 $dba = $registry->get_DBAdaptor($species_name,
'core');
108 my $rfam_sql =
"select distinct t.stable_id, hit_name from analysis a join transcript t on (a.analysis_id = t.analysis_id and a.logic_name like 'ncrna%' and t.biotype != 'miRNA') join exon_transcript et on (t.transcript_id = et.transcript_id) join supporting_feature sf on (et.exon_id = sf.exon_id and sf.feature_type = 'dna_align_feature' ) join dna_align_feature df on (sf.feature_id = df.dna_align_feature_id) order by hit_name";
113 #hash keyed on RFAM accessions, value is an array of ensembl transcript stable_ids
114 my %rfam_transcript_stable_ids;
116 while (my ($stable_id, $hit_name) = $sth->fetchrow_array ) {
119 if ($hit_name =~ /^(RF\d+)/) {
123 push @{$rfam_transcript_stable_ids{$rfam_id}}, $stable_id;
130 my $ua = LWP::UserAgent->new();
133 my $request = HTTP::Request->new(GET => $wget);
134 my $response = $ua->request($request);
136 if ( !$response->is_success() ) {
137 warn($response->status_line);
140 @lines = split(/\n\n\n/, $response->decoded_content);
142 my $file_io = $self->get_filehandle($file);
143 if ( !defined $file_io ) {
144 print
"ERROR: Can't open RFAM file $file\n";
148 while (my $line = $file_io->getline()) {
150 push @lines, $temp_line;
153 $temp_line .= $line .
"\n";
160 my $direct_count = 0;
162 while (my $entry = shift @lines) {
168 my ($accession) = $entry =~ /#=GF\sAC\s+(\w+)/ ;
169 my ($label) = $entry =~ /#=GF\sID\s+([^\n]+)/;
170 my ($description) = $entry =~ /#=GF\sDE\s+([^\n]+)/;
172 if (exists($rfam_transcript_stable_ids{$accession})){
174 my $xref_id = $self->add_xref({ acc => $accession,
176 label => $label || $accession ,
177 desc => $description,
178 source_id => $source_id,
179 species_id => $species_id,
181 info_type =>
"DIRECT"} );
183 my @transcript_stable_ids = @{$rfam_transcript_stable_ids{$accession}};
184 foreach my $stable_id (@transcript_stable_ids){
185 $self->add_direct_xref($xref_id, $stable_id,
"Transcript",
"", $dbi);
193 print
"Added $xref_count RFAM xrefs and $direct_count direct xrefs\n" if($verbose);
195 return 0; # successfull