3 See the NOTICE file distributed with
this work
for additional information
4 regarding copyright ownership.
6 Licensed under the Apache License, Version 2.0 (the
"License");
7 you may not use
this file except in compliance with the License.
8 You may obtain a copy of the License at
12 Unless required by applicable law or agreed to in writing, software
13 distributed under the License is distributed on an
"AS IS" BASIS,
14 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 See the License
for the specific language governing permissions and
16 limitations under the License.
20 package XrefParser::miRBaseParser;
32 my ($self, $ref_arg) = @_;
33 my $source_id = $ref_arg->{source_id};
34 my $species_id = $ref_arg->{species_id};
35 my $species_name = $ref_arg->{species};
36 my $files = $ref_arg->{files};
37 my $verbose = $ref_arg->{verbose};
38 my $dbi = $ref_arg->{dbi};
39 $dbi = $self->dbi unless defined $dbi;
41 if((!defined $source_id) or (!defined $species_id) or (!defined $files) ){
42 croak
"Need to pass source_id, species_id and files as pairs";
46 my $file = @{$files}[0];
48 if(!defined($species_id)){
49 $species_id = $self->get_species_id_for_filename($file);
52 my $xrefs = $self->create_xrefs($source_id, $file, $species_id, $dbi, $species_name);
57 if(!defined($self->upload_xref_object_graphs($xrefs, $dbi))){
60 return 0; # successfull
64 # --------------------------------------------------------------------------------
65 # Parse file into array of xref objects
69 my ($self, $source_id, $file, $species_id, $dbi, $species_name) = @_;
71 my %species2name = $self->species_id2name($dbi);
72 if (defined $species_name) { push @{$species2name{$species_id}}, $species_name; }
73 if (!defined $species2name{$species_id}) { next; }
74 my @names = @{$species2name{$species_id}};
76 my %name2species_id =
map{ $_=>$species_id } @names;
78 my $file_io = $self->get_filehandle($file);
79 if ( !defined $file_io ) {
80 print STDERR
"ERROR: Could not open $file\n";
81 return 1; # 1 is an error
88 while ($_ = $file_io->getline()) {
97 my ($header, $sequence) = split (/\nSQ/, $entry, 2);
99 my @seq_lines = split (/\n/, $sequence)
if ($sequence);
100 # drop the information line
103 $sequence = join(
"", @seq_lines);
105 $sequence = uc($sequence);
107 $sequence =~ s/U/T/g;
108 # remove numbers and whitespace
109 $sequence =~ s/[\d+,\s+]
112 my ($name) = $header =~ /\nID\s+(\S+)\s+/;
113 my ($acc) = $header =~ /\nAC\s+(\S+);\s+/;
114 my ($description) = $header =~ /\nDE\s+(.+)\s+stem-loop/;
115 my @description_parts = split (/\s+/, $description)
if ($description);
116 # remove the miRNA identifier
117 pop @description_parts;
118 my $species = join(
" ", @description_parts);
119 $xref->{SEQUENCE_TYPE} =
'dna';
120 $xref->{STATUS} =
'experimental';
121 $xref->{SOURCE_ID} = $source_id;
122 $species = lc $species;
125 my $species_id_check = $name2species_id{$species};
128 next
if (!defined($species_id_check));
130 # skip xrefs for species that aren't in the species table
131 if (defined($species_id) and $species_id == $species_id_check) {
133 $xref->{ACCESSION} = $acc;
134 $xref->{LABEL} = $name;
135 $xref->{DESCRIPTION} = $name;
136 $xref->{SEQUENCE} = $sequence;
137 $xref->{SPECIES_ID} = $species_id;
139 # TODO synonyms, dependent xrefs etc
146 print
"Read " . scalar(@xrefs) .
" xrefs from $file\n";