ensembl-hive  2.7.0
miRBaseParser.pm
Go to the documentation of this file.
1 =head1 LICENSE
2 
3 See the NOTICE file distributed with this work for additional information
4 regarding copyright ownership.
5 
6 Licensed under the Apache License, Version 2.0 (the "License");
7 you may not use this file except in compliance with the License.
8 You may obtain a copy of the License at
9 
10  http://www.apache.org/licenses/LICENSE-2.0
11 
12 Unless required by applicable law or agreed to in writing, software
13 distributed under the License is distributed on an "AS IS" BASIS,
14 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 See the License for the specific language governing permissions and
16 limitations under the License.
17 
18 =cut
19 
20 package XrefParser::miRBaseParser;
21 
22 use strict;
23 use warnings;
24 use Carp;
25 use DBI;
26 
27 use base qw(XrefParser::BaseParser);
28 
29 
30 sub run {
31 
32  my ($self, $ref_arg) = @_;
33  my $source_id = $ref_arg->{source_id};
34  my $species_id = $ref_arg->{species_id};
35  my $species_name = $ref_arg->{species};
36  my $files = $ref_arg->{files};
37  my $verbose = $ref_arg->{verbose};
38  my $dbi = $ref_arg->{dbi};
39  $dbi = $self->dbi unless defined $dbi;
40 
41  if((!defined $source_id) or (!defined $species_id) or (!defined $files) ){
42  croak "Need to pass source_id, species_id and files as pairs";
43  }
44  $verbose |=0;
45 
46  my $file = @{$files}[0];
47 
48  if(!defined($species_id)){
49  $species_id = $self->get_species_id_for_filename($file);
50  }
51 
52  my $xrefs = $self->create_xrefs($source_id, $file, $species_id, $dbi, $species_name);
53  if(!defined($xrefs)){
54  return 1; #error
55  }
56  # upload
57  if(!defined($self->upload_xref_object_graphs($xrefs, $dbi))){
58  return 1;
59  }
60  return 0; # successfull
61 
62 }
63 
64 # --------------------------------------------------------------------------------
65 # Parse file into array of xref objects
66 
67 sub create_xrefs {
68 
69  my ($self, $source_id, $file, $species_id, $dbi, $species_name) = @_;
70 
71  my %species2name = $self->species_id2name($dbi);
72  if (defined $species_name) { push @{$species2name{$species_id}}, $species_name; }
73  if (!defined $species2name{$species_id}) { next; }
74  my @names = @{$species2name{$species_id}};
75 
76  my %name2species_id = map{ $_=>$species_id } @names;
77 
78  my $file_io = $self->get_filehandle($file);
79  if ( !defined $file_io ) {
80  print STDERR "ERROR: Could not open $file\n";
81  return 1; # 1 is an error
82  }
83 
84  my @xrefs;
85 
86  local $/ = "\n\/\/";
87 
88  while ($_ = $file_io->getline()) {
89 
90  my $xref;
91 
92  my $entry = $_;
93  chomp $entry;
94 
95  next if (!$entry);
96 
97  my ($header, $sequence) = split (/\nSQ/, $entry, 2);
98  # remove newlines
99  my @seq_lines = split (/\n/, $sequence) if ($sequence);
100  # drop the information line
101  shift @seq_lines;
102  # put onto one line
103  $sequence = join("", @seq_lines);
104  # make uppercase
105  $sequence = uc($sequence);
106  # replace Ts for Us
107  $sequence =~ s/U/T/g;
108  # remove numbers and whitespace
109  $sequence =~ s/[\d+,\s+]//g;
110 
111 # print "$header\n";
112  my ($name) = $header =~ /\nID\s+(\S+)\s+/;
113  my ($acc) = $header =~ /\nAC\s+(\S+);\s+/;
114  my ($description) = $header =~ /\nDE\s+(.+)\s+stem-loop/;
115  my @description_parts = split (/\s+/, $description) if ($description);
116  # remove the miRNA identifier
117  pop @description_parts;
118  my $species = join(" ", @description_parts);
119  $xref->{SEQUENCE_TYPE} = 'dna';
120  $xref->{STATUS} = 'experimental';
121  $xref->{SOURCE_ID} = $source_id;
122  $species = lc $species;
123  $species =~ s/ /_/;
124 
125  my $species_id_check = $name2species_id{$species};
126 
127 
128  next if (!defined($species_id_check));
129 
130  # skip xrefs for species that aren't in the species table
131  if (defined($species_id) and $species_id == $species_id_check) {
132 
133  $xref->{ACCESSION} = $acc;
134  $xref->{LABEL} = $name;
135  $xref->{DESCRIPTION} = $name;
136  $xref->{SEQUENCE} = $sequence;
137  $xref->{SPECIES_ID} = $species_id;
138 
139  # TODO synonyms, dependent xrefs etc
140  push @xrefs, $xref;
141  }
142  }
143 
144  $file_io->close();
145 
146  print "Read " . scalar(@xrefs) ." xrefs from $file\n";
147 
148  return \@xrefs;
149 
150 }
151 
152 
153 1;
map
public map()
XrefParser::BaseParser
Definition: BaseParser.pm:8
run
public run()