ensembl-hive  2.8.1
WilsonAffyParser.pm
Go to the documentation of this file.
1 =head1 LICENSE
2 
3 See the NOTICE file distributed with this work for additional information
4 regarding copyright ownership.
5 
6 Licensed under the Apache License, Version 2.0 (the "License");
7 you may not use this file except in compliance with the License.
8 You may obtain a copy of the License at
9 
10  http://www.apache.org/licenses/LICENSE-2.0
11 
12 Unless required by applicable law or agreed to in writing, software
13 distributed under the License is distributed on an "AS IS" BASIS,
14 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 See the License for the specific language governing permissions and
16 limitations under the License.
17 
18 =cut
19 
20 package XrefParser::WilsonAffyParser;
21 
22 use strict;
23 use warnings;
24 use Carp;
25 use base qw( XrefParser::BaseParser );
26 
27 sub run {
28  my ($self, $ref_arg) = @_;
29  my $source_id = $ref_arg->{source_id};
30  my $species_id = $ref_arg->{species_id};
31  my $files = $ref_arg->{files};
32  my $verbose = $ref_arg->{verbose};
33 
34  if((!defined $source_id) or (!defined $species_id) or (!defined $files) ){
35  croak "Need to pass source_id, species_id and files as pairs";
36  }
37  $verbose |=0;
38 
39  my @xrefs = $self->create_xrefs($source_id, $species_id, @{$files}[0], $verbose);
40 
41  if(!@xrefs){
42  return 1; # 1 error
43  }
44  # upload
45  if(!defined($self->upload_xref_object_graphs(@xrefs))){
46  return 1;
47  }
48  return 0;
49 
50 }
51 
52 sub create_xrefs {
53 
54  my ($self, $source_id, $species_id, $file, $verbose) = @_;
55 
56  my ($count, $noseq, $direct) = (0,0,0);
57 
58  local $| = 1; # don't buffer
59 
60  my @xrefs;
61 
62  my $file_io = $self->get_filehandle($file);
63 
64  if ( !defined $file_io ) {
65  print STDERR "ERROR: Could not open $file\n";
66  return 1; # 1 error
67  }
68 
69  $file_io->getline(); # skip first line
70 
71  while ( $_ = $file_io->getline() ) {
72  #last if ($count > 200);
73  my $xref;
74 
75  my @fields = split /\t/;
76 
77  # first field (probe_set) is accession
78  my $acc = $fields[0];
79  chomp($acc);
80  $acc =~ s/\"//g;
81 
82 
83  # get linked accession (may be RefSeq or EMBL or ensembl)
84  my $target = $fields[2];
85  chomp($target);
86  $target =~ s/\"//g;
87 
88  # Create direct xrefs for mappings to Ensembl transcripts
89  if ($target =~ /ENSGALT/) {
90 
91  # remove version if present
92  ($target) = $target =~ /([^.]*)\.([^.]*)/;
93 
94  # add xref - not we're assuming it doesn't already exist;
95  # may need to check like in CCDS parser
96  my $xref_id = $self->add_xref({ acc => $acc,
97  version => 0,
98  label => $acc,
99  desc => "$target direct mapping",
100  source_id => $source_id,
101  species_id => $species_id} );
102  $self->add_direct_xref($xref_id, $target, "transcript", "");
103  $direct++;
104 
105  } else {
106 
107  # fetch sequence for others (EMBL ESTs and RefSeqs - pfetch will handle these)
108  system ("pfetch -q $target > seq.txt");
109 
110  my $seq_io = $self->get_filehandle('seq.txt');
111 
112  my $seq = $seq_io->getline();
113  $seq_io->close();
114 
115  chomp($seq);
116 
117  if ($seq && $seq !~ /no match/) {
118 
119  $xref->{ACCESSION} = $acc;
120  $xref->{SEQUENCE} = $seq;
121  $xref->{LABEL} = $acc;
122  $xref->{SOURCE_ID} = $source_id;
123  $xref->{SPECIES_ID} = $species_id;
124  $xref->{SEQUENCE_TYPE} = 'dna';
125  $xref->{STATUS} = 'experimental';
126 
127  # Add description noting where the mapping came from
128  $xref->{DESCRIPTION} = $target . " used as mapping target";
129 
130  $count++;
131 
132  print "$count " if (($count % 100 == 0) and $verbose);
133 
134  push @xrefs, $xref;
135 
136  } else {
137 
138  print STDERR "Couldn't get sequence for $target\n";
139  $noseq++;
140 
141  }
142 
143  }
144 
145  }
146 
147  $file_io->close();
148 
149  if($verbose){
150  print "\n\nParsed $count primary xrefs.\n";
151  print "Couldn't get sequence for $noseq primary_xrefs\n" if ($noseq);
152  print "Added $direct direct xrefs.\n";
153  }
154 
155  return \@xrefs;
156 
157 }
158 
159 1;
XrefParser::BaseParser
Definition: BaseParser.pm:8
run
public run()