3 See the NOTICE file distributed with
this work
for additional information
4 regarding copyright ownership.
6 Licensed under the Apache License, Version 2.0 (the
"License");
7 you may not use
this file except in compliance with the License.
8 You may obtain a copy of the License at
12 Unless required by applicable law or agreed to in writing, software
13 distributed under the License is distributed on an
"AS IS" BASIS,
14 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 See the License
for the specific language governing permissions and
16 limitations under the License.
20 package XrefParser::WilsonAffyParser;
28 my ($self, $ref_arg) = @_;
29 my $source_id = $ref_arg->{source_id};
30 my $species_id = $ref_arg->{species_id};
31 my $files = $ref_arg->{files};
32 my $verbose = $ref_arg->{verbose};
34 if((!defined $source_id) or (!defined $species_id) or (!defined $files) ){
35 croak
"Need to pass source_id, species_id and files as pairs";
39 my @xrefs = $self->create_xrefs($source_id, $species_id, @{$files}[0], $verbose);
45 if(!defined($self->upload_xref_object_graphs(@xrefs))){
54 my ($self, $source_id, $species_id, $file, $verbose) = @_;
56 my ($count, $noseq, $direct) = (0,0,0);
58 local $| = 1; # don
't buffer
62 my $file_io = $self->get_filehandle($file);
64 if ( !defined $file_io ) {
65 print STDERR "ERROR: Could not open $file\n";
69 $file_io->getline(); # skip first line
71 while ( $_ = $file_io->getline() ) {
72 #last if ($count > 200);
75 my @fields = split /\t/;
77 # first field (probe_set) is accession
83 # get linked accession (may be RefSeq or EMBL or ensembl)
84 my $target = $fields[2];
88 # Create direct xrefs for mappings to Ensembl transcripts
89 if ($target =~ /ENSGALT/) {
91 # remove version if present
92 ($target) = $target =~ /([^.]*)\.([^.]*)/;
94 # add xref - not we're assuming it doesn
't already exist;
95 # may need to check like in CCDS parser
96 my $xref_id = $self->add_xref({ acc => $acc,
99 desc => "$target direct mapping",
100 source_id => $source_id,
101 species_id => $species_id} );
102 $self->add_direct_xref($xref_id, $target, "transcript", "");
107 # fetch sequence for others (EMBL ESTs and RefSeqs - pfetch will handle these)
108 system ("pfetch -q $target > seq.txt");
110 my $seq_io = $self->get_filehandle('seq.txt
');
112 my $seq = $seq_io->getline();
117 if ($seq && $seq !~ /no match/) {
119 $xref->{ACCESSION} = $acc;
120 $xref->{SEQUENCE} = $seq;
121 $xref->{LABEL} = $acc;
122 $xref->{SOURCE_ID} = $source_id;
123 $xref->{SPECIES_ID} = $species_id;
124 $xref->{SEQUENCE_TYPE} = 'dna
';
125 $xref->{STATUS} = 'experimental
';
127 # Add description noting where the mapping came from
128 $xref->{DESCRIPTION} = $target . " used as mapping target";
132 print "$count " if (($count % 100 == 0) and $verbose);
138 print STDERR "Couldn't get sequence
for $target\n
";
150 print "\n\nParsed $count primary xrefs.\n
";
151 print "Couldn
't get sequence for $noseq primary_xrefs\n" if ($noseq);
152 print "Added $direct direct xrefs.\n";