3 See the NOTICE file distributed with
this work
for additional information
4 regarding copyright ownership.
6 Licensed under the Apache License, Version 2.0 (the
"License");
7 you may not use
this file except in compliance with the License.
8 You may obtain a copy of the License at
12 Unless required by applicable law or agreed to in writing, software
13 distributed under the License is distributed on an
"AS IS" BASIS,
14 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 See the License
for the specific language governing permissions and
16 limitations under the License.
20 package XrefParser::MGI_CCDS_Parser;
32 my ($self, $ref_arg) = @_;
33 my $source_id = $ref_arg->{source_id};
34 my $species_id = $ref_arg->{species_id};
35 my $files = $ref_arg->{files};
36 my $verbose = $ref_arg->{verbose};
37 my $dbi = $ref_arg->{dbi};
38 $dbi = $self->dbi unless defined $dbi;
40 if((!defined $source_id) or (!defined $species_id) or (!defined $files) ){
41 croak
"Need to pass source_id, species_id and file as pairs";
45 my $file = @{$files}[0];
52 my $sql =
'select source_id, priority_description from source where name like "MGI"';
53 my $sth = $dbi->prepare($sql);
56 my ($mgi_source_id, $desc);
57 $sth->bind_columns(\$mgi_source_id, \$desc);
60 push @arr, $mgi_source_id;
64 $sql =
"select accession, label, version, description from xref where source_id in (".join(
", ",@arr).
")";
66 $sth = $dbi->prepare($sql);
68 my ($acc, $lab, $ver);
69 $sth->bind_columns(\$acc, \$lab, \$ver, \$desc);
70 while (my @row = $sth->fetchrow_array()) {
72 $accession{$lab} = $acc;
74 $version{$acc} = $ver;
75 $description{$acc} = $desc;
83 # Get master xref ids via the ccds label.
86 $sql =
'select x.label, x.xref_id from xref x, source s where x.source_id = s.source_id and s.name ="CCDS"';
88 my %ccds_label_to_xref_id;
89 $sth = $dbi->prepare($sql);
92 $sth->bind_columns(\$lab, \$xref_id);
93 while (my @row = $sth->fetchrow_array()) {
94 $ccds_label_to_xref_id{$row[0]} = $row[1];
100 my $ua = LWP::UserAgent->new();
106 my $ccds_missing = 0;
107 my $entrezgene_missing = 0;
109 my $mgi_io = $self->get_filehandle($file);
110 if ( !defined $mgi_io ) {
111 print STDERR
"ERROR: Could not open $file\n";
112 return 1; # 1 is an error
117 ##chromosome g_accession gene gene_id ccds_id ccds_status cds_strand cds_from cds_to cds_locations match_type
118 #1 NC_000067.5 Xkr4 497097 CCDS14803.1 Public - 3206102 3661428 [3206102-3207048, 3411782-3411981, 3660632-3661428] Identical
119 #1 NC_000067.5 Rp1h 19888 CCDS14804.1 Public - 4334680 4342905 [4334680-4340171, 4341990-4342161, 4342282-4342905] Identical
120 while (my $line = $mgi_io->getline()) {
121 my($chrom, $g_acc, $gene_name, $entrez_id, $ccds, @junk) = split(/\t/,$line);
122 if(defined($ccds_label_to_xref_id{$ccds})){
123 if(defined($accession{$gene_name}) and
124 defined($label{$accession{$gene_name}})){
125 my $acc = $accession{$gene_name};
126 $self->add_dependent_xref({ master_xref_id => $ccds_label_to_xref_id{$ccds},
128 version => $version{$acc},
129 label => $label{$acc},
130 desc => $description{$acc},
131 source_id => $source_id,
133 species_id => $species_id });
138 $entrezgene_missing++;
145 print
"$ccds_missing ccds not resolved, $entrezgene_missing mgi not found. Added $count MGI xrefs via CCDS\n" if($verbose);