my ($self, $ref_arg) = @_;
my $source_id = $ref_arg->{source_id};
my $species_id = $ref_arg->{species_id};
my $files = $ref_arg->{files};
my $verbose = $ref_arg->{verbose};
my $dbi = $ref_arg->{dbi};
$dbi = $self->dbi unless defined $dbi;
if((!defined $source_id) or (!defined $species_id) or (!defined $files) ){
croak "Need to pass source_id, species_id and file as pairs";
}
$verbose |=0;
my $file = @{$files}[0];
my %label;
my %version;
my %description;
my $sql = 'select source_id, priority_description from source where name like "MGI"';
my $sth = $dbi->prepare($sql);
$sth->execute();
my ($mgi_source_id, $desc);
$sth->bind_columns(\$mgi_source_id, \$desc);
my @arr;
while($sth->fetch()){
push @arr, $mgi_source_id;
}
$sth->finish;
$sql = "select accession, label, version, description from xref where source_id in (".join(", ",@arr).")";
$sth = $dbi->prepare($sql);
$sth->execute();
my ($acc, $lab, $ver);
$sth->bind_columns(\$acc, \$lab, \$ver, \$desc);
while (my @row = $sth->fetchrow_array()) {
if(defined($desc)){
$accession{$lab} = $acc;
$label{$acc} = $lab;
$version{$acc} = $ver;
$description{$acc} = $desc;
}
}
$sth->finish;
#
# Get master xref ids via the ccds label.
#
$sql = 'select x.label, x.xref_id from xref x, source s where x.source_id = s.source_id and s.name ="CCDS"';
my %ccds_label_to_xref_id;
$sth = $dbi->prepare($sql);
$sth->execute();
my ($xref_id);
$sth->bind_columns(\$lab, \$xref_id);
while (my @row = $sth->fetchrow_array()) {
$ccds_label_to_xref_id{$row[0]} = $row[1];
}
$sth->finish;
my $ua = LWP::UserAgent->new();
$ua->timeout(10);
$ua->env_proxy();
my $count = 0;
my $ccds_missing = 0;
my $entrezgene_missing = 0;
my $mgi_io = $self->get_filehandle($file);
if ( !defined $mgi_io ) {
print STDERR "ERROR: Could not open $file\n";
return 1; # 1 is an error
}
#
#
##chromosome g_accession gene gene_id ccds_id ccds_status cds_strand cds_from cds_to cds_locations match_type
#1 NC_000067.5 Xkr4 497097 CCDS14803.1 Public - 3206102 3661428 [3206102-3207048, 3411782-3411981, 3660632-3661428] Identical
#1 NC_000067.5 Rp1h 19888 CCDS14804.1 Public - 4334680 4342905 [4334680-4340171, 4341990-4342161, 4342282-4342905] Identical
while (my $line = $mgi_io->getline()) {
my($chrom, $g_acc, $gene_name, $entrez_id, $ccds, @junk) = split(/\t/,$line);
if(defined($ccds_label_to_xref_id{$ccds})){
if(defined($accession{$gene_name}) and
defined($label{$accession{$gene_name}})){
my $acc = $accession{$gene_name};
$self->add_dependent_xref({ master_xref_id => $ccds_label_to_xref_id{$ccds},
acc => $acc,
version => $version{$acc},
label => $label{$acc},
desc => $description{$acc},
source_id => $source_id,
dbi => $dbi,
species_id => $species_id });
$count++;
}
else{
$entrezgene_missing++;
}
}
else{
$ccds_missing++;
}
}
print "$ccds_missing ccds not resolved, $entrezgene_missing mgi not found. Added $count MGI xrefs via CCDS\n" if($verbose);
return 0;
}