my ( $self, $ref_arg ) = @_;
my $source_id = $ref_arg->{source_id};
my $species_id = $ref_arg->{species_id};
my $species_name = $ref_arg->{species};
my $files = $ref_arg->{files};
my $verbose = $ref_arg->{verbose}
my $dbi = $ref_arg->{dbi}
if ( ( !defined $source_id ) or
( !defined $species_id ) or
( !defined $files ) )
{
confess 'Need to pass source_id, species_id and files';
}
my $file = @{$files}[0];
my $wiki_source_id =
$self->get_source_id_for_source_name( 'WikiGene', undef, $dbi );
my $eg_io = $self->get_filehandle($file);
if ( !defined $eg_io ) {
confess "Could not open $file";
}
my $input_file = Text::CSV->new({
sep_char => "\t",
empty_is_undef => 1,
allow_loose_quotes => 1
})
|| confess "Cannot use file $file: " . Text::CSV->error_diag();
# process header
confess "Malformed or unexpected header in EntrezGene file '${file}'";
}
my $xref_count = 0;
my $syn_count = 0;
my %seen; # record already processed xrefs
# read data and load xrefs
RECORD:
while ( my $data = $input_file->getline($eg_io) ) {
my ( $tax_id, $acc, $symbol, undef, $synonyms, undef, undef, undef, $desc ) = @{ $data };
# species_id corresponds to the species taxonomy id, see:
# https://github.com/Ensembl/ensembl-xref/pull/31#issuecomment-445838474
if ( $tax_id ne $species_id ) {
next RECORD;
}
if ( exists $seen{$acc} ) {
next RECORD;
}
$self->add_xref({
acc => $acc,
label => $symbol,
desc => $desc,
source_id => $source_id,
species_id => $species_id,
dbi => $dbi,
info_type => 'DEPENDENT'
});
$self->add_xref({
acc => $acc,
label => $symbol,
desc => $desc,
source_id => $wiki_source_id,
species_id => $species_id,
dbi => $dbi,
info_type => 'DEPENDENT'
});
$xref_count += 1;
my @syn = split qr{ \| }msx, $synonyms;
foreach my $synonym ( @syn ) {
if ( $synonym ne q{-} ) {
$self->add_to_syn( $acc, $source_id, $synonym, $species_id, $dbi );
$syn_count += 1;
}
}
$seen{$acc} = 1;
} ## end while ( my $data = $input_file...)
$input_file->eof ||
confess "Error parsing file $file, should be EOF: " . $input_file->error_diag();
$eg_io->close();
if ( $verbose ) {
print $xref_count . " EntrezGene Xrefs added with $syn_count synonyms\n";
}
return 0;