my ($self, $ref_arg) = @_;
my $source_id = $ref_arg->{source_id};
my $species_id = $ref_arg->{species_id};
my $files = $ref_arg->{files};
my $verbose = $ref_arg->{verbose}
my $dbi = $ref_arg->{dbi}
if ( (!defined $source_id) || (!defined $species_id) || (!defined $files) ) {
confess "Need to pass source_id, species_id and files as pairs";
}
my $file = @{$files}[0];
my $count = 0;
my $file_io = $self->get_filehandle($file);
if ( !defined $file_io ) {
confess "Can't open VGNC file '$file'\n";
}
my $source_name = $self->get_source_name_for_source_id($source_id, $dbi);
# Create a hash of all valid taxon_ids for this species
my %species2tax = $self->species_id2taxonomy($dbi);
push @{$species2tax{$species_id}}, $species_id;
my @tax_ids = @{$species2tax{$species_id}};
my %taxonomy2species_id =
map{ $_=>$species_id } @tax_ids;
my $input_file = Text::CSV->new({
sep_char => "\t",
empty_is_undef => 1,
binary => 1
}) or confess "Cannot use file '$file': ".Text::CSV->error_diag();
# header must contain these columns
my @required_columns = qw(
taxon_id
ensembl_gene_id
vgnc_id
symbol
name
alias_symbol
prev_symbol
);
# get header columns
my @columns = @{ $input_file->getline( $file_io ) };
# die if some required_column is not in columns
foreach my $colname (@required_columns) {
if ( !grep { /$colname/xms } @columns ) {
confess "Can't find required column '$colname' in VGNC file '$file'\n";
}
}
$input_file->column_names( @columns );
while ( my $data = $input_file->getline_hr( $file_io ) ) {
# skip data for other species
next if ( !exists $taxonomy2species_id{$data->{'taxon_id'}} );
if ( $data->{'ensembl_gene_id'} ) { # Ensembl direct xref
$self->add_to_direct_xrefs({
stable_id => $data->{'ensembl_gene_id'},
type => 'gene',
acc => $data->{'vgnc_id'},
label => $data->{'symbol'},
desc => $data->{'name'},
dbi => $dbi,
source_id => $source_id,
species_id => $species_id
});
$self->add_synonyms_for_hgnc({
source_id => $source_id,
name => $data->{'vgnc_id'},
species_id => $species_id,
dbi => $dbi,
dead => $data->{'alias_symbol'},
alias => $data->{'prev_symbol'}
});
$count++;
}
}
$input_file->eof or confess "Error parsing file '$file': " . $input_file->error_diag();
$file_io->close();
if($verbose){
print "Loaded a total of $count VGNC xrefs\n";
}
return 0; # successful
}