my ( $self, $args ) = @_;
my $notify = sub { print @_, "\n" if $args->{'verbose'} };
my $files = $args->{'files'};
my $file = ref $files eq 'ARRAY' ? shift @$files : $EMPTY_STR;
if ( $file ) {
$notify->(sprintf "%s Processing file '%s'", __PACKAGE__, $file);
}
else {
printf STDERR "%s called without a 'files' argument\n%s",
__PACKAGE__, Dumper($args);
return 1; # error
}
my $tair_io = $self->get_filehandle($file);
if ( !defined $tair_io ) {
print STDERR "ERROR: Could not open $file\n";
return 1; # 1 is an error
}
my $source_id = $args->{'source_id'} ||
my $species_id = $args->{'species_id'} ||
my $tairs_source_id = $self->get_source_id_for_source_name($TAIR_SYMBOL);
my $tairl_source_id =
$self->get_source_id_for_source_name($TAIR_LOCUS_MODEL);
my $tairt_source_id =
$self->get_source_id_for_source_name($TAIR_TRANSLATION);
my $nascg_source_id = $self->get_source_id_for_source_name($NASC_GENE_ID);
my $line_num = 0;
my %xrefs_added;
while ( my $line = $tair_io->getline() ) {
# Only process FASTA header lines
next unless $line =~ s/^>
chomp $line;
my ( $gene_stable_id, $symbol_str, $desc ) = split /\s*\|\s*/, $line;
next unless $gene_stable_id;
$desc ||= $EMPTY_STR;
if ( $args->{'verbose'} ) {
printf "%-70s\r",
sprintf( '%9d: Processing %s', ++$line_num, $gene_stable_id )
;
}
#
# Transcript, e.g., "AT5G55930.1"
#
if ( $gene_stable_id =~ /^([A-Z0-9]+) \. (\d+)$/xms ) {
my $transcript_id = $gene_stable_id;
$gene_stable_id = $1;
my $tairl_xref_id = $self->add_xref({
source_id => $tairl_source_id,
species_id => $species_id,
info_type => $DIRECT,
acc => $transcript_id,
label => $transcript_id,
desc => $desc,
});
$self->add_direct_xref(
$tairl_xref_id, $transcript_id, $TRANSCRIPT, $DIRECT
);
$xrefs_added{ $TAIR_LOCUS_MODEL }++;
my $tairt_xref_id = $self->add_xref({
source_id => $tairt_source_id,
species_id => $species_id,
info_type => $DIRECT,
acc => $gene_stable_id,
label => $gene_stable_id,
desc => $EMPTY_STR,
});
$self->add_direct_xref(
$tairt_xref_id, $transcript_id, $TRANSLATION, $DIRECT
);
$xrefs_added{ $TAIR_TRANSLATION }++;
}
#
# Gene IDs for TAIR and NASC
#
my $tairg_xref_id = $self->add_xref({
source_id => $tairg_source_id,
species_id => $species_id,
info_type => $DIRECT,
acc => $gene_stable_id,
label => $gene_stable_id,
desc => $desc,
});
$self->add_direct_xref(
$tairg_xref_id, $gene_stable_id, $GENE, $DIRECT
);
$xrefs_added{ $TAIR_LOCUS }++;
my $nascg_xref_id = $self->add_xref({
source_id => $nascg_source_id,
species_id => $species_id,
info_type => $DIRECT,
acc => $gene_stable_id,
label => $gene_stable_id . '-TAIR-G',
desc => $desc,
});
$self->add_direct_xref(
$nascg_xref_id, $gene_stable_id, $GENE, $DIRECT
);
$xrefs_added{ $NASC_GENE_ID }++;
#
# Symbols, e.g., "ATOPT1, OPT1"
#
if ( $symbol_str ) {
$symbol_str =~ s/^\s*Symbols:\s*
if ( my @symbols =
map { $_ || () } split /\s*,\s*/, $symbol_str ) {
if ( my $main_sym = shift @symbols ) {
my $sym_xref_id = $self->add_xref({
source_id => $tairs_source_id,
species_id => $species_id,
info_type => $DIRECT,
acc => $main_sym,
label => $main_sym,
desc => $EMPTY_STR,
});
#
# Add only first symbol to the gene
#
$self->add_direct_xref(
$sym_xref_id, $gene_stable_id, $GENE, $DIRECT
);
$xrefs_added{ $TAIR_SYMBOL }++;
#
# Add the remainder as "external_synonym"
#
for my $symbol ( @symbols ) {
$self->add_to_syn(
$main_sym, $tairs_source_id,
$symbol, $species_id
);
$xrefs_added{'SYNONYMS'}++;
}
}
}
}
}
$tair_io->close();
$notify->(
join("\n",
$EMPTY_STR,
sprintf " - Added %9d %s xrefs", $xrefs_added{ $_ }, $_
}
sort keys %xrefs_added
)
);
return 0; # successful
}