my ($self) = @_;
my $xref =$self->xref();
if(!defined($xref->dir())){
if(defined($self->dir)){
$xref->species($self->dir);
$self->species_id($self->get_id_from_species_name($self->species));
}
else{
$xref->dir(".");
}
}
#we will populate @method with all exonerate methods which will be used to perform sequence mapping
my ($default_method, $override_method_for_source);
if($self->mapper->can('set_methods')){
($default_method, $override_method_for_source) = $self->mapper->set_methods();
}
else {
($default_method, $override_method_for_source) = $self->set_methods();
}
push(@
method, $default_method);
print "Default exonerate method is $default_method\n" if($self->verbose);
# %override_method_for_source is keyed on exonerate methods; values are arrays of xref sources
my %override_method_for_source= %{$override_method_for_source};
#array which holds all source ids, for which the default exonerate method was overriden
my @all_source_ids;
#similar hash to %override_method_for_source but instead of array of source names as values, it contains array of
#corresponding source ids as values
my %methods_and_source_ids;
my %source_mapping_method;
#only sources which have xrefs in the primary_xref table (which stores sequences) are relevant here
#multiple sources can have the same name but different priority_descriptions so
#one name can map to multiple ids
#%source_ids is keyed on source name, values are arrays of source ids - populate it with all
#sources which have xrefs in primary_xref table
my %source_ids;
my $source_sth = $xref->dbc->prepare("select distinct name, source_id from primary_xref join xref using(xref_id) join source using(source_id) order by name;");
$source_sth->execute();
while ( my ($name, $source_id) = $source_sth->fetchrow_array() ){
push (@{$source_ids{$name}}, $source_id);
}
$source_sth->finish();
if (%override_method_for_source) {
print "Default exonerate method overridden for some sources\n" if($self->verbose);
foreach my $method (keys %override_method_for_source) {
#convert source names to source_ids
my @source_names = @{$override_method_for_source{$method}};
my $a_source_exists = 0;
foreach my $source_name (@source_names) {
#a source name should be defined only once against one exonerate method only
if (exists($source_mapping_method{$source_name})) {
die "$source_name source name defined more than once in set_methods method (SubmitMapper method which can be overriden in species.pm module)\n";
}
$source_mapping_method{$source_name} = $method;
my @source_ids = @{$source_ids{$source_name}} if (exists($source_ids{$source_name}));
if (@source_ids) {
$a_source_exists = 1;
foreach my $source_id (@source_ids) {
push @{$methods_and_source_ids{$method}}, $source_id;
push @all_source_ids, $source_id;
}
} else {
print "WARNING: source id for $source_name not found. Xrefs for this source will not be sequence mapped. There are no xrefs from this source in the primary_xref table.\n" if($self->verbose);
}
}
#if we found at least one source id with xrefs to be mapped using $method, add $method to @method
if ($a_source_exists) {
}
}
}
#store source_ids and mapping methods in source_mapping_method table
my $insert_src_method_sth = $xref->dbc->prepare("insert into source_mapping_method values(?,?)");
foreach my $source_name (keys %source_ids){
my $method;
if (exists($source_mapping_method{$source_name})){
$method = $source_mapping_method{$source_name};
} else {
$method = $default_method;
}
foreach my $source_id (@{$source_ids{$source_name}}){
$insert_src_method_sth->execute($source_id,$method);
print "Will use $method method for source id $source_id, $source_name\n" if($self->verbose);
}
}
$insert_src_method_sth->finish();
if(defined($self->mapper->dumpcheck())){
my $skip = 1;
if(!-e $xref->dir()."/xref_".$method."_dna.fasta"){
$skip = 0;
}
if(!-e $xref->dir()."/xref_".$method."_peptide.fasta"){
$skip = 0;
}
}
if($skip){
print "Xref fasta files found and will be used (No new dumping)\n" if($self->verbose);
return;
}
}
print "Dumping Xref fasta files\n" if($self->verbose());
for my $sequence_type ('dna', 'peptide') {
my $filename = $xref->dir() . "/xref_".$method."_" . $sequence_type . ".fasta";
open( my $DH,">", $filename) || die "Could not open $filename";
my $sql = "SELECT p.xref_id, p.sequence, x.species_id , x.source_id ";
$sql .= " FROM primary_xref p, xref x ";
$sql .= " WHERE p.xref_id = x.xref_id AND ";
$sql .= " p.sequence_type ='" . $sequence_type ."' ";
#for the default method don't select sources for which the method was overriden
if ($method eq $default_method && scalar(@all_source_ids) > 0 ) {
$sql .= "AND x.source_id not in (" . join(',',@all_source_ids).")";
}
#for a non default method only select sources which should have their xrefs mapped using this method
if ($method ne $default_method) {
$sql .= "AND x.source_id in (" . join(',',@{$methods_and_source_ids{$method}}).")";
}
my $sth = $xref->dbc->prepare($sql);
$sth->execute();
while(my @row = $sth->fetchrow_array()){
# Ambiguous peptides must be cleaned out to protect Exonerate from J,O and U codes
$row[1] = uc($row[1]);
$row[1] =~ s/(.{60})/$1\n/g;
if ($sequence_type eq 'peptide') { $row[1] =~ tr/JOU/X/ }
print $DH ">".$row[0]."\n".$row[1]."\n";
}
close $DH;
$sth->finish();
}
}
my $sth = $xref->dbc->prepare("insert into process_status (status, date) values('xref_fasta_dumped',now())");
$sth->execute();
$sth->finish;
return;
}