my $self = shift;
my $only_those_not_set = shift || 0;
my $sql;
my $core_dbi = $self->core->dbc;
my $xref_dbi = $self->xref->dbc;
my $update_gene_desc_sth = $core_dbi->prepare("UPDATE gene SET description = ? where gene_id = ? and description IS NULL");
if(!$only_those_not_set){
my $reset_sth = $core_dbi->prepare("UPDATE gene SET description = null");
$reset_sth->execute();
$reset_sth->finish;
}
my %ignore;
if($only_those_not_set){
print "Only setting those not already set\n";
$sql = "select internal_id from gene_stable_id where desc_set = 1";
my $sql_sth = $xref_dbi->prepare($sql);
$sql_sth->execute;
my $id;
$sql_sth->bind_columns(\$id);
while($sql_sth->fetch){
$ignore{$id} = 1;
}
$sql_sth->finish;
}
##########################################
# Get source_id to external_disaply_name #
##########################################
my %name_to_external_name;
$sql = "select external_db_id, db_name, db_display_name from external_db";
my $sth = $core_dbi->prepare($sql);
$sth->execute();
my ($id, $name, $display_name);
$sth->bind_columns(\$id, \$name, \$display_name);
while($sth->fetch()){
$name_to_external_name{$name} = $display_name;
}
$sth->finish;
my ($source_id_to_external_name_href, $name_to_source_id_href);
if( $self->mapper->can("set_source_id_to_external_name") ){
($source_id_to_external_name_href, $name_to_source_id_href) = $self->mapper->set_source_id_to_external_name (\%name_to_external_name);
}
else{
($source_id_to_external_name_href, $name_to_source_id_href) = $self->set_source_id_to_external_name (\%name_to_external_name);
}
my %source_id_to_external_name = %$source_id_to_external_name_href;
my %name_to_source_id = %$name_to_source_id_href;
my @precedence;
my @regexps;
my ($ignore, $precedence);
if( $self->mapper->can("gene_description_sources") ){
@precedence = $self->mapper->gene_description_sources();
}
else{
($precedence, $ignore) = @{$self->gene_description_sources()};
@precedence = @$precedence;
}
if( $self->mapper->can("gene_description_filter_regexps") ){
@regexps = $self->mapper->gene_description_filter_regexps();
}
else{
@regexps = $self->gene_description_filter_regexps();
}
my $ins_p_sth = $xref_dbi->prepare("INSERT ignore into gene_desc_priority (source_id, priority) values(?, ?)");
my $get_source_id_sth = $xref_dbi->prepare("select source_id from source where name like ?");
my $list_sources_sth = $xref_dbi->prepare("select distinct name from gene_desc_priority d join source using(source_id) order by d.priority");
# The lower the priority number the better then
my $i=0;
foreach my $name (@precedence){
$i++;
$get_source_id_sth->execute($name);
my $source_id;
$get_source_id_sth->bind_columns(\$source_id);
while($get_source_id_sth->fetch){
$ins_p_sth->execute($source_id, $i);
}
}
$ins_p_sth->finish;
$get_source_id_sth->finish;
$i = 0;
if ($self->verbose) {
print "Precedence for gene descriptions (1- best description)\n";
$list_sources_sth->execute();
my $source_name;
$list_sources_sth->bind_columns(\$source_name);
while ($list_sources_sth->fetch() ) {
$i++;
print "\t$i\t$source_name\n";
}
}
$self->_apply_ignore($ignore, $xref_dbi);
#######################################################################
my $gene_desc_sql =(<<DXS);
select CASE ox.ensembl_object_type
WHEN 'Gene' THEN gtt_gene.gene_id
WHEN 'Transcript' THEN gtt_transcript.gene_id
WHEN 'Translation' THEN gtt_translation.gene_id
END AS d_gene_id,
x.description AS description,
s.source_id AS source_id,
from ( gene_desc_priority p
join ( source s
join ( xref x
join ( object_xref ox
join ( identity_xref ix
) using (object_xref_id)
) using (xref_id)
) using (source_id)
) using (source_id)
)
left join gene_transcript_translation gtt_gene
on (gtt_gene.gene_id = ox.ensembl_id)
left join gene_transcript_translation gtt_transcript
on (gtt_transcript.transcript_id = ox.ensembl_id)
left join gene_transcript_translation gtt_translation
on (gtt_translation.translation_id = ox.ensembl_id)
where ox.ox_status = 'DUMP_OUT'
order by d_gene_id,
ox.ensembl_object_type,
p.priority,
(ix.target_identity+ix.query_identity) desc
DXS
########################################################################
my $gene_sth = $core_dbi->prepare("select g.description from gene g where g.gene_id = ?");
my %no_source_name_in_desc;
if( $self->mapper->can("no_source_label_list") ){
foreach my $name (@{$self->mapper->no_source_label_list()}){
my $id = $name_to_source_id{$name};
print "$name will not have [Source:...] info in desc\n";
$no_source_name_in_desc{$id} = 1;
}
}
my $gene_desc_sth = $xref_dbi->prepare($gene_desc_sql);
$gene_desc_sth->execute();
my ($gene_id, $desc,$source_id,$label);
$gene_desc_sth->bind_columns(\$gene_id, \$desc, \$source_id,\$label);
my %gene_desc_updated;
while($gene_desc_sth->fetch()){
next if(exists($ignore{$gene_id}) || exists($gene_desc_updated{$gene_id}));
if(defined($desc) ){
my $filtered_desc = $self->filter_by_regexp($desc, \@regexps);
if ($filtered_desc ne "") {
if(!defined($no_source_name_in_desc{$source_id})){
$filtered_desc .= " [Source:".$source_id_to_external_name{$source_id}.";Acc:".$label."]";
}
$update_gene_desc_sth->execute($filtered_desc,$gene_id);
$gene_desc_updated{$gene_id} = 1;
}
}
}
$update_gene_desc_sth->finish;
$gene_desc_sth->finish;
print scalar(keys %gene_desc_updated) ." gene descriptions added\n";
# reset the status to DUMP_OUT fro object_xrefs that where ignored for the display_xref;
my $reset_status_sth = $xref_dbi->prepare('UPDATE object_xref SET ox_status = "DUMP_OUT" where ox_status = "NO_DISPLAY"');
$reset_status_sth->execute();
$reset_status_sth->finish;
}