3 Copyright [1999-2015] Wellcome Trust Sanger Institute and the EMBL-European Bioinformatics Institute
4 Copyright [2016-2024] EMBL-European Bioinformatics Institute
6 Licensed under the Apache License, Version 2.0 (the
"License");
7 you may not use
this file except in compliance with the License.
8 You may obtain a copy of the License at
12 Unless required by applicable law or agreed to in writing, software
13 distributed under the License is distributed on an
"AS IS" BASIS,
14 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 See the License
for the specific language governing permissions and
16 limitations under the License.
23 Please email comments or questions to the
public Ensembl
24 developers list at <http:
26 Questions may also be sent to the Ensembl help desk at
43 # create gene and peptide archive
44 $archiver->create_archive($mapping_session_id);
46 # dump existing archive tables to file
48 $archiver->dump_table_to_file(
'source',
'gene_archive',
49 'gene_archive_existing.txt', 1 );
53 This module creates the gene_archive and peptide_archive
54 tables. Data is written to a file as tab-delimited text
for
55 loading into a MySQL database (
this can be done manually, or
using
56 StableIdmapper->upload_file_into_table()).
58 An
archive entry
for a given source gene is created
if no target
59 gene exists, or
if any of its transcripts or their translations
60 changed. Non-coding transcripts only have an entry in gene_archive (i.e.
61 without a corresponding peptide_archive entry).
74 package Bio::EnsEMBL::IdMapping::Archiver;
78 no warnings
'uninitialized';
85 use Digest::MD5 qw(md5_hex);
94 Arg[1] : Int $mapping_session_id - the mapping_session_id
for this run
95 Example : $archiver->create_archive($stable_id_mapper->mapping_session_id);
96 Description : Creates the gene_archive and peptide_archive tables and writes
97 the data to a tab-delimited file. The decision as to what to
98 archive is deferred to dump_gene(), see documentation there
for
101 Exceptions : Thrown on missing argument.
102 Caller : id_mapping.pl
110 my $mapping_session_id = shift;
113 unless ($mapping_session_id) {
114 $self->logger->warning(
"No mapping_session_id set.");
117 $self->mapping_session_id($mapping_session_id);
119 # get filehandles to write gene and peptide archive
120 my $ga_fh = $self->get_filehandle(
'gene_archive_new.txt',
'tables');
121 my $pa_fh = $self->get_filehandle(
'peptide_archive_new.txt',
'tables');
123 # get the currently highest peptide_archive_id from the source db
124 my $s_dba = $self->cache->get_DBAdaptor(
'source');
125 my $s_dbh = $s_dba->dbc->db_handle;
126 my $sql = qq(SELECT MAX(peptide_archive_id) FROM peptide_archive);
127 $pa_id = $self->fetch_value_from_db($s_dbh, $sql);
130 $self->logger->warning(
"No max(peptide_archive_id) found in db.\n", 1);
131 $self->logger->info(
"That's ok if this is the first stable ID mapping for this species.\n", 1);
135 $self->logger->debug(
"Starting with peptide_archive_id $pa_id.\n");
137 # lookup hash of target gene stable IDs
138 my %target_genes =
map { $_->stable_id => $_ }
139 values %{ $self->cache->get_by_name(
"genes_by_id",
'target') };
141 # loop over source genes and dump to archive (dump_gene() will decide whether
142 # to do full or partial dump)
143 foreach my $source_gene (values %{ $self->cache->get_by_name(
"genes_by_id",
146 $self->dump_gene($source_gene, $target_genes{$source_gene->stable_id},
159 Arg[3] : Filehandle $ga_fh - filehandle
for writing gene_archive data
160 Arg[4] : Filehandle $pa_fh - filehandle
for writing peptide_archive data
161 Example : my $target_gene = $gene_mappings{$source_gene->
stable_id};
162 $archiver->dump_gene($source_gene, $target_gene, $ga_fh, $pa_fh);
163 Description : Given a source gene, it will write a gene_achive and
164 peptide_achive entry
for it
if no target gene exists, or
if any
165 of its transcripts or their translation changed.
168 Caller : create_archive()
175 my ($self, $s_gene, $t_gene, $ga_fh, $pa_fh) = @_;
177 # private method, so no argument check done for performance reasons
179 # loop over source transcripts
180 foreach my $s_tr (@{ $s_gene->get_all_Transcripts }) {
182 my $changed_flag = 1;
184 my $s_tl = $s_tr->translation;
188 foreach my $t_tr (@{ $t_gene->get_all_Transcripts }) {
190 $t_tl = $t_tr->translation;
192 # If there is a translation, there should also be a target translation
193 # and both transcript and translation should have same stable id and version
198 if ( $s_tl->stable_id eq $t_tl->stable_id
199 and $s_tl->version == $t_tl->version
200 and $s_tr->stable_id eq $t_tr->stable_id
201 and $s_tr->version == $t_tr->version ) {
209 # nothing changed if source transcript has a target transcript with same stable id and version
210 if ( $s_tr->stable_id eq $t_tr->stable_id
211 and $s_tr->version == $t_tr->version ) {
221 $self->dump_tuple($s_gene, $s_tr, $s_tl, $ga_fh, $pa_fh);
230 Arg[2] : Bio::EnsEMBL::IdMapping::TinyTrancript $tr - its
transcript
232 Arg[4] : Filehandle $ga_fh - filehandle
for writing gene_archive data
233 Arg[5] : Filehandle $pa_fh - filehandle
for writing peptide_archive data
234 Example : $archive->dump_tuple($s_gene, $s_tr, $s_tl, $ga_fh, $pa_fh);
235 Description : Writes entry lines
for gene_archive and peptide_archive.
245 my ($self, $gene, $tr, $tl, $ga_fh, $pa_fh) = @_;
247 # private method, so no argument check done for performance reasons
250 print $ga_fh join(
"\t",
260 print $ga_fh join(
"\t",
264 $self->mapping_session_id
268 my $pep_seq = $tl->seq;
269 print $pa_fh join(
"\t", $pa_id, md5_hex($pep_seq), $pep_seq);
271 # increment peptide_archive_id
274 print $ga_fh join (
"\t",
278 $self->mapping_session_id
285 =head2 mapping_session_id
287 Arg[1] : (optional) Int - mapping_session_id to set
288 Example : my $msi = $archiver->mapping_session_id;
289 Description : Getter/setter
for mapping_session_id.
292 Caller : create_archive()
298 sub mapping_session_id {
300 $self->{
'_mapping_session_id'} = shift
if (@_);
301 return $self->{
'_mapping_session_id'};