2 # See the NOTICE file distributed with this work for additional information
3 # regarding copyright ownership.
5 # Licensed under the Apache License, Version 2.0 (the "License");
6 # you may not use this file except in compliance with the License.
7 # You may obtain a copy of the License at
9 # http://www.apache.org/licenses/LICENSE-2.0
11 # Unless required by applicable law or agreed to in writing, software
12 # distributed under the License is distributed on an "AS IS" BASIS,
13 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 # See the License for the specific language governing permissions and
15 # limitations under the License.
22 # Finds all potential frameshifts (exons 1, 2, 4 or 5 bp apart)
23 # in a database and adds transcript attributes for them.
24 # Attribute value is intron number (first intron is 1, second 2 etc).
31 my ($host, $port, $user, $pass, $dbpattern, $nostore, $nodelete, $print);
33 GetOptions(
'host|dbhost=s' => \$host,
34 'user|dbuser=s' => \$user,
35 'port|dbport=i' => \$port,
36 'pass|dbpass=s' => \$pass,
37 'dbpattern|dbname=s' => \$dbpattern,
38 'nostore' => \$nostore,
39 'nodelete' => \$nodelete,
41 'help' => sub {
usage(); exit(0); });
45 usage() if(!$user || !$dbpattern || !$host);
47 my $dsn = "DBI:mysql:host=$host";
48 $dsn .= ";port=$port" if ($port);
50 my $db = DBI->connect($dsn, $user, $pass);
52 my @dbnames =
map {$_->[0] } @{ $db->selectall_arrayref(
"show databases" ) };
54 for my $dbname ( @dbnames ) {
56 next
if ($dbname !~ /$dbpattern/);
66 my $attribute_adaptor = $db_adaptor->get_AttributeAdaptor();
67 my $transcript_adaptor = $db_adaptor->get_TranscriptAdaptor();
68 my $gene_adaptor = $db_adaptor->get_GeneAdaptor();
72 print STDERR
"Deleting existing 'Frameshift' transcript attributes\n";
73 my $dsth = $db_adaptor->
dbc()->
prepare(
"DELETE ta FROM transcript_attrib ta, attrib_type at WHERE at.attrib_type_id=ta.attrib_type_id AND at.code='Frameshift'");
78 print STDERR
"Finding frameshifts in $dbname, creating transcript attributes ...\n";
79 print STDERR
"Attributes will not be stored in database\n" if ($nostore);
83 # get all transcripts then look at each of their introns in turn
85 my @transcripts = @{$transcript_adaptor->fetch_all()};
87 foreach my $transcript (@transcripts) {
89 #print "Transcript " . $trans_no++ . " of " . scalar(@transcripts) . "\n";
91 my $intron_number = 1;
93 foreach my $intron (@{$transcript->get_all_Introns()}) {
95 # only interested in the short ones
96 if ($intron->length() < 6 && $intron->length() != 3) {
98 print
"Transcript " . $transcript->stable_id() .
" intron $intron_number length " . $intron->length() .
"\n" if ($print);
101 -NAME =>
'Frameshift',
102 -DESCRIPTION =>
'Frameshift modelled as intron',
103 -VALUE => $intron_number);
105 my @attribs = ($attribute);
107 $attribute_adaptor->store_on_Transcript($transcript->dbID, \@attribs)
if (!$nostore);
121 print
"$count short intron attributes\n";
122 print
"Attributes not stored in database\n" if ($nostore);
126 print
"No frameshift introns found!\n";
132 # ----------------------------------------------------------------------
138 Finds all potential frameshifts (exons 1, 2 4 or 5 bp apart) in a database
139 and adds
transcript attributes
for them. Attribute value is intron length.
143 Options ([..] indicates optional):
145 --host The database server to connect to.
147 [--port] The port to use. Defaults to 3306.
149 --user Database username. Must allow writing.
151 --pass Password for user.
153 --dbpattern Regular expression to define which databases are affected.
155 [--nostore] Don
't store the attributes, just print results.
157 [--nodelete] Don't delete any existing
"Frameshift" attributes before creating new ones.
159 [--print] Print
transcript stable ID, intron number and length.
170 # ----------------------------------------------------------------------