ensembl-hive  2.7.0
frameshift_transcript_attribs.pl
Go to the documentation of this file.
1 #!/usr/bin/env perl
2 # See the NOTICE file distributed with this work for additional information
3 # regarding copyright ownership.
4 #
5 # Licensed under the Apache License, Version 2.0 (the "License");
6 # you may not use this file except in compliance with the License.
7 # You may obtain a copy of the License at
8 #
9 # http://www.apache.org/licenses/LICENSE-2.0
10 #
11 # Unless required by applicable law or agreed to in writing, software
12 # distributed under the License is distributed on an "AS IS" BASIS,
13 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 # See the License for the specific language governing permissions and
15 # limitations under the License.
16 
17 # $Id$
18 
19 use strict;
20 use warnings;
21 
22 # Finds all potential frameshifts (exons 1, 2, 4 or 5 bp apart)
23 # in a database and adds transcript attributes for them.
24 # Attribute value is intron number (first intron is 1, second 2 etc).
25 
28 
29 use Getopt::Long;
30 
31 my ($host, $port, $user, $pass, $dbpattern, $nostore, $nodelete, $print);
32 
33 GetOptions('host|dbhost=s' => \$host,
34  'user|dbuser=s' => \$user,
35  'port|dbport=i' => \$port,
36  'pass|dbpass=s' => \$pass,
37  'dbpattern|dbname=s' => \$dbpattern,
38  'nostore' => \$nostore,
39  'nodelete' => \$nodelete,
40  'print' => \$print,
41  'help' => sub { usage(); exit(0); });
42 
43 $port ||= 3306;
44 
45 usage() if(!$user || !$dbpattern || !$host);
46 
47 my $dsn = "DBI:mysql:host=$host";
48 $dsn .= ";port=$port" if ($port);
49 
50 my $db = DBI->connect($dsn, $user, $pass);
51 
52 my @dbnames = map {$_->[0] } @{ $db->selectall_arrayref( "show databases" ) };
53 
54 for my $dbname ( @dbnames ) {
55 
56  next if ($dbname !~ /$dbpattern/);
57 
58  print $dbname . "\n";
59 
60  my $db_adaptor = Bio::EnsEMBL::DBSQL::DBAdaptor->new(-host => $host,
61  -user => $user,
62  -pass => $pass,
63  -dbname => $dbname,
64  -port => $port);
65 
66  my $attribute_adaptor = $db_adaptor->get_AttributeAdaptor();
67  my $transcript_adaptor = $db_adaptor->get_TranscriptAdaptor();
68  my $gene_adaptor = $db_adaptor->get_GeneAdaptor();
69 
70  if (!$nodelete) {
71 
72  print STDERR "Deleting existing 'Frameshift' transcript attributes\n";
73  my $dsth = $db_adaptor->dbc()->prepare("DELETE ta FROM transcript_attrib ta, attrib_type at WHERE at.attrib_type_id=ta.attrib_type_id AND at.code='Frameshift'");
74  $dsth->execute();
75 
76  }
77 
78  print STDERR "Finding frameshifts in $dbname, creating transcript attributes ...\n";
79  print STDERR "Attributes will not be stored in database\n" if ($nostore);
80 
81  my $count = 0;
82 
83  # get all transcripts then look at each of their introns in turn
84 
85  my @transcripts = @{$transcript_adaptor->fetch_all()};
86 
87  foreach my $transcript (@transcripts) {
88 
89  #print "Transcript " . $trans_no++ . " of " . scalar(@transcripts) . "\n";
90 
91  my $intron_number = 1;
92 
93  foreach my $intron (@{$transcript->get_all_Introns()}) {
94 
95  # only interested in the short ones
96  if ($intron->length() < 6 && $intron->length() != 3) {
97 
98  print "Transcript " . $transcript->stable_id() . " intron $intron_number length " . $intron->length() . "\n" if ($print);
99 
100  my $attribute = Bio::EnsEMBL::Attribute->new(-CODE => 'Frameshift',
101  -NAME => 'Frameshift',
102  -DESCRIPTION => 'Frameshift modelled as intron',
103  -VALUE => $intron_number);
104 
105  my @attribs = ($attribute);
106 
107  $attribute_adaptor->store_on_Transcript($transcript->dbID, \@attribs) if (!$nostore);
108 
109  $count++;
110 
111  }
112 
113  $intron_number++;
114 
115  } # foreach intron
116 
117  } # foreach transcript
118 
119  if ($count) {
120 
121  print "$count short intron attributes\n";
122  print "Attributes not stored in database\n" if ($nostore);
123 
124  } else {
125 
126  print "No frameshift introns found!\n";
127 
128  }
129 
130 }
131 
132 # ----------------------------------------------------------------------
133 
134 sub usage {
135 
136  print << "EOF";
137 
138  Finds all potential frameshifts (exons 1, 2 4 or 5 bp apart) in a database
139  and adds transcript attributes for them. Attribute value is intron length.
140 
141  perl $0 {options}
142 
143  Options ([..] indicates optional):
144 
145  --host The database server to connect to.
146 
147  [--port] The port to use. Defaults to 3306.
148 
149  --user Database username. Must allow writing.
150 
151  --pass Password for user.
152 
153  --dbpattern Regular expression to define which databases are affected.
154 
155  [--nostore] Don't store the attributes, just print results.
156 
157  [--nodelete] Don't delete any existing "Frameshift" attributes before creating new ones.
158 
159  [--print] Print transcript stable ID, intron number and length.
160 
161  [--help] This text.
162 
163 
164 EOF
165 
166  exit(0);
167 
168 }
169 
170 # ----------------------------------------------------------------------
transcript
public transcript()
Bio::EnsEMBL::DBSQL::DBAdaptor
Definition: DBAdaptor.pm:40
map
public map()
Bio::EnsEMBL::DBSQL::DBAdaptor::dbc
public Bio::EnsEMBL::DBSQL::DBConnection dbc()
Bio::EnsEMBL::Attribute::new
public Bio::EnsEMBL::Attribute new()
Bio::EnsEMBL::DBSQL::DBConnection::prepare
public DBI prepare()
Bio::EnsEMBL::Attribute
Definition: Attribute.pm:34
Bio::EnsEMBL::DBSQL::DBAdaptor::new
public Bio::EnsEMBL::DBSQL::DBAdaptor new()
usage
public usage()