2 # See the NOTICE file distributed with this work for additional information
3 # regarding copyright ownership.
5 # Licensed under the Apache License, Version 2.0 (the "License");
6 # you may not use this file except in compliance with the License.
7 # You may obtain a copy of the License at
9 # http://www.apache.org/licenses/LICENSE-2.0
11 # Unless required by applicable law or agreed to in writing, software
12 # distributed under the License is distributed on an "AS IS" BASIS,
13 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 # See the License for the specific language governing permissions and
15 # limitations under the License.
20 vega_repeat_libraries.pl - set repeat_consensus.repeat_class
24 vega_repeat_libraries.pl [options]
27 --conffile, --conf=FILE read parameters from FILE
28 (
default: conf/Conversion.ini)
30 --dbname, db_name=NAME use database NAME
31 --host, --dbhost, --db_host=HOST use database host HOST
32 --port, --dbport, --db_port=PORT use database port PORT
33 --user, --dbuser, --db_user=USER use database username USER
34 --pass, --dbpass, --db_pass=PASS use database passwort PASS
35 --logfile, --log=FILE log to FILE (
default: *STDOUT)
36 --logpath=PATH write logfile to PATH (
default: .)
37 --logappend, --log_append append to logfile (
default: truncate)
38 -v, --verbose verbose logging (
default:
false)
39 -i, --interactive=0|1
run script interactively (
default:
true)
40 -n, --dry_run, --dry=0|1 don
't write results to database
41 -h, --help, -? print help (this message)
43 --prune undo, i.e. delete from the database changes caused by running the script
48 --repeatfile=FILE read repeat class definitions from FILE
52 This program classifies the repeats stored in a core database into some
53 somewhat sensible categories. It does this through a combination of a
54 repeat.txt file extracted from RepeatMasker repeat libraries and through some
55 simple pattern matching of the repeat names.
60 Steve Trevanion <st3@sanger.ac.uk>
61 Patrick Meidl <pm2@sanger.ac.uk>
63 Based on code by James Smith <js5@sanger.ac.uk>
67 Post questions to the EnsEMBL development list http://lists.ensembl.org/mailman/listinfo/dev
73 no warnings 'uninitialized
';
76 use vars qw($SERVERROOT);
79 $SERVERROOT = "$Bin/../../..";
80 unshift(@INC, "$SERVERROOT/ensembl-otter/modules");
81 unshift(@INC, "$SERVERROOT/ensembl/modules");
82 unshift(@INC, "$SERVERROOT/bioperl-live");
87 use Bio::EnsEMBL::Utils::ConversionSupport;
91 my $support = new Bio::EnsEMBL::Utils::ConversionSupport($SERVERROOT);
94 $support->parse_common_options(@_);
95 $support->parse_extra_options('repeatfile=s
', 'prune
');
96 $support->allowed_params($support->get_common_params, 'repeatfile
', 'prune
');
98 if ($support->param('help
') or $support->error) {
99 warn $support->error if $support->error;
103 # ask user to confirm parameters to proceed
104 $support->confirm_params;
106 # get log filehandle and print heading and parameters to logfile
109 $support->check_required_params('repeatfile
') unless $support->param('prune
'); # don't need the repeat file
for pruning
111 # connect to database and get adaptors
112 my $dba = $support->get_database(
'ensembl');
113 my $dbh = $dba->dbc->db_handle;
115 # unless we are pruning (undo), we should make a backup copy of the repeat_consensus table
116 if($support->param(
'prune')){
118 # backup table must exist for this to work
121 # backup table present
122 if($support->user_proceed(
"Replace the current table 'repeat_consensus' with the backup table 'repeat_consensus_backup'?")){
123 if($dbh->do(
"drop table repeat_consensus")){
124 if($dbh->do(
"create table repeat_consensus select * from repeat_consensus_backup")){
125 $support->log(
"prune (undo) was successful\n");
126 $support->log_stamped(
"Done.\n");
129 $support->finish_log;
133 $support->log_error(
"prune failed\n");
137 $support->log_error(
"prune failed\n");
143 print
"aborting...\n";
144 $support->log_error(
"aborting...\n");
148 print
"Cannot do prune, as no backup table\n";
149 $support->log_error(
"Cannot do prune, as no backup table\n");
155 # check to see if the backup table 'repeat_consensus_backup' already exists
157 #table already exists: ask user if OK to overwrite it
158 if ($support->user_proceed(
"The backup table 'repeat_consensus_backup' already exists, OK to delete?")) {
159 if($dbh->do(
"drop table 'repeat_consensus_backup'")){
160 $support->log(
"deleted previous backup table\n");
164 $support->log_error(
"tried but failed to delete previous backup table\n");
168 # user won't allow removing the backup table
169 print
"Aborting ...\n";
170 $support->log_error(
"User won't allow removal of backup table ... aborting program\n");
173 # table doesn't exist, therefore we can create it
180 if ($support->species eq
'Mus_musculus') {
181 $support->log(
"Making Vega mouse specific changes...\n");
182 $support->log(
"Copying repeat_name to repeat_consensus...\n", 1);
183 $dbh->do(
"update repeat_consensus set repeat_consensus = repeat_name where repeat_class = 'Tandem_repeat'") unless ($support->param(
'dry_run'));
184 $support->log(
"Setting repeat_name to 'trf' where appropriate\n", 1);
185 $dbh->do(
"update repeat_consensus set repeat_name = 'trf' where repeat_class = 'Tandem_repeat'") unless ($support->param(
'dry_run'));
186 $support->log(
"Done.\n");
190 $support->log(
"Clearing repeat_class...\n");
191 $dbh->do(
"update repeat_consensus set repeat_class = ''") unless ($support->param(
'dry_run'));
192 $support->log(
"Done.\n");
194 # read repeat classes from file
195 $support->log_stamped(
"Reading repeat classes from input file...\n");
196 my $fh = $support->filehandle(
'<', $support->param(
'repeatfile'));
200 my ($hid, $type) = split( /\t/, $_, 2);
201 $dbh->do(
"update repeat_consensus set repeat_class = ? where repeat_name in (?,?,?)", {} , $type, $hid, substr($hid,0,15),
"$hid-int" ) unless ($support->param(
'dry_run'));
203 $support->log(
"$C\n", 1) unless $C % 100;
206 $support->log_stamped(
"Done.\n");
208 # Consensifying repeat classes
209 $support->log_stamped(
"Consensifying remaining repeat classes...\n");
210 unless ($support->param(
'dry_run')) {
211 $dbh->do(
"update repeat_consensus set repeat_class = 'Simple_repeat' where repeat_class= '' and repeat_name like '%)n'" );
212 $dbh->do(
"update repeat_consensus set repeat_class = 'low_complexity' where repeat_class= '' and repeat_name like '%-rich'" );
213 $dbh->do(
"update repeat_consensus set repeat_class = 'low_complexity' where repeat_class= '' and repeat_name like 'poly%'" );
214 $dbh->do(
"update repeat_consensus set repeat_class = 'LTR/ERVL' where repeat_class= '' and repeat_name like '%ERVL%' " );
215 $dbh->do(
"update repeat_consensus set repeat_class = 'LTR/ERVL' where repeat_class= '' and repeat_name like '%ERV16%' " );
216 $dbh->do(
"update repeat_consensus set repeat_class = 'SINE/Alu' where repeat_class= '' and repeat_name like 'Alu%' " );
217 $dbh->do(
"update repeat_consensus set repeat_class = 'SINE/Alu' where repeat_class= '' and repeat_name like '%F_AM%' " );
218 $dbh->do(
"update repeat_consensus set repeat_class = 'LINE/L1' where repeat_class= '' and repeat_name like 'L1%' " );
219 $dbh->do(
"update repeat_consensus set repeat_class = 'DNA/MER2_type' where repeat_class= '' and repeat_name like 'Tigger%' " );
220 $dbh->do(
"update repeat_consensus set repeat_class = 'DNA/MER1_type' where repeat_class= '' and repeat_name like 'Charlie%' " );
221 $dbh->do(
"update repeat_consensus set repeat_class = 'DNA/Tc2' where repeat_class= '' and repeat_name like 'HsTC%' " );
222 $dbh->do(
"update repeat_consensus set repeat_class = 'DNA/MER2_type' where repeat_class= '' and repeat_name like 'MER46%' " );
223 $dbh->do(
"update repeat_consensus set repeat_class = 'DNA/MER2_type' where repeat_class= '' and repeat_name like 'MER7%' " );
224 $dbh->do(
"update repeat_consensus set repeat_class = 'DNA/MER1_type' where repeat_class= '' and repeat_name like 'MER91' " );
225 $dbh->do(
"update repeat_consensus set repeat_class = 'DNA/MER1_type' where repeat_class= '' and repeat_name like 'MER58' " );
226 $dbh->do(
"update repeat_consensus set repeat_class = 'DNA/MER1_type' where repeat_class= '' and repeat_name like 'MER63' " );
227 $dbh->do(
"update repeat_consensus set repeat_class = 'Satellite/telomeric' where repeat_class= '' and repeat_name like 'SUBTEL_%' " );
228 $dbh->do(
"update repeat_consensus set repeat_class = 'trf' where repeat_class = '' and repeat_name = 'trf' " );
229 $dbh->do(
"update repeat_consensus set repeat_class = 'dust' where repeat_class = '' and repeat_name = 'dust'" );
230 $dbh->do(
"update repeat_consensus set repeat_class = 'novel_transposon' where repeat_class = '' and repeat_name = 'novel_transposon'");
232 $support->log_stamped(
"Done.\n");
234 # Setting repeat types
235 $support->log_stamped(
"Setting repeat types...\n");
237 'Low_Comp%' =>
'Low complexity regions',
238 'LINE%' =>
'Type I Transposons/LINE',
239 'SINE%' =>
'Type I Transposons/SINE',
240 'DNA%' =>
'Type II Transposons',
242 'Other%' =>
'Other repeats',
243 'Satelli%' =>
'Satellite repeats',
244 'Simple%' =>
'Simple repeats',
245 'Other%' =>
'Other repeats',
246 'Tandem%' =>
'Tandem repeats',
247 'TRF%' =>
'Tandem repeats',
249 'Unknown%' =>
'Unknown',
250 '%RNA' =>
'RNA repeats',
251 'novel_transposon' =>
'Novel Transposon',
253 unless ($support->param(
'dry_run')) {
254 foreach (keys %mappings) {
255 $dbh->do(qq(update repeat_consensus set repeat_type =
'$mappings{$_}' where repeat_class like
'$_'));
258 # type all remaining repeats as unknown
259 $dbh->do(qq(update repeat_consensus set repeat_type =
'Unknown' where repeat_type =
''));
260 $dbh->do(qq(update repeat_consensus set repeat_type =
'Unknown' where repeat_type = NULL));
262 $support->log_stamped(
"Done.\n");
265 $support->finish_log;
269 if($dbh->do(
"create table repeat_consensus_backup select * from repeat_consensus")){
270 $support->log(
"backup table 'repeat_consensus_backup was created successfully\n");
273 $support->log_error(
"failed to create backup table 'repeat_consensus_backup'\n");
278 # check to see if the backup table 'repeat_consensus_backup' already exists
279 my @tables = $dbh->tables();
282 foreach my $table(@tables){
285 if($table eq
'`repeat_consensus_backup`'){