2 # See the NOTICE file distributed with this work for additional information
3 # regarding copyright ownership.
5 # Licensed under the Apache License, Version 2.0 (the "License");
6 # you may not use this file except in compliance with the License.
7 # You may obtain a copy of the License at
9 # http://www.apache.org/licenses/LICENSE-2.0
11 # Unless required by applicable law or agreed to in writing, software
12 # distributed under the License is distributed on an "AS IS" BASIS,
13 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 # See the License for the specific language governing permissions and
15 # limitations under the License.
17 # Repeat classification script
19 # This script is used to do the repeat classification for web display
20 # on newer v32 databases.
30 # get the basic options for connecting to a database server
32 # add the print option
33 push( @{$optsd},
"print|p" );
34 # process the command line with the supplied options plus a help subroutine
35 my $opts = $cli_helper->process_args( $optsd, \&
usage );
37 # use the command line options to get an array of database details
38 for my $db_args ( @{ $cli_helper->get_dba_args_for_opts($opts) } ) {
40 # use the args to create a DBA
43 print STDOUT
"Processing species "
46 . $dba->dbc()->dbname()
48 . $dba->dbc()->host() .
"\n";
49 print STDERR
" Setting repeat types\n";
51 my %mappings = (
'Low_Comp%' =>
'Low complexity regions',
52 'LINE%' =>
'Type I Transposons/LINE',
53 'SINE%' =>
'Type I Transposons/SINE',
54 'DNA%' =>
'Type II Transposons',
56 'Other%' =>
'Other repeats',
57 'Satelli%' =>
'Satellite repeats',
58 'Simple%' =>
'Simple repeats',
59 'Other%' =>
'Other repeats',
60 'Tandem%' =>
'Tandem repeats',
61 'TRF%' =>
'Tandem repeats',
62 'Waterman' =>
'Waterman',
64 'Tet_repeat' =>
'Tetraodon repeats',
65 'MaskRegion' =>
'Mask region',
67 'Unknown%' =>
'Unknown',
68 '%RNA' =>
'RNA repeats', );
69 foreach ( keys %mappings ) {
71 qq(update repeat_consensus set repeat_type =
'$mappings{$_}' where repeat_class like
'$_')
75 # type all remaining repeats as unknown
76 $helper->execute_update( -SQL =>
77 qq(update repeat_consensus set repeat_type =
'Unknown' where repeat_type =
'')
79 $helper->execute_update( -SQL =>
80 qq(update repeat_consensus set repeat_type =
'Unknown' where repeat_type is
null)
82 } ## end
for my $db_args ( @{ $cli_helper...})
84 print STDERR
"All done.\n";
89 This program classifies the repeats stored in a core database into some
90 somewhat sensible categories. It does
this through a combination of a
91 repeat.txt file extracted from RepeatMasker repeat libraries and through
92 some simple pattern matching of the repeat names.
94 usage: perl repeat-types.pl [-user <user>] [-port <port>] [-pass <pass>]
95 -host <host> -dbpattern <regexp>
97 example: perl repeat-types.pl -user ensadmin -pass secret -host ecs1g \\
98 -port 3306 -dbpattern
'^homo_sapiens_(core|vega)_20_34c$'