ensembl-hive  2.7.0
repeat-types.pl
Go to the documentation of this file.
1 #!/usr/bin/env perl
2 # See the NOTICE file distributed with this work for additional information
3 # regarding copyright ownership.
4 #
5 # Licensed under the Apache License, Version 2.0 (the "License");
6 # you may not use this file except in compliance with the License.
7 # You may obtain a copy of the License at
8 #
9 # http://www.apache.org/licenses/LICENSE-2.0
10 #
11 # Unless required by applicable law or agreed to in writing, software
12 # distributed under the License is distributed on an "AS IS" BASIS,
13 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 # See the License for the specific language governing permissions and
15 # limitations under the License.
16 
17 # Repeat classification script
18 #
19 # This script is used to do the repeat classification for web display
20 # on newer v32 databases.
21 #
22 
23 use strict;
24 use warnings;
25 
27 
28 my $cli_helper = Bio::EnsEMBL::Utils::CliHelper->new();
29 
30 # get the basic options for connecting to a database server
31 my $optsd = $cli_helper->get_dba_opts();
32 # add the print option
33 push( @{$optsd}, "print|p" );
34 # process the command line with the supplied options plus a help subroutine
35 my $opts = $cli_helper->process_args( $optsd, \&usage );
36 
37 # use the command line options to get an array of database details
38 for my $db_args ( @{ $cli_helper->get_dba_args_for_opts($opts) } ) {
39 
40  # use the args to create a DBA
41  my $dba = Bio::EnsEMBL::DBSQL::DBAdaptor->new( %{$db_args} );
42  my $helper = $dba->dbc()->sql_helper();
43  print STDOUT "Processing species "
44  . $dba->species_id()
45  . " from database "
46  . $dba->dbc()->dbname()
47  . " on server "
48  . $dba->dbc()->host() . "\n";
49  print STDERR " Setting repeat types\n";
50 
51  my %mappings = ( 'Low_Comp%' => 'Low complexity regions',
52  'LINE%' => 'Type I Transposons/LINE',
53  'SINE%' => 'Type I Transposons/SINE',
54  'DNA%' => 'Type II Transposons',
55  'LTR%' => 'LTRs',
56  'Other%' => 'Other repeats',
57  'Satelli%' => 'Satellite repeats',
58  'Simple%' => 'Simple repeats',
59  'Other%' => 'Other repeats',
60  'Tandem%' => 'Tandem repeats',
61  'TRF%' => 'Tandem repeats',
62  'Waterman' => 'Waterman',
63  'Recon' => 'Recon',
64  'Tet_repeat' => 'Tetraodon repeats',
65  'MaskRegion' => 'Mask region',
66  'dust%' => 'Dust',
67  'Unknown%' => 'Unknown',
68  '%RNA' => 'RNA repeats', );
69  foreach ( keys %mappings ) {
70  $helper->execute_update( -SQL =>
71 qq(update repeat_consensus set repeat_type = '$mappings{$_}' where repeat_class like '$_')
72  );
73  }
74 
75  # type all remaining repeats as unknown
76  $helper->execute_update( -SQL =>
77 qq(update repeat_consensus set repeat_type = 'Unknown' where repeat_type = '')
78  );
79  $helper->execute_update( -SQL =>
80 qq(update repeat_consensus set repeat_type = 'Unknown' where repeat_type is null)
81  );
82 } ## end for my $db_args ( @{ $cli_helper...})
83 
84 print STDERR "All done.\n";
85 
86 sub usage {
87  print STDERR <<EOF
88 
89 This program classifies the repeats stored in a core database into some
90 somewhat sensible categories. It does this through a combination of a
91 repeat.txt file extracted from RepeatMasker repeat libraries and through
92 some simple pattern matching of the repeat names.
93 
94 usage: perl repeat-types.pl [-user <user>] [-port <port>] [-pass <pass>]
95  -host <host> -dbpattern <regexp>
96 
97 example: perl repeat-types.pl -user ensadmin -pass secret -host ecs1g \\
98  -port 3306 -dbpattern '^homo_sapiens_(core|vega)_20_34c$'
99 
100 EOF
101  ;
102  exit;
103 }
Bio::EnsEMBL::DBSQL::DBAdaptor
Definition: DBAdaptor.pm:40
Bio::EnsEMBL::Utils::CliHelper::get_dba_opts
public Arrayref get_dba_opts()
Bio::EnsEMBL::DBSQL::DBAdaptor::dbc
public Bio::EnsEMBL::DBSQL::DBConnection dbc()
usage
public usage()
Bio::EnsEMBL::DBSQL::DBAdaptor::new
public Bio::EnsEMBL::DBSQL::DBAdaptor new()
Bio::EnsEMBL::DBSQL::DBConnection::sql_helper
public Bio::EnsEMBL::Utils::SqlHelper sql_helper()
Bio::EnsEMBL::Utils::SqlHelper::execute_update
public Int execute_update()
Bio::EnsEMBL::Utils::CliHelper
Definition: CliHelper.pm:55