ensembl-hive  2.8.1
add_ikmc_as_simple_features.pl
Go to the documentation of this file.
1 #!/usr/bin/env perl
2 # See the NOTICE file distributed with this work for additional information
3 # regarding copyright ownership.
4 #
5 # Licensed under the Apache License, Version 2.0 (the "License");
6 # you may not use this file except in compliance with the License.
7 # You may obtain a copy of the License at
8 #
9 # http://www.apache.org/licenses/LICENSE-2.0
10 #
11 # Unless required by applicable law or agreed to in writing, software
12 # distributed under the License is distributed on an "AS IS" BASIS,
13 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 # See the License for the specific language governing permissions and
15 # limitations under the License.
16 
17 
18 #
19 # updates the external db tables on all of the core databases on a given host
20 #
21 
22 use strict;
23 use warnings;
24 
25 
26 use Getopt::Long;
27 use DBI;
28 use IO::File;
29 
30 my ( $host, $user, $pass, $port, $dbname, $file);
31 
32 GetOptions( "dbhost|host=s", \$host,
33  "dbuser|user=s", \$user,
34  "dbpass|pass=s", \$pass,
35  "dbport|port=i", \$port,
36  "file=s", \$file,
37  "dbname=s", \$dbname);
38 
39 $port ||= 3306;
40 $host ||= "ens-research";
41 $dbname ||= "ianl_mus_musculus_core_58_37k";
42 
43 # connect to the database
44 my $dsn = "DBI:mysql:host=$host;port=$port;database=$dbname";
45 
46 my $db = DBI->connect( $dsn, $user, $pass, {RaiseError => 1} );
47 
48 
49 my %analysis = ("No products available yet" => 0,
50  "ES cells available" => 0,
51  "Mice available" => 0,
52  "Vector available" => 0);
53 
54 # if the 4 analysis do not exist create them
55 # if they do exist delete entrys in the simple feature table
56 
57 
58 my $find_sth = $db->prepare("SELECT analysis_id FROM analysis where logic_name like ?")|| die "Could not prepare find_sth";
59 
60 my $create_sth = $db->prepare("INSERT into analysis (logic_name) values (?)") || die "Could not prepare create_sth";
61 
62 my $delete_sth = $db->prepare("DELETE from simple_feature where analysis_id = ?" )|| die "Could not prepare delete_sth";
63 
64 foreach my $anal (keys %analysis){
65  my $logic_name = "IKMC_".$anal;
66  $logic_name =~ s/ /_/g;
67 
68  my $id =undef;
69  $find_sth->execute($logic_name);
70  $find_sth->bind_columns(\$id);
71  $find_sth->fetch;
72  if(defined($id)){ # delete existing simple features
73  print STDERR "Analysis $logic_name already exists so clearing entrys for this ($id) in the simple_feature table\n";
74  $delete_sth->execute($id) || die "Could not delete form simple table for id = $id";
75  }
76  else{ # create analysis
77  $create_sth->execute($logic_name) || die "Could not create new analysis $logic_name";
78  $id = $create_sth->{'mysql_insertid'};
79  print STDERR "Creating new anlysis $logic_name ($id)\n";
80  }
81  $analysis{$anal} = $id;
82 }
83 
84 $find_sth->finish();
85 $create_sth->finish();
86 $delete_sth->finish();
87 
88 
89 # make a hashes for gene
90 # gene2seqregion{stable_id} = seq_region_id
91 # gene2start....
92 # gene2end ....
93 my %gene2seqregion;
94 my %gene2start;
95 my %gene2end;
96 my %gene2strand;
97 
98 my $gene_sth = $db->prepare("SELECT stable_id, seq_region_id, seq_region_start, seq_region_end, seq_region_strand FROM gene") || die "Could not prepare gene_sth";
99 
100 $gene_sth->execute();
101 my ($stable_id, $seq, $start, $end, $strand);
102 $gene_sth->bind_columns(\$stable_id, \$seq, \$start, \$end, \$strand);
103 while($gene_sth->fetch()){
104  $gene2seqregion{$stable_id} = $seq;
105  $gene2start{$stable_id} = $start;
106  $gene2end{$stable_id} = $end;
107  $gene2strand{$stable_id} = $strand;
108 }
109 #process the file and add new simple features
110 
111 my $insert_sth = $db->prepare("INSERT INTO simple_feature (seq_region_id, seq_region_start, seq_region_end, seq_region_strand, display_label, analysis_id ) VALUES(?, ?, ?, ?, ?, ?)") || die "Could not prepare insert_sth";
112 
113 my $ikmc = get_filehandle($file);
114 my %count;
115 while ( $_ = $ikmc->getline() ) {
116 
117  chomp;
118  my ($mgi, $label, $type, $stable_id) = split /\t/;
119 
120  if((defined($stable_id) and $stable_id) and defined($gene2seqregion{$stable_id})){
121  if(!defined($analysis{$type})){
122  print STDERR $_."\nUnknown type *$type*\n";
123  }
124  else{
125  $insert_sth->execute($gene2seqregion{$stable_id},
126  $gene2start{$stable_id},
127  $gene2end{$stable_id},
128  $gene2strand{$stable_id},
129  $mgi,
130  $analysis{$type}) || die "Could not insert new values";
131  $count{$type}++;
132  }
133  }
134  else{
135  if(!defined($stable_id) or !$stable_id){
136  $count{"no stable_id"}++;
137  }
138  else{
139  print STDERR "Could not find stable id $stable_id\n";
140  }
141  }
142 }
143 
144 foreach my $key (keys %count){
145  print $key."\t".$count{$key}."\n";
146 }
147 
148 
149 sub get_filehandle
150 {
151  my ($file_name) = @_;
152 
153  my $io;
154 
155  my $alt_file_name = $file_name;
156  $alt_file_name =~ s/\.(gz|Z)$//;
157 
158  if ( $alt_file_name eq $file_name ) {
159  $alt_file_name .= '.gz';
160  }
161 
162  if ( !-f $file_name ) {
163  carp( "File '$file_name' does not exist, "
164  . "will try '$alt_file_name'" );
165  $file_name = $alt_file_name;
166  }
167 
168  if ( $file_name =~ /\.(gz|Z)$/ ) {
169  # Read from zcat pipe
170  $io = IO::File->new("zcat $file_name |")
171  or carp("Can not open file '$file_name' with 'zcat'");
172  } else {
173  # Read file normally
174  $io = IO::File->new($file_name)
175  or carp("Can not open file '$file_name'");
176  }
177 
178  if ( !defined $io ) { return undef }
179 
180  print "Reading from '$file_name'...\n";
181 
182  return $io;
183 }
get_filehandle
public get_filehandle()