ensembl-hive  2.7.0
DanioRerio.pm
Go to the documentation of this file.
1 =head1 LICENSE
2 
3 See the NOTICE file distributed with this work for additional information
4 regarding copyright ownership.
5 
6 Licensed under the Apache License, Version 2.0 (the "License");
7 you may not use this file except in compliance with the License.
8 You may obtain a copy of the License at
9 
10  http://www.apache.org/licenses/LICENSE-2.0
11 
12 Unless required by applicable law or agreed to in writing, software
13 distributed under the License is distributed on an "AS IS" BASIS,
14 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 See the License for the specific language governing permissions and
16 limitations under the License.
17 
18 =cut
19 
20 package SeqStoreConverter::vega::DanioRerio;
21 
22 use strict;
23 use warnings;
24 
27 use vars qw(@ISA);
28 
30 
31 sub copy_internal_clone_names {
32  my $self = shift;
33 
34  my $target = $self->target();
35  my $source = $self->source();
36  my $dbh = $self->dbh();
37  $self->debug("Vega danio specific - copying internal clone names to seq_region_attrib");
38 
39 #get id for 'fpc_clone_id' attribute
40 
41  $dbh->do("INSERT INTO $target.attrib_type (code,name,description)".
42  "values ('fpc_clone_id','fpc clone','clone id used for linking to Zebrafish webFPC')");
43 
44  my ($attrib_id) = $dbh->selectrow_array("Select attrib_type_id from $target.attrib_type where code = 'fpc_clone_id'");
45  warn "No attrib id found\n" unless defined($attrib_id);
46 
47 #get clone details
48  my $select1_sth = $dbh->prepare
49  ("SELECT seq_region_id, name from $target.seq_region where coord_system_id = 3;");
50  $select1_sth->execute();
51  my ($seq_region_id, $embl_name);
52  $select1_sth->bind_columns(\$seq_region_id, \$embl_name);
53 
54  my $clone_name;
55  my $select2_sth = $dbh->prepare("select name from $source.clone where embl_acc= ?");
56 
57  my $insert_sth = $dbh->prepare("insert into $target.seq_region_attrib values (?,$attrib_id,?)");
58 
59  while ($select1_sth->fetch()) {
60  $embl_name =~ s/([\d\w]+).*/$1/;
61  $select2_sth->bind_param(1,$embl_name);
62  $select2_sth->execute;
63  $insert_sth->bind_param(1,$seq_region_id);
64  while (my ($clone_name) = $select2_sth->fetchrow_array()) {
65  $insert_sth->bind_param(2,$clone_name);
66  $insert_sth->execute();
67  }
68  }
69 }
70 
71 sub update_clone_info {
72  my $self = shift;
73  my $target_cs_name = shift;
74 
75  my $target = $self->target();
76  my $source = $self->source();
77  my $dbh = $self->dbh();
78 
79  # clone_info, current_clone_info
80  $self->debug("Vega Danio_specific - Transforming clone_id into seq_region_id for clone_info and current_clone_info");
81 
82  foreach my $table_name ('clone_info','current_clone_info') {
83  my $select_st1 =
84  "SELECT ctg.name, ctg.clone_id " .
85  "FROM $source.contig ctg, $source.$table_name ci " .
86  "WHERE ctg.clone_id = ci.clone_id " .
87  "AND ctg.name not like 'ctg%' " .
88  "AND ctg.name not like 'NA%'";
89 
90  my $query_results1 = $dbh->selectall_arrayref($select_st1);
91 
92  my $i = 0;
93  foreach my $contig_name (@$query_results1) {
94  my $embl_acc = $contig_name->[0];
95  my $select_st2 =
96  "SELECT sr.seq_region_id " .
97  "FROM $target.seq_region sr " .
98  "WHERE sr.name = '$embl_acc'";
99  my @query_results2 = $dbh->selectrow_array($select_st2);
100  push @{$query_results1->[$i]},@query_results2;
101  $i++;
102  }
103 
104  foreach my $clone (@$query_results1) {
105  my $seq_reg_id = $clone->[2];
106  my $clone_id = $clone->[1];
107 
108  my $update_query =
109  "UPDATE $target.$table_name " .
110  "SET clone_id = '$seq_reg_id' " .
111  "WHERE clone_id = '$clone_id'";
112  $dbh->do($update_query);
113  }
114  my $alter_struct_1 =
115  "ALTER table $target.$table_name " .
116  "CHANGE clone_id seq_region_id int(10) not null";
117  my $alter_struct_2 =
118  "ALTER table $target.$table_name " .
119  "add unique index (seq_region_id)";
120  $dbh->do($alter_struct_1);
121  $dbh->do($alter_struct_2);
122  }
123 
124  # assembly_tag
125  $self->debug("Vega Danio_specific - Transforming contig_id into seq_region_id for assembly_tag");
126 
127  # first remove orphans from assembly_tag table (i.e. entries pointing to
128  # non-existing contigs)
129  my $numrows = $dbh->do(qq(
130  DELETE at
131  FROM $source.assembly_tag at
132  LEFT JOIN $source.contig c ON c.contig_id = at.contig_id
133  WHERE c.contig_id IS NULL
134  ));
135  $self->debug(" Deleted $numrows orphans from assembly_tag");
136 
137  my $select_st3 =
138  "SELECT ctg.name, ctg.contig_id " .
139  "FROM $source.contig ctg, $source.assembly_tag at " .
140  "WHERE ctg.contig_id = at.contig_id " .
141  "AND ctg.name not like 'ctg%' " .
142  "AND ctg.name not like 'NA%'";
143 
144  my $query_results3 = $dbh->selectall_arrayref($select_st3);
145 
146  my $j = 0;
147  foreach my $contig_name (@$query_results3) {
148  my $embl_acc = $contig_name->[0];
149  my $select_st4 =
150  "SELECT sr.seq_region_id " .
151  "FROM $target.seq_region sr " .
152  "WHERE sr.name = '$embl_acc'";
153  my @query_results4 = $dbh->selectrow_array($select_st4);
154  push @{$query_results3->[$j]}, @query_results4;
155  $j++;
156  }
157 
158  foreach my $contig (@$query_results3) {
159  my $seq_reg_id = $contig->[2];
160  my $contig_id = $contig->[1];
161 
162  my $update_query =
163  "UPDATE $target.assembly_tag " .
164  "SET contig_id = '$seq_reg_id' " .
165  "WHERE contig_id = '$contig_id'";
166  $dbh->do($update_query);
167  }
168 
169  $dbh->do(" ALTER TABLE $target.assembly_tag
170  CHANGE contig_id seq_region_id int(10) UNSIGNED NOT NULL");
171 
172  $dbh->do(" ALTER TABLE $target.assembly_tag
173  CHANGE contig_start seq_region_start int(10)");
174 
175  $dbh->do(" ALTER TABLE $target.assembly_tag
176  CHANGE contig_end seq_region_end int(10)");
177 
178  $dbh->do(" ALTER TABLE $target.assembly_tag
179  CHANGE contig_strand seq_region_strand tinyint(1)");
180 
181 }
182 
183 1;
SeqStoreConverter::DanioRerio
Definition: DanioRerio.pm:3
SeqStoreConverter::vega::VBasicConverter
Definition: VBasicConverter.pm:4