ensembl-hive  2.7.0
DirectXrefs.pm
Go to the documentation of this file.
1 =head1 LICENSE
2 
3 See the NOTICE file distributed with this work for additional information
4 regarding copyright ownership.
5 
6 Licensed under the Apache License, Version 2.0 (the "License");
7 you may not use this file except in compliance with the License.
8 You may obtain a copy of the License at
9 
10  http://www.apache.org/licenses/LICENSE-2.0
11 
12 Unless required by applicable law or agreed to in writing, software
13 distributed under the License is distributed on an "AS IS" BASIS,
14 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 See the License for the specific language governing permissions and
16 limitations under the License.
17 
18 =cut
19 
20 package XrefMapper::DirectXrefs;
21 use strict;
22 use warnings;
23 
24 use vars '@ISA';
25 @ISA = qw{ XrefMapper::BasicMapper };
26 
28 
29 use Cwd;
30 use DBI;
31 use File::Basename;
32 use IPC::Open3;
33 
34 
35 sub new {
36  my($class, $mapper) = @_;
37 
38  my $self ={};
39  bless $self,$class;
40  $self->core($mapper->core);
41  $self->xref($mapper->xref);
42  $self->verbose($mapper->verbose);
43  return $self;
44 }
45 
46 sub get_ins_ix_sth {
47  my $self = shift;
48  my $dbi = shift;
49 
50  my $sql = (<<"IIX");
51 INSERT IGNORE INTO identity_xref (object_xref_id, query_identity, target_identity)
52  VALUES (?, 100, 100)
53 IIX
54  my $sth = $dbi->prepare($sql);
55  return $sth;
56 }
57 
58 
59 sub process {
60  my $self = shift;
61 
62  # Now process the direct xrefs and add data to the object xrefs remember dependent xrefs.
63 
64  my $object_xref_id;
65  my $dbi = $self->xref->dbc;
66 
67  # First get the sths needed for the processing of the direct xrefs;
68  my $ins_ox_sql = (<<"IOS");
69 INSERT INTO object_xref (ensembl_id, xref_id, ensembl_object_type, linkage_type)
70  VALUES (?, ?, ?, ?)
71 IOS
72  my $ins_ox_sth = $dbi->prepare($ins_ox_sql);
73  my $get_object_xref_id_sth = $self->get_ox_id_sth($dbi);
74 
75  # Direct xrefs can be considered to be 100% matching
76 
77  my $ins_ix_sth = $self->get_ins_ix_sth($dbi);
78 
79 my $stable_sql=(<<"SQL");
80  SELECT so.name, dx.general_xref_id, s.internal_id, dx.ensembl_stable_id , dx.linkage_xref
81  FROM source so, xref x, TYPE_direct_xref dx left join TYPE_stable_id s on s.stable_id = dx.ensembl_stable_id
82  WHERE x.xref_id = dx.general_xref_id and x.source_id = so.source_id
83 SQL
84 
85 
86 
87  # We want to process the errors ourselves as for greater control
88  # If we get a error adding an object xref then it is already there
89  # This is not a problem. But we want to know how amny of these there were.
90 
91  local $ins_ox_sth->{RaiseError} = 0; # want to see duplicates and not add de
92  local $ins_ox_sth->{PrintError} = 0;
93 
94  my %err_count;
95 
96  foreach my $table (qw(gene transcript translation)){
97  my ($dbname, $xref_id, $internal_id, $stable_id, $linkage_type);
98  my $sql = $stable_sql;
99  $sql =~ s/TYPE/$table/g;
100  my $sth = $dbi->prepare($sql);
101  $sth->execute();
102  $sth->bind_columns(\$dbname, \$xref_id, \$internal_id, \$stable_id, \$linkage_type);
103  my $count =0;
104  my $duplicate_direct_count = 0;
105  my $duplicate_dependent_count = 0;
106  while($sth->fetch){
107  if(!defined($internal_id)){ # not found either it is an internal id already or stable_id no longer exists
108  if($stable_id =~ /^\d+$/){
109  $internal_id = $stable_id;
110  }
111  else{
112  if((!defined($err_count{$dbname})) or ($err_count{$dbname} < 10)){
113  print "Could not find stable id $stable_id in table to get the internal id hence ignoring!!! (for $dbname)\n" if($self->verbose);
114  }
115  $err_count{$dbname}++;
116  next;
117  }
118  }
119  $object_xref_id++;
120  $count++;
121  my @master_xref_ids;
122  if($internal_id == 0){
123  die "Problem could not find stable id $stable_id and got past the first check for $dbname\n";
124  }
125  $ins_ox_sth->execute($internal_id, $xref_id, $table, 'DIRECT');
126  $get_object_xref_id_sth->execute($internal_id, $xref_id, $table, 'DIRECT');
127  $object_xref_id = ($get_object_xref_id_sth->fetchrow_array())[0];
128  if($ins_ox_sth->err){
129  $duplicate_direct_count++;
130  next; #duplicate
131  }
132  else{
133  $ins_ix_sth->execute($object_xref_id);
134  push @master_xref_ids, $xref_id;
135  }
136  $self->process_dependents({master_xrefs => \@master_xref_ids,
137  dup_count => \$duplicate_dependent_count,
138  table => $table,
139  internal_id => $internal_id,
140  dbi => $dbi,
141  });
142 
143  }
144  $sth->finish;
145  if($duplicate_direct_count or $duplicate_dependent_count){
146  print "duplicate entrys ignored for $duplicate_direct_count direct xrefs and $duplicate_dependent_count dependent xrefs\n" if($self->verbose);
147  }
148  }
149  foreach my $key ( keys %err_count){
150  print STDERR "*WARNING*: ".$err_count{$key}." direct xrefs for database ".$key." could not be added as their stable_ids could not be found\n";
151  }
152 
153  my $sth = $dbi->prepare("insert into process_status (status, date) values('direct_xrefs_parsed',now())");
154  $sth->execute();
155  $sth->finish;
156 
157  return;
158 }
159 
160 
161 sub get_dep_sth {
162  my $self = shift;
163  my $dbi = shift;
164 
165  my $dep_sql = (<<"DSS");
166 SELECT dependent_xref_id, linkage_annotation
167  FROM dependent_xref
168  WHERE master_xref_id = ?
169 DSS
170  my $sth = $dbi->prepare($dep_sql);
171  return $sth;
172 }
173 
174 
175 
176 sub get_add_dep_ox {
177  my $self = shift;
178  my $dbi = shift;
179 
180  my $sql = (<<"IO2");
181 INSERT INTO object_xref (ensembl_id, xref_id, ensembl_object_type, linkage_type, master_xref_id)
182  VALUES (?, ?, ?, ?, ?)
183 IO2
184  my $sth = $dbi->prepare($sql);
185  return $sth;
186 }
187 
188 sub get_ox_id_sth {
189  my $self = shift;
190  my $dbi = shift;
191 
192  my $sql = (<<"IO2");
193 select object_xref_id from object_xref where ensembl_id = ? and xref_id = ? and ensembl_object_type = ? and linkage_type = ?
194 IO2
195  my $sth = $dbi->prepare($sql);
196  return $sth;
197 }
198 
199 sub get_ox_id_master_sth {
200  my $self = shift;
201  my $dbi = shift;
202 
203  my $sql = (<<"IO2");
204 select object_xref_id from object_xref where ensembl_id = ? and xref_id = ? and ensembl_object_type = ? and linkage_type = ? and master_xref_id = ?
205 IO2
206  my $sth = $dbi->prepare($sql);
207  return $sth;
208 }
209 
210 
211 sub process_dependents {
212  my ($self, $arg_ref) = @_;
213 
214  my $dbi = $arg_ref->{dbi};
215  my $master_xref_ids = $arg_ref->{master_xrefs};
216  my $duplicate_dep_count = $arg_ref->{dup_count};
217  my $table = $arg_ref->{table};
218  my $internal_id = $arg_ref->{internal_id};
219 
220  my $dep_sth = $self->get_dep_sth($dbi);
221  my $ins_ox_sth2 = $self->get_add_dep_ox($dbi);
222  my $ins_ix_sth = $self->get_ins_ix_sth($dbi);
223  my $get_object_xref_id_sth = $self->get_ox_id_master_sth($dbi);
224 
225 
226  local $ins_ox_sth2->{RaiseError} = 0; # want to see duplicates and not die automatically
227  local $ins_ox_sth2->{PrintError} = 0;
228 
229  my $object_xref_id;
230  while(my $master_xref_id = pop(@$master_xref_ids)){
231  my ($dep_xref_id, $link);
232  $dep_sth->execute($master_xref_id);
233  $dep_sth->bind_columns(\$dep_xref_id, \$link);
234  while($dep_sth->fetch){
235  $ins_ox_sth2->execute($internal_id, $dep_xref_id, $table, 'DEPENDENT', $master_xref_id);
236  $get_object_xref_id_sth->execute($internal_id, $dep_xref_id, $table, 'DEPENDENT', $master_xref_id);
237  $object_xref_id = ($get_object_xref_id_sth->fetchrow_array())[0];
238  if($ins_ox_sth2->err){
239  my $err = $ins_ox_sth2->errstr;
240  if($err =~ /Duplicate/){
241  $$duplicate_dep_count++;
242  next;
243  }
244  else{
245  die "Problem loading error is $err\n";
246  }
247  }
248  $ins_ix_sth->execute($object_xref_id);
249  push @$master_xref_ids, $dep_xref_id; # get the dependent, dependents just in case
250 
251  }
252  }
253  return;
254 }
255 
256 1;
transcript
public transcript()
XrefMapper::db::dbc
public dbc()
XrefMapper::BasicMapper
Definition: BasicMapper.pm:8
XrefMapper::BasicMapper::core
public XrefMapper::db core()