ensembl-hive  2.7.0
generate_xref_mindmap.pl
Go to the documentation of this file.
1 #!/usr/bin/env perl
2 # See the NOTICE file distributed with this work for additional information
3 # regarding copyright ownership.
4 #
5 # Licensed under the Apache License, Version 2.0 (the "License");
6 # you may not use this file except in compliance with the License.
7 # You may obtain a copy of the License at
8 #
9 # http://www.apache.org/licenses/LICENSE-2.0
10 #
11 # Unless required by applicable law or agreed to in writing, software
12 # distributed under the License is distributed on an "AS IS" BASIS,
13 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 # See the License for the specific language governing permissions and
15 # limitations under the License.
16 
17 
18 #The script generates a FreePlane mind map file of xrefs for a given species.
19 #To view the generated file, download free mind mapping software Freeplane from http://freeplane.sourceforge.net
20 
21 use strict;
22 use warnings;
23 use DBI qw( :sql_types );
24 use Getopt::Long;
27 use HTML::Entities;
28 
29 
30 use FindBin qw($Bin);
31 use vars qw($SERVERROOT);
32 
33 BEGIN {
34  $SERVERROOT = "$Bin";
35 }
36 
37 my $xhost;
38 my $xport;
39 my $xuser;
40 my $xpass;
41 my $db_version;
42 my $host;
43 my $user;
44 my $port;
45 my $species;
46 my $dontdrop;
47 
48 
49 
50 GetOptions( "xhost|xh=s" => \$xhost,
51  "xport=i" => \$xport,
52  "xuser|xu=s" => \$xuser,
53  "xpass|xp=s" => \$xpass,
54  "host|h=s",\$host,
55  "user|u=s",\$user,
56  "port=i",\$port,
57  "species|s=s",\$species,
58  "db_version=i",\$db_version,
59  "dontdrop|d", \$dontdrop,
60  "help" , \&usage,
61 
62 );
63 
64 usage() if (!defined $xhost || !defined $xuser || !defined $xpass || !defined $host || !defined $user || !defined $species);
65 
66 
67 
68 $port ||= 3306;
69 $xport ||= 3306;
70 $db_version ||= software_version();
71 
72 my $registry = "Bio::EnsEMBL::Registry";
73 
74 
75 $registry->load_registry_from_db( -host => $host, -port => $port,-user => $user, -db_version => $db_version, -species => $species);
76 
77 
78 my $dba = $registry->get_DBAdaptor($species, "core");
79 
80 if (!$dba) {
81  die("$species database version $db_version not found on host $host port $port");
82 }
83 
84 my $dbh;
85 
86 #create and connect to the xref_mindmap database
87 my $dsn = "DBI:mysql:host=$xhost;";
88 if ($xport) {
89  $dsn .= "port=$xport;";
90 }
91 my $xdbname = 'xref_mindmap_'.$species.'_'.$db_version;
92 
93 
94 $dbh = DBI->connect( $dsn, $xuser, $xpass,
95  { 'PrintError' => 1, 'RaiseError' => 1 } );
96 
97 
98 my @objects_with_xrefs = ('Gene', 'Transcript', 'Translation');
99 
100 my %object_display_names = ('Gene' => 'Gene', 'Transcript' => 'Transcript', 'Translation' => 'Protein');
101 
102 
103 print STDOUT "Creating database $xdbname\n";
104 
105 eval {
106  $dbh->do("drop database if exists $xdbname");
107  $dbh->do("create database $xdbname");
108 
109  my $cmd = "mysql -h $xhost";
110  if ($xport) {
111  $cmd .= " -P $xport";
112  }
113  $cmd .= " -u $xuser --password=$xpass $xdbname < $SERVERROOT/sql/tables.sql";
114  system($cmd) == 0 or die("error encountered when creating schema for database $xdbname\n");
115 
116  $dbh->do("use $xdbname");
117 
118 };
119 
120 if ($@) {
121  die("An SQL error occured while creating database $xdbname:\n$@");
122 }
123 
124 
125 
126 #copy the external_db information
127 
128 my $external_db_info_sth = $dba->dbc()->prepare("select external_db_id, db_name, db_display_name from external_db");
129 my $insert_external_db_info_sth = $dbh->prepare("insert into external_db_type (external_db_id, db_name, db_display_name) values (?,?,?)");
130 
131 
132 $external_db_info_sth->execute();
133 
134 while ( my ($external_db_id, $db_name, $db_display_name) = $external_db_info_sth->fetchrow_array() ) {
135 
136  $insert_external_db_info_sth->execute($external_db_id, $db_name, $db_display_name);
137 
138 }
139 $external_db_info_sth->finish();
140 $insert_external_db_info_sth->finish();
141 
142 #populate db types in external_db_type
143 
144 my $cmd = "mysql -h $xhost";
145 if ($xport) {
146  $cmd .= " -P $xport";
147 }
148 $cmd .= " -u $xuser --password=$xpass $xdbname < $SERVERROOT/sql/update.sql";
149 system($cmd) == 0 or die("error encountered when updating database $xdbname\n");
150 
151 
152 #get distinct db_names for an object type
153 my $external_db_sth = $dba->dbc()->prepare("select distinct external_db_id from object_xref join xref using(xref_id) join external_db using(external_db_id) where ensembl_object_type = ? and info_type != 'UNMAPPED' order by db_name");
154 
155 #get xref types for an object type and external db
156 my $xref_types_sth = $dba->dbc()->prepare("select distinct info_type from object_xref join xref using(xref_id) where ensembl_object_type = ? and external_db_id = ? and info_type != 'UNMAPPED'");
157 
158 #get master xref db_names
159 my $master_db_sth = $dba->dbc()->prepare("select distinct e.external_db_id from object_xref ox join xref x1 on (ox.xref_id = x1.xref_id) join dependent_xref on (x1.xref_id = dependent_xref_id) join xref x2 on (master_xref_id = x2.xref_id) join external_db e on (x2.external_db_id = e.external_db_id) where ensembl_object_type = ? and x1.external_db_id = ? and x1.info_type != 'UNMAPPED'");
160 
161 #get species from which xrefs were projected
162 my $projected_from_species_sth = $dba->dbc()->prepare("select distinct substr(info_text,1,locate(' ', info_text, locate(' ', info_text)+1 )) from object_xref join xref using(xref_id) where info_text like 'from%' and ensembl_object_type = ? and external_db_id = ? and info_type = 'PROJECTION'");
163 
164 
165 #xref_mindmap db statements
166 
167 my $insert_object_xref_linkage = $dbh->prepare("insert into object_xref_linkage values(?,?,?,?,?)");
168 
169 my $insert_object_external_db_node = $dbh->prepare("insert into object_external_db_node values (?,?,?)");
170 
171 my $link_type_sth = $dbh->prepare("select link_type_id from link_type where link_type = ?");
172 
173 
174 foreach my $object (@objects_with_xrefs) {
175 
176  $external_db_sth->execute($object);
177  my @db_ids;
178  while (my ($db_id) = $external_db_sth->fetchrow_array() ) {
179  push @db_ids, $db_id;
180  }
181 
182  foreach my $db_id (@db_ids) {
183  $xref_types_sth->execute($object, $db_id);
184 
185  #store the xref db node
186  $insert_object_external_db_node->execute($object,$db_id, $object.$db_id);
187 
188  while (my ($link_type) = $xref_types_sth->fetchrow_array() ) {
189 
190  #get link_type_id
191  $link_type_sth->execute($link_type);
192  my ($link_type_id) = $link_type_sth->fetchrow_array();
193 
194  if ($link_type_id) {
195 
196  if ($link_type eq 'DEPENDENT') {
197 
198  $master_db_sth->execute($object,$db_id);
199  while (my ($master_db_id) = $master_db_sth->fetchrow_array() ) {
200 
201  $insert_object_xref_linkage->execute($db_id,$object,$link_type_id, $master_db_id, undef);
202  }
203 
204  } elsif ($link_type eq 'PROJECTION') {
205 
206  $projected_from_species_sth->execute($object, $db_id);
207  while (my ($linked_node_text) = $projected_from_species_sth->fetchrow_array()) {
208  $insert_object_xref_linkage->execute($db_id,$object,$link_type_id, undef, $linked_node_text);
209  }
210 
211  } else {
212 
213  $insert_object_xref_linkage->execute($db_id,$object,$link_type_id, undef, undef);
214 
215  }
216 
217  }
218 
219  }
220 
221  }
222 
223 }
224 
225 
226 #populate 'generated_from' link types in object_xref_linkage - this is for xrefs to do with gene and transcript naming
227 
228 my $naming_dbs_sth = $dbh->prepare("select external_db_id, db_name from external_db_type where db_name like ?");
229 my $db_id_sth = $dbh->prepare("select external_db_id from external_db_type where db_name = ?");
230 
231 $link_type_sth->execute('GENERATED_FROM');
232 my ($link_type_id) = $link_type_sth->fetchrow_array();
233 
234 foreach my $object (@objects_with_xrefs) {
235 
236  $naming_dbs_sth->execute('%'.$object.'\_name');
237  while (my ($db_id, $db_name) = $naming_dbs_sth->fetchrow_array() ) {
238 
239  if ($db_name =~ m/(.*)\_$object\_name/i) {
240 
241  my $linked_db_name = $1;
242  if ($linked_db_name) {
243 
244  #find the linked db id
245  $db_id_sth->execute($linked_db_name);
246  my ($linked_db_id) = $db_id_sth->fetchrow_array();
247 
248  if ($linked_db_id) {
249  $insert_object_xref_linkage->execute($db_id,$object,$link_type_id, $linked_db_id, undef);
250  }
251 
252  }
253  }
254  }
255 
256  foreach my $name ( ('Vega','Ensembl')) {
257  $naming_dbs_sth->execute('Clone\_based\_'.$name.'\_'.$object);
258  my ($db_id, $db_name) = $naming_dbs_sth->fetchrow_array();
259  if ($db_id) {
260  $insert_object_xref_linkage->execute($db_id,$object,$link_type_id, undef, "$name clone name");
261  }
262  }
263 
264 }
265 $naming_dbs_sth->finish();
266 
267 
268 
269 $db_id_sth->finish();
270 
271 $external_db_sth->finish();
272 $xref_types_sth->finish();
273 $master_db_sth->finish();
274 $projected_from_species_sth->finish();
275 
276 $insert_object_xref_linkage->finish();
277 $insert_object_external_db_node->finish();
278 $link_type_sth->finish();
279 
280 
281 
282 #create the .mm file based on info from xref_mindmap db
283 
284 my $file_name = $species .'_xrefs_'. $db_version .'.mm';
285 
286 open( FH, ">$file_name" ) or die("Can't open $file_name\n");
287 
288 print STDOUT "Writing to file $file_name\n";
289 
290 
291 my $header = <<HEADER;
292 <map version='0.9.0'>
293 <!--To view this file, download free mind mapping software Freeplane from http://freeplane.sourceforge.net -->
294 <node TEXT='External References' ID='ID_000000001' COLOR='#18898b' STYLE='fork'>
295 <font NAME='Liberation Sans' SIZE='12' BOLD='true'/>
296 <hook NAME='MapStyle' max_node_width='600'/>
297 <edge STYLE='bezier' COLOR='#808080' WIDTH='thin'/>
298 HEADER
299 
300 print FH $header;
301 
302 
303 my $category_and_db_name_nodes_sth = $dbh->prepare("select db_type, external_db_id, db_display_name, mindmap_tag_id from object_external_db_node join external_db_type using(external_db_id) join db_type using(db_type_id) where ensembl_object_type = ? order by db_type, db_display_name;");
304 
305 my $db_linkage_types_sth = $dbh->prepare("select link_description, linked_external_db_id, linked_node_text from object_xref_linkage join link_type using(link_type_id) where ensembl_object_type = ? and external_db_id = ? and link_description is not null order by link_description, linked_external_db_id, linked_node_text");
306 
307 #this is used to link to dbs from the closest level (e.g. if Orphanet xrefs for a Gene are dependent on HGNC xrefs
308 #we will try to link to the Gene HGNC node then transcript etc..
309 
310 my $object_distance_sth = $dbh->prepare("select to_object from object_distance where from_object = ? order by distance");
311 
312 my $node_id_sth = $dbh->prepare("select mindmap_tag_id from object_external_db_node where ensembl_object_type = ? and external_db_id = ?");
313 
314 
315 my $node_id_count = 1;
316 
317 foreach my $object (@objects_with_xrefs) {
318  $node_id_count++;
319  my $node_id = 'ID_'.$node_id_count;
320  #write out the object node
321  my $object_node = "<node TEXT='".$object_display_names{$object}."' POSITION='right' ID='".$node_id."' COLOR='#cc3300' STYLE='fork'>
322 <font NAME='Liberation Sans' SIZE='12' BOLD='true'/>
323 <edge STYLE='bezier' COLOR='#808080' WIDTH='thin'/>
324 ";
325  print FH $object_node;
326 
327  $category_and_db_name_nodes_sth->execute($object);
328 
329  my $last_type = 'none';
330  while (my ($db_type, $db_id, $db_display_name, $tag_id) = $category_and_db_name_nodes_sth->fetchrow_array() ) {
331 
332  if ($db_type ne $last_type) {
333 
334  if ($last_type ne 'none') {
335  print FH "</node>\n"; #db type node end tag
336  }
337  $node_id_count++;
338  $node_id = 'ID_'.$node_id_count;
339  #write out new db type node
340  my $db_type_node = "<node TEXT='".encode_entities($db_type)."' ID='".$node_id."' COLOR='#669900'>
341 <font NAME='Liberation Sans' SIZE='12' BOLD='true'/>
342 ";
343  print FH $db_type_node;
344  }
345 
346  #write out the db name node
347  $node_id_sth->execute($object,$db_id);
348  ($node_id) = $node_id_sth->fetchrow_array();
349  my $db_name_node = "<node TEXT='".encode_entities($db_display_name)."' ID='".$node_id."'>
350 ";
351  print FH $db_name_node;
352 
353  $db_linkage_types_sth->execute($object,$db_id);
354 
355  my $last_link_desc = 'none';
356  while (my ($link_description, $linked_db_id, $linked_node_text)
357  = $db_linkage_types_sth->fetchrow_array() ){
358 
359  if ($link_description ne $last_link_desc) {
360  if ($last_link_desc ne 'none') {
361  print FH "</node>\n"; #link type node end tag
362  }
363  #write out the link node
364  $node_id_count++;
365  $node_id = 'ID_'.$node_id_count;
366  my $link_node = "<node TEXT='".encode_entities($link_description)."' ID='".$node_id."'>
367 ";
368  print FH $link_node;
369 
370  }
371 
372  if ($linked_db_id) {
373 
374  #find the node id to link to
375  my @closest_level;
376  $object_distance_sth->execute($object);
377 
378  while (my ($object_type) = $object_distance_sth->fetchrow_array() ) {
379  push @closest_level, $object_type;
380  }
381 
382  my $linked_node_id;
383  foreach my $object_type (@closest_level) {
384 
385  $node_id_sth->execute($object_type,$linked_db_id);
386  ($linked_node_id) = $node_id_sth->fetchrow_array();
387  last if ($linked_node_id);
388  }
389 
390  if ($linked_node_id) {
391  #write out arrow link to the node
392  my $arrow_link_node = "<arrowlink DESTINATION='".$linked_node_id."' STARTARROW='NONE' ENDARROW='DEFAULT'/>
393 ";
394  print FH $arrow_link_node;
395  }
396 
397  } elsif ($linked_node_text) {
398 
399  #write out the node
400  $node_id_count++;
401  $node_id = 'ID_'.$node_id_count;
402  my $node = "<node TEXT='".encode_entities($linked_node_text)."' ID='".$node_id."'/>
403 ";
404  print FH $node;
405 
406  }
407 
408  $last_link_desc = $link_description;
409 
410 
411  }
412  if ($last_link_desc ne 'none') {
413  print FH "</node>\n"; #link type node end tag
414  }
415 
416 
417  print FH "</node>\n"; #db name node end tag
418 
419  $last_type = $db_type;
420  }
421  if ($last_type ne 'none') {
422  print FH "</node>\n"; #db type node end tag
423  }
424 
425 
426  print FH "</node>\n" #object node end tag;
427 
428 }
429 
430 $category_and_db_name_nodes_sth->finish();
431 $db_linkage_types_sth->finish();
432 $object_distance_sth->finish();
433 $node_id_sth->finish();
434 
435 
436 #external references end tag and map end tag
437 my $footer = <<FOOTER;
438 </node>
439 </map>
440 FOOTER
441 
442 print FH $footer;
443 
444 close(FH);
445 
446 if (!$dontdrop) {
447 
448  print STDOUT "Dropping database $xdbname\n";
449  eval {
450  $dbh->do("drop database if exists $xdbname");
451  };
452  if ($@) {
453  die("An SQL error occured while dropping database $xdbname:\n$@");
454  }
455 }
456 
457 $dbh->disconnect();
458 
459 
460 
461 
462 sub usage {
463  my $indent = ' ' x length($0);
464  print <<EOF; exit(0);
465 
466 The script populates a stable_id lookup database with all stable ids found in databases
467 on a specified server (or servers) for a specified db release.
468 Stable ids are copied for objects listed in hash %group_objects
469 
470 Options -lhost -luser -lpass are mandatory and specify the credentials for the server on which a stable id lookup database exists or is to be created (if using option -create). If an argument for option -ldbname is not provided, the default name for the database wil be used: 'ensembl_stable_id_lookup_xx', where xx is the database release (option -db_version).
471 
472 Options -host -user -port specify the credentials of the server(s) where stable ids are to be copied from.
473 
474 To run the script cd into the directory where the script lives eg:
475 cd ensembl/misc-scripts/stable_id_lookup/
476 
477 
478 This command will create database ensembl_stable_id_lookup_67 on server ens-staging1 and will copy stable ids from databases for release 67 found on ens-staging1 and ens-staging2:
479 
480 populate_stable_id_lookup.pl -lhost ens-staging1 -luser ensadmin -lpass xxxx -create -db_version 67 -host ens-staging1 -host ens-staging2 -user ensro
481 
482 
483 Usage:
484 
485  $0 -xhost host_name -xuser user_name -xpass password
486  $indent [-xport port_number]
487  $indent -host host_name -user user_name
488  $indent [-port port_number] [-db_version]
489  $indent -species species_name
490  $indent [-dontdrop]
491  $indent [-help]
492 
493 
494 
495  -h|host Database host where the species db lives
496 
497  -u|user Database user where the species db lives
498 
499  -port Database port the species db lives
500 
501  -xh|xhost Database host where the xref mindmap db is to be created
502 
503  -xu|xuser Database user where the xref mindmap db is to be created
504 
505  -xp|xpass Database password where the xref mindmap db is to be created
506 
507  -xport Database port where the xref mindmap db is to be created
508 
509  -s|species Species name to generate xref mindmap for
510 
511  -d|dontdrop Don\'t drop the xref_mindmap db at the end of the script
512 
513  -db_version If not specified, software_version() returned by the ApiVersion module will be used
514 
515  -help This message
516 
517 
518 EOF
519 
520 }
EnsEMBL
Definition: Filter.pm:1
map
public map()
Bio::EnsEMBL::Registry
Definition: Registry.pm:113
BEGIN
public BEGIN()
run
public run()
Bio
Definition: AltAlleleGroup.pm:4
usage
public usage()
Bio::EnsEMBL::ApiVersion
Definition: ApiVersion.pm:17