ensembl-hive  2.7.0
sql2rst.pl
Go to the documentation of this file.
1 #!/usr/bin/env perl
2 # See the NOTICE file distributed with this work for additional information
3 # regarding copyright ownership.
4 #
5 # Licensed under the Apache License, Version 2.0 (the "License");
6 # you may not use this file except in compliance with the License.
7 # You may obtain a copy of the License at
8 #
9 # http://www.apache.org/licenses/LICENSE-2.0
10 #
11 # Unless required by applicable law or agreed to in writing, software
12 # distributed under the License is distributed on an "AS IS" BASIS,
13 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 # See the License for the specific language governing permissions and
15 # limitations under the License.
16 
17 # 1st Feb 2011
18 # Generate an HTML documentation page from an SQL file.
19 #
20 # It needs to have a "javascript like" documentation above each table.
21 # See the content of the method sql_documentation_format();
22 ####################################################################################
23 
24 
25 use strict;
26 #use warnings; # commented out because this script is a repeat offender
27 
28 use File::Basename ();
29 use Getopt::Long;
30 use List::Util qw(max sum);
31 
34 
35 
36 ###############
37 ### Options ###
38 ###############
39 
40 my ($sql_file,$fk_sql_file,$html_file,$db_team,$show_colour,$version,$header_flag,$sort_headers,$sort_tables,$intro_file,$embed_diagrams,$help,$help_format);
41 my ($url,$skip_conn,$db_connection);
42 
43 usage() if (!scalar(@ARGV));
44 
45 GetOptions(
46  'i=s' => \$sql_file,
47  'fk=s' => \$fk_sql_file,
48  'o=s' => \$html_file,
49  'd=s' => \$db_team,
50  'c!' => \$show_colour,
51  'v=i' => \$version,
52  'embed_diagrams!' => \$embed_diagrams,
53  'show_header!' => \$header_flag,
54  'sort_headers=i' => \$sort_headers,
55  'sort_tables=i' => \$sort_tables,
56  'url=s' => \$url,
57  'skip_connection' => \$skip_conn,
58  'intro=s' => \$intro_file,
59  'help!' => \$help,
60  'help_format' => \$help_format,
61 );
62 
63 usage() if ($help);
64 sql_documentation_format() if ($help_format);
65 
66 
67 if (!$sql_file) {
68  print "> Error! Please give a sql file using the option '-i' \n";
69  usage();
70 }
71 
72 $show_colour = 1 if (!defined($show_colour));
73 $header_flag = 1 if (!defined($header_flag));
74 $sort_headers = 1 if (!defined($sort_headers));
75 $sort_tables = 1 if (!defined($sort_tables));
76 
77 $skip_conn = undef if ($skip_conn == 0);
78 
79 # Dababase connection (optional)
80 if (defined($url) && !defined($skip_conn)) {
81  $db_connection = new Bio::EnsEMBL::Hive::DBSQL::DBConnection(
82  -url => $url,
83  ) or die("DATABASE CONNECTION ERROR: Could not get a database adaptor for $url\n");
84 }
85 
86 
87 
88 
89 ################
90 ### Settings ##
91 ################
92 
93 my $default_colour = '#000'; # Black
94 
95 my $documentation = {};
96 my $tables_names = {'default' => []};
97 my @header_names = ('default');
98 my @colours = ($default_colour);
99 my %legend;
100 
101 my $in_doc = 0;
102 my $in_table = 0;
103 my $header = 'default';
104 my $table = '';
105 my $info = '';
106 my $nb_by_col = 15;
107 my $count_sql_col = 0;
108 my $tag_content = '';
109 my $tag = '';
110 my $parenth_count = 0;
111 my $header_colour;
112 my $pk = [];
113 
114 my $SQL_LIMIT = 50;
115 
116 
117 #############
118 ## Parser ##
119 #############
120 
121 # Create a complex hash "%$documentation" to store all the documentation content
122 
123 open my $sql_fh, '<', $sql_file or die "Can't open $sql_file : $!";
124 while (<$sql_fh>) {
125  chomp $_;
126  next if ($_ eq '');
127  next if ($_ =~ /^\s*(DROP|PARTITION)/i);
128  next if ($_ =~ /^\s*(#|--)/); # Escape characters
129 
130  # Verifications
131  if ($_ =~ /^\/\*\*/) { $in_doc=1; next; } # start of a table documentation
132  if ($_ =~ /^\s*create\s+table\s+(if\s+not\s+exists\s+)?`?(\w+)`?/i) { # start to parse the content of the table
133  $pk = [];
134  if ($table eq $2) {
135  $in_table=1;
136  $parenth_count++;
137  }
138  else {
139  print STDERR "The documentation of the table $2 has not be found!\n";
140  }
141  next;
142  }
143  next if ($in_doc==0 and $in_table==0);
144 
145  my $doc = remove_char($_);
146 
147  #================================================#
148  # Parsing the documentation part of the SQL file #
149  #================================================#
150  if ($in_doc==1) {
151  # Header name
152  if ($doc =~ /^\@header\s*(.+)$/i and $header_flag == 1) {
153  $header = $1;
154  unless (exists $tables_names->{$header}) {
155  push (@header_names,$header);
156  $tables_names->{$header} = [];
157  }
158  next;
159  }
160  # Table name
161  elsif ($doc =~ /^\@table\s*(\w+)/i) {
162  $table = $1;
163  push(@{$tables_names->{$header}},$table);
164  $documentation->{$header}{'tables'}{$table} = { 'desc' => '', 'colour' => '', 'column' => [], 'example' => [], 'see' => [], 'info' => [] };
165  $tag = $tag_content = '';
166  }
167  # Description (used for both set, table and info tags)
168  elsif ($doc =~ /^\@(desc)\s*(.+)$/i) {
169  fill_documentation ($1,$2);
170  }
171  # Colour of the table header (used for both set, table) (optional)
172  elsif ($doc =~ /^\@(colour)\s*(.+)$/i) {
173  fill_documentation ($1,$2) if ($show_colour);
174  }
175  # Column
176  elsif ($doc =~ /^\@(column)\s*(.+)$/i) {
177  fill_documentation ($1,$2);
178  }
179  # Example
180  elsif ($doc =~ /^\@(example)\s*(.+)$/i) {
181  fill_documentation ($1,$2);
182  }
183  # See other tables
184  elsif ($doc =~ /^\@(see)\s*(\w+)\s*$/i) {
185  fill_documentation ($1,$2);
186  }
187  # Addtional information block
188  elsif ($doc =~ /^\@(info)\s*(.+)$/i) {
190  $info = $2;
191  next;
192  }
193  # End of documentation
194  elsif ($doc =~ /^\*\//) { # End of the documentation block
195  fill_documentation (); # Add the last tag content to the documentation hash
196  $in_doc=0;
197  next;
198  }
199  # Add legend colour description
200  elsif ($doc =~ /^\@(legend)\s*(#\S+)\s+(.+)$/i) {
201  $legend{$2} = $3;
202  }
203  elsif ($doc =~ /^\s*(.+)$/) { # If a tag content is split in several lines
204  $tag_content .= " $1";
205  }
206  }
207 
208  #=====================================================#
209  # Parsing of the SQL table to fetch the columns types #
210  #=====================================================#
211  elsif ($in_table==1) {
212 
213  # END OF TABLE DEFINITION
214  # Can't do this easily with a simply regex as there are varying valid formats
215  # The end of the table definition is actually defined by 2nd enclosing bracket
216 
217  # Regex counting VOODOO!
218  # This basically puts the regex in a list context
219  # before inc/dec'ing with it in a scalar context.
220  $parenth_count +=()= $doc =~ /\(/gi;
221  $parenth_count -=()= $doc =~ /\)/gi;
222 
223 
224  if ($parenth_count == 0) { # End of the sql table definition
225  if (scalar(@$pk)) {
226  add_column_index('primary key', join(',', @$pk));
227  }
228  if (scalar @{$documentation->{$header}{'tables'}{$table}{column}} > $count_sql_col) {
229  print STDERR "Description of a non existant column in the table $table!\n";
230  }
231 
232  $in_table=0;
233  $count_sql_col = 0;
234  $table='';
235  $parenth_count = 0;
236  }
237  else {
238 
239  #---------#
240  # INDEXES #
241  #---------#
242 
243  # Remove the comments
244  $doc =~ s/--\s.*$//;
245 
246  # Skip the blank lines
247  next if ($doc =~ /^\s+$/);
248 
249  if ($doc =~ /FOREIGN\s+KEY\s+\((\S+)\)\s+REFERENCES\s+(\S+)\s*\((\S+)\)/i) { # foreign key
250  push @{$documentation->{$header}{'tables'}{$table}->{foreign_keys}}, [$1,$2,$3];
251  next;
252  }
253  elsif ($doc =~ /^\s*(primary\s+key)\s*\w*\s*\((.+)\)/i or $doc =~ /^\s*(unique)\s*\((.+)\)/i){ # Primary or unique;
254  add_column_index($1,$2);
255  next;
256  }
257  elsif ($doc =~ /^\s*(unique\s+)?(key|index)\s+([^\s\(]+)\s*\((.+)\)/i) { # Keys and indexes
258  add_column_index("$1$2",$4,$3);
259  next;
260  }
261  elsif ($doc =~ /^\s*(unique)\s+(\S*)\s*\((.+)\)/i) { # Unique
262  add_column_index("$1",$3,$2);
263  next;
264  }
265  elsif ($doc =~ /^\s*(key|index)\s+\((.+)\)/i) { # Keys
266  add_column_index("$1",$2);
267  next;
268  }
269 
270  #----------------------------------#
271  # COLUMNS & TYPES & DEFAULT VALUES #
272  #----------------------------------#
273  my $col_name = '';
274  my $col_type = '';
275  my $col_def = '';
276 
277  # All the type is contained in the same line (type followed by parenthesis)
278  if ($doc =~ /^\W*(\w+)\W+(\w+\s?\(.*\))/ ){
279  $col_name = $1;
280  $col_type = $2;
281  if ($doc =~ /default\s+([^,\s]+)\s*.*(,|#).*/i) { $col_def = $1; } # Default value
282  }
283 
284  # The type is written in several lines
285  elsif ($doc =~ /^\W*(\w+)\W+(enum|set)(\s?\(.*)/i){ # The content is split in several lines
286  $col_name= $1;
287  $col_type="$2$3<br />";
288  my $end_type = 0;
289  while ($end_type != 1){
290  my $line = <$sql_fh>;
291  chomp $line;
292  $line = remove_char($line);
293 
294  # Regex counting VOODOO again
295  $parenth_count +=()= $line =~ /\(/gi;
296  $parenth_count -=()= $line =~ /\)/gi;
297 
298  if ($line =~ /\)/) { # Close parenthesis
299  $end_type=1;
300  $line =~ /^\s*(.+)\)/;
301  $col_type .= "$1)";
302  }
303  else { # Add the content of the line
304  $line =~ /^\s*(.+)/;
305  $col_type .= $1.'<br />';
306  }
307  if ($line =~ /default\s+([^,\s]+)\s*.*(,|#).*/i) { $col_def = $1; } # Default value
308  }
309  }
310 
311  # All the type is contained in the same line (type without parenthesis)
312  elsif ($doc =~ /^\s*\W*(\w+)\W+(\w+)/ ){
313  $col_name = $1;
314  $col_type = $2;
315  if ($doc =~ /default\s*([^,\s]+)\s*.*(,|#).*/i) { $col_def = $1; } # Default value
316  }
317 
318  # Default value
319  if (!defined($col_def) || $col_def eq '') {
320  $col_def = ($doc =~ /not\s+null/i ) ? '*not set*' : 'NULL';
321  }
322 
323  add_column_type_and_default_value($col_name,$col_type,$col_def);
324 
325  if ($doc =~ /\bprimary\s+key\b/i) {
326  push @$pk, $col_name;
327  }
328  }
329  }
330 }
331 close($sql_fh);
332 
333 my %table_documentation;
334 foreach my $c (keys %$documentation) {
335  my $h = $documentation->{$c};
336  foreach my $table_name (keys %{$h->{tables}}) {
337  $table_documentation{$table_name} = $h->{tables}->{$table_name};
338  $h->{tables}->{$table_name}->{category} = $c;
339  }
340 }
341 
342 if ($fk_sql_file) {
343  open $sql_fh, '<', $fk_sql_file or die "Can't open $fk_sql_file : $!";
344  while (<$sql_fh>) {
345  chomp $_;
346  next if ($_ eq '');
347  my $doc = remove_char($_);
348  if ($doc =~ /ALTER\s+TABLE\s+(\S+)\s+ADD.*FOREIGN\s+KEY\s+\((\S+)\)\s+REFERENCES\s+(\S+)\((\S+)\)/i) {
349  push @{$table_documentation{$1}->{foreign_keys}}, [$2,$3,$4];
350  } elsif ($doc =~ /ALTER.*FOREIGN/i) {
351  die "Unrecognized: $doc";
352  }
353  }
354  close($sql_fh);
355 }
356 
357 sub table_box {
358  my ($graph, $table_name) = @_;
359  my $table_doc = $table_documentation{$table_name};
360  my @rows = map {sprintf('<tr><td bgcolor="white" port="port%s">%s%s</td></tr>', $_->{name}, ($_->{index} =~ /\bprimary\b/i ? '<B>PK</B>&nbsp;&nbsp;' : ''), $_->{name})} @{$table_doc->{column}};
361  $graph->add_node($table_name,
362  'shape' => 'box',
363  'style' => 'filled,rounded',
364  'fillcolor' => $table_doc->{colour},
365  'label' => sprintf('<<table border="0"><th><td><font point-size="16">%s</font></td></th><hr/>%s</table>>', $table_name, join('', @rows)),
366  );
367 }
368 
370  my ($show_clusters, $column_links) = @_;
372  'label' => "$db_team schema diagram",
373  'fontsize' => 20,
374  $column_links
375  ? ( 'rankdir' => 'LR', 'concentrate' => 'true', )
376  : ( 'splines' => 'ortho', ),
377  );
378  foreach my $table_name (sort keys %table_documentation) {
379  table_box($graph, $table_name);
380  }
381  foreach my $table_name (sort keys %table_documentation) {
382  foreach my $fk (@{$table_documentation{$table_name}->{foreign_keys}}) {
383  $graph->add_edge($table_name => $fk->[1],
384  'style' => 'dashed',
385  $column_links ? (
386  'from_port' => "$fk->[0]:e",
387  'to_port' => "$fk->[2]:w",
388  ) : (),
389  );
390  }
391  }
392 
393  if ($show_clusters) {
394  foreach my $h (sort keys %$documentation) {
395  my $cluster_id = clean_name($h);
396  my $c = blend_colors($documentation->{$h}->{colour}, '#FFFFFF', 0.8);
397  $graph->cluster_2_attributes()->{$cluster_id} = {
398  'cluster_label' => $h,
399  'style' => 'rounded,filled,noborder',
400  'fill_colour_pair' => ["#$c"],
401  };
402  my @cluster_nodes;
403  $graph->cluster_2_nodes()->{$cluster_id} = \@cluster_nodes;
404  foreach my $t (sort keys %{$documentation->{$h}->{tables}}) {
405  push @cluster_nodes, $t;
406  }
407  }
408  }
409  return $graph;
410 }
411 
412 
413 sub blend_colors {
414  my ($color1, $color2, $alpha) = @_;
415  my @rgb1 = map {hex($_)} unpack("(A2)*", substr $color1, 1);
416  my @rgb2 = map {hex($_)} unpack("(A2)*", substr $color2, 1);
417  my @rgb = map {int($rgb1[$_] + $alpha * ($rgb2[$_]-$rgb1[$_]))} 0..2;
418  my $c = join('', map {sprintf('%X', $_)} @rgb);
419  return $c;
420 }
421 
422 sub sub_table_box {
423  my ($graph, $table_name, $fields) = @_;
424  my $table_doc = $table_documentation{$table_name};
425  my @rows;
426  my $has_ellipsis;
427  foreach my $c (@{$table_doc->{column}}) {
428  if ($fields->{$c->{name}}) {
429  push @rows, sprintf('<tr><td bgcolor="white" port="port%s">%s%s</td></tr>', $c->{name}, ($c->{index} =~ /\bprimary\b/i ? '<B>PK</B>&nbsp;&nbsp;' : ''), $c->{name});
430  $has_ellipsis = 0;
431  } elsif (!$has_ellipsis) {
432  push @rows, '<tr><td bgcolor="white"><i>...</i></td></tr>';
433  $has_ellipsis = 1;
434  }
435  }
436  $graph->add_node($table_name,
437  'shape' => 'box',
438  'style' => 'filled,rounded',
439  'fillcolor' => $table_doc->{colour},
440  'label' => sprintf('<<table border="0"><th><td><font point-size="16">%s</font></td></th><hr/>%s</table>>', $table_name, join('', @rows)),
441  );
442 }
443 
445  my ($cluster, $column_links) = @_;
447  'label' => "$db_team schema diagram: $cluster tables",
448  'fontsize' => 20,
449  $column_links
450  ? ( 'rankdir' => 'LR', 'concentrate' => 'true', )
451  : ( 'splines' => 'ortho', ),
452  );
453  foreach my $table_name (sort keys %{$documentation->{$cluster}->{tables}}) {
454  table_box($graph, $table_name);
455  }
456  my %clusters_to_draw = ($cluster => 1);
457  my %other_table_fields;
458  my @drawn_fks;
459  foreach my $table_name (sort keys %table_documentation) {
460  foreach my $fk (@{$table_documentation{$table_name}->{foreign_keys}}) {
461  if ($table_documentation{$table_name}->{category} eq $cluster) {
462  $other_table_fields{$fk->[1]}->{$fk->[2]} = 1;
463  $clusters_to_draw{ $table_documentation{$fk->[1]}->{category} } = 1;
464  push @drawn_fks, [$table_name, @$fk];
465  } elsif ($table_documentation{$fk->[1]}->{category} eq $cluster) {
466  $other_table_fields{$table_name}->{$fk->[0]} = 1;
467  $clusters_to_draw{ $table_documentation{$table_name}->{category} } = 1;
468  push @drawn_fks, [$table_name, @$fk];
469  }
470  }
471  }
472  foreach my $table_name (sort keys %other_table_fields) {
473  sub_table_box($graph, $table_name, $other_table_fields{$table_name} || {}) if $cluster ne $table_documentation{$table_name}->{category};
474  }
475  foreach my $fk (@drawn_fks) {
476  my $table_name = shift @$fk;
477  $graph->add_edge($table_name => $fk->[1],
478  'style' => 'dashed',
479  $column_links ? (
480  'from_port' => $fk->[0].':e',
481  'to_port' => $fk->[2].':w',
482  ) : (),
483  );
484  }
485 
486  foreach my $h (sort keys %clusters_to_draw) {
487  #next unless $h eq $cluster;
488  my $cluster_id = clean_name($h);
489  my $c = blend_colors($documentation->{$h}->{colour}, '#FFFFFF', 0.8);
490  $graph->cluster_2_attributes()->{$cluster_id} = {
491  'cluster_label' => $h,
492  ($h eq $cluster) ?
493  ( 'style' => 'rounded,filled,noborder', 'fill_colour_pair' => ["#$c"], )
494  : ( 'style' => 'filled,noborder', 'fill_colour_pair' => ['white'] ),
495  };
496  my @cluster_nodes;
497  $graph->cluster_2_nodes()->{$cluster_id} = \@cluster_nodes;
498  foreach my $t (sort keys %{$documentation->{$h}->{tables}}) {
499  push @cluster_nodes, $t if (($h eq $cluster) || $other_table_fields{$t});
500  }
501  }
502  return $graph;
503 }
504 
505 sub clean_name {
506  my $n = shift;
507  $n =~ s/\s+/_/g;
508  $n =~ s/[\-\/]+/_/g;
509  return lc $n;
510 }
511 
512 # Sort the headers names by alphabetic order
513 if ($sort_headers == 1) {
514  @header_names = sort(@header_names);
515 }
516 # Sort the tables names by alphabetic order
517 if ($sort_tables == 1) {
518  while ( my($header_name,$tables) = each (%{$tables_names})) {
519  @{$tables} = sort(@{$tables});
520  }
521 }
522 
523 # Remove the empty headers (e.g. "default")
524 @header_names = grep {scalar(@{$tables_names->{$_}})} @header_names;
525 
526 
527 #####################
528 ## Schema diagrams ##
529 #####################
530 my %diagram_dotcode;
531 if ($embed_diagrams) {
532  my $graph = generate_whole_diagram('show_clusters', 'column_links');
533  $diagram_dotcode{''} = $graph->as_debug();
534  foreach my $c (@header_names) {
535  my $graph = generate_sub_diagram($c, 'column_links');
536  $diagram_dotcode{$c} = $graph->as_debug();
537  }
538 }
539 
540 
541 #################
542 ## RST content ##
543 #################
544 
545 my $html_content = '';
546 
547 # Legend link
548 if ($show_colour and scalar @colours > 1 and $header_flag != 1) {
549  $html_content .= qq{A colour legend is available at the <a href="#legend">bottom of the page</a>.\n<br /><br />};
550 }
551 
552 #=============================================#
553 # List of tables by header (i.e. like a menu) #
554 #=============================================#
555 $html_content .= display_tables_list();
556 my $table_count = 1;
557 my $col_count = 1;
558 
559 $html_content .= ".. raw:: latex\n\n \\begin{landscape}\n\n";
560 
561 #========================================#
562 # Display the detailled tables by header #
563 #========================================#
564 my $header_id = 1;
565 foreach my $header_name (@header_names) {
566  my $tables = $tables_names->{$header_name};
567 
568  #----------------#
569  # Header display #
570  #----------------#
571  my $category_title = $documentation->{$header_name}->{'colour'} ? sprintf(':schema_table_header:`<%s,square>%s`', $documentation->{$header_name}->{'colour'}, $header_name) : $header_name;
572  $html_content .= rst_title($category_title, '~') . "\n";
573 
574  #------------------------#
575  # Additional information #
576  #------------------------#
577  $html_content .= rst_add_indent_to_block($documentation->{$header_name}{'desc'}, " ") . "\n\n" if $documentation->{$header_name}{'desc'};
578  if ($embed_diagrams) {
579  my $l = clean_name($header_name);
580  $html_content .= ".. schema_diagram::\n\n" . rst_add_indent_to_block($diagram_dotcode{$header_name}, ' ') . "\n\n";
581  }
582 
583  #----------------#
584  # Tables display #
585  #----------------#
586  foreach my $t_name (@{$tables}) {
587  my $data = $documentation->{$header_name}{'tables'}{$t_name};
588 
589  my $table_title = $documentation->{$header_name}->{'colour'} ? sprintf(':schema_table_header:`<%s,round>%s`', $documentation->{$header_name}->{'colour'}, $t_name) : $t_name;
590  $html_content .= rst_title($table_title, '+');
591  $html_content .= add_description($data) . "\n";
592  $html_content .= add_columns($t_name,$data);
593  $html_content .= add_examples($t_name,$data);
594  }
595 }
596 
597 
598 $html_content .= ".. raw:: latex\n\n \\end{landscape}\n\n";
599 
600 
601 ######################
602 ## HTML/output file ##
603 ######################
604 my $output_fh;
605 if ($html_file) {
606  open $output_fh, '>', $html_file or die "Can't open $html_file : $!";
607 } else {
608  $output_fh = \*STDOUT;
609 }
610 print $output_fh slurp_intro($intro_file)."\n";
611 print $output_fh $html_content."\n";
612 close($output_fh);
613 
614 
615 sub rst_title {
616  my ($title, $underscore_symbol) = @_;
617  return $title . "\n" . ($underscore_symbol x length($title)) . "\n";
618 }
619 
620 sub block_width {
621  my ($block) = @_;
622  my @lines = split /\n/, $block;
623  return max(map {length($_)} @lines);
624 }
625 
626 sub column_width {
627  my ($data, $i) = @_;
628  return max(map {block_width($_->[$i])} @$data);
629 }
630 
631 sub rst_list_table {
632  my ($data, $class) = @_;
633 
634  my @widths = map {column_width($data, $_)} 0..(scalar(@{$data->[0]})-1);
635  my $w = ":widths: ".join(" ", @widths);
636 
637  my $table_content = join("\n", map {rst_list_table_row($_)} @$data);
638  return ".. list-table::\n" . rst_add_indent_to_block(":header-rows: 1\n$w\n" . ($class ? ":class: $class\n" : "") . "\n" . $table_content, " ") . "\n\n";
639 }
640 
641 sub rst_list_table_row {
642  my ($row) = @_;
643  my $first_cell = shift @$row;
644  return rst_add_indent_to_block($first_cell, " ", "* - ") . "\n" . join("\n", map {rst_add_indent_to_block($_, " ", " - ")} @$row);
645 }
646 
648  my ($block, $indent, $first_indent) = @_;
649  $first_indent //= $indent;
650  my @lines = split /\n/, $block;
651  my $first_line = shift @lines;
652  if (@lines) {
653  return $first_indent . $first_line . "\n" . join("\n", map {$indent . $_} @lines);
654  }
655  return $first_indent . $first_line;
656 }
657 
658 sub rst_bullet_list {
659  my ($data, $list_symbol) = @_;
660  $list_symbol //= '-';
661  return join("\n", map {$list_symbol . " " . $_} @$data);
662 }
663 
664 ###############
665 ## Methods ##
666 ###############
667 
668 # List the table names. Group them if possible
669 # By default there is one group, named "default" and it contains all the tables.
671 
672 
673  my $rest = '';
674 
675  if ($embed_diagrams) {
676  $rest .= rst_title('Schema diagram', '=') . "\n";
677  $rest .= "The $db_team schema diagrams are automatically generated as PNG images with Graphviz, and show the links between columns of each table.\n";
678  $rest .= "Here follows the overall schema diagram, while the individual diagrams of each category are available below, together with the table descriptions.\n\n";
679  $rest .= ".. schema_diagram::\n\n" . rst_add_indent_to_block($diagram_dotcode{''}, ' ') . "\n\n";
680  }
681 
682  $rest .= rst_title('Table list', '=') . "\n";
683  $rest .= "Here is the list of all the tables found in a typical $db_team database, grouped by categories.\n\n";
684 
685  if (scalar(@header_names) == 1) {
686  return rst_bullet_list($tables_names->{$header_names[0]}) . "\n";
687  }
688 
689  # Remove the empty headers (e.g. "default")
690  my @useful_header_names = grep {scalar(@{$tables_names->{$_}})} @header_names;
691 
692  # No more than 3 categories at a time
693  my $max_headers_per_line = 3;
694  while (scalar(@useful_header_names)) {
695  my @headers_this_time = splice(@useful_header_names, 0, $max_headers_per_line);
696 
697  my @first_row = map {$documentation->{$_}->{'colour'} ? sprintf(':schema_table_header:`<%s,square>%s`', $documentation->{$_}->{'colour'}, $_) : $_} @headers_this_time;
698  my @second_row = map {rst_bullet_list([map {$_.'_'} @{$tables_names->{$_}}])} @headers_this_time;
699  my @data = (\@first_row, \@second_row);
700 
701  $rest .= rst_list_table(\@data, 'sql-schema-table');
702  }
703 
704  return $rest;
705 }
706 
707 
708 
709 # Method to pick up the documentation information contained in the SQL file.
710 # If the line starts by a @<tag>, the previous tag content is added to the documentation hash.
711 # This method allows to describe the content of a tag in several lines.
712 sub fill_documentation {
713  my $t1 = shift;
714  my $t2 = shift;
715 
716  if ($tag ne '') {
717  # Description tag (info, table or header)
718  if ($tag eq 'desc') {
719  # Additional description
720  if ($info ne '') {
721  $tag_content = $info.'@info@'.$tag_content;
722  # Table: additional information
723  if ($table ne '') {
724  push(@{$documentation->{$header}{'tables'}{$table}{'info'}},$tag_content);
725  }
726  # Header: additional information
727  else {
728  if (!$documentation->{$header}{'info'}) {
729  $documentation->{$header}{'info'} = [];
730  }
731  push(@{$documentation->{$header}{'info'}},$tag_content);
732  }
733  $info = '';
734  }
735  # Header description
736  elsif(!$documentation->{$header}{'tables'}) {
737  $documentation->{$header}{'desc'} = $tag_content;
738  }
739  # Table description
740  else {
741  $documentation->{$header}{'tables'}{$table}{$tag} = $tag_content;
742  }
743  }
744  elsif ($tag eq 'colour') {
745  if(!$documentation->{$header}{'tables'}) {
746  $documentation->{$header}{'colour'} = $tag_content;
747  $header_colour = 1;
748  }
749  elsif ($table ne '') {
750  $documentation->{$header}{'tables'}{$table}{$tag} = $tag_content;
751  if (! grep {$tag_content eq $_} @colours) {
752  push (@colours,$tag_content);
753  }
754  }
755  }
756  elsif ($tag eq 'column') {
757  $tag_content =~ /(\w+)[\s\t]+(.*)/;
758 
759  my $column = { 'name' => $1,
760  'type' => '',
761  'default' => '',
762  'index' => '',
763  'desc' => $2
764  };
765  if ($2 eq '') {
766  print STDERR "COLUMN: The description content of the column '$1' is missing in the table $table!\n";
767  }
768  push(@{$documentation->{$header}{'tables'}{$table}{$tag}},$column);
769  }
770  else{
771  push(@{$documentation->{$header}{'tables'}{$table}{$tag}},$tag_content);
772  }
773  }
774  # New tag initialised
775  if ($t1) {
776  $tag = $t1;
777  $tag_content = $t2;
778  }
779  else {
780  $tag = $tag_content = '';
781  }
782 }
783 
784 
785 # Method generating the HTML code to display the description content
786 sub add_description {
787  my $data = shift;
788 
789  # Search if there are some @link tags in the description text.
790  my $desc = add_internal_link($data->{desc},$data);
791 
792  return $desc . "\n";
793 }
794 
795 
796 # Method generating the HTML code of the table listing the columns of the given SQL table.
797 sub add_columns {
798  my $table = shift;
799  my $data = shift;
800  my $cols = $data->{column};
801 
802  my @data;
803  my @header_row = ('Column', 'Type', 'Default value', 'Description', 'Index');
804  push @data, \@header_row;
805 
806  foreach my $col (@$cols) {
807  my $name = $col->{name};
808  my $type = $col->{type};
809  my $default = $col->{default};
810  my $desc = $col->{desc};
811  my $index = $col->{index};
812 
813  # links
814  $desc = add_internal_link($desc,$data);
815 
816  $type = parse_column_type($type);
817 
818  my @row = ("**$name**", $type, $default, $desc, $index);
819  push @data, \@row;
820  }
821 
822  return rst_list_table(\@data);
823 }
824 
825 
826 # Method generating the HTML code to display the content of the tags @example (description + SQL query + Table of SQL query results)
827 sub add_examples {
828  my $table = shift;
829  my $data = shift;
830  my $examples = $data->{example};
831  my $html;
832 
833  my $nb = (scalar(@$examples) > 1) ? 1 : '';
834 
835  foreach my $ex (@$examples) {
836  my @lines = split("\n",$ex);
837  my $nb_display = ($nb ne '') ? " $nb" : $nb;
838  $html .= rst_title("Example$nb_display:", '-') . "\n";
839  my $has_desc = 0;
840  my $sql;
841 
842  # Parse the example lines
843  foreach my $line (@lines) {
844  chomp($line);
845 
846  # Pick up the SQL query if it exists
847  if ($line =~ /(.*)\s*\@sql\s*(.+)/) {
848  $html .= $1;
849  $sql = $2;
850  } elsif (!defined($sql)){
851  $html .= $line;
852  $has_desc = 1;
853  }
854  else {
855  $sql .= $line;
856  }
857  }
858  $html .= "\n\n";
859 
860  # Search if there are some @link tags in the example description.
861  $html = add_internal_link($html,$data);
862 
863  # Add a table of examples
864  if (defined($sql)) {
865  my $sql_table = '';
866  if (!defined($skip_conn) && defined($url)) {
867  $sql_table = get_example_table($sql,$table,$nb);
868  }
869 
870  $html .= ".. code-block:: sql\n\n" . rst_add_indent_to_block($sql, " ") . "\n\n" . $sql_table . "\n";
871  }
872  $nb ++;
873  }
874 
875  return $html;
876 }
877 
878 
879 # Method generating the HTML code to display the content of the tags @see
880 sub add_see {
881  my $sees = shift;
882  my $html = '';
883 
884  if (scalar @$sees) {
885  $html .= qq{ <td class="sql_schema_extra_left">\ <p style="font-weight:bold">See also:</p>\n <ul>\n};
886  foreach my $see (@$sees) {
887  $html .= qq{ <li><a href="#$see">$see</a></li>\n};
888  }
889  $html .= qq{ </ul>\n </td>\n};
890  }
891 
892  return $html;
893 }
894 
895 
896 # Method searching the tag @link into the string given as argument and replace it by an internal HTML link
897 sub add_internal_link {
898  my $desc = shift;
899  my $data = shift;
900  while ($desc =~ /\@link\s?(\w+)/) {
901  my $link = $1;
902  if ((!grep {$link eq $_} @{$data->{see}}) and defined($link)) {
903  push @{$data->{see}}, $link;
904  }
905  my $table_to_link = $link;
906  $desc =~ s/\@link\s?\w+/$table_to_link/;
907  }
908  return $desc;
909 }
910 
911 
912 # Method parsing the index information from the SQL table description in order to display it in the
913 # HTML table listing the columns of the corresponding SQL table.
914 sub add_column_index {
915  my $idx_type = shift;
916  my $idx_col = shift;
917  my $idx_name = shift;
918 
919  my $index = $idx_type;
920  if (!defined($idx_name)) {
921  $idx_name = $idx_col;
922  }
923  if ($idx_type !~ /primary/i) {
924  $index .= ": *$idx_name*";
925  }
926  my @idx_cols = split(',',$idx_col); # The index can involve several columns
927 
928  my %is_found = ();
929  foreach my $i_col (@idx_cols) {
930  $i_col =~ s/\s//g; # Remove white spaces
931  # In case of index using a number characters for a column
932  if ($i_col =~ /(.+)\(\d+\)/) {
933  $i_col = $1;
934  }
935  $is_found{$i_col} = 0;
936  foreach my $col (@{$documentation->{$header}{tables}{$table}{column}}) {
937  if ($col->{name} eq $i_col) {
938  if ($col->{index} ne '') {
939  $col->{index} .= "\n";
940  }
941  $col->{index} .= lc($index);
942  $is_found{$i_col} = 1;
943  last;
944  }
945  }
946  }
947  # Description missing
948  while (my ($k,$v) = each(%is_found)) {
949  if ($v==0) {
950  print STDERR "INDEX: The description of the column '$k' is missing in the table $table!\n";
951  }
952  }
953 }
954 
955 
956 # Method parsing the column type and default value from the SQL table description, in order to display them in the
957 # HTML table listing the columns of the corresponding SQL table.
959  my $c_name = shift;
960  my $c_type = shift;
961  my $c_default = shift;
962  $count_sql_col ++;
963 
964  my $is_found = 0;
965  foreach my $col (@{$documentation->{$header}{'tables'}{$table}{column}}) {
966  if ($col->{name} eq $c_name) {
967  $col->{type} = $c_type;
968  $col->{default} = $c_default if ($c_default ne '');
969  $is_found = 1;
970  last;
971  }
972  }
973  # Description missing
974  if ($is_found==0) {
975  print STDERR "COLUMN: The description of the column '$c_name' is missing in the table $table!\n";
976  }
977 }
978 
979 
980 # Display the types "enum" and "set" as an HTML list (<ul><li>)
981 sub parse_column_type {
982  my $type = shift;
983  $type =~ /^\s*(enum|set)\s*\((.*)\)/i;
984  my $c_type = uc($1);
985  my $c_data = $2;
986  return $type unless ($c_data);
987 
988  $c_data =~ s/'//g;
989  $c_data =~ s/"//g;
990  $c_data =~ s/\s//g;
991  $c_data =~ s/<br \/>//g;
992 
993  my @items_list = split(',',$c_data);
994 
995  return $type unless (scalar(@items_list) > 1);
996 
997  return $c_type . ":\n\n" . rst_bullet_list(\@items_list);
998 }
999 
1000 
1001 # Method to query the database with the SQL query example, get the result and display it
1002 # in an HTML table.
1003 sub get_example_table {
1004  my $sql = shift;
1005  my $table = shift;
1006  my $nb = shift;
1007  my $html;
1008 
1009  $sql =~ /select\s+(.+)\s+from/i;
1010  my $cols = $1;
1011  $cols = '*' if $cols =~ /^\*\s/;
1012  my @tcols;
1013 
1014  foreach my $col (split(',',$cols)) {
1015 
1016  # Columns selection like the expressions "table.*" or "*"
1017  my $table_name;
1018  $table_name = $table if ($cols eq '*');
1019  $table_name = $1 if ($col =~ /(\S+)\.\*/ and !defined($table_name));
1020  if (defined($table_name)) {
1021  my $table_cols = $db_connection->selectall_arrayref(qq{SHOW COLUMNS FROM $table_name});
1022  foreach my $col (@$table_cols) {
1023  push(@tcols,$col->[0]);
1024  }
1025  next;
1026  }
1027 
1028  # Check for alias
1029  $col = $1 if ($col =~ /\s+as\s+(\S+)$/i);
1030 
1031  $col =~ s/ //g;
1032  push(@tcols,$col);
1033  }
1034 
1035  my $results = $db_connection->selectall_arrayref($sql);
1036  if (scalar(@$results)) {
1037  my @data;
1038  push @data, \@tcols;
1039 
1040  my $count = 0;
1041  foreach my $result (@$results) {
1042  last if ($count >= $SQL_LIMIT);
1043  push @data, [map {$_ // 'NULL'} @$result];
1044  $count ++;
1045  }
1046  return rst_list_table(\@data);
1047  } else {
1048  my $msg = qq{The SQL query displayed above returned no results!};
1049  print STDERR qq{SQL: $sql\n$msg\n};
1050  return ".. error::\n\n $msg\n\n";
1051  }
1052 }
1053 
1054 
1055 
1056 # Removed the character(s) ` from the read line.
1057 sub remove_char {
1058  my $text = shift;
1059  $text =~ s/`//g;
1060  return $text;
1061 }
1062 
1063 
1064 # Insert the introduction text of the web page
1065 sub slurp_intro {
1066  my $intro_file = shift;
1067  return qq{This document describes the tables that make up the $db_team schema. Tables are grouped into categories, and the purpose of each table is explained.\n} if (!defined $intro_file);
1068 
1069  local $/=undef;
1070  open my $fh, '<', $intro_file or die "Can't open $intro_file: $!";
1071  my $intro_html = <$fh>;
1072  close $fh;
1073 
1074  $intro_html =~ s/####DB_VERSION####/$version/g if (defined($version));
1075 
1076  return $intro_html;
1077 }
1078 
1079 
1080 ##################
1081 ## Help methods ##
1082 ##################
1083 
1084 sub sql_documentation_format {
1085  print q{
1086 
1087 #--------------------------#
1088 # Example of documentation #
1089 #--------------------------#
1090 
1120 create table variation (
1121  variation_id int(10) unsigned not null auto_increment, # PK
1122  source_id int(10) unsigned not null,
1123  name varchar(255),
1124  validation_status SET('cluster','freq','submitter','doublehit','hapmap','1000Genome','failed','precious'),
1125  ancestral_allele text,
1126  flipped tinyint(1) unsigned NULL DEFAULT NULL,
1127  class_so_id ENUM('SO:0001483','SO:1000002','SO:0000667') DEFAULT 'SO:0001059', # default to sequence_alteration
1128 
1129  primary key( variation_id ),
1130  unique ( name ),
1131  key source_idx (source_id)
1132 );
1133 
1139 #========================================================================================================================#
1140 
1141 
1142 #------------------#
1143 # Tags description #
1144 #------------------#
1145  : begin and end of the document block
1147  @header : tag to create a group of tables
1148  @table : name of the sql table
1149  @desc : description of the role/content of the table, set or info tags
1150  @colour : tag to colour the header of the table (e.g. if the tables are coloured in the graphic SQL schema and you want to reproduce it in the HTML version)
1151  @column : column_name [tab(s)] Column description. Note: 1 ligne = 1 column
1152  @see : tables names linked to the described table
1153  @link : Internal link to an other table description. The format is ... @link table_name ...
1154  @info : tag to describe additional information about a table or a set of tables
1155  @legend : tag to fill the colour legend table at the end of the HTML page
1156  @example : tag to add some examples, like examples of SQL queries
1157  @sql : tag inside the @example tag, used to delimit a SQL query
1158 
1159 };
1160  exit(0);
1161 }
1162 
1163 
1164 sub usage {
1165 
1166  print q{
1167  Usage: perl sql2html.pl [OPTION]
1168 
1169  Convert the SQL documentation into an HTML document.
1170 
1171  Options:
1172 
1173  -help Print this message
1174  -help_format Print the description of the documentation format in the SQL files
1175 
1176  An input file must be specified. This file must be a SQL file, with the "Java-doc like documentation".
1177  For more information, please visit the following page:
1178  http://www.ebi.ac.uk/seqdb/confluence/display/EV/SQL+documentation
1179 
1180  -i A SQL file name (Required)
1181  -fk An external SQL file name with foreign keys statements (Optional)
1182  -o An HTML output file name (Required)
1183  -d The name of the database (e.g Core, Variation, Functional Genomics, ...)
1184  -c A flag to display the colours associated with the tables (1) or not (0). By default, the value is set to 1.
1185  -v Version of the schema. Replace the string ####DB_VERSION#### by the value of the parameter "-v", in the introduction text. (Optional)
1186  -intro A html/text file to include in the Introduction section (Optional. If not provided a default text will be inserted)
1187  -html_head A html/text file to include extra text inside the html <head></head> tags. (Optional)
1188  -show_header A flag to display headers for a group of tables (1) or not (0). By default, the value is set to 1.
1189  -embed_diagrams A flag to include schema diagrams as dot graphs
1190  -sort_headers A flag to sort (1) or not (0) the headers by alphabetic order. By default, the value is set to 1.
1191  -sort_tables A flag to sort (1) or not (0) the tables by alphabetic order. By default, the value is set to 1.
1192 
1193  Other optional options:
1194 
1195  # If you want to add some SQL query results as examples:
1196  -url URL of the database that has some data
1197  -skip_connection Avoid to run the MySQL queries contained in the "@example" tags.
1198 
1199  } . "\n";
1200  exit(0);
1201 }
column_width
public column_width()
rst_bullet_list
public rst_bullet_list()
slurp_intro
public slurp_intro()
map
public map()
add_internal_link
public add_internal_link()
add_columns
public add_columns()
add_column_type_and_default_value
public add_column_type_and_default_value()
block_width
public block_width()
fill_documentation
public fill_documentation()
usage
public usage()
rst_list_table_row
public rst_list_table_row()
add_see
public add_see()
Bio::EnsEMBL::Hive::Utils::GraphViz::new
public new()
display_tables_list
public display_tables_list()
Bio::EnsEMBL::Hive::DBSQL::DBConnection
Definition: DBConnection.pm:20
parse_column_type
public parse_column_type()
get_example_table
public get_example_table()
add_description
public add_description()
remove_char
public remove_char()
generate_whole_diagram
public generate_whole_diagram()
rst_title
public rst_title()
sub_table_box
public sub_table_box()
table_box
public table_box()
blend_colors
public blend_colors()
sql_documentation_format
public sql_documentation_format()
Bio::EnsEMBL::Hive::Utils::GraphViz
Definition: GraphViz.pm:16
add_examples
public add_examples()
clean_name
public clean_name()
rst_list_table
public rst_list_table()
add_column_index
public add_column_index()
rst_add_indent_to_block
public rst_add_indent_to_block()
info
public info()
generate_sub_diagram
public generate_sub_diagram()