ensembl-hive  2.7.0
PerlFilter.pm
Go to the documentation of this file.
1 =head1 LICENSE
2 
3 See the NOTICE file distributed with this work for additional information
4 regarding copyright ownership.
5 
6 Licensed under the Apache License, Version 2.0 (the "License");
7 you may not use this file except in compliance with the License.
8 You may obtain a copy of the License at
9 
10  http://www.apache.org/licenses/LICENSE-2.0
11 
12 Unless required by applicable law or agreed to in writing, software
13 distributed under the License is distributed on an "AS IS" BASIS,
14 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 See the License for the specific language governing permissions and
16 limitations under the License.
17 
18 =cut
19 
20 =head1 LICENSE
21 
22  Doxygen Pre-Processor for Perl
23  Copyright (C) 2002 Bart Schuller
24  Copyright (C) 2006 Phinex Informatik AG
25  All Rights Reserved
26 
27  Doxygen Filter is free software; you can redistribute it and/or modify
28  it under the same terms as Perl itself.
29 
30  Larry Wall's 'Artistic License' for perl can be found in
31  http://www.perl.com/pub/a/language/misc/Artistic.html
32  ------------------------------------------------
33  Author: Aeby Thomas, Phinex Informatik AG,
34  Based on DoxygenFilter from Bart Schuller
35  E-Mail: tom.aeby@phinex.ch
36 
37  Phinex Informatik AG
38  Thomas Aeby
39  Kirchweg 52
40  1735 Giffers
41 
42  ------------------------------------------------
43  This completely rewritten version of Doxygen::PerlFilter
44 
45 =head1 CONTACT
46 
47  Please email comments or questions to the public Ensembl
48  developers list at <http://lists.ensembl.org/mailman/listinfo/dev>.
49 
50  Questions may also be sent to the Ensembl help desk at
51  <http://www.ensembl.org/Help/Contact>.
52 
53 =cut
54 
55 =head1 NAME
56 
57  EnsEMBL::PerlFilter
58 
59 =head1 DESCRIPTION
60 
61  Implementation of EnsEMBL::PerlFilter.
62  Derived from http://www.bigsister.ch/doxygenfilter by Bart Schuller and Thomas Aeby
63  Original distributed under Perl artistic license, see: http://www.bigsister.ch/doxygenfilter/license.html
64 
65  Adaptations by Kieron Taylor (ktaylor@ebi.ac.uk), 2011
66 
67  State Machine rewrite of existing filter.
68  Was going to use DFA::Command to handle the logic, but actually it won't work well for parsing Perl,
69  hence own simplified state machine. Intentionally not using PPI package to parse Perl, too complex.
70  This is a 80/20 EnsEMBL specific POD->Doxygen converter, however it should work somewhat with other code.
71 
72 
73 =cut
74 
75 package EnsEMBL::PerlFilter;
76 
77 use warnings;
78 use strict;
79 
80 use base qw(EnsEMBL::Filter);
81 
82 # Possible states
83 use constant {
84  NORMAL => 0,
85  INHERIT => 1,
86  PODTOP => 2,
87  PODSECTION => 3,
88  PODMETHOD => 4,
89  SEEALSO => 5,
90  TERMINAL => 6, #would have been END, but that's a reserved word
91  CODE => 7,
92 };
93 
94 # State-determining functions
95 my @parse = ( \&normal_parser, \&inheritance_parser, \&pod_top_parser, \&pod_section_parser, \&pod_method_parser, \&see_also_parser, \&finish, \&code_parser );
96 # State-reactive functions
97 my @act = ( \&normal_action, \&inheritance_action, \&pod_top_action, \&pod_section_action, \&pod_method_action, \&see_also_action, \&finish, \&code_action );
98 
99 my @buffer;
100 my $state; # state of state machine, see?
101 
102 my @big_buffer; # to absorb everything we want to print right up until we know whom we inherit from.
103 my $class_declaration;
104 my @inheritance;
105 my @leading_text;
106 my $method_description;
107 my $previous_doc_header;
108 my $brackets =0;
109 
110 my $id = __PACKAGE__;
111 
112 sub filter {
113  my($self, $infile) = @_;
114  open(my $infh, $infile);
115  my $current_class = "";
116  $state = NORMAL;
117  my $line;
118  # Read file, using lookup table to run correct parser on each line.
119  # Parsing is done in two stages:
120  # 1) Parsing based on the previous mode. The methods are called "_parse"
121  # 2) Parsing after any mode switch in stage 1. The methods are called "_action"
122  # Strictly speaking actions are also performed in "parse" code, but that's what was needed in the end.
123  while( defined($line = <$infh>) && $state != TERMINAL) {
124  my $sub_ref = $parse[$state];
125  $parse[$state]->([$self,$line]);
126  $act[$state]->([$self,$line]);
127  }
128  # Create the filtered file:
129  # beware, #include declarations are coming from elsewhere ( inheritance_action() ).
130 
131  my @namespaces;
132  my $class_name;
133  if (defined($class_declaration)) {
134  @namespaces = split(/::/,$class_declaration);
135  $class_name = pop @namespaces;
136  foreach (@namespaces) {
137  $self->print("namespace ".$_." {\n");
138  }
139  $self->more(@leading_text);
140  $self->print("class ".$class_name);
141  }
142  else {
143  $self->print("# No class definition in this file.");
144  warn "No package line found in $infile\n";
145  }
146  if (scalar @inheritance > 0) {
147  my $string = shift @inheritance;
148  $self->print(" : public ".$string);
149  foreach my $parent (@inheritance) {
150  $self->print(", public ".$parent);
151  }
152  }
153  $self->print(" {\n");
154  $self->print("public: \n");
155  $self->more(@big_buffer);
156  $self->print("};\n");
157  foreach (@namespaces) {
158  print("}\n");
159  }
160 }
161 
162 
163 my $packaged;
164 sub normal_parser {
165  my $args = $_[0];
166  my $self = $args->[0];
167  my $line = $args->[1];
168 
169  chomp($line);
170 
171  if ($line =~ /^=head1/) {
172  $state = PODTOP;
173  }
174  elsif ($line =~/^=head2/) { # head2 usually signifies a doc-block just before a method
175  $state = PODMETHOD;
176  }
177  elsif ($line =~/^1;/) {
178  $state = TERMINAL;
179  warn "Reached end of code: 1;\n";
180  }
181  elsif ($line =~ /^\s*package\s+(.*);/) {
182  if ($packaged) {$state = TERMINAL} else {
183  $class_declaration = $1;
184  $packaged = 1;
185  }
186  }
187  elsif ($line =~/^\s*use\s/ || $line =~/^(our|my)?\s*\@ISA/ || $line =~(/^\s*.*::ISA/) ){
188  $state = INHERIT;
189  }
190 
191 }
192 
193 sub normal_action {
194  my $args = $_[0];
195  my $self = $args->[0];
196  my $line = $args->[1];
197  # for catching undocumented subroutines and adding code blocks to documentation
198  if ($line =~ /^\s*sub\s+([\w:]+)/) {
199  $state = CODE;
200  my $method_name = $1;
201  if ($line =~ /{/) {$brackets = 1;} else {$brackets = 0;}
202  #warn "Previous: $previous_doc_header. Present: $method_name\n";
203  if (defined($previous_doc_header) && $previous_doc_header =~ /$method_name/){
204  # We've found the corresponding sub to go with the documentation.
205  $previous_doc_header = "";
206  }
207  else {
208  # Create an undocumented entry
209  my $scope = "public";
210  if ($method_name =~ /^_/) {$scope = "protected";}
211  push @big_buffer,"/** \@fn $scope $method_name( ) \n Undocumented method\n\n";
212  $method_description = $scope." ".$method_name;
213  warn "Found undocumented method $method_name\n";
214  }
215  my $html_lump = "
216 <div id='codesection-$method_name' class='dynheader closed' style='cursor:pointer;' onclick='return toggleVisibility(this)'>
217 \@htmlonly
218  <img id='codesection-$method_name-trigger' src='closed.png' style='display:inline'><b>Code:</b>
219 </div>
220 <div id='codesection-$method_name-summary' class='dyncontent' style='display:block;font-size:small;'>click to view</div>
221 <div id='codesection-$method_name-content' class='dyncontent' style='display: none;'>
222 \@endhtmlonly
223 \@code\n";
224  #push @big_buffer,"\@par Code:\n\@code\n";
225  push @big_buffer,$html_lump;
226  push @big_buffer,$line;
227  if ($line =~ /sub.*{.*}/ || $line =~ /^\s*1;/) {$brackets = 0;} #one-line subroutines must be catered for, but only after the magic <Div> is created.
228  }
229 }
230 
231 sub inheritance_parser {
232  # We only get here if dealing with multiple inheritance over comma-terminated multiple lines
233  my $args = $_[0];
234  my $self = $args->[0];
235  my $line = $args->[1];
236 
237  chomp($line);
238  $line =~ s/;//;
239  my @parents = $line =~ /Bio::EnsEMBL::[\w:]+/g;
240  push @inheritance,@parents;
241 }
242 sub inheritance_action {
243  my ($include, $inherit);
244 
245  my $args = $_[0];
246  my $self = $args->[0];
247  my $line = $args->[1];
248  chomp($line);
249  $line =~ s/;//;
250  # simple inheritance suited only to Ensembl code. Multiple inheritance from one line possible
251  # There are a few ignored cases of Bio::PrimarySeqI and other things from BioPerl(?)
252  if ($line =~ /\@ISA/) {
253  my @parents = $line =~ /Bio::EnsEMBL::[\w:]+/g;
254  push @inheritance,@parents;
255  }
256  elsif ($line =~ /use base/ or $line =~ /use parent/) {
257  my @parents = $line =~ /Bio::EnsEMBL::[\w:]+/g;
258  push @inheritance,@parents;
259  }
260  elsif ($line =~ /use/) {
261  $line =~ /use\s+([\w:]+)/;
262  $include = $1;
263  if (defined($include)) {
264  unless ($include eq "strict" || $include eq "warnings" || $include eq "vars" || $include eq "Exporter" || $include eq "base") {
265  $include =~ s/::/\//g; # allows doxygen to know where to look for other packages
266  $self->print("#include \"".$include.".pm\"\n");
267  }
268  }
269  else {
270  warn "Inheritance issue with: $line";
271  }
272  }
273 
274  $line =~ s/\s*#.*$//; # strip trailing comments
275  if ($line =~ /,$/) {
276  $state = INHERIT; # technically redundant, but easier to follow state
277  } else {
278  $state = NORMAL;
279  }
280 }
281 
282 sub pod_top_parser {
283  my $args = $_[0];
284  my $self = $args->[0];
285  my $line = $args->[1];
286  #secondary switching logic found elsewhere in pod_section_parser, due to unterminated pod sections.
287  if ($line =~ /DESCRIPTION|SYNOPSIS/) {$state = PODSECTION;}
288  elsif ($line =~ /SEE ALSO|OPTIONS/) {$state = SEEALSO;}
289  elsif ($line =~ /^=cut/) {$state = NORMAL;}
290 }
291 sub pod_top_action {
292  my $args = $_[0];
293  my $self = $args->[0];
294  my $line = $args->[1];
295 
296 }
297 
298 sub pod_section_parser {
299  my $args = $_[0];
300  my $self = $args->[0];
301  my $line = $args->[1];
302  if ($line =~ /^=head1\s+(.+)|^(=cut)/) {
303  #end of section. Flush out, otherwise keep on slurping through pod_section_action
304  my $header = $1;
305  if ($buffer[0] =~ /DESCRIPTION/) {
306  push @leading_text,"/** \@section Description\n\@htmlonly<pre>";
307  shift @buffer; #discard the description pod header
308  foreach (@buffer) {
309  $_ =~ s/\@/\\@/g; # escape @array references but only in descriptions.
310  $_ =~ s/=head(\d)\s*(.*)/<\/pre>\n<h$1>$2<\/h$1>\n<pre>/; # replace in-block head commands with formatting
311  }
312  push @leading_text,@buffer;
313  push @leading_text,"</pre>\@endhtmlonly*/ \n";
314  @buffer = ();
315  }
316  elsif ($buffer[0] =~ /SYNOPSIS/) {
317  push @leading_text,"/** \@section Synopsis\n\@code\n";
318  shift @buffer;
319  push @leading_text,@buffer;
320  push @leading_text,"\@endcode */ \n";
321  @buffer = ();
322  }
323  if (defined($header) && ( $header eq "DESCRIPTION" || $header eq "SYNOPSIS") ) {
324  $state = PODSECTION;
325  }
326  elsif (defined($header) && $header eq "SEE ALSO") {
327  $state = SEEALSO;
328  }
329  elsif (not defined($header)) {
330  $state = NORMAL; # this fires when the =cut pattern matches.
331  }
332  else {
333  $state = PODTOP;
334  }
335  }
336 }
337 sub pod_section_action {
338  my $args = $_[0];
339  my $self = $args->[0];
340  my $line = $args->[1];
341  $line =~ s/[BICLFS]<(.+?)>/$1/g; # remove POD formatting commands
342  #$line =~ s/(<|>)/\\$1/g; #protect HTML-like stuff that isn't HTML
343  push @buffer,$line;
344 }
345 
346 sub pod_method_parser {
347  my $args = $_[0];
348  my $self = $args->[0];
349  my $line = $args->[1];
350  # optional extraction of parameters by guesswork?
351  my $proto = "";
352  if ($line =~ /^=cut/) {
353  #flush out method
354  $method_description = shift @buffer;
355  chomp $method_description;
356  $method_description =~ s/retval//; # remove any still unassigned return types
357  # trim trailing brackety stuff off method header. It is upsetting Doxygen
358  $method_description =~ s/\s*\(.+\).*$//;
359  # Protect the code examples with a @code block
360  my $code;
361  foreach (@buffer) {
362  if ($code and /\w+\s*:(?!:)/) { #closes code section at end of Example:
363  $_ = '@endcode <pre>'.$_;
364  $code = undef;
365  }
366  if (/Example\s+:(.*)/) { #starts code section at Example : in POD
367  $_ =~ s#(Examples?\s+:)(.*)#$1</pre>\@code$2#;
368  $code = 1;
369  }
370  }
371  if ($code) {push @buffer,'\@endcode'};
372  push @big_buffer,"\n /** \@fn ".$method_description."\n<pre>";
373  push @big_buffer,@buffer;
374  push @big_buffer," </pre>\n "; # this comment block is still open. To be finished in code_parser
375  @buffer = ();
376  $previous_doc_header = $method_description;
377  $state = NORMAL;
378  }
379  my $return_type;
380  if ($line =~ /^\s*returns?\s*(type)?\s*:/i) { #picking up "Returns : ", "Return type:"
381  $return_type = $self->sanitize_return_values($line);
382  if (not defined($return_type)) {$return_type = "";}
383  $buffer[0] =~ s/retval/$return_type/;
384  }
385 
386 }
387 
388 sub pod_method_action {
389  my $args = $_[0];
390  my $self = $args->[0];
391  my $line = $args->[1];
392  my ($protection,$method_name);
393  # extract method name from header, including methods with spaces in names, unnecessary brackets on the ends and so on
394  if ($line =~ /^=head2\s+([\w_\-\&\s]+)/) {
395  $method_name = $1;
396  chomp $method_name;
397  # use _method coding convention to identify scope of method
398  if( substr( $method_name, 0, 1 ) eq "_" ) {
399  $protection = "protected";
400  }
401  else {
402  $protection = "public";
403  }
404  $method_description = "$protection retval $method_name";
405  push @buffer,$method_description."( );\n";
406  }
407  else {
408  $line =~ s/[BICLFS]<(.+?)>/$1/g; # remove POD formatting commands
409  #$line =~ s/(\@|&|<|>|\\|\%|#)/\\$1/g; #sanitising the oddities that will bewilder Doxygen
410  $line =~ s/(?<!isn't\s)DEPRECATED/\@deprecated/i; #make use of Doxygen's deprecated list features
411  push @buffer,$line;
412  }
413 }
414 
415 sub see_also_parser {
416  my $args = $_[0];
417  my $self = $args->[0];
418  my $line = $args->[1];
419 
420  if ($line =~ /^=cut/ || $line =~ /^=head1/) {
421  $buffer[0] =~ s/=head1/\';
424  push @leading_text,@buffer;
425  @buffer = ();
426  $state = NORMAL;
427  }
428 }
429 sub see_also_action {
430  my $args = $_[0];
431  my $self = $args->[0];
432  my $line = $args->[1];
433  push @buffer,$line;
434 
435 }
436 
437 sub finish {
438  my $args = $_[0];
439  my $self = $args->[0];
440  my $line = $args->[1];
441 }
442 
443 
444 sub code_parser {
445  my $args = $_[0];
446  my $self = $args->[0];
447  my $line = $args->[1];
448 
449  #count the brackets in the code to know when we've run out of subroutine
450  $line =~ s/\s#.*?$//; #rips comments off the end of a line prior to counting
451  $line =~ s#/.*/##g; #remove any conventional regexp from the line, as they can contain brackets
452  my $open_brackets = () = $line =~ /{/g;
453  my $close_brackets = () = $line =~ /}/g;
454  $brackets = $brackets + $open_brackets - $close_brackets;
455 }
456 
457 sub code_action {
458  my $args = $_[0];
459  my $self = $args->[0];
460  my $line = $args->[1];
461 
462  push @big_buffer,$line;
463  # When we run out of open brackets, or we hit weird unpaired brackets in strings or comments
464  if ($brackets <=0 || $line =~ /^=/ || $line =~ /^\s*sub[\s{]/ || $line =~ /^\s*1;/) {
465  $state = NORMAL;
466  push @big_buffer,"\@endcode\n </div>*/\n";
467  #Add fake function for doxygen to find after the comment.
468  push @big_buffer,$method_description."( );\n";
469  }
470 }
471 
472 # Methods sometimes have well-specified return values, these must be dressed up for Doxygen.
473 sub sanitize_return_values {
474  my ( $self, $line ) = @_;
475  my $return_value;
476  my $type;
477  $line =~ /^\s*returns?\s*(type)?\s*:\s*([\w:]+)/i;
478  if ($2) {
479  my $type = $2;
480  if ($type =~ /^int/i) {$return_value = "Int";}
481  elsif ($type eq "undef") {$return_value = "Undef";}
482  elsif ($type =~ /^tri/i) {$return_value = "Boolean Or Undef";}
483  elsif ($type =~ /none/i) {$return_value = "void";}
484  elsif ($type eq "1" || $type eq "0" || $type =~ /TRUE/i || $type =~ /FALSE/i) {$return_value = "Boolean";}
485  elsif ($line =~ /\sSQL\s/) {$return_value = "SQLStatement";}
486  elsif ($line =~ /subclass type/) {$return_value = "\$this";}
487  else {$return_value = ucfirst($type);}
488  }
489  else {
490  $type = "void";
491  }
492  return $return_value;
493 }
494 
495 1;
EnsEMBL::Filter
Definition: Filter.pm:2
Bio::EnsEMBL
Definition: AltAlleleGroup.pm:5
run
public run()