3 See the NOTICE file distributed with
this work
for additional information
4 regarding copyright ownership.
6 Licensed under the Apache License, Version 2.0 (the
"License");
7 you may not use
this file except in compliance with the License.
8 You may obtain a copy of the License at
12 Unless required by applicable law or agreed to in writing, software
13 distributed under the License is distributed on an
"AS IS" BASIS,
14 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 See the License
for the specific language governing permissions and
16 limitations under the License.
22 Doxygen Pre-Processor
for Perl
23 Copyright (C) 2002 Bart Schuller
24 Copyright (C) 2006 Phinex Informatik AG
27 Doxygen
Filter is free software; you can redistribute it and/or modify
28 it under the same terms as Perl itself.
30 Larry Wall
's 'Artistic License
' for perl can be found in
31 http://www.perl.com/pub/a/language/misc/Artistic.html
32 ------------------------------------------------
33 Author: Aeby Thomas, Phinex Informatik AG,
34 Based on DoxygenFilter from Bart Schuller
35 E-Mail: tom.aeby@phinex.ch
42 ------------------------------------------------
43 This completely rewritten version of Doxygen::PerlFilter
47 Please email comments or questions to the public Ensembl
48 developers list at <http://lists.ensembl.org/mailman/listinfo/dev>.
50 Questions may also be sent to the Ensembl help desk at
51 <http://www.ensembl.org/Help/Contact>.
61 Implementation of EnsEMBL::PerlFilter.
62 Derived from http://www.bigsister.ch/doxygenfilter by Bart Schuller and Thomas Aeby
63 Original distributed under Perl artistic license, see: http://www.bigsister.ch/doxygenfilter/license.html
65 Adaptations by Kieron Taylor (ktaylor@ebi.ac.uk), 2011
67 State Machine rewrite of existing filter.
68 Was going to use DFA::Command to handle the logic, but actually it won't work well
for parsing Perl,
69 hence own simplified state machine. Intentionally not
using PPI
package to parse Perl, too complex.
70 This is a 80/20 EnsEMBL specific POD->Doxygen converter, however it should work somewhat with other code.
75 package EnsEMBL::PerlFilter;
90 TERMINAL => 6, #would have been END, but that
's a reserved word
94 # State-determining functions
95 my @parse = ( \&normal_parser, \&inheritance_parser, \&pod_top_parser, \&pod_section_parser, \&pod_method_parser, \&see_also_parser, \&finish, \&code_parser );
96 # State-reactive functions
97 my @act = ( \&normal_action, \&inheritance_action, \&pod_top_action, \&pod_section_action, \&pod_method_action, \&see_also_action, \&finish, \&code_action );
100 my $state; # state of state machine, see?
102 my @big_buffer; # to absorb everything we want to print right up until we know whom we inherit from.
103 my $class_declaration;
106 my $method_description;
107 my $previous_doc_header;
110 my $id = __PACKAGE__;
113 my($self, $infile) = @_;
114 open(my $infh, $infile);
115 my $current_class = "";
118 # Read file, using lookup table to run correct parser on each line.
119 # Parsing is done in two stages:
120 # 1) Parsing based on the previous mode. The methods are called "_parse"
121 # 2) Parsing after any mode switch in stage 1. The methods are called "_action"
122 # Strictly speaking actions are also performed in "parse" code, but that's what was needed in the end.
123 while( defined($line = <$infh>) && $state != TERMINAL) {
124 my $sub_ref = $parse[$state];
125 $parse[$state]->([$self,$line]);
126 $act[$state]->([$self,$line]);
128 # Create the filtered file:
129 # beware, #include declarations are coming from elsewhere ( inheritance_action() ).
133 if (defined($class_declaration)) {
134 @namespaces = split(/::/,$class_declaration);
135 $class_name = pop @namespaces;
136 foreach (@namespaces) {
137 $self->print(
"namespace ".$_.
" {\n");
139 $self->more(@leading_text);
140 $self->print(
"class ".$class_name);
143 $self->print(
"# No class definition in this file.");
144 warn
"No package line found in $infile\n";
146 if (scalar @inheritance > 0) {
147 my $string = shift @inheritance;
148 $self->print(
" : public ".$string);
149 foreach my $parent (@inheritance) {
150 $self->print(
", public ".$parent);
153 $self->print(
" {\n");
154 $self->print(
"public: \n");
155 $self->more(@big_buffer);
156 $self->print(
"};\n");
157 foreach (@namespaces) {
166 my $self = $args->[0];
167 my $line = $args->[1];
171 if ($line =~ /^=head1/) {
174 elsif ($line =~/^=head2/) { # head2 usually signifies a doc-block just before a method
177 elsif ($line =~/^1;/) {
179 warn
"Reached end of code: 1;\n";
181 elsif ($line =~ /^\s*package\s+(.*);/) {
182 if ($packaged) {$state = TERMINAL}
else {
183 $class_declaration = $1;
187 elsif ($line =~/^\s*use\s/ || $line =~/^(our|my)?\s*\@ISA/ || $line =~(/^\s*.*::ISA/) ){
195 my $self = $args->[0];
196 my $line = $args->[1];
197 # for catching undocumented subroutines and adding code blocks to documentation
198 if ($line =~ /^\s*sub\s+([\w:]+)/) {
200 my $method_name = $1;
201 if ($line =~ /{/) {$brackets = 1;}
else {$brackets = 0;}
202 #warn "Previous: $previous_doc_header. Present: $method_name\n";
203 if (defined($previous_doc_header) && $previous_doc_header =~ /$method_name/){
204 # We've found the corresponding sub to go with the documentation.
205 $previous_doc_header =
"";
208 # Create an undocumented entry
209 my $scope =
"public";
210 if ($method_name =~ /^_/) {$scope =
"protected";}
211 push @big_buffer,
"/** \@fn $scope $method_name( ) \n Undocumented method\n\n";
212 $method_description = $scope.
" ".$method_name;
213 warn
"Found undocumented method $method_name\n";
216 <div id='codesection-$method_name' class='dynheader closed' style='cursor:pointer;' onclick='return toggleVisibility(this)'>
218 <img id='codesection-$method_name-trigger' src='closed.png' style='display:inline'><b>Code:</b>
220 <div id='codesection-$method_name-summary' class='dyncontent' style='display:block;font-size:small;'>click to view</div>
221 <div id='codesection-$method_name-content' class='dyncontent' style='display: none;'>
224 #push @big_buffer,"\@par Code:\n\@code\n";
225 push @big_buffer,$html_lump;
226 push @big_buffer,$line;
227 if ($line =~ /sub.*{.*}/ || $line =~ /^\s*1;/) {$brackets = 0;} #one-line subroutines must be catered
for, but only after the magic <Div> is created.
231 sub inheritance_parser {
232 # We only get here if dealing with multiple inheritance over comma-terminated multiple lines
234 my $self = $args->[0];
235 my $line = $args->[1];
240 push @inheritance,@parents;
242 sub inheritance_action {
243 my ($include, $inherit);
246 my $self = $args->[0];
247 my $line = $args->[1];
250 # simple inheritance suited only to Ensembl code. Multiple inheritance from one line possible
251 # There are a few ignored cases of Bio::PrimarySeqI and other things from BioPerl(?)
252 if ($line =~ /\@ISA/) {
254 push @inheritance,@parents;
256 elsif ($line =~ /use base/ or $line =~ /use parent/) {
258 push @inheritance,@parents;
260 elsif ($line =~ /use/) {
261 $line =~ /use\s+([\w:]+)/;
263 if (defined($include)) {
264 unless ($include eq
"strict" || $include eq
"warnings" || $include eq
"vars" || $include eq
"Exporter" || $include eq
"base") {
266 $self->print(
"#include \"".$include.
".pm\"\n");
270 warn
"Inheritance issue with: $line";
276 $state = INHERIT; # technically redundant, but easier to follow state
284 my $self = $args->[0];
285 my $line = $args->[1];
286 #secondary switching logic found elsewhere in pod_section_parser, due to unterminated pod sections.
287 if ($line =~ /DESCRIPTION|SYNOPSIS/) {$state = PODSECTION;}
288 elsif ($line =~ /SEE ALSO|OPTIONS/) {$state = SEEALSO;}
289 elsif ($line =~ /^=cut/) {$state = NORMAL;}
293 my $self = $args->[0];
294 my $line = $args->[1];
298 sub pod_section_parser {
300 my $self = $args->[0];
301 my $line = $args->[1];
302 if ($line =~ /^=head1\s+(.+)|^(=cut)/) {
303 #end of section. Flush out, otherwise keep on slurping through pod_section_action
305 if ($buffer[0] =~ /DESCRIPTION/) {
306 push @leading_text,
"/** \@section Description\n\@htmlonly<pre>";
307 shift @buffer; #discard the description pod header
309 $_ =~ s/\@/\\@/g; # escape @array references but only in descriptions.
310 $_ =~ s/=head(\d)\s*(.*)/<\/pre>\n<h$1>$2<\/h$1>\n<pre>/; # replace in-block head commands with formatting
312 push @leading_text,@buffer;
313 push @leading_text,
"</pre>\@endhtmlonly*/ \n";
316 elsif ($buffer[0] =~ /SYNOPSIS/) {
317 push @leading_text,
"/** \@section Synopsis\n\@code\n";
319 push @leading_text,@buffer;
320 push @leading_text,
"\@endcode */ \n";
323 if (defined($header) && ( $header eq
"DESCRIPTION" || $header eq
"SYNOPSIS") ) {
326 elsif (defined($header) && $header eq
"SEE ALSO") {
329 elsif (not defined($header)) {
330 $state = NORMAL; #
this fires when the =cut pattern matches.
337 sub pod_section_action {
339 my $self = $args->[0];
340 my $line = $args->[1];
341 $line =~ s/[BICLFS]<(.+?)>/$1/g; # remove POD formatting commands
342 #$line =~ s/(<|>)/\\$1/g; #protect HTML-like stuff that isn't HTML
346 sub pod_method_parser {
348 my $self = $args->[0];
349 my $line = $args->[1];
350 # optional extraction of parameters by guesswork?
352 if ($line =~ /^=cut/) {
354 $method_description = shift @buffer;
355 chomp $method_description;
356 $method_description =~ s/retval
357 # trim trailing brackety stuff off method header. It is upsetting Doxygen
358 $method_description =~ s/\s*\(.+\).*$
359 # Protect the code examples with a @code block
362 if ($code and /\w+\s*:(?!:)/) { #closes code section at end of Example:
363 $_ =
'@endcode <pre>'.$_;
366 if (/Example\s+:(.*)/) { #starts code section at Example : in POD
367 $_ =~ s#(Examples?\s+:)(.*)
#$1</pre>\@code$2#;
371 if ($code) {push @buffer,
'\@endcode'};
372 push @big_buffer,
"\n /** \@fn ".$method_description.
"\n<pre>";
373 push @big_buffer,@buffer;
374 push @big_buffer,
" </pre>\n "; #
this comment block is still open. To be finished in code_parser
376 $previous_doc_header = $method_description;
380 if ($line =~ /^\s*returns?\s*(type)?\s*:/i) { #picking up
"Returns : ",
"Return type:"
381 $return_type = $self->sanitize_return_values($line);
382 if (not defined($return_type)) {$return_type =
"";}
383 $buffer[0] =~ s/retval/$return_type/;
388 sub pod_method_action {
390 my $self = $args->[0];
391 my $line = $args->[1];
392 my ($protection,$method_name);
393 # extract method name from header, including methods with spaces in names, unnecessary brackets on the ends and so on
394 if ($line =~ /^=head2\s+([\w_\-\&\s]+)/) {
397 # use _method coding convention to identify scope of method
398 if( substr( $method_name, 0, 1 ) eq
"_" ) {
399 $protection =
"protected";
402 $protection =
"public";
404 $method_description =
"$protection retval $method_name";
405 push @buffer,$method_description.
"( );\n";
408 $line =~ s/[BICLFS]<(.+?)>/$1/g; # remove POD formatting commands
409 #$line =~ s/(\@|&|<|>|\\|\%|#)/\\$1/g; #sanitising the oddities that will bewilder Doxygen
410 $line =~ s/(?<!isn
't\s)DEPRECATED/\@deprecated/i; #make use of Doxygen's deprecated list features
415 sub see_also_parser {
417 my $self = $args->[0];
418 my $line = $args->[1];
420 if ($line =~ /^=cut/ || $line =~ /^=head1/) {
421 $buffer[0] =~ s/=head1/\
';
424 push @leading_text,@buffer;
429 sub see_also_action {
431 my $self = $args->[0];
432 my $line = $args->[1];
439 my $self = $args->[0];
440 my $line = $args->[1];
446 my $self = $args->[0];
447 my $line = $args->[1];
449 #count the brackets in the code to know when we've
run out of subroutine
451 $line =~ s#/.*/##g; #remove any conventional regexp from the line, as they can contain brackets
452 my $open_brackets = () = $line =~ /{/g;
453 my $close_brackets = () = $line =~ /}/g;
454 $brackets = $brackets + $open_brackets - $close_brackets;
459 my $self = $args->[0];
460 my $line = $args->[1];
462 push @big_buffer,$line;
463 # When we run out of open brackets, or we hit weird unpaired brackets in strings or comments
464 if ($brackets <=0 || $line =~ /^=/ || $line =~ /^\s*sub[\s{]/ || $line =~ /^\s*1;/) {
466 push @big_buffer,
"\@endcode\n </div>*/\n";
467 #Add fake function for doxygen to find after the comment.
468 push @big_buffer,$method_description.
"( );\n";
472 # Methods sometimes have well-specified return values, these must be dressed up for Doxygen.
473 sub sanitize_return_values {
474 my ( $self, $line ) = @_;
477 $line =~ /^\s*returns?\s*(type)?\s*:\s*([\w:]+)/i;
480 if ($type =~ /^
int/i) {$return_value =
"Int";}
481 elsif ($type eq
"undef") {$return_value =
"Undef";}
482 elsif ($type =~ /^tri/i) {$return_value =
"Boolean Or Undef";}
483 elsif ($type =~ /none/i) {$return_value =
"void";}
484 elsif ($type eq
"1" || $type eq
"0" || $type =~ /TRUE/i || $type =~ /FALSE/i) {$return_value =
"Boolean";}
485 elsif ($line =~ /\sSQL\s/) {$return_value =
"SQLStatement";}
486 elsif ($line =~ /subclass type/) {$return_value =
"\$this";}
487 else {$return_value = ucfirst($type);}
492 return $return_value;