Merge branch 'kerneldoc2' into docs-next

So once upon a time I set out to fix the problem reported by Tobin wherein a literal block within a kerneldoc comment would be corrupted in processing. On the way, though, I got annoyed at the way I have to learn how kernel-doc works from the beginning every time I tear into it. As a result, seven of the following eight patches just get rid of some dead code and reorganize the rest - mostly turning the 500-line process_file() function into something a bit more rational. Sphinx output is unchanged after these are applied. Then, at the end, there's a tweak to stop messing with literal blocks. If anybody was unaware that I've not done any serious Perl since the 1990's, they will certainly understand that fact now.

Merge branch 'kerneldoc2' into docs-next
So once upon a time I set out to fix the problem reported by Tobin wherein a literal block within a kerneldoc comment would be corrupted in processing. On the way, though, I got annoyed at the way I have to learn how kernel-doc works from the beginning every time I tear into it. As a result, seven of the following eight patches just get rid of some dead code and reorganize the rest - mostly turning the 500-line process_file() function into something a bit more rational. Sphinx output is unchanged after these are applied. Then, at the end, there's a tweak to stop messing with literal blocks. If anybody was unaware that I've not done any serious Perl since the 1990's, they will certainly understand that fact now.
fcdf1df2 · Jonathan Corbet · 281a7af8 · 38476378 · fcdf1df2
Commit fcdf1df2 authored Feb 20, 2018 by Jonathan Corbet
Show whitespace changes
Inline Side-by-side

Showing with 365 additions and 301 deletions

scripts/kernel-doc scripts/kernel-doc +365 -301

No files found.
--- a/scripts/kernel-doc
+++ b/scripts/kernel-doc
 #!/usr/bin/env perl
+# SPDX-License-Identifier: GPL-2.0
 use warnings;
 use strict;
@@ -328,13 +329,15 @@ my $lineprefix="";
 use constant {
    STATE_NORMAL        => 0, # normal code
    STATE_NAME          => 1, # looking for function name
-    STATE_FIELD         => 2, # scanning field start
+    STATE_BODY_MAYBE    => 2, # body - or maybe more description
-    STATE_PROTO         => 3, # scanning prototype
+    STATE_BODY          => 3, # the body of the comment
-    STATE_DOCBLOCK      => 4, # documentation block
+    STATE_PROTO         => 4, # scanning prototype
-    STATE_INLINE        => 5, # gathering documentation outside main block
+    STATE_DOCBLOCK      => 5, # documentation block
+    STATE_INLINE        => 6, # gathering documentation outside main block
 };
 my $state;
 my $in_doc_sect;
+my $leading_space;
 # Inline documentation state
 use constant {
@@ -553,10 +556,9 @@ sub output_highlight {
 	}
 	if ($line eq ""){
 	    if (! $output_preformatted) {
-		print $lineprefix, local_unescape($blankline);
+		print $lineprefix, $blankline;
 	    }
 	} else {
-	    $line =~ s/\\\\\\/\&/g;
 	    if ($output_mode eq "man" && substr($line, 0, 1) eq ".") {
 		print "\\&$line";
 	    } else {
@@ -747,17 +749,73 @@ sub output_blockhead_rst(%) {
    }
 }
+#
+# Apply the RST highlights to a sub-block of text.
+#   
+sub highlight_block($) {
+    # The dohighlight kludge requires the text be called $contents
+    my $contents = shift;
+    eval $dohighlight;
+    die $@ if $@;
+    return $contents;
+}
+#
+# Regexes used only here.
+#
+my $sphinx_literal = '^[^.].*::$';
+my $sphinx_cblock = '^\.\.\ +code-block::';
 sub output_highlight_rst {
-    my $contents = join "\n",@_;
+    my $input = join "\n",@_;
+    my $output = "";
    my $line;
+    my $in_literal = 0;
+    my $litprefix;
+    my $block = "";
-    # undo the evil effects of xml_escape() earlier
+    foreach $line (split "\n",$input) {
-    $contents = xml_unescape($contents);
+	#
+	# If we're in a literal block, see if we should drop out
-    eval $dohighlight;
+	# of it.  Otherwise pass the line straight through unmunged.
-    die $@ if $@;
+	#
+	if ($in_literal) {
+	    if (! ($line =~ /^\s*$/)) {
+		#
+		# If this is the first non-blank line in a literal
+		# block we need to figure out what the proper indent is.
+		#
+		if ($litprefix eq "") {
+		    $line =~ /^(\s*)/;
+		    $litprefix = '^' . $1;
+		    $output .= $line . "\n";
+		} elsif (! ($line =~ /$litprefix/)) {
+		    $in_literal = 0;
+		} else {
+		    $output .= $line . "\n";
+		}
+	    } else {
+		$output .= $line . "\n";
+	    }
+	}
+	#
+	# Not in a literal block (or just dropped out)
+	#
+	if (! $in_literal) {
+	    $block .= $line . "\n";
+	    if (($line =~ /$sphinx_literal/) || ($line =~ /$sphinx_cblock/)) {
+		$in_literal = 1;
+		$litprefix = "";
+		$output .= highlight_block($block);
+		$block = ""
+	    }
+	}
+    }
-    foreach $line (split "\n", $contents) {
+    if ($block) {
+	$output .= highlight_block($block);
+    }
+    foreach $line (split "\n", $output) {
 	print $lineprefix . $line . "\n";
    }
 }
@@ -1422,8 +1480,6 @@ sub push_parameter($$$$) {
 		}
 	}
-	$param = xml_escape($param);
 	# strip spaces from $param so that it is one continuous string
 	# on @parameterlist;
 	# this fixes a problem where check_sections() cannot find
@@ -1748,47 +1804,6 @@ sub process_proto_type($$) {
    }
 }
-# xml_escape: replace <, >, and & in the text stream;
-#
-# however, formatting controls that are generated internally/locally in the
-# kernel-doc script are not escaped here; instead, they begin life like
-# $blankline_html (4 of '\' followed by a mnemonic + ':'), then these strings
-# are converted to their mnemonic-expected output, without the 4 * '\' & ':',
-# just before actual output; (this is done by local_unescape())
-sub xml_escape($) {
-	my $text = shift;
-	if ($output_mode eq "man") {
-		return $text;
-	}
-	$text =~ s/\&/\\\\\\amp;/g;
-	$text =~ s/\</\\\\\\lt;/g;
-	$text =~ s/\>/\\\\\\gt;/g;
-	return $text;
-}
-# xml_unescape: reverse the effects of xml_escape
-sub xml_unescape($) {
-	my $text = shift;
-	if ($output_mode eq "man") {
-		return $text;
-	}
-	$text =~ s/\\\\\\amp;/\&/g;
-	$text =~ s/\\\\\\lt;/</g;
-	$text =~ s/\\\\\\gt;/>/g;
-	return $text;
-}
-# convert local escape strings to html
-# local escape strings look like:  '\\\\menmonic:' (that's 4 backslashes)
-sub local_unescape($) {
-	my $text = shift;
-	if ($output_mode eq "man") {
-		return $text;
-	}
-	$text =~ s/\\\\\\\\lt:/</g;
-	$text =~ s/\\\\\\\\gt:/>/g;
-	return $text;
-}
 sub map_filename($) {
    my $file;
@@ -1826,40 +1841,27 @@ sub process_export_file($) {
    close(IN);
 }
-sub process_file($) {
+#
-    my $file;
+# Parsers for the various processing states.
-    my $identifier;
+#
-    my $func;
+# STATE_NORMAL: looking for the /** to begin everything.
-    my $descr;
+#
-    my $in_purpose = 0;
+sub process_normal() {
-    my $initial_section_counter = $section_counter;
-    my ($orig_file) = @_;
-    my $leading_space;
-    $file = map_filename($orig_file);
-    if (!open(IN,"<$file")) {
-	print STDERR "Error: Cannot open file $file\n";
-	++$errors;
-	return;
-    }
-    $. = 1;
-    $section_counter = 0;
-    while (<IN>) {
-	while (s/\\\s*$//) {
-	    $_ .= <IN>;
-	}
-	# Replace tabs by spaces
-        while ($_ =~ s/\t+/' ' x (length($&) * 8 - length($`) % 8)/e) {};
-	if ($state == STATE_NORMAL) {
    if (/$doc_start/o) {
 	$state = STATE_NAME;	# next line is always the function name
 	$in_doc_sect = 0;
 	$declaration_start_line = $. + 1;
    }
-	} elsif ($state == STATE_NAME) {# this line is the function name (always)
+}
+#
+# STATE_NAME: Looking for the "name - description" line
+#
+sub process_name($$) {
+    my $file = shift;
+    my $identifier;
+    my $descr;
    if (/$doc_block/o) {
 	$state = STATE_DOCBLOCK;
 	$contents = "";
@@ -1877,7 +1879,7 @@ sub process_file($) {
 	    $identifier = $1;
 	}
-		$state = STATE_FIELD;
+	$state = STATE_BODY;
 	# if there's no @param blocks need to set up default section
 	# here
 	$contents = "";
@@ -1889,8 +1891,8 @@ sub process_file($) {
 	    $descr =~ s/^\s*//;
 	    $descr =~ s/\s*$//;
 	    $descr =~ s/\s+/ /g;
-		    $declaration_purpose = xml_escape($descr);
+	    $declaration_purpose = $descr;
-		    $in_purpose = 1;
+	    $state = STATE_BODY_MAYBE;
 	} else {
 	    $declaration_purpose = "";
 	}
@@ -1922,7 +1924,15 @@ sub process_file($) {
 	++$warnings;
 	$state = STATE_NORMAL;
    }
-	} elsif ($state == STATE_FIELD) {	# look for head: lines, and include content
+}
+#
+# STATE_BODY and STATE_BODY_MAYBE: the bulk of a kerneldoc comment.
+#
+sub process_body($$) {
+    my $file = shift;
    if (/$doc_sect/i) { # case insensitive for supported section names
 	$newsection = $1;
 	$newcontents = $2;
@@ -1944,12 +1954,12 @@ sub process_file($) {
 		print STDERR "${file}:$.: warning: contents before sections\n";
 		++$warnings;
 	    }
-		    dump_section($file, $section, xml_escape($contents));
+	    dump_section($file, $section, $contents);
 	    $section = $section_default;
 	}
 	$in_doc_sect = 1;
-		$in_purpose = 0;
+	$state = STATE_BODY;
 	$contents = $newcontents;
 	$new_start_line = $.;
 	while (substr($contents, 0, 1) eq " ") {
@@ -1962,7 +1972,7 @@ sub process_file($) {
 	$leading_space = undef;
    } elsif (/$doc_end/) {
 	if (($contents ne "") && ($contents ne "\n")) {
-		    dump_section($file, $section, xml_escape($contents));
+	    dump_section($file, $section, $contents);
 	    $section = $section_default;
 	    $contents = "";
 	}
@@ -1975,24 +1985,23 @@ sub process_file($) {
 	$prototype = "";
 	$state = STATE_PROTO;
 	$brcount = 0;
-#		print STDERR "end of doc comment, looking for prototype\n";
    } elsif (/$doc_content/) {
 	# miguel-style comment kludge, look for blank lines after
 	# @parameter line to signify start of description
 	if ($1 eq "") {
 	    if ($section =~ m/^@/ || $section eq $section_context) {
-			dump_section($file, $section, xml_escape($contents));
+		dump_section($file, $section, $contents);
 		$section = $section_default;
 		$contents = "";
 		$new_start_line = $.;
 	    } else {
 		$contents .= "\n";
 	    }
-		    $in_purpose = 0;
+	    $state = STATE_BODY;
-		} elsif ($in_purpose == 1) {
+	} elsif ($state == STATE_BODY_MAYBE) {
 	    # Continued declaration purpose
 	    chomp($declaration_purpose);
-		    $declaration_purpose .= " " . xml_escape($1);
+	    $declaration_purpose .= " " . $1;
 	    $declaration_purpose =~ s/\s+/ /g;
 	} else {
 	    my $cont = $1;
@@ -2004,7 +2013,6 @@ sub process_file($) {
 			$leading_space = "";
 		    }
 		}
 		$cont =~ s/^$leading_space//;
 	    }
 	    $contents .= $cont . "\n";
@@ -2014,7 +2022,67 @@ sub process_file($) {
 	print STDERR "${file}:$.: warning: bad line: $_";
 	++$warnings;
    }
-	} elsif ($state == STATE_INLINE) { # scanning for inline parameters
+}
+#
+# STATE_PROTO: reading a function/whatever prototype.
+#
+sub process_proto($$) {
+    my $file = shift;
+    if (/$doc_inline_oneline/) {
+	$section = $1;
+	$contents = $2;
+	if ($contents ne "") {
+	    $contents .= "\n";
+	    dump_section($file, $section, $contents);
+	    $section = $section_default;
+	    $contents = "";
+	}
+    } elsif (/$doc_inline_start/) {
+	$state = STATE_INLINE;
+	$inline_doc_state = STATE_INLINE_NAME;
+    } elsif ($decl_type eq 'function') {
+	process_proto_function($_, $file);
+    } else {
+	process_proto_type($_, $file);
+    }
+}
+#
+# STATE_DOCBLOCK: within a DOC: block.
+#
+sub process_docblock($$) {
+    my $file = shift;
+    if (/$doc_end/) {
+	dump_doc_section($file, $section, $contents);
+	$section = $section_default;
+	$contents = "";
+	$function = "";
+	%parameterdescs = ();
+	%parametertypes = ();
+	@parameterlist = ();
+	%sections = ();
+	@sectionlist = ();
+	$prototype = "";
+	$state = STATE_NORMAL;
+    } elsif (/$doc_content/) {
+	if ( $1 eq "" )	{
+	    $contents .= $blankline;
+	} else {
+	    $contents .= $1 . "\n";
+	}
+    }
+}
+#
+# STATE_INLINE: docbook comments within a prototype.
+#
+sub process_inline($$) {
+    my $file = shift;
    # First line (state 1) needs to be a @parameter
    if ($inline_doc_state == STATE_INLINE_NAME && /$doc_inline_sect/o) {
 	$section = $1;
@@ -2030,7 +2098,7 @@ sub process_file($) {
 	# Documentation block end */
    } elsif (/$doc_inline_end/) {
 	if (($contents ne "") && ($contents ne "\n")) {
-		    dump_section($file, $section, xml_escape($contents));
+	    dump_section($file, $section, $contents);
 	    $section = $section_default;
 	    $contents = "";
 	}
@@ -2051,52 +2119,48 @@ sub process_file($) {
 	    ++$warnings;
 	}
    }
-	} elsif ($state == STATE_PROTO) {	# scanning for function '{' (end of prototype)
+}
-	    if (/$doc_inline_oneline/) {
-		$section = $1;
-		$contents = $2;
+sub process_file($) {
-		if ($contents ne "") {
+    my $file;
-		    $contents .= "\n";
+    my $initial_section_counter = $section_counter;
-		    dump_section($file, $section, xml_escape($contents));
+    my ($orig_file) = @_;
-		    $section = $section_default;
-		    $contents = "";
+    $file = map_filename($orig_file);
+    if (!open(IN,"<$file")) {
+	print STDERR "Error: Cannot open file $file\n";
+	++$errors;
+	return;
    }
-	    } elsif (/$doc_inline_start/) {
-		$state = STATE_INLINE;
+    $. = 1;
-		$inline_doc_state = STATE_INLINE_NAME;
-	    } elsif ($decl_type eq 'function') {
+    $section_counter = 0;
-		process_proto_function($_, $file);
+    while (<IN>) {
-	    } else {
+	while (s/\\\s*$//) {
-		process_proto_type($_, $file);
+	    $_ .= <IN>;
 	}
+	# Replace tabs by spaces
+        while ($_ =~ s/\t+/' ' x (length($&) * 8 - length($`) % 8)/e) {};
+	# Hand this line to the appropriate state handler
+	if ($state == STATE_NORMAL) {
+	    process_normal();
+	} elsif ($state == STATE_NAME) {
+	    process_name($file, $_);
+	} elsif ($state == STATE_BODY || $state == STATE_BODY_MAYBE) {
+	    process_body($file, $_);
+	} elsif ($state == STATE_INLINE) { # scanning for inline parameters
+	    process_inline($file, $_);
+	} elsif ($state == STATE_PROTO) {
+	    process_proto($file, $_);
 	} elsif ($state == STATE_DOCBLOCK) {
-		if (/$doc_end/)
+	    process_docblock($file, $_);
-		{
-			dump_doc_section($file, $section, xml_escape($contents));
-			$section = $section_default;
-			$contents = "";
-			$function = "";
-			%parameterdescs = ();
-			%parametertypes = ();
-			@parameterlist = ();
-			%sections = ();
-			@sectionlist = ();
-			$prototype = "";
-			$state = STATE_NORMAL;
-		}
-		elsif (/$doc_content/)
-		{
-			if ( $1 eq "" )
-			{
-				$contents .= $blankline;
-			}
-			else
-			{
-				$contents .= $1 . "\n";
-			}
-		}
 	}
    }
+    # Make sure we got something interesting.
    if ($initial_section_counter == $section_counter) {
 	if ($output_mode ne "none") {
 	    print STDERR "${file}:1: warning: no structured comments found\n";