Commit 90f72095 authored by Robert Griesemer's avatar Robert Griesemer

godoc: improved comment formatting: recognize URLs

and highlight special words, if provided. Also:

- related cleanups in src/pkg/go/doc/comment.go
- fix typos in src/cmd/goinstall/doc.go

Fixes #672.

R=rsc
CC=adg, golang-dev
https://golang.org/cl/601042
parent 17e03514
...@@ -665,7 +665,9 @@ func htmlEscFmt(w io.Writer, x interface{}, format string) { ...@@ -665,7 +665,9 @@ func htmlEscFmt(w io.Writer, x interface{}, format string) {
func htmlCommentFmt(w io.Writer, x interface{}, format string) { func htmlCommentFmt(w io.Writer, x interface{}, format string) {
var buf bytes.Buffer var buf bytes.Buffer
writeAny(&buf, x, false) writeAny(&buf, x, false)
doc.ToHTML(w, buf.Bytes()) // does html-escaping // TODO(gri) Provide list of words (e.g. function parameters)
// to be emphasized by ToHTML.
doc.ToHTML(w, buf.Bytes(), nil) // does html-escaping
} }
......
...@@ -6,7 +6,7 @@ ...@@ -6,7 +6,7 @@
Goinstall is an experiment in automatic package installation. Goinstall is an experiment in automatic package installation.
It installs packages, possibly downloading them from the internet. It installs packages, possibly downloading them from the internet.
It maintains a list of public Go packages at http://godashboard.appspot.com/packages. It maintains a list of public Go packages at http://godashboard.appspot.com/package.
Usage: Usage:
goinstall [flags] importpath... goinstall [flags] importpath...
......
...@@ -8,12 +8,25 @@ package doc ...@@ -8,12 +8,25 @@ package doc
import ( import (
"go/ast" "go/ast"
"http" // for URLEscape
"io" "io"
"regexp"
"strings" "strings"
"template" // for htmlEscape "template" // for htmlEscape
) )
// Comment extraction
func isWhitespace(ch byte) bool { return ch == ' ' || ch == '\t' || ch == '\n' || ch == '\r' }
func stripTrailingWhitespace(s string) string {
i := len(s)
for i > 0 && isWhitespace(s[i-1]) {
i--
}
return s[0:i]
}
// CommentText returns the text of comment, // CommentText returns the text of comment,
// with the comment markers - //, /*, and */ - removed. // with the comment markers - //, /*, and */ - removed.
...@@ -26,19 +39,23 @@ func CommentText(comment *ast.CommentGroup) string { ...@@ -26,19 +39,23 @@ func CommentText(comment *ast.CommentGroup) string {
comments[i] = string(c.Text) comments[i] = string(c.Text)
} }
lines := make([]string, 0, 20) lines := make([]string, 0, 10) // most comments are less than 10 lines
for _, c := range comments { for _, c := range comments {
// Remove comment markers. // Remove comment markers.
// The parser has given us exactly the comment text. // The parser has given us exactly the comment text.
switch n := len(c); { switch c[1] {
case n >= 4 && c[0:2] == "/*" && c[n-2:n] == "*/": case '/':
c = c[2 : n-2] //-style comment
case n >= 2 && c[0:2] == "//": c = c[2:]
c = c[2:n]
// Remove leading space after //, if there is one. // Remove leading space after //, if there is one.
// TODO(gri) This appears to be necessary in isolated
// cases (bignum.RatFromString) - why?
if len(c) > 0 && c[0] == ' ' { if len(c) > 0 && c[0] == ' ' {
c = c[1:] c = c[1:]
} }
case '*':
/*-style comment */
c = c[2 : len(c)-2]
} }
// Split on newlines. // Split on newlines.
...@@ -46,20 +63,12 @@ func CommentText(comment *ast.CommentGroup) string { ...@@ -46,20 +63,12 @@ func CommentText(comment *ast.CommentGroup) string {
// Walk lines, stripping trailing white space and adding to list. // Walk lines, stripping trailing white space and adding to list.
for _, l := range cl { for _, l := range cl {
// Strip trailing white space l = stripTrailingWhitespace(l)
m := len(l)
for m > 0 && (l[m-1] == ' ' || l[m-1] == '\n' || l[m-1] == '\t' || l[m-1] == '\r') {
m--
}
l = l[0:m]
// Add to list. // Add to list.
n := len(lines) n := len(lines)
if n+1 >= cap(lines) { if n+1 >= cap(lines) {
newlines := make([]string, n, 2*cap(lines)) newlines := make([]string, n, 2*cap(lines))
for k := range newlines { copy(newlines, lines)
newlines[k] = lines[k]
}
lines = newlines lines = newlines
} }
lines = lines[0 : n+1] lines = lines[0 : n+1]
...@@ -88,6 +97,7 @@ func CommentText(comment *ast.CommentGroup) string { ...@@ -88,6 +97,7 @@ func CommentText(comment *ast.CommentGroup) string {
return strings.Join(lines, "\n") return strings.Join(lines, "\n")
} }
// Split bytes into lines. // Split bytes into lines.
func split(text []byte) [][]byte { func split(text []byte) [][]byte {
// count lines // count lines
...@@ -127,28 +137,51 @@ var ( ...@@ -127,28 +137,51 @@ var (
rdquo = []byte("”") rdquo = []byte("”")
) )
// Escape comment text for HTML. // Escape comment text for HTML. If nice is set,
// Also, turn `` into “ and '' into ”. // also turn `` into “ and '' into ”.
func commentEscape(w io.Writer, s []byte) { func commentEscape(w io.Writer, s []byte, nice bool) {
last := 0 last := 0
for i := 0; i < len(s)-1; i++ { if nice {
if s[i] == s[i+1] && (s[i] == '`' || s[i] == '\'') { for i := 0; i < len(s)-1; i++ {
template.HTMLEscape(w, s[last:i]) ch := s[i]
last = i + 2 if ch == s[i+1] && (ch == '`' || ch == '\'') {
switch s[i] { template.HTMLEscape(w, s[last:i])
case '`': last = i + 2
w.Write(ldquo) switch ch {
case '\'': case '`':
w.Write(rdquo) w.Write(ldquo)
case '\'':
w.Write(rdquo)
}
i++ // loop will add one more
} }
i++ // loop will add one more
} }
} }
template.HTMLEscape(w, s[last:]) template.HTMLEscape(w, s[last:])
} }
const (
// Regexp for Go identifiers
identRx = `[a-zA-Z_][a-zA-Z_0-9]*` // TODO(gri) ASCII only for now - fix this
// Regexp for URLs
protocol = `(https?|ftp|file|gopher|mailto|news|nntp|telnet|wais|prospero):`
hostPart = `[a-zA-Z0-9_@\-]+`
filePart = `[a-zA-Z0-9_?%#~&/\-+=]+`
urlRx = protocol + `//` + // http://
hostPart + `([.:]` + hostPart + `)*/?` + // //www.google.com:8080/
filePart + `([:.,]` + filePart + `)*`
)
var matchRx = regexp.MustCompile(`(` + identRx + `)|(` + urlRx + `)`)
var ( var (
html_a = []byte(`<a href="`)
html_aq = []byte(`">`)
html_enda = []byte("</a>")
html_i = []byte("<i>")
html_endi = []byte("</i>")
html_p = []byte("<p>\n") html_p = []byte("<p>\n")
html_endp = []byte("</p>\n") html_endp = []byte("</p>\n")
html_pre = []byte("<pre>") html_pre = []byte("<pre>")
...@@ -156,6 +189,66 @@ var ( ...@@ -156,6 +189,66 @@ var (
) )
// Emphasize and escape a line of text for HTML. URLs are converted into links;
// if the URL also appears in the words map, the link is taken from the map (if
// the corresponding map value is the empty string, the URL is not converted
// into a link). Go identifiers that appear in the words map are italicized; if
// the corresponding map value is not the empty string, it is considered a URL
// and the word is converted into a link. If nice is set, the remaining text's
// appearance is improved where is makes sense (e.g., `` is turned into &ldquo;
// and '' into &rdquo;).
func emphasize(w io.Writer, line []byte, words map[string]string, nice bool) {
for {
m := matchRx.Execute(line)
if len(m) == 0 {
break
}
// m >= 6 (two parenthesized sub-regexps in matchRx, 1st one is identRx)
// write text before match
commentEscape(w, line[0:m[0]], nice)
// analyze match
match := line[m[0]:m[1]]
url := ""
italics := false
if words != nil {
url, italics = words[string(match)]
}
if m[2] < 0 {
// didn't match against first parenthesized sub-regexp; must be match against urlRx
if !italics {
// no alternative URL in words list, use match instead
url = string(match)
}
italics = false // don't italicize URLs
}
// write match
if len(url) > 0 {
w.Write(html_a)
w.Write([]byte(http.URLEscape(url)))
w.Write(html_aq)
}
if italics {
w.Write(html_i)
}
commentEscape(w, match, nice)
if italics {
w.Write(html_endi)
}
if len(url) > 0 {
w.Write(html_enda)
}
// advance
line = line[m[1]:]
}
commentEscape(w, line, nice)
}
func indentLen(s []byte) int { func indentLen(s []byte) int {
i := 0 i := 0
for i < len(s) && (s[i] == ' ' || s[i] == '\t') { for i < len(s) && (s[i] == ' ' || s[i] == '\t') {
...@@ -207,11 +300,16 @@ func unindent(block [][]byte) { ...@@ -207,11 +300,16 @@ func unindent(block [][]byte) {
// The comment markers have already been removed. // The comment markers have already been removed.
// //
// Turn each run of multiple \n into </p><p> // Turn each run of multiple \n into </p><p>
// Turn each run of indented lines into <pre> without indent. // Turn each run of indented lines into a <pre> block without indent.
//
// URLs in the comment text are converted into links; if the URL also appears
// in the words map, the link is taken from the map (if the corresponding map
// value is the empty string, the URL is not converted into a link).
// //
// TODO(rsc): I'd like to pass in an array of variable names []string // Go identifiers that appear in the words map are italicized; if the corresponding
// and then italicize those strings when they appear as words. // map value is not the empty string, it is considered a URL and the word is converted
func ToHTML(w io.Writer, s []byte) { // into a link.
func ToHTML(w io.Writer, s []byte, words map[string]string) {
inpara := false inpara := false
close := func() { close := func() {
...@@ -255,19 +353,17 @@ func ToHTML(w io.Writer, s []byte) { ...@@ -255,19 +353,17 @@ func ToHTML(w io.Writer, s []byte) {
unindent(block) unindent(block)
// put those lines in a pre block. // put those lines in a pre block
// they don't get the nice text formatting,
// just html escaping
w.Write(html_pre) w.Write(html_pre)
for _, line := range block { for _, line := range block {
template.HTMLEscape(w, line) emphasize(w, line, nil, false) // no nice text formatting
} }
w.Write(html_endpre) w.Write(html_endpre)
continue continue
} }
// open paragraph // open paragraph
open() open()
commentEscape(w, lines[i]) emphasize(w, lines[i], words, true) // nice text formatting
i++ i++
} }
close() close()
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment