Commit 055650da authored by Robert Griesemer's avatar Robert Griesemer

godoc: first cut at textual search

To enable use -fulltext flag; e.g.: godoc -v -fulltext -http=:7777

Enabling the fulltext index will use significantly more memory as
the text of all source code, the respective suffixarray, and the
file set data structure is kept in memory. At the moment there is
about 6Mb of source code (~1400 files) indexed under GOROOT.
Source code + suffix array together consume 5*(size of source) or
about 30Mb. The file set data structure consumes about 4b/src line.
By default only up to 5000 results are shown for now.

The presentation of the results needs tuning. In particular,
if a string is found, clicking on the respective file does not
highlight them, yet.

At the moment, only Go source files are indexed. Eventually,
the full text index should encompass other files as well.

R=rsc, adg
CC=golang-dev
https://golang.org/cl/3182043
parent 9282a768
...@@ -66,3 +66,27 @@ ...@@ -66,3 +66,27 @@
or a qualified identifier (such as <a href="search?q=math.Sin">math.Sin</a>). or a qualified identifier (such as <a href="search?q=math.Sin">math.Sin</a>).
</p> </p>
{.end} {.end}
{.section Textual}
<h2 id="Textual">Textual occurences</h2>
<table class="layout">
<tr>
<th align=left>File</th>
<th align=left>Occurences</th>
<th align=left>Lines</th>
</tr>
{.repeated section @}
<tr>
<td>
<a href="/{Filename|url-src}?h={Query|html-esc}">{Filename|url-src}</a>:
</td>
{Lines|linelist}
</tr>
{.end}
</table>
{.end}
{.section Complete}
{.or}
<p>
<span class="alert" style="font-size:120%">Incomplete list of results</span>
</p>
{.end}
...@@ -47,6 +47,8 @@ The flags are: ...@@ -47,6 +47,8 @@ The flags are:
width of tabs in units of spaces width of tabs in units of spaces
-timestamps=true -timestamps=true
show timestamps with directory listings show timestamps with directory listings
-fulltext=false
build full text index for string search results
-path="" -path=""
additional package directories (colon-separated) additional package directories (colon-separated)
-html -html
......
...@@ -63,6 +63,7 @@ var ( ...@@ -63,6 +63,7 @@ var (
// layout control // layout control
tabwidth = flag.Int("tabwidth", 4, "tab width") tabwidth = flag.Int("tabwidth", 4, "tab width")
showTimestamps = flag.Bool("timestamps", true, "show timestamps with directory listings") showTimestamps = flag.Bool("timestamps", true, "show timestamps with directory listings")
fulltextIndex = flag.Bool("fulltext", false, "build full text index for string search results")
// file system mapping // file system mapping
fsMap Mapping // user-defined mapping fsMap Mapping // user-defined mapping
...@@ -736,6 +737,25 @@ func localnameFmt(w io.Writer, format string, x ...interface{}) { ...@@ -736,6 +737,25 @@ func localnameFmt(w io.Writer, format string, x ...interface{}) {
} }
// Template formatter for "linelist" format.
func linelistFmt(w io.Writer, format string, x ...interface{}) {
const max = 20 // show at most this many lines
list := x[0].([]int)
// print number of occurences
fmt.Fprintf(w, "<td>%d</td>", len(list))
// print actual lines
// TODO(gri) should sort them
for i, line := range list {
if i < max {
fmt.Fprintf(w, "<td>%d</td>", line)
} else {
fmt.Fprint(w, "<td>...</td>")
break
}
}
}
var fmap = template.FormatterMap{ var fmap = template.FormatterMap{
"": textFmt, "": textFmt,
"html": htmlFmt, "html": htmlFmt,
...@@ -751,6 +771,7 @@ var fmap = template.FormatterMap{ ...@@ -751,6 +771,7 @@ var fmap = template.FormatterMap{
"time": timeFmt, "time": timeFmt,
"dir/": dirslashFmt, "dir/": dirslashFmt,
"localname": localnameFmt, "localname": localnameFmt,
"linelist": linelistFmt,
} }
...@@ -1309,17 +1330,23 @@ var searchIndex RWValue ...@@ -1309,17 +1330,23 @@ var searchIndex RWValue
type SearchResult struct { type SearchResult struct {
Query string Query string
Hit *LookupResult Hit *LookupResult // identifier occurences of Query
Alt *AltWords Alt *AltWords // alternative identifiers to look for
Illegal bool Illegal bool // true if Query for identifier search has incorrect syntax
Accurate bool Textual []Positions // textual occurences of Query
Complete bool // true if all textual occurences of Query are reported
Accurate bool // true if the index is not older than the indexed files
} }
func lookup(query string) (result SearchResult) { func lookup(query string) (result SearchResult) {
result.Query = query result.Query = query
if index, timestamp := searchIndex.get(); index != nil { if index, timestamp := searchIndex.get(); index != nil {
result.Hit, result.Alt, result.Illegal = index.(*Index).Lookup(query) index := index.(*Index)
result.Hit, result.Alt, result.Illegal = index.Lookup(query)
// TODO(gri) should max be a flag?
const max = 5000 // show at most this many fulltext results
result.Textual, result.Complete = index.LookupString(query, max)
_, ts := fsModified.get() _, ts := fsModified.get()
result.Accurate = timestamp >= ts result.Accurate = timestamp >= ts
} }
...@@ -1338,7 +1365,7 @@ func search(w http.ResponseWriter, r *http.Request) { ...@@ -1338,7 +1365,7 @@ func search(w http.ResponseWriter, r *http.Request) {
} }
var title string var title string
if result.Hit != nil { if result.Hit != nil || len(result.Textual) > 0 {
title = fmt.Sprintf(`Results for query %q`, query) title = fmt.Sprintf(`Results for query %q`, query)
} else { } else {
title = fmt.Sprintf(`No results found for query %q`, query) title = fmt.Sprintf(`No results found for query %q`, query)
...@@ -1407,17 +1434,18 @@ func indexer() { ...@@ -1407,17 +1434,18 @@ func indexer() {
log.Printf("updating index...") log.Printf("updating index...")
} }
start := time.Nanoseconds() start := time.Nanoseconds()
index := NewIndex(fsDirnames()) index := NewIndex(fsDirnames(), *fulltextIndex)
stop := time.Nanoseconds() stop := time.Nanoseconds()
searchIndex.set(index) searchIndex.set(index)
if *verbose { if *verbose {
secs := float64((stop-start)/1e6) / 1e3 secs := float64((stop-start)/1e6) / 1e3
nwords, nspots := index.Size() stats := index.Stats()
log.Printf("index updated (%gs, %d unique words, %d spots)", secs, nwords, nspots) log.Printf("index updated (%gs, %d bytes of source, %d files, %d unique words, %d spots)",
secs, stats.Bytes, stats.Files, stats.Words, stats.Spots)
} }
log.Printf("bytes=%d footprint=%d\n", runtime.MemStats.HeapAlloc, runtime.MemStats.Sys) log.Printf("before GC: bytes = %d footprint = %d\n", runtime.MemStats.HeapAlloc, runtime.MemStats.Sys)
runtime.GC() runtime.GC()
log.Printf("bytes=%d footprint=%d\n", runtime.MemStats.HeapAlloc, runtime.MemStats.Sys) log.Printf("after GC: bytes = %d footprint = %d\n", runtime.MemStats.HeapAlloc, runtime.MemStats.Sys)
} }
time.Sleep(1 * 60e9) // try once a minute time.Sleep(1 * 60e9) // try once a minute
} }
......
This diff is collapsed.
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment