Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Support
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
G
go
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Issues
0
Issues
0
List
Boards
Labels
Milestones
Merge Requests
0
Merge Requests
0
Analytics
Analytics
Repository
Value Stream
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Commits
Issue Boards
Open sidebar
Kirill Smelkov
go
Commits
5a75ac88
Commit
5a75ac88
authored
Oct 26, 2009
by
Robert Griesemer
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
- indexing component for godoc
R=rsc
http://go/go-review/1015014
parent
569a1cd5
Changes
1
Show whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
664 additions
and
0 deletions
+664
-0
src/cmd/godoc/index.go
src/cmd/godoc/index.go
+664
-0
No files found.
src/cmd/godoc/index.go
0 → 100644
View file @
5a75ac88
// Copyright 2009 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// This file contains the infrastructure to create an
// (identifier) index for a set of Go files.
//
// Basic indexing algorithm:
// - traverse all .go files of the file tree specified by root
// - for each word (identifier) encountered, collect all occurences (spots)
// into a list; this produces a list of spots for each word
// - reduce the lists: from a list of spots to a list of FileRuns,
// and from a list of FileRuns into a list of PakRuns
// - make a HitList from the PakRuns
//
// Details:
// - keep two lists per word: one containing package-level declarations
// that have snippets, and one containing all other spots
// - keep the snippets in a separate table indexed by snippet index
// and store the snippet index in place of the line number in a SpotInfo
// (the line number for spots with snippets is stored in the snippet)
// - at the end, create lists of alternative spellings for a given
// word
package
main
import
(
"container/vector"
;
"go/ast"
;
"go/parser"
;
"go/token"
;
"os"
;
pathutil
"path"
;
"sort"
;
"strings"
;
)
// ----------------------------------------------------------------------------
// Data structures used during indexing
// A RunList is a vector of entries that can be sorted according to some
// criteria. A RunList may be compressed by grouping "runs" of entries
// which are equal (according to the sort critera) into a new RunList of
// runs. For instance, a RunList containing pairs (x, y) may be compressed
// into a RunList containing pair runs (x, {y}) where each run consists of
// a list of y's with the same x.
type
RunList
struct
{
vector
.
Vector
;
less
func
(
x
,
y
interface
{})
bool
;
}
func
(
h
*
RunList
)
Less
(
i
,
j
int
)
bool
{
return
h
.
less
(
h
.
At
(
i
),
h
.
At
(
j
));
}
func
(
h
*
RunList
)
sort
(
less
func
(
x
,
y
interface
{})
bool
)
{
h
.
less
=
less
;
sort
.
Sort
(
h
);
}
// Compress entries which are the same according to a sort criteria
// (specified by less) into "runs".
func
(
h
*
RunList
)
reduce
(
less
func
(
x
,
y
interface
{})
bool
,
newRun
func
(
h
*
RunList
,
i
,
j
int
)
interface
{})
*
RunList
{
// create runs of entries with equal values
h
.
sort
(
less
);
// for each run, make a new run object and collect them in a new RunList
var
hh
RunList
;
i
:=
0
;
for
j
:=
0
;
j
<
h
.
Len
();
j
++
{
if
less
(
h
.
At
(
i
),
h
.
At
(
j
))
{
hh
.
Push
(
newRun
(
h
,
i
,
j
));
i
=
j
;
// start a new run
}
}
// add final run, if any
if
i
<
h
.
Len
()
{
hh
.
Push
(
newRun
(
h
,
i
,
h
.
Len
()));
}
return
&
hh
;
}
// A SpotInfo value describes a particular identifier spot in a given file;
// It encodes three values: the SpotKind (declaration or use), a line or
// snippet index "lori", and whether it's a line or index.
//
// The following encoding is used:
//
// bits 32 4 1 0
// value [lori|kind|isIndex]
//
type
SpotInfo
uint32
// SpotKind describes whether an identifier is declared (and what kind of
// declaration) or used.
type
SpotKind
uint32
const
(
ImportDecl
SpotKind
=
iota
;
ConstDecl
;
TypeDecl
;
VarDecl
;
FuncDecl
;
MethodDecl
;
Use
;
nKinds
;
)
// makeSpotInfo makes a SpotInfo.
func
makeSpotInfo
(
kind
SpotKind
,
lori
int
,
isIndex
bool
)
SpotInfo
{
// encode lori: bits [4..32)
x
:=
SpotInfo
(
lori
)
<<
4
;
if
int
(
x
>>
4
)
!=
lori
{
// lori value doesn't fit - since snippet indices are
// most certainly always smaller then 1<<28, this can
// only happen for line numbers; give it no line number (= 0)
x
=
0
;
}
// encode kind: bits [1..4)
x
|=
SpotInfo
(
kind
)
<<
1
;
// encode isIndex: bit 0
if
isIndex
{
x
|=
1
;
}
return
x
;
}
func
(
x
SpotInfo
)
less
(
y
SpotInfo
)
bool
{
return
x
.
Lori
()
<
y
.
Lori
();
}
func
(
x
SpotInfo
)
Kind
()
SpotKind
{
return
SpotKind
(
x
>>
1
&
7
);
}
func
(
x
SpotInfo
)
Lori
()
int
{
return
int
(
x
>>
4
);
}
func
(
x
SpotInfo
)
IsIndex
()
bool
{
return
x
&
1
!=
0
;
}
// A Pak describes a Go package.
type
Pak
struct
{
Path
string
;
// directory name containing the package
Name
string
;
// package name as declared by package clause
}
func
(
p
*
Pak
)
less
(
q
*
Pak
)
bool
{
return
p
.
Path
<
q
.
Path
||
p
.
Name
<
q
.
Name
;
}
// A File describes a Go file.
type
File
struct
{
Path
string
;
// complete file name
Pak
Pak
;
// the package to which the file belongs
}
func
(
f
*
File
)
less
(
g
*
File
)
bool
{
return
f
.
Path
<
g
.
Path
;
}
// A Spot describes a single occurence of a word.
type
Spot
struct
{
File
*
File
;
Info
SpotInfo
;
}
// Spots are sorted by filename.
func
lessSpot
(
x
,
y
interface
{})
bool
{
return
x
.
(
Spot
)
.
File
.
less
(
y
.
(
Spot
)
.
File
);
}
// A FileRun describes a run of Spots of a word in a single file.
type
FileRun
struct
{
File
*
File
;
Infos
[]
SpotInfo
;
}
// newFileRun allocates a new *FileRun from the Spot run [i, j) in h.
func
newFileRun
(
h
*
RunList
,
i
,
j
int
)
interface
{}
{
file
:=
h
.
At
(
i
)
.
(
Spot
)
.
File
;
lines
:=
make
([]
SpotInfo
,
j
-
i
);
prev
:=
0
;
k
:=
0
;
for
;
i
<
j
;
i
++
{
info
:=
h
.
At
(
i
)
.
(
Spot
)
.
Info
;
// ignore line duplicates
// (if lori is a snippet index it is unique - no need to check IsIndex())
lori
:=
info
.
Lori
();
if
lori
!=
prev
{
lines
[
k
]
=
info
;
prev
=
lori
;
k
++
;
}
}
return
&
FileRun
{
file
,
lines
[
0
:
k
]};
}
// FileRuns are sorted by package.
func
lessFileRun
(
x
,
y
interface
{})
bool
{
return
x
.
(
*
FileRun
)
.
File
.
Pak
.
less
(
&
y
.
(
*
FileRun
)
.
File
.
Pak
);
}
// A PakRun describes a run of *FileRuns of a package.
type
PakRun
struct
{
Pak
Pak
;
Files
[]
*
FileRun
;
}
// Sorting support for files within a PakRun.
func
(
p
*
PakRun
)
Len
()
int
{
return
len
(
p
.
Files
);
}
func
(
p
*
PakRun
)
Less
(
i
,
j
int
)
bool
{
return
p
.
Files
[
i
]
.
File
.
less
(
p
.
Files
[
j
]
.
File
);
}
func
(
p
*
PakRun
)
Swap
(
i
,
j
int
)
{
p
.
Files
[
i
],
p
.
Files
[
j
]
=
p
.
Files
[
j
],
p
.
Files
[
i
];
}
// newPakRun allocates a new *PakRun from the *FileRun run [i, j) in h.
func
newPakRun
(
h
*
RunList
,
i
,
j
int
)
interface
{}
{
pak
:=
h
.
At
(
i
)
.
(
*
FileRun
)
.
File
.
Pak
;
files
:=
make
([]
*
FileRun
,
j
-
i
);
k
:=
0
;
for
;
i
<
j
;
i
++
{
files
[
k
]
=
h
.
At
(
i
)
.
(
*
FileRun
);
k
++
;
}
run
:=
&
PakRun
{
pak
,
files
};
sort
.
Sort
(
run
);
// files were sorted by package; sort them by file now
return
run
;
}
// PakRuns are sorted by package.
func
lessPakRun
(
x
,
y
interface
{})
bool
{
return
x
.
(
*
PakRun
)
.
Pak
.
less
(
&
y
.
(
*
PakRun
)
.
Pak
);
}
// A HitList describes a list of PakRuns.
type
HitList
[]
*
PakRun
func
reduce
(
h0
*
RunList
)
HitList
{
// reduce a list of Spots into a list of FileRuns
h1
:=
h0
.
reduce
(
lessSpot
,
newFileRun
);
// reduce a list of FileRuns into a list of PakRuns
h2
:=
h1
.
reduce
(
lessFileRun
,
newPakRun
);
// sort the list of PakRuns by package
h2
.
sort
(
lessPakRun
);
// create a HitList
h
:=
make
(
HitList
,
h2
.
Len
());
for
i
:=
0
;
i
<
h2
.
Len
();
i
++
{
h
[
i
]
=
h2
.
At
(
i
)
.
(
*
PakRun
);
}
return
h
;
}
func
(
h
HitList
)
filter
(
pakname
string
)
HitList
{
// determine number of matching packages (most of the time just one)
n
:=
0
;
for
_
,
p
:=
range
h
{
if
p
.
Pak
.
Name
==
pakname
{
n
++
;
}
}
// create filtered HitList
hh
:=
make
(
HitList
,
n
);
i
:=
0
;
for
_
,
p
:=
range
h
{
if
p
.
Pak
.
Name
==
pakname
{
hh
[
i
]
=
p
;
i
++
;
}
}
return
hh
;
}
type
wordPair
struct
{
canon
string
;
// canonical word spelling (all lowercase)
alt
string
;
// alternative spelling
}
// An AltWords describes a list of alternative spellings for a
// canonical (all lowercase) spelling of a word.
type
AltWords
struct
{
Canon
string
;
// canonical word spelling (all lowercase)
Alts
[]
string
;
// alternative spelling for the same word
}
func
lessWordPair
(
x
,
y
interface
{})
bool
{
return
x
.
(
*
wordPair
)
.
canon
<
y
.
(
*
wordPair
)
.
canon
;
}
// newAltWords allocates a new *AltWords from the *wordPair run [i, j) in h.
func
newAltWords
(
h
*
RunList
,
i
,
j
int
)
interface
{}
{
canon
:=
h
.
At
(
i
)
.
(
*
wordPair
)
.
canon
;
alts
:=
make
([]
string
,
j
-
i
);
k
:=
0
;
for
;
i
<
j
;
i
++
{
alts
[
k
]
=
h
.
At
(
i
)
.
(
*
wordPair
)
.
alt
;
k
++
;
}
return
&
AltWords
{
canon
,
alts
};
}
func
(
a
*
AltWords
)
filter
(
s
string
)
*
AltWords
{
if
len
(
a
.
Alts
)
==
1
&&
a
.
Alts
[
0
]
==
s
{
// there are no different alternatives
return
nil
;
}
// make a new AltWords with the current spelling removed
alts
:=
make
([]
string
,
len
(
a
.
Alts
));
i
:=
0
;
for
_
,
w
:=
range
a
.
Alts
{
if
w
!=
s
{
alts
[
i
]
=
w
;
i
++
;
}
}
return
&
AltWords
{
a
.
Canon
,
alts
[
0
:
i
]};
}
// ----------------------------------------------------------------------------
// Indexer
type
IndexResult
struct
{
Decls
RunList
;
// package-level declarations (with snippets)
Others
RunList
;
// all other occurences
}
// An Indexer maintains the data structures and provides the machinery
// for indexing .go files under a file tree. It implements the path.Visitor
// interface for walking file trees, and the ast.Visitor interface for
// walking Go ASTs.
type
Indexer
struct
{
words
map
[
string
]
*
IndexResult
;
// RunLists of Spots
snippets
vector
.
Vector
;
// vector of *Snippets, indexed by snippet indices
file
*
File
;
// current file
decl
ast
.
Decl
;
// current decl
nspots
int
;
// number of spots encountered
}
func
(
x
*
Indexer
)
addSnippet
(
s
*
Snippet
)
int
{
index
:=
x
.
snippets
.
Len
();
x
.
snippets
.
Push
(
s
);
return
index
;
}
func
(
x
*
Indexer
)
visitComment
(
c
*
ast
.
CommentGroup
)
{
if
c
!=
nil
{
ast
.
Walk
(
x
,
c
);
}
}
func
(
x
*
Indexer
)
visitIdent
(
kind
SpotKind
,
id
*
ast
.
Ident
)
{
if
id
!=
nil
{
lists
,
found
:=
x
.
words
[
id
.
Value
];
if
!
found
{
lists
=
new
(
IndexResult
);
x
.
words
[
id
.
Value
]
=
lists
;
}
if
kind
==
Use
||
x
.
decl
==
nil
{
// not a declaration or no snippet required
info
:=
makeSpotInfo
(
kind
,
id
.
Pos
()
.
Line
,
false
);
lists
.
Others
.
Push
(
Spot
{
x
.
file
,
info
});
}
else
{
// a declaration with snippet
index
:=
x
.
addSnippet
(
NewSnippet
(
x
.
decl
,
id
));
info
:=
makeSpotInfo
(
kind
,
index
,
true
);
lists
.
Decls
.
Push
(
Spot
{
x
.
file
,
info
});
}
x
.
nspots
++
;
}
}
func
(
x
*
Indexer
)
visitSpec
(
spec
ast
.
Spec
,
isVarDecl
bool
)
{
switch
n
:=
spec
.
(
type
)
{
case
*
ast
.
ImportSpec
:
x
.
visitComment
(
n
.
Doc
);
x
.
visitIdent
(
ImportDecl
,
n
.
Name
);
for
_
,
s
:=
range
n
.
Path
{
ast
.
Walk
(
x
,
s
);
}
x
.
visitComment
(
n
.
Comment
);
case
*
ast
.
ValueSpec
:
x
.
visitComment
(
n
.
Doc
);
kind
:=
ConstDecl
;
if
isVarDecl
{
kind
=
VarDecl
;
}
for
_
,
n
:=
range
n
.
Names
{
x
.
visitIdent
(
kind
,
n
);
}
ast
.
Walk
(
x
,
n
.
Type
);
for
_
,
v
:=
range
n
.
Values
{
ast
.
Walk
(
x
,
v
);
}
x
.
visitComment
(
n
.
Comment
);
case
*
ast
.
TypeSpec
:
x
.
visitComment
(
n
.
Doc
);
x
.
visitIdent
(
TypeDecl
,
n
.
Name
);
ast
.
Walk
(
x
,
n
.
Type
);
x
.
visitComment
(
n
.
Comment
);
}
}
func
(
x
*
Indexer
)
Visit
(
node
interface
{})
bool
{
// TODO(gri): methods in interface types are categorized as VarDecl
switch
n
:=
node
.
(
type
)
{
case
*
ast
.
Ident
:
x
.
visitIdent
(
Use
,
n
);
case
*
ast
.
Field
:
x
.
decl
=
nil
;
// no snippets for fields
x
.
visitComment
(
n
.
Doc
);
for
_
,
m
:=
range
n
.
Names
{
x
.
visitIdent
(
VarDecl
,
m
);
}
ast
.
Walk
(
x
,
n
.
Type
);
for
_
,
s
:=
range
n
.
Tag
{
ast
.
Walk
(
x
,
s
);
}
x
.
visitComment
(
n
.
Comment
);
case
*
ast
.
DeclStmt
:
if
decl
,
ok
:=
n
.
Decl
.
(
*
ast
.
GenDecl
);
ok
{
// local declarations can only be *ast.GenDecls
x
.
decl
=
nil
;
// no snippets for local declarations
x
.
visitComment
(
decl
.
Doc
);
for
_
,
s
:=
range
decl
.
Specs
{
x
.
visitSpec
(
s
,
decl
.
Tok
==
token
.
VAR
);
}
}
else
{
// handle error case gracefully
ast
.
Walk
(
x
,
n
.
Decl
);
}
case
*
ast
.
GenDecl
:
x
.
decl
=
n
;
x
.
visitComment
(
n
.
Doc
);
for
_
,
s
:=
range
n
.
Specs
{
x
.
visitSpec
(
s
,
n
.
Tok
==
token
.
VAR
);
}
case
*
ast
.
FuncDecl
:
x
.
visitComment
(
n
.
Doc
);
kind
:=
FuncDecl
;
if
n
.
Recv
!=
nil
{
kind
=
MethodDecl
;
ast
.
Walk
(
x
,
n
.
Recv
);
}
x
.
decl
=
n
;
x
.
visitIdent
(
kind
,
n
.
Name
);
ast
.
Walk
(
x
,
n
.
Type
);
if
n
.
Body
!=
nil
{
ast
.
Walk
(
x
,
n
.
Type
);
}
default
:
return
true
;
}
return
false
;
}
func
(
x
*
Indexer
)
VisitDir
(
path
string
,
d
*
os
.
Dir
)
bool
{
return
true
;
}
func
(
x
*
Indexer
)
VisitFile
(
path
string
,
d
*
os
.
Dir
)
{
if
!
isGoFile
(
d
)
{
return
;
}
file
,
err
:=
parser
.
ParseFile
(
path
,
nil
,
parser
.
ParseComments
);
if
err
!=
nil
{
return
;
// ignore files with (parse) errors
}
dir
,
_
:=
pathutil
.
Split
(
path
);
pak
:=
Pak
{
dir
,
file
.
Name
.
Value
};
x
.
file
=
&
File
{
path
,
pak
};
ast
.
Walk
(
x
,
file
);
}
// ----------------------------------------------------------------------------
// Index
type
LookupResult
struct
{
Decls
HitList
;
// package-level declarations (with snippets)
Others
HitList
;
// all other occurences
}
type
Index
struct
{
words
map
[
string
]
*
LookupResult
;
// maps words to hit lists
alts
map
[
string
]
*
AltWords
;
// maps canonical(words) to lists of alternative spellings
snippets
[]
*
Snippet
;
// all snippets, indexed by snippet index
nspots
int
;
// number of spots indexed (a measure of the index size)
}
func
canonical
(
w
string
)
string
{
return
strings
.
ToLower
(
w
);
}
// NewIndex creates a new index for the file tree rooted at root.
func
NewIndex
(
root
string
)
*
Index
{
var
x
Indexer
;
// initialize Indexer
x
.
words
=
make
(
map
[
string
]
*
IndexResult
);
// collect all Spots
pathutil
.
Walk
(
root
,
&
x
,
nil
);
// for each word, reduce the RunLists into a LookupResult;
// also collect the word with its canonical spelling in a
// word list for later computation of alternative spellings
words
:=
make
(
map
[
string
]
*
LookupResult
);
var
wlist
RunList
;
for
w
,
h
:=
range
x
.
words
{
decls
:=
reduce
(
&
h
.
Decls
);
others
:=
reduce
(
&
h
.
Others
);
words
[
w
]
=
&
LookupResult
{
Decls
:
decls
,
Others
:
others
,
};
wlist
.
Push
(
&
wordPair
{
canonical
(
w
),
w
});
}
// reduce the word list {canonical(w), w} into
// a list of AltWords runs {canonical(w), {w}}
alist
:=
wlist
.
reduce
(
lessWordPair
,
newAltWords
);
// convert alist into a map of alternative spellings
alts
:=
make
(
map
[
string
]
*
AltWords
);
for
i
:=
0
;
i
<
alist
.
Len
();
i
++
{
a
:=
alist
.
At
(
i
)
.
(
*
AltWords
);
alts
[
a
.
Canon
]
=
a
;
}
// convert snippet vector into a list
snippets
:=
make
([]
*
Snippet
,
x
.
snippets
.
Len
());
for
i
:=
0
;
i
<
x
.
snippets
.
Len
();
i
++
{
snippets
[
i
]
=
x
.
snippets
.
At
(
i
)
.
(
*
Snippet
);
}
return
&
Index
{
words
,
alts
,
snippets
,
x
.
nspots
};
}
// Size returns the number of different words and
// spots indexed as a measure for the index size.
func
(
x
*
Index
)
Size
()
(
nwords
int
,
nspots
int
)
{
return
len
(
x
.
words
),
x
.
nspots
;
}
func
(
x
*
Index
)
LookupWord
(
w
string
)
(
match
*
LookupResult
,
alt
*
AltWords
)
{
match
,
_
=
x
.
words
[
w
];
alt
,
_
=
x
.
alts
[
canonical
(
w
)];
// remove current spelling from alternatives
// (if there is no match, the alternatives do
// not contain the current spelling)
if
match
!=
nil
&&
alt
!=
nil
{
alt
=
alt
.
filter
(
w
);
}
return
;
}
// For a given string s, which is either a single identifier or a qualified
// identifier, Lookup returns a LookupResult, and a list of alternative
// spellings, if any.
func
(
x
*
Index
)
Lookup
(
s
string
)
(
match
*
LookupResult
,
alt
*
AltWords
)
{
ss
:=
strings
.
Split
(
s
,
"."
,
0
);
switch
len
(
ss
)
{
case
1
:
match
,
alt
=
x
.
LookupWord
(
ss
[
0
]);
case
2
:
pakname
:=
ss
[
0
];
match
,
alt
=
x
.
LookupWord
(
ss
[
1
]);
if
match
!=
nil
{
// found a match - filter by package name
decls
:=
match
.
Decls
.
filter
(
pakname
);
others
:=
match
.
Others
.
filter
(
pakname
);
match
=
&
LookupResult
{
decls
,
others
};
}
if
alt
!=
nil
{
// alternative spellings found - add package name
// TODO(gri): At the moment this is not very smart
// and likely will produce suggestions that have
// no match. Should filter incorrect alternatives.
canon
:=
pakname
+
"."
+
alt
.
Canon
;
// for completeness (currently not used)
alts
:=
make
([]
string
,
len
(
alt
.
Alts
));
for
i
,
a
:=
range
alt
.
Alts
{
alts
[
i
]
=
pakname
+
"."
+
a
;
}
alt
=
&
AltWords
{
canon
,
alts
};
}
}
return
;
}
func
(
x
*
Index
)
Snippet
(
i
int
)
*
Snippet
{
// handle illegal snippet indices gracefully
if
0
<=
i
&&
i
<
len
(
x
.
snippets
)
{
return
x
.
snippets
[
i
];
}
return
nil
;
}
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment