Commit c945f77f authored by Russ Cox's avatar Russ Cox

exp/norm: use rune

Nothing terribly interesting here. (!)

Since the public APIs are all in terms of UTF-8,
the changes are all internal only.

R=mpvl, gri, r
CC=golang-dev
https://golang.org/cl/5309042
parent b50a847c
......@@ -126,26 +126,26 @@ func (rb *reorderBuffer) insert(src input, i int, info runeInfo) bool {
}
// appendRune inserts a rune at the end of the buffer. It is used for Hangul.
func (rb *reorderBuffer) appendRune(rune uint32) {
func (rb *reorderBuffer) appendRune(r uint32) {
bn := rb.nbyte
sz := utf8.EncodeRune(rb.byte[bn:], int(rune))
sz := utf8.EncodeRune(rb.byte[bn:], rune(r))
rb.nbyte += utf8.UTFMax
rb.rune[rb.nrune] = runeInfo{bn, uint8(sz), 0, 0}
rb.nrune++
}
// assignRune sets a rune at position pos. It is used for Hangul and recomposition.
func (rb *reorderBuffer) assignRune(pos int, rune uint32) {
func (rb *reorderBuffer) assignRune(pos int, r uint32) {
bn := rb.rune[pos].pos
sz := utf8.EncodeRune(rb.byte[bn:], int(rune))
sz := utf8.EncodeRune(rb.byte[bn:], rune(r))
rb.rune[pos] = runeInfo{bn, uint8(sz), 0, 0}
}
// runeAt returns the rune at position n. It is used for Hangul and recomposition.
func (rb *reorderBuffer) runeAt(n int) uint32 {
inf := rb.rune[n]
rune, _ := utf8.DecodeRune(rb.byte[inf.pos : inf.pos+inf.size])
return uint32(rune)
r, _ := utf8.DecodeRune(rb.byte[inf.pos : inf.pos+inf.size])
return uint32(r)
}
// bytesAt returns the UTF-8 encoding of the rune at position n.
......@@ -237,17 +237,17 @@ func isHangulWithoutJamoT(b []byte) bool {
// decomposeHangul algorithmically decomposes a Hangul rune into
// its Jamo components.
// See http://unicode.org/reports/tr15/#Hangul for details on decomposing Hangul.
func (rb *reorderBuffer) decomposeHangul(rune uint32) bool {
func (rb *reorderBuffer) decomposeHangul(r uint32) bool {
b := rb.rune[:]
n := rb.nrune
if n+3 > len(b) {
return false
}
rune -= hangulBase
x := rune % jamoTCount
rune /= jamoTCount
rb.appendRune(jamoLBase + rune/jamoVCount)
rb.appendRune(jamoVBase + rune%jamoVCount)
r -= hangulBase
x := r % jamoTCount
r /= jamoTCount
rb.appendRune(jamoLBase + r/jamoVCount)
rb.appendRune(jamoVBase + r%jamoVCount)
if x != 0 {
rb.appendRune(jamoTBase + x)
}
......
......@@ -8,14 +8,14 @@ import "testing"
// TestCase is used for most tests.
type TestCase struct {
in []int
out []int
in []rune
out []rune
}
type insertFunc func(rb *reorderBuffer, rune int) bool
type insertFunc func(rb *reorderBuffer, r rune) bool
func insert(rb *reorderBuffer, rune int) bool {
src := inputString(string(rune))
func insert(rb *reorderBuffer, r rune) bool {
src := inputString(string(r))
return rb.insert(src, 0, rb.f.info(src, 0))
}
......@@ -39,7 +39,7 @@ func runTests(t *testing.T, name string, fm Form, f insertFunc, tests []TestCase
continue
}
for j, want := range test.out {
found := int(rb.runeAt(j))
found := rune(rb.runeAt(j))
if found != want {
t.Errorf("%s:%d: runeAt(%d) = %U; want %U", name, i, j, found, want)
}
......@@ -57,7 +57,7 @@ func TestFlush(t *testing.T) {
t.Errorf("wrote bytes on flush of empty buffer. (len(out) = %d)", len(out))
}
for _, r := range []int("world!") {
for _, r := range []rune("world!") {
insert(&rb, r)
}
......@@ -76,14 +76,14 @@ func TestFlush(t *testing.T) {
}
var insertTests = []TestCase{
{[]int{'a'}, []int{'a'}},
{[]int{0x300}, []int{0x300}},
{[]int{0x300, 0x316}, []int{0x316, 0x300}}, // CCC(0x300)==230; CCC(0x316)==220
{[]int{0x316, 0x300}, []int{0x316, 0x300}},
{[]int{0x41, 0x316, 0x300}, []int{0x41, 0x316, 0x300}},
{[]int{0x41, 0x300, 0x316}, []int{0x41, 0x316, 0x300}},
{[]int{0x300, 0x316, 0x41}, []int{0x316, 0x300, 0x41}},
{[]int{0x41, 0x300, 0x40, 0x316}, []int{0x41, 0x300, 0x40, 0x316}},
{[]rune{'a'}, []rune{'a'}},
{[]rune{0x300}, []rune{0x300}},
{[]rune{0x300, 0x316}, []rune{0x316, 0x300}}, // CCC(0x300)==230; CCC(0x316)==220
{[]rune{0x316, 0x300}, []rune{0x316, 0x300}},
{[]rune{0x41, 0x316, 0x300}, []rune{0x41, 0x316, 0x300}},
{[]rune{0x41, 0x300, 0x316}, []rune{0x41, 0x316, 0x300}},
{[]rune{0x300, 0x316, 0x41}, []rune{0x316, 0x300, 0x41}},
{[]rune{0x41, 0x300, 0x40, 0x316}, []rune{0x41, 0x300, 0x40, 0x316}},
}
func TestInsert(t *testing.T) {
......@@ -91,18 +91,18 @@ func TestInsert(t *testing.T) {
}
var decompositionNFDTest = []TestCase{
{[]int{0xC0}, []int{0x41, 0x300}},
{[]int{0xAC00}, []int{0x1100, 0x1161}},
{[]int{0x01C4}, []int{0x01C4}},
{[]int{0x320E}, []int{0x320E}},
{[]int("음ẻ과"), []int{0x110B, 0x1173, 0x11B7, 0x65, 0x309, 0x1100, 0x116A}},
{[]rune{0xC0}, []rune{0x41, 0x300}},
{[]rune{0xAC00}, []rune{0x1100, 0x1161}},
{[]rune{0x01C4}, []rune{0x01C4}},
{[]rune{0x320E}, []rune{0x320E}},
{[]rune("음ẻ과"), []rune{0x110B, 0x1173, 0x11B7, 0x65, 0x309, 0x1100, 0x116A}},
}
var decompositionNFKDTest = []TestCase{
{[]int{0xC0}, []int{0x41, 0x300}},
{[]int{0xAC00}, []int{0x1100, 0x1161}},
{[]int{0x01C4}, []int{0x44, 0x5A, 0x030C}},
{[]int{0x320E}, []int{0x28, 0x1100, 0x1161, 0x29}},
{[]rune{0xC0}, []rune{0x41, 0x300}},
{[]rune{0xAC00}, []rune{0x1100, 0x1161}},
{[]rune{0x01C4}, []rune{0x44, 0x5A, 0x030C}},
{[]rune{0x320E}, []rune{0x28, 0x1100, 0x1161, 0x29}},
}
func TestDecomposition(t *testing.T) {
......@@ -111,15 +111,15 @@ func TestDecomposition(t *testing.T) {
}
var compositionTest = []TestCase{
{[]int{0x41, 0x300}, []int{0xC0}},
{[]int{0x41, 0x316}, []int{0x41, 0x316}},
{[]int{0x41, 0x300, 0x35D}, []int{0xC0, 0x35D}},
{[]int{0x41, 0x316, 0x300}, []int{0xC0, 0x316}},
{[]rune{0x41, 0x300}, []rune{0xC0}},
{[]rune{0x41, 0x316}, []rune{0x41, 0x316}},
{[]rune{0x41, 0x300, 0x35D}, []rune{0xC0, 0x35D}},
{[]rune{0x41, 0x316, 0x300}, []rune{0xC0, 0x316}},
// blocking starter
{[]int{0x41, 0x316, 0x40, 0x300}, []int{0x41, 0x316, 0x40, 0x300}},
{[]int{0x1100, 0x1161}, []int{0xAC00}},
{[]rune{0x41, 0x316, 0x40, 0x300}, []rune{0x41, 0x316, 0x40, 0x300}},
{[]rune{0x1100, 0x1161}, []rune{0xAC00}},
// parenthesized Hangul, alternate between ASCII and Hangul.
{[]int{0x28, 0x1100, 0x1161, 0x29}, []int{0x28, 0xAC00, 0x29}},
{[]rune{0x28, 0x1100, 0x1161, 0x29}, []rune{0x28, 0xAC00, 0x29}},
}
func TestComposition(t *testing.T) {
......
......@@ -119,7 +119,7 @@ const (
// This contains only the properties we're interested in.
type Char struct {
name string
codePoint int // if zero, this index is not a valid code point.
codePoint rune // if zero, this index is not a valid code point.
ccc uint8 // canonical combining class
excludeInComp bool // from CompositionExclusions.txt
compatDecomp bool // it has a compatibility expansion
......@@ -160,7 +160,7 @@ const (
SMissing
)
var lastChar int = 0
var lastChar = rune('\u0000')
func (c Char) isValid() bool {
return c.codePoint != 0 && c.state != SMissing
......@@ -193,7 +193,7 @@ func (f FormInfo) String() string {
return buf.String()
}
type Decomposition []int
type Decomposition []rune
func (d Decomposition) String() string {
return fmt.Sprintf("%.4X", d)
......@@ -220,7 +220,7 @@ func openReader(file string) (input io.ReadCloser) {
return
}
func parseDecomposition(s string, skipfirst bool) (a []int, e os.Error) {
func parseDecomposition(s string, skipfirst bool) (a []rune, e os.Error) {
decomp := strings.Split(s, " ")
if len(decomp) > 0 && skipfirst {
decomp = decomp[1:]
......@@ -230,7 +230,7 @@ func parseDecomposition(s string, skipfirst bool) (a []int, e os.Error) {
if err != nil {
return a, err
}
a = append(a, int(point))
a = append(a, rune(point))
}
return a, nil
}
......@@ -260,7 +260,7 @@ func parseCharacter(line string) {
state = SLast
}
firstChar := lastChar + 1
lastChar = int(point)
lastChar = rune(point)
if state != SLast {
firstChar = lastChar
}
......@@ -370,8 +370,8 @@ func loadCompositionExclusions() {
// hasCompatDecomp returns true if any of the recursive
// decompositions contains a compatibility expansion.
// In this case, the character may not occur in NFK*.
func hasCompatDecomp(rune int) bool {
c := &chars[rune]
func hasCompatDecomp(r rune) bool {
c := &chars[r]
if c.compatDecomp {
return true
}
......@@ -396,19 +396,19 @@ const (
JamoTEnd = 0x11C3
)
func isHangul(rune int) bool {
return HangulBase <= rune && rune < HangulEnd
func isHangul(r rune) bool {
return HangulBase <= r && r < HangulEnd
}
func ccc(rune int) uint8 {
return chars[rune].ccc
func ccc(r rune) uint8 {
return chars[r].ccc
}
// Insert a rune in a buffer, ordered by Canonical Combining Class.
func insertOrdered(b Decomposition, rune int) Decomposition {
func insertOrdered(b Decomposition, r rune) Decomposition {
n := len(b)
b = append(b, 0)
cc := ccc(rune)
cc := ccc(r)
if cc > 0 {
// Use bubble sort.
for ; n > 0; n-- {
......@@ -418,18 +418,18 @@ func insertOrdered(b Decomposition, rune int) Decomposition {
b[n] = b[n-1]
}
}
b[n] = rune
b[n] = r
return b
}
// Recursively decompose.
func decomposeRecursive(form int, rune int, d Decomposition) Decomposition {
if isHangul(rune) {
func decomposeRecursive(form int, r rune, d Decomposition) Decomposition {
if isHangul(r) {
return d
}
dcomp := chars[rune].forms[form].decomp
dcomp := chars[r].forms[form].decomp
if len(dcomp) == 0 {
return insertOrdered(d, rune)
return insertOrdered(d, r)
}
for _, c := range dcomp {
d = decomposeRecursive(form, c, d)
......@@ -475,8 +475,8 @@ func completeCharFields(form int) {
f.isOneWay = f.isOneWay || hasCompatDecomp(c.codePoint)
}
for _, rune := range f.decomp {
chars[rune].forms[form].inDecomp = true
for _, r := range f.decomp {
chars[r].forms[form].inDecomp = true
}
}
......@@ -505,7 +505,7 @@ func completeCharFields(form int) {
switch {
case len(f.decomp) > 0:
f.quickCheck[MDecomposed] = QCNo
case isHangul(i):
case isHangul(rune(i)):
f.quickCheck[MDecomposed] = QCNo
default:
f.quickCheck[MDecomposed] = QCYes
......@@ -588,7 +588,7 @@ func printCharInfoTables() int {
for i, char := range chars {
v := makeCharInfo(char)
if v != 0 {
t.insert(i, v)
t.insert(rune(i), v)
}
}
return t.printTables("charInfo")
......@@ -606,7 +606,7 @@ func printDecompositionTables() int {
for _, c := range chars {
for f := 0; f < 2; f++ {
d := c.forms[f].expandedDecomp
s := string([]int(d))
s := string([]rune(d))
if _, ok := positionMap[s]; !ok {
p := decompositions.Len()
decompositions.WriteByte(uint8(len(s)))
......@@ -624,7 +624,7 @@ func printDecompositionTables() int {
for i, c := range chars {
d := c.forms[FCanonical].expandedDecomp
if len(d) != 0 {
nfcT.insert(i, positionMap[string([]int(d))])
nfcT.insert(rune(i), positionMap[string([]rune(d))])
if ccc(c.codePoint) != ccc(d[0]) {
// We assume the lead ccc of a decomposition is !=0 in this case.
if ccc(d[0]) == 0 {
......@@ -634,7 +634,7 @@ func printDecompositionTables() int {
}
d = c.forms[FCompatibility].expandedDecomp
if len(d) != 0 {
nfkcT.insert(i, positionMap[string([]int(d))])
nfkcT.insert(rune(i), positionMap[string([]rune(d))])
if ccc(c.codePoint) != ccc(d[0]) {
// We assume the lead ccc of a decomposition is !=0 in this case.
if ccc(d[0]) == 0 {
......@@ -752,7 +752,7 @@ func verifyComputed() {
for i, c := range chars {
for _, f := range c.forms {
isNo := (f.quickCheck[MDecomposed] == QCNo)
if (len(f.decomp) > 0) != isNo && !isHangul(i) {
if (len(f.decomp) > 0) != isNo && !isHangul(rune(i)) {
log.Fatalf("%U: NF*D must be no if rune decomposes", i)
}
......
......@@ -16,7 +16,7 @@ func main() {
// We take the smallest, largest and an arbitrary value for each
// of the UTF-8 sequence lengths.
var testRunes = []int{
var testRunes = []rune{
0x01, 0x0C, 0x7F, // 1-byte sequences
0x80, 0x100, 0x7FF, // 2-byte sequences
0x800, 0x999, 0xFFFF, // 3-byte sequences
......
......@@ -28,13 +28,13 @@ func runPosTests(t *testing.T, name string, f Form, fn positionFunc, tests []Pos
if pos != test.pos {
t.Errorf("%s:%d: position is %d; want %d", name, i, pos, test.pos)
}
runes := []int(test.buffer)
runes := []rune(test.buffer)
if rb.nrune != len(runes) {
t.Errorf("%s:%d: reorder buffer lenght is %d; want %d", name, i, rb.nrune, len(runes))
continue
}
for j, want := range runes {
found := int(rb.runeAt(j))
found := rune(rb.runeAt(j))
if found != want {
t.Errorf("%s:%d: rune at %d is %U; want %U", name, i, j, found, want)
}
......@@ -385,8 +385,8 @@ func runAppendTests(t *testing.T, name string, f Form, fn appendFunc, tests []Ap
}
if outs != test.out {
// Find first rune that differs and show context.
ir := []int(outs)
ig := []int(test.out)
ir := []rune(outs)
ig := []rune(test.out)
for j := 0; j < len(ir) && j < len(ig); j++ {
if ir[j] == ig[j] {
continue
......
......@@ -103,7 +103,7 @@ type Test struct {
name string
partnr int
number int
rune int // used for character by character test
r rune // used for character by character test
cols [cMaxColumns]string // Each has 5 entries, see below.
}
......@@ -174,12 +174,12 @@ func loadTestData() {
if err != nil {
logger.Fatal(err)
}
if test.rune == 0 {
if test.r == 0 {
// save for CharacterByCharacterTests
test.rune = int(r)
test.r = int(r)
}
var buf [utf8.UTFMax]byte
sz := utf8.EncodeRune(buf[:], int(r))
sz := utf8.EncodeRune(buf[:], rune(r))
test.cols[j-1] += string(buf[:sz])
}
}
......@@ -198,7 +198,7 @@ func cmpResult(t *Test, name string, f norm.Form, gold, test, result string) {
if errorCount > 20 {
return
}
st, sr, sg := []int(test), []int(result), []int(gold)
st, sr, sg := []rune(test), []rune(result), []rune(gold)
logger.Printf("%s:%s: %s(%X)=%X; want:%X: %s",
t.Name(), name, fstr[f], st, sr, sg, t.name)
}
......@@ -210,7 +210,7 @@ func cmpIsNormal(t *Test, name string, f norm.Form, test string, result, want bo
if errorCount > 20 {
return
}
logger.Printf("%s:%s: %s(%X)=%v; want: %v", t.Name(), name, fstr[f], []int(test), result, want)
logger.Printf("%s:%s: %s(%X)=%v; want: %v", t.Name(), name, fstr[f], []rune(test), result, want)
}
}
......@@ -243,13 +243,13 @@ func CharacterByCharacterTests() {
tests := part[1].tests
last := 0
for i := 0; i <= len(tests); i++ { // last one is special case
var rune int
var r int
if i == len(tests) {
rune = 0x2FA1E // Don't have to go to 0x10FFFF
r = 0x2FA1E // Don't have to go to 0x10FFFF
} else {
rune = tests[i].rune
r = tests[i].r
}
for last++; last < rune; last++ {
for last++; last < r; last++ {
// Check all characters that were not explicitly listed in the test.
t := &Test{partnr: 1, number: -1}
char := string(last)
......
......@@ -73,15 +73,15 @@ var tests = []trietest{
{1, []byte{t6, tx, tx, tx, tx, tx}},
}
func mkUtf8(rune int) ([]byte, int) {
func mkUTF8(r rune) ([]byte, int) {
var b [utf8.UTFMax]byte
sz := utf8.EncodeRune(b[:], rune)
sz := utf8.EncodeRune(b[:], r)
return b[:sz], sz
}
func TestLookup(t *testing.T) {
for i, tt := range testRunes {
b, szg := mkUtf8(tt)
b, szg := mkUTF8(tt)
v, szt := testdata.lookup(b)
if int(v) != i {
t.Errorf("lookup(%U): found value %#x, expected %#x", tt, v, i)
......@@ -103,7 +103,7 @@ func TestLookup(t *testing.T) {
func TestLookupUnsafe(t *testing.T) {
for i, tt := range testRunes {
b, _ := mkUtf8(tt)
b, _ := mkUTF8(tt)
v := testdata.lookupUnsafe(b)
if int(v) != i {
t.Errorf("lookupUnsafe(%U): found value %#x, expected %#x", i, v, i)
......@@ -113,7 +113,7 @@ func TestLookupUnsafe(t *testing.T) {
func TestLookupString(t *testing.T) {
for i, tt := range testRunes {
b, szg := mkUtf8(tt)
b, szg := mkUTF8(tt)
v, szt := testdata.lookupString(string(b))
if int(v) != i {
t.Errorf("lookup(%U): found value %#x, expected %#x", i, v, i)
......@@ -135,7 +135,7 @@ func TestLookupString(t *testing.T) {
func TestLookupStringUnsafe(t *testing.T) {
for i, tt := range testRunes {
b, _ := mkUtf8(tt)
b, _ := mkUTF8(tt)
v := testdata.lookupStringUnsafe(string(b))
if int(v) != i {
t.Errorf("lookupUnsafe(%U): found value %#x, expected %#x", i, v, i)
......
......@@ -4,7 +4,7 @@
package norm
var testRunes = []int{1, 12, 127, 128, 256, 2047, 2048, 2457, 65535, 65536, 65793, 1114111, 512, 513, 514, 528, 533}
var testRunes = []rune{1, 12, 127, 128, 256, 2047, 2048, 2457, 65535, 65536, 65793, 1114111, 512, 513, 514, 528, 533}
// testdataValues: 192 entries, 384 bytes
// Block 2 is the null block.
......
......@@ -94,9 +94,9 @@ func (n trieNode) countSparseEntries() int {
return count
}
func (n *trieNode) insert(rune int, value uint16) {
func (n *trieNode) insert(r rune, value uint16) {
var p [utf8.UTFMax]byte
sz := utf8.EncodeRune(p[:], rune)
sz := utf8.EncodeRune(p[:], r)
for i := 0; i < sz; i++ {
if n.leaf {
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment