Commit 18274e00 authored by Roger Peppe's avatar Roger Peppe Committed by Rob Pike

Add IndexFunc and LastIndexFunc.

Change TrimRight and TrimLeft to use these functions.
Incidentally fix minor bug in TrimRight.
Add some test cases for this.
YMMV whether it's worth saving the closure allocation.

R=r, r2
CC=golang-dev, hoisie, rsc
https://golang.org/cl/1198044
parent b9055629
...@@ -328,49 +328,99 @@ func ToTitleSpecial(_case unicode.SpecialCase, s string) string { ...@@ -328,49 +328,99 @@ func ToTitleSpecial(_case unicode.SpecialCase, s string) string {
// TrimLeftFunc returns a slice of the string s with all leading // TrimLeftFunc returns a slice of the string s with all leading
// Unicode code points c satisfying f(c) removed. // Unicode code points c satisfying f(c) removed.
func TrimLeftFunc(s string, f func(r int) bool) string { func TrimLeftFunc(s string, f func(r int) bool) string {
start, end := 0, len(s) i := indexFunc(s, f, false)
for start < end { if i == -1 {
return ""
}
return s[i:]
}
// TrimRightFunc returns a slice of the string s with all trailing
// Unicode code points c satisfying f(c) removed.
func TrimRightFunc(s string, f func(r int) bool) string {
i := lastIndexFunc(s, f, false)
if i >= 0 && s[i] >= utf8.RuneSelf {
_, wid := utf8.DecodeRuneInString(s[i:])
i += wid
} else {
i++
}
return s[0:i]
}
// TrimFunc returns a slice of the string s with all leading
// and trailing Unicode code points c satisfying f(c) removed.
func TrimFunc(s string, f func(r int) bool) string {
return TrimRightFunc(TrimLeftFunc(s, f), f)
}
// IndexFunc returns the index into s of the first Unicode
// code point satisfying f(c), or -1 if none do.
func IndexFunc(s string, f func(r int) bool) int {
return indexFunc(s, f, true)
}
// LastIndexFunc returns the index into s of the last
// Unicode code point satisfying f(c), or -1 if none do.
func LastIndexFunc(s string, f func(r int) bool) int {
return lastIndexFunc(s, f, true)
}
// indexFunc is the same as IndexFunc except that if
// truth==false, the sense of the predicate function is
// inverted. We could use IndexFunc directly, but this
// way saves a closure allocation.
func indexFunc(s string, f func(r int) bool, truth bool) int {
start := 0
for start < len(s) {
wid := 1 wid := 1
rune := int(s[start]) rune := int(s[start])
if rune >= utf8.RuneSelf { if rune >= utf8.RuneSelf {
rune, wid = utf8.DecodeRuneInString(s[start:end]) rune, wid = utf8.DecodeRuneInString(s[start:])
} }
if !f(rune) { if f(rune) == truth {
return s[start:] return start
} }
start += wid start += wid
} }
return s[start:] return -1
} }
// TrimRightFunc returns a slice of the string s with all trailing // lastIndexFunc is the same as LastIndexFunc except that if
// Unicode code points c satisfying f(c) removed. // truth==false, the sense of the predicate function is
func TrimRightFunc(s string, f func(r int) bool) string { // inverted. We could use IndexFunc directly, but this
start, end := 0, len(s) // way saves a closure allocation.
for start < end { func lastIndexFunc(s string, f func(r int) bool, truth bool) int {
wid := 1 end := len(s)
rune := int(s[end-wid]) for end > 0 {
start := end - 1
rune := int(s[start])
if rune >= utf8.RuneSelf { if rune >= utf8.RuneSelf {
// Back up & look for beginning of rune. Mustn't pass start. // Back up & look for beginning of rune. Mustn't pass start.
for wid = 2; start <= end-wid && !utf8.RuneStart(s[end-wid]); wid++ { for start--; start >= 0; start-- {
if utf8.RuneStart(s[start]) {
break
}
} }
if start > end-wid { // invalid UTF-8 sequence; stop processing if start < 0 {
return s[start:end] return -1
}
var wid int
rune, wid = utf8.DecodeRuneInString(s[start:end])
// If we've decoded fewer bytes than we expected,
// we've got some invalid UTF-8, so make sure we return
// the last possible index in s.
if start+wid < end && f(utf8.RuneError) == truth {
return end - 1
} }
rune, wid = utf8.DecodeRuneInString(s[end-wid : end])
} }
if !f(rune) { if f(rune) == truth {
return s[0:end] return start
} }
end -= wid end = start
} }
return s[0:end] return -1
}
// TrimFunc returns a slice of the string s with all leading
// and trailing Unicode code points c satisfying f(c) removed.
func TrimFunc(s string, f func(r int) bool) string {
return TrimRightFunc(TrimLeftFunc(s, f), f)
} }
func makeCutsetFunc(cutset string) func(rune int) bool { func makeCutsetFunc(cutset string) func(rune int) bool {
......
...@@ -283,8 +283,14 @@ var trimSpaceTests = []StringTest{ ...@@ -283,8 +283,14 @@ var trimSpaceTests = []StringTest{
StringTest{" \t\r\n x\t\t\r\r\n\n ", "x"}, StringTest{" \t\r\n x\t\t\r\r\n\n ", "x"},
StringTest{" \u2000\t\r\n x\t\t\r\r\ny\n \u3000", "x\t\t\r\r\ny"}, StringTest{" \u2000\t\r\n x\t\t\r\r\ny\n \u3000", "x\t\t\r\r\ny"},
StringTest{"1 \t\r\n2", "1 \t\r\n2"}, StringTest{"1 \t\r\n2", "1 \t\r\n2"},
StringTest{" x\x80", "x\x80"}, // invalid UTF-8 on end StringTest{" x\x80", "x\x80"},
StringTest{" x\xc0", "x\xc0"}, // invalid UTF-8 on end StringTest{" x\xc0", "x\xc0"},
StringTest{"x \xc0\xc0 ", "x \xc0\xc0"},
StringTest{"x \xc0", "x \xc0"},
StringTest{"x \xc0 ", "x \xc0"},
StringTest{"x \xc0\xc0 ", "x \xc0\xc0"},
StringTest{"x ☺\xc0\xc0 ", "x ☺\xc0\xc0"},
StringTest{"x ☺ ", "x ☺"},
} }
func tenRunes(rune int) string { func tenRunes(rune int) string {
...@@ -407,8 +413,28 @@ var trimTests = []TrimTest{ ...@@ -407,8 +413,28 @@ var trimTests = []TrimTest{
TrimTest{TrimRight, "abba", "", "abba"}, TrimTest{TrimRight, "abba", "", "abba"},
TrimTest{TrimRight, "", "123", ""}, TrimTest{TrimRight, "", "123", ""},
TrimTest{TrimRight, "", "", ""}, TrimTest{TrimRight, "", "", ""},
TrimTest{TrimRight, "☺\xc0", "☺", "☺\xc0"},
} }
// naiveTrimRight implements a version of TrimRight
// by scanning forwards from the start of s.
func naiveTrimRight(s string, cutset string) string {
i := -1
for j, r := range s {
if IndexRune(cutset, r) == -1 {
i = j
}
}
if i >= 0 && s[i] >= utf8.RuneSelf {
_, wid := utf8.DecodeRuneInString(s[i:])
i += wid
} else {
i++
}
return s[0:i]
}
func TestTrim(t *testing.T) { func TestTrim(t *testing.T) {
for _, tc := range trimTests { for _, tc := range trimTests {
actual := tc.f(tc.in, tc.cutset) actual := tc.f(tc.in, tc.cutset)
...@@ -426,25 +452,100 @@ func TestTrim(t *testing.T) { ...@@ -426,25 +452,100 @@ func TestTrim(t *testing.T) {
if actual != tc.out { if actual != tc.out {
t.Errorf("%s(%q, %q) = %q; want %q", name, tc.in, tc.cutset, actual, tc.out) t.Errorf("%s(%q, %q) = %q; want %q", name, tc.in, tc.cutset, actual, tc.out)
} }
// test equivalence of TrimRight to naive version
if tc.f == TrimRight {
naive := naiveTrimRight(tc.in, tc.cutset)
if naive != actual {
t.Errorf("TrimRight(%q, %q) = %q, want %q", tc.in, tc.cutset, actual, naive)
}
}
} }
} }
var isSpace = predicate{unicode.IsSpace, "IsSpace"}
var isDigit = predicate{unicode.IsDigit, "IsDigit"}
var isUpper = predicate{unicode.IsUpper, "IsUpper"}
var isValidRune = predicate{
func(r int) bool {
return r != utf8.RuneError
},
"IsValidRune",
}
type predicate struct {
f func(r int) bool
name string
}
type TrimFuncTest struct { type TrimFuncTest struct {
f func(r int) bool f predicate
name, in, out string in, out string
}
func not(p predicate) predicate {
return predicate{
func(r int) bool {
return !p.f(r)
},
"not " + p.name,
}
} }
var trimFuncTests = []TrimFuncTest{ var trimFuncTests = []TrimFuncTest{
TrimFuncTest{unicode.IsSpace, "IsSpace", space + " hello " + space, "hello"}, TrimFuncTest{isSpace, space + " hello " + space, "hello"},
TrimFuncTest{unicode.IsDigit, "IsDigit", "\u0e50\u0e5212hello34\u0e50\u0e51", "hello"}, TrimFuncTest{isDigit, "\u0e50\u0e5212hello34\u0e50\u0e51", "hello"},
TrimFuncTest{unicode.IsUpper, "IsUpper", "\u2C6F\u2C6F\u2C6F\u2C6FABCDhelloEF\u2C6F\u2C6FGH\u2C6F\u2C6F", "hello"}, TrimFuncTest{isUpper, "\u2C6F\u2C6F\u2C6F\u2C6FABCDhelloEF\u2C6F\u2C6FGH\u2C6F\u2C6F", "hello"},
TrimFuncTest{not(isSpace), "hello" + space + "hello", space},
TrimFuncTest{not(isDigit), "hello\u0e50\u0e521234\u0e50\u0e51helo", "\u0e50\u0e521234\u0e50\u0e51"},
TrimFuncTest{isValidRune, "ab\xc0a\xc0cd", "\xc0a\xc0"},
TrimFuncTest{not(isValidRune), "\xc0a\xc0", "a"},
} }
func TestTrimFunc(t *testing.T) { func TestTrimFunc(t *testing.T) {
for _, tc := range trimFuncTests { for _, tc := range trimFuncTests {
actual := TrimFunc(tc.in, tc.f) actual := TrimFunc(tc.in, tc.f.f)
if actual != tc.out { if actual != tc.out {
t.Errorf("TrimFunc(%q, %q) = %q; want %q", tc.in, tc.name, actual, tc.out) t.Errorf("TrimFunc(%q, %q) = %q; want %q", tc.in, tc.f.name, actual, tc.out)
}
}
}
type IndexFuncTest struct {
in string
f predicate
first, last int
}
var indexFuncTests = []IndexFuncTest{
IndexFuncTest{"", isValidRune, -1, -1},
IndexFuncTest{"abc", isDigit, -1, -1},
IndexFuncTest{"0123", isDigit, 0, 3},
IndexFuncTest{"a1b", isDigit, 1, 1},
IndexFuncTest{space, isSpace, 0, len(space) - 3}, // last rune in space is 3 bytes
IndexFuncTest{"\u0e50\u0e5212hello34\u0e50\u0e51", isDigit, 0, 18},
IndexFuncTest{"\u2C6F\u2C6F\u2C6F\u2C6FABCDhelloEF\u2C6F\u2C6FGH\u2C6F\u2C6F", isUpper, 0, 34},
IndexFuncTest{"12\u0e50\u0e52hello34\u0e50\u0e51", not(isDigit), 8, 12},
// broken unicode tests
IndexFuncTest{"\x801", isDigit, 1, 1},
IndexFuncTest{"\x80abc", isDigit, -1, -1},
IndexFuncTest{"\xc0a\xc0", isValidRune, 1, 1},
IndexFuncTest{"\xc0a\xc0", not(isValidRune), 0, 2},
IndexFuncTest{"\xc0\xc0", not(isValidRune), 0, 4},
IndexFuncTest{"\xc0\xc0\xc0", not(isValidRune), 0, 5},
IndexFuncTest{"ab\xc0a\xc0cd", not(isValidRune), 2, 4},
IndexFuncTest{"a\xe0\x80cd", not(isValidRune), 1, 2},
}
func TestIndexFunc(t *testing.T) {
for _, tc := range indexFuncTests {
first := IndexFunc(tc.in, tc.f.f)
if first != tc.first {
t.Errorf("IndexFunc(%q, %s) = %d; want %d", tc.in, tc.f.name, first, tc.first)
}
last := LastIndexFunc(tc.in, tc.f.f)
if last != tc.last {
t.Errorf("LastIndexFunc(%q, %s) = %d; want %d", tc.in, tc.f.name, last, tc.last)
} }
} }
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment