Commit 0d1cbaf2 authored by Russ Cox's avatar Russ Cox

strings.utflen -> utf8.RuneCount, RuneCountInString

R=r
DELTA=94  (52 added, 33 deleted, 9 changed)
OCL=20547
CL=20552
parent 94a66489
......@@ -88,6 +88,7 @@ bignum.6: fmt.dirinstall
bufio.6: io.dirinstall os.dirinstall
flag.6: fmt.dirinstall
testing.6: flag.install fmt.dirinstall
strings.6: utf8.install
fmt.dirinstall: io.dirinstall reflect.dirinstall strconv.dirinstall
hash.dirinstall: os.dirinstall
......
......@@ -4,30 +4,17 @@
package strings
// Count UTF-8 sequences in s.
// Assumes s is well-formed.
export func utflen(s string) int {
n := 0;
for i := 0; i < len(s); i++ {
if s[i]&0xC0 != 0x80 {
n++
}
}
return n
}
import "utf8"
// Split string into array of UTF-8 sequences (still strings)
export func explode(s string) *[]string {
a := new([]string, utflen(s));
a := new([]string, utf8.RuneCountInString(s, 0, len(s)));
j := 0;
var size, rune int;
for i := 0; i < len(a); i++ {
ej := j;
ej++;
for ej < len(s) && (s[ej]&0xC0) == 0x80 {
ej++
}
a[i] = s[j:ej];
j = ej
rune, size = utf8.DecodeRuneInString(s, j);
a[i] = string(rune);
j += size;
}
return a
}
......@@ -35,7 +22,7 @@ export func explode(s string) *[]string {
// Count non-overlapping instances of sep in s.
export func count(s, sep string) int {
if sep == "" {
return utflen(s)+1
return utf8.RuneCountInString(s, 0, len(s))+1
}
c := sep[0];
n := 0;
......
......@@ -79,21 +79,3 @@ export func TestSplit(t *testing.T) {
}
}
// TODO: utflen shouldn't even be in strings.
type UtflenTest struct {
in string;
out int;
}
var utflentests = []UtflenTest {
UtflenTest{ abcd, 4 },
UtflenTest{ faces, 3 },
UtflenTest{ commas, 7 },
}
export func TestUtflen(t *testing.T) {
for i := 0; i < len(utflentests); i++ {
tt := utflentests[i];
if out := strings.utflen(tt.in); out != tt.out {
t.Errorf("utflen(%q) = %d, want %d", tt.in, out, tt.out);
}
}
}
......@@ -107,8 +107,7 @@ func DecodeRuneInternal(p *[]byte) (rune, size int, short bool) {
return RuneError, 1, false
}
func DecodeRuneInStringInternal(s string, i int) (rune, size int, short bool) {
n := len(s) - i;
func DecodeRuneInStringInternal(s string, i int, n int) (rune, size int, short bool) {
if n < 1 {
return RuneError, 0, true;
}
......@@ -188,7 +187,7 @@ export func FullRune(p *[]byte) bool {
}
export func FullRuneInString(s string, i int) bool {
rune, size, short := DecodeRuneInStringInternal(s, i);
rune, size, short := DecodeRuneInStringInternal(s, i, len(s) - i);
return !short
}
......@@ -200,7 +199,7 @@ export func DecodeRune(p *[]byte) (rune, size int) {
export func DecodeRuneInString(s string, i int) (rune, size int) {
var short bool;
rune, size, short = DecodeRuneInStringInternal(s, i);
rune, size, short = DecodeRuneInStringInternal(s, i, len(s) - i);
return;
}
......@@ -248,3 +247,31 @@ export func EncodeRune(rune int, p *[]byte) int {
return 4;
}
export func RuneCount(p *[]byte) int {
i := 0;
var n int;
for n = 0; i < len(p); n++ {
if p[i] < RuneSelf {
i++;
} else {
rune, size := DecodeRune(p[i:len(p)]);
i += size;
}
}
return n;
}
export func RuneCountInString(s string, i int, l int) int {
ei := i + l;
n := 0;
for n = 0; i < ei; n++ {
if s[i] < RuneSelf {
i++;
} else {
rune, size, short := DecodeRuneInStringInternal(s, i, ei - i);
i += size;
}
}
return n;
}
......@@ -156,3 +156,25 @@ export func TestDecodeRune(t *testing.T) {
}
}
}
type RuneCountTest struct {
in string;
out int;
}
var runecounttests = []RuneCountTest {
RuneCountTest{ "abcd", 4 },
RuneCountTest{ "☺☻☹", 3 },
RuneCountTest{ "1,2,3,4", 7 },
RuneCountTest{ "\xe2\x00", 2 },
}
export func TestRuneCount(t *testing.T) {
for i := 0; i < len(runecounttests); i++ {
tt := runecounttests[i];
if out := utf8.RuneCountInString(tt.in, 0, len(tt.in)); out != tt.out {
t.Errorf("RuneCountInString(%q) = %d, want %d", tt.in, out, tt.out);
}
if out := utf8.RuneCount(Bytes(tt.in)); out != tt.out {
t.Errorf("RuneCount(%q) = %d, want %d", tt.in, out, tt.out);
}
}
}
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment