Commit 1eba218e authored by Peter Froehlich's avatar Peter Froehlich Committed by Russ Cox

Runes: turn string into []int

Split: fixed typo in documentation

R=rsc, r, r1
https://golang.org/cl/157170
parent c0efa07c
...@@ -333,3 +333,16 @@ func AddByte(s []byte, t byte) []byte { ...@@ -333,3 +333,16 @@ func AddByte(s []byte, t byte) []byte {
s[lens] = t; s[lens] = t;
return s; return s;
} }
// Runes returns a slice of runes (Unicode code points) equivalent to s.
func Runes(s []byte) []int {
t := make([]int, utf8.RuneCount(s));
i := 0;
for len(s) > 0 {
r, l := utf8.DecodeRune(s);
t[i] = r;
i++;
s = s[l:];
}
return t;
}
...@@ -400,3 +400,49 @@ func TestRepeat(t *testing.T) { ...@@ -400,3 +400,49 @@ func TestRepeat(t *testing.T) {
} }
} }
} }
func runesEqual(a, b []int) bool {
if len(a) != len(b) {
return false
}
for i, r := range a {
if r != b[i] {
return false
}
}
return true;
}
type RunesTest struct {
in string;
out []int;
lossy bool;
}
var RunesTests = []RunesTest{
RunesTest{"", []int{}, false},
RunesTest{" ", []int{32}, false},
RunesTest{"ABC", []int{65, 66, 67}, false},
RunesTest{"abc", []int{97, 98, 99}, false},
RunesTest{"\u65e5\u672c\u8a9e", []int{26085, 26412, 35486}, false},
RunesTest{"ab\x80c", []int{97, 98, 0xFFFD, 99}, true},
RunesTest{"ab\xc0c", []int{97, 98, 0xFFFD, 99}, true},
}
func TestRunes(t *testing.T) {
for _, tt := range RunesTests {
tin := strings.Bytes(tt.in);
a := Runes(tin);
if !runesEqual(a, tt.out) {
t.Errorf("Runes(%q) = %v; want %v", tin, a, tt.out);
continue;
}
if !tt.lossy {
// can only test reassembly if we didn't lose information
s := string(a);
if s != tt.in {
t.Errorf("string(Runes(%q)) = %x; want %x", tin, s, tin)
}
}
}
}
...@@ -124,7 +124,7 @@ func genSplit(s, sep string, sepSave, n int) []string { ...@@ -124,7 +124,7 @@ func genSplit(s, sep string, sepSave, n int) []string {
// Split splits the string s around each instance of sep, returning an array of substrings of s. // Split splits the string s around each instance of sep, returning an array of substrings of s.
// If sep is empty, Split splits s after each UTF-8 sequence. // If sep is empty, Split splits s after each UTF-8 sequence.
// If n > 0, split Splits s into at most n substrings; the last substring will be the unsplit remainder. // If n > 0, Split splits s into at most n substrings; the last substring will be the unsplit remainder.
func Split(s, sep string, n int) []string { return genSplit(s, sep, 0, n) } func Split(s, sep string, n int) []string { return genSplit(s, sep, 0, n) }
// SplitAfter splits the string s after each instance of sep, returning an array of substrings of s. // SplitAfter splits the string s after each instance of sep, returning an array of substrings of s.
...@@ -272,3 +272,14 @@ func Bytes(s string) []byte { ...@@ -272,3 +272,14 @@ func Bytes(s string) []byte {
} }
return b; return b;
} }
// Runes returns a slice of runes (Unicode code points) equivalent to the string s.
func Runes(s string) []int {
t := make([]int, utf8.RuneCountInString(s));
i := 0;
for _, r := range s {
t[i] = r;
i++;
}
return t;
}
...@@ -370,3 +370,48 @@ func TestRepeat(t *testing.T) { ...@@ -370,3 +370,48 @@ func TestRepeat(t *testing.T) {
} }
} }
} }
func runesEqual(a, b []int) bool {
if len(a) != len(b) {
return false
}
for i, r := range a {
if r != b[i] {
return false
}
}
return true;
}
type RunesTest struct {
in string;
out []int;
lossy bool;
}
var RunesTests = []RunesTest{
RunesTest{"", []int{}, false},
RunesTest{" ", []int{32}, false},
RunesTest{"ABC", []int{65, 66, 67}, false},
RunesTest{"abc", []int{97, 98, 99}, false},
RunesTest{"\u65e5\u672c\u8a9e", []int{26085, 26412, 35486}, false},
RunesTest{"ab\x80c", []int{97, 98, 0xFFFD, 99}, true},
RunesTest{"ab\xc0c", []int{97, 98, 0xFFFD, 99}, true},
}
func TestRunes(t *testing.T) {
for _, tt := range RunesTests {
a := Runes(tt.in);
if !runesEqual(a, tt.out) {
t.Errorf("Runes(%q) = %v; want %v", tt.in, a, tt.out);
continue;
}
if !tt.lossy {
// can only test reassembly if we didn't lose information
s := string(a);
if s != tt.in {
t.Errorf("string(Runes(%q)) = %x; want %x", tt.in, s, tt.in)
}
}
}
}
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment