Commit 9d3c4b37 authored by Meador Inge's avatar Meador Inge Committed by Dylan Trotter

Make `str` casing methods multi-byte aware (#186)

In the course of reviewing #116 it came to light that
some of the other `str` methods that involve changing
case do not handle multi-byte characters correctly.
This patch fixes that.
parent c337a511
...@@ -34,6 +34,7 @@ var ( ...@@ -34,6 +34,7 @@ var (
strASCIISpaces = []byte(" \t\n\v\f\r") strASCIISpaces = []byte(" \t\n\v\f\r")
strInterpolationRegexp = regexp.MustCompile(`^%([#0 +-]?)((\*|[0-9]+)?)((\.(\*|[0-9]+))?)[hlL]?([diouxXeEfFgGcrs%])`) strInterpolationRegexp = regexp.MustCompile(`^%([#0 +-]?)((\*|[0-9]+)?)((\.(\*|[0-9]+))?)[hlL]?([diouxXeEfFgGcrs%])`)
internedStrs = map[string]*Str{} internedStrs = map[string]*Str{}
caseOffset = byte('a' - 'A')
) )
type stripSide int type stripSide int
...@@ -161,25 +162,17 @@ func strCapitalize(f *Frame, args Args, kwargs KWArgs) (*Object, *BaseException) ...@@ -161,25 +162,17 @@ func strCapitalize(f *Frame, args Args, kwargs KWArgs) (*Object, *BaseException)
if raised := checkMethodArgs(f, "capitalize", args, StrType); raised != nil { if raised := checkMethodArgs(f, "capitalize", args, StrType); raised != nil {
return nil, raised return nil, raised
} }
s := toStrUnsafe(args[0]) s := toStrUnsafe(args[0]).Value()
sv := s.Value() numBytes := len(s)
numBytes := len(sv)
if numBytes == 0 { if numBytes == 0 {
return s.ToObject(), nil return args[0], nil
}
buf := make([]byte, numBytes)
offset := byte('a' - 'A')
buf[0] = sv[0]
if sv[0] >= 'a' && sv[0] <= 'z' {
buf[0] -= offset
} }
b := make([]byte, numBytes)
b[0] = toUpper(s[0])
for i := 1; i < numBytes; i++ { for i := 1; i < numBytes; i++ {
buf[i] = sv[i] b[i] = toLower(s[i])
if sv[i] >= 'A' && sv[i] <= 'Z' {
buf[i] += offset
}
} }
return NewStr(string(buf)).ToObject(), nil return NewStr(string(b)).ToObject(), nil
} }
func strContains(f *Frame, o *Object, value *Object) (*Object, *BaseException) { func strContains(f *Frame, o *Object, value *Object) (*Object, *BaseException) {
...@@ -396,7 +389,15 @@ func strLower(f *Frame, args Args, kwargs KWArgs) (*Object, *BaseException) { ...@@ -396,7 +389,15 @@ func strLower(f *Frame, args Args, kwargs KWArgs) (*Object, *BaseException) {
return nil, raised return nil, raised
} }
s := toStrUnsafe(args[0]).Value() s := toStrUnsafe(args[0]).Value()
return NewStr(strings.ToLower(s)).ToObject(), nil numBytes := len(s)
if numBytes == 0 {
return args[0], nil
}
b := make([]byte, numBytes)
for i := 0; i < numBytes; i++ {
b[i] = toLower(s[i])
}
return NewStr(string(b)).ToObject(), nil
} }
func strLStrip(f *Frame, args Args, _ KWArgs) (*Object, *BaseException) { func strLStrip(f *Frame, args Args, _ KWArgs) (*Object, *BaseException) {
...@@ -986,7 +987,31 @@ func strTitle(f *Frame, args Args, kwargs KWArgs) (*Object, *BaseException) { ...@@ -986,7 +987,31 @@ func strTitle(f *Frame, args Args, kwargs KWArgs) (*Object, *BaseException) {
return nil, raised return nil, raised
} }
s := toStrUnsafe(args[0]).Value() s := toStrUnsafe(args[0]).Value()
return NewStr(strings.Title(strings.ToLower(s))).ToObject(), nil numBytes := len(s)
if numBytes == 0 {
return args[0], nil
}
b := make([]byte, numBytes)
previousIsCased := false
for i := 0; i < numBytes; i++ {
c := s[i]
switch {
case s[i] >= 'a' && s[i] <= 'z':
if !previousIsCased {
c = toUpper(c)
}
previousIsCased = true
case s[i] >= 'A' && s[i] <= 'Z':
if previousIsCased {
c = toLower(c)
}
previousIsCased = true
default:
previousIsCased = false
}
b[i] = c
}
return NewStr(string(b)).ToObject(), nil
} }
func strUpper(f *Frame, args Args, kwargs KWArgs) (*Object, *BaseException) { func strUpper(f *Frame, args Args, kwargs KWArgs) (*Object, *BaseException) {
...@@ -995,7 +1020,15 @@ func strUpper(f *Frame, args Args, kwargs KWArgs) (*Object, *BaseException) { ...@@ -995,7 +1020,15 @@ func strUpper(f *Frame, args Args, kwargs KWArgs) (*Object, *BaseException) {
return nil, raised return nil, raised
} }
s := toStrUnsafe(args[0]).Value() s := toStrUnsafe(args[0]).Value()
return NewStr(strings.ToUpper(s)).ToObject(), nil numBytes := len(s)
if numBytes == 0 {
return args[0], nil
}
b := make([]byte, numBytes)
for i := 0; i < numBytes; i++ {
b[i] = toUpper(s[i])
}
return NewStr(string(b)).ToObject(), nil
} }
func strZFill(f *Frame, args Args, _ KWArgs) (*Object, *BaseException) { func strZFill(f *Frame, args Args, _ KWArgs) (*Object, *BaseException) {
...@@ -1029,3 +1062,17 @@ func init() { ...@@ -1029,3 +1062,17 @@ func init() {
InternStr(string([]byte{byte(i)})) InternStr(string([]byte{byte(i)}))
} }
} }
func toLower(b byte) byte {
if b >= 'A' && b <= 'Z' {
return b + caseOffset
}
return b
}
func toUpper(b byte) byte {
if b >= 'a' && b <= 'z' {
return b - caseOffset
}
return b
}
...@@ -281,6 +281,7 @@ func TestStrMethods(t *testing.T) { ...@@ -281,6 +281,7 @@ func TestStrMethods(t *testing.T) {
{"capitalize", wrapArgs("ùBAR"), NewStr("ùbar").ToObject(), nil}, {"capitalize", wrapArgs("ùBAR"), NewStr("ùbar").ToObject(), nil},
{"capitalize", wrapArgs("вол"), NewStr("вол").ToObject(), nil}, {"capitalize", wrapArgs("вол"), NewStr("вол").ToObject(), nil},
{"capitalize", wrapArgs("foobar", 123), nil, mustCreateException(TypeErrorType, "'capitalize' of 'str' requires 1 arguments")}, {"capitalize", wrapArgs("foobar", 123), nil, mustCreateException(TypeErrorType, "'capitalize' of 'str' requires 1 arguments")},
{"capitalize", wrapArgs("ВОЛ"), NewStr("ВОЛ").ToObject(), nil},
{"endswith", wrapArgs("", ""), True.ToObject(), nil}, {"endswith", wrapArgs("", ""), True.ToObject(), nil},
{"endswith", wrapArgs("", "", 1), False.ToObject(), nil}, {"endswith", wrapArgs("", "", 1), False.ToObject(), nil},
{"endswith", wrapArgs("foobar", "bar"), True.ToObject(), nil}, {"endswith", wrapArgs("foobar", "bar"), True.ToObject(), nil},
...@@ -339,6 +340,8 @@ func TestStrMethods(t *testing.T) { ...@@ -339,6 +340,8 @@ func TestStrMethods(t *testing.T) {
{"lower", wrapArgs("aBC"), NewStr("abc").ToObject(), nil}, {"lower", wrapArgs("aBC"), NewStr("abc").ToObject(), nil},
{"lower", wrapArgs("abc def", 123), nil, mustCreateException(TypeErrorType, "'lower' of 'str' requires 1 arguments")}, {"lower", wrapArgs("abc def", 123), nil, mustCreateException(TypeErrorType, "'lower' of 'str' requires 1 arguments")},
{"lower", wrapArgs(123), nil, mustCreateException(TypeErrorType, "unbound method lower() must be called with str instance as first argument (got int instance instead)")}, {"lower", wrapArgs(123), nil, mustCreateException(TypeErrorType, "unbound method lower() must be called with str instance as first argument (got int instance instead)")},
{"lower", wrapArgs("вол"), NewStr("вол").ToObject(), nil},
{"lower", wrapArgs("ВОЛ"), NewStr("ВОЛ").ToObject(), nil},
{"lstrip", wrapArgs("foo "), NewStr("foo ").ToObject(), nil}, {"lstrip", wrapArgs("foo "), NewStr("foo ").ToObject(), nil},
{"lstrip", wrapArgs(" foo bar "), NewStr("foo bar ").ToObject(), nil}, {"lstrip", wrapArgs(" foo bar "), NewStr("foo bar ").ToObject(), nil},
{"lstrip", wrapArgs("foo foo", "o"), NewStr("foo foo").ToObject(), nil}, {"lstrip", wrapArgs("foo foo", "o"), NewStr("foo foo").ToObject(), nil},
...@@ -452,6 +455,8 @@ func TestStrMethods(t *testing.T) { ...@@ -452,6 +455,8 @@ func TestStrMethods(t *testing.T) {
{"title", wrapArgs("aBC dEF"), NewStr("Abc Def").ToObject(), nil}, {"title", wrapArgs("aBC dEF"), NewStr("Abc Def").ToObject(), nil},
{"title", wrapArgs("abc def", 123), nil, mustCreateException(TypeErrorType, "'title' of 'str' requires 1 arguments")}, {"title", wrapArgs("abc def", 123), nil, mustCreateException(TypeErrorType, "'title' of 'str' requires 1 arguments")},
{"title", wrapArgs(123), nil, mustCreateException(TypeErrorType, "unbound method title() must be called with str instance as first argument (got int instance instead)")}, {"title", wrapArgs(123), nil, mustCreateException(TypeErrorType, "unbound method title() must be called with str instance as first argument (got int instance instead)")},
{"title", wrapArgs("вол"), NewStr("вол").ToObject(), nil},
{"title", wrapArgs("ВОЛ"), NewStr("ВОЛ").ToObject(), nil},
{"upper", wrapArgs(""), NewStr("").ToObject(), nil}, {"upper", wrapArgs(""), NewStr("").ToObject(), nil},
{"upper", wrapArgs("a"), NewStr("A").ToObject(), nil}, {"upper", wrapArgs("a"), NewStr("A").ToObject(), nil},
{"upper", wrapArgs("A"), NewStr("A").ToObject(), nil}, {"upper", wrapArgs("A"), NewStr("A").ToObject(), nil},
...@@ -461,6 +466,8 @@ func TestStrMethods(t *testing.T) { ...@@ -461,6 +466,8 @@ func TestStrMethods(t *testing.T) {
{"upper", wrapArgs("aBC"), NewStr("ABC").ToObject(), nil}, {"upper", wrapArgs("aBC"), NewStr("ABC").ToObject(), nil},
{"upper", wrapArgs("abc def", 123), nil, mustCreateException(TypeErrorType, "'upper' of 'str' requires 1 arguments")}, {"upper", wrapArgs("abc def", 123), nil, mustCreateException(TypeErrorType, "'upper' of 'str' requires 1 arguments")},
{"upper", wrapArgs(123), nil, mustCreateException(TypeErrorType, "unbound method upper() must be called with str instance as first argument (got int instance instead)")}, {"upper", wrapArgs(123), nil, mustCreateException(TypeErrorType, "unbound method upper() must be called with str instance as first argument (got int instance instead)")},
{"upper", wrapArgs("вол"), NewStr("вол").ToObject(), nil},
{"upper", wrapArgs("ВОЛ"), NewStr("ВОЛ").ToObject(), nil},
{"zfill", wrapArgs("123", 2), NewStr("123").ToObject(), nil}, {"zfill", wrapArgs("123", 2), NewStr("123").ToObject(), nil},
{"zfill", wrapArgs("123", 3), NewStr("123").ToObject(), nil}, {"zfill", wrapArgs("123", 3), NewStr("123").ToObject(), nil},
{"zfill", wrapArgs("123", 4), NewStr("0123").ToObject(), nil}, {"zfill", wrapArgs("123", 4), NewStr("0123").ToObject(), nil},
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment