Commit c6065be4 authored by Dylan Trotter's avatar Dylan Trotter

Implement str.endswith, str/unicode.strip, and %d string interpolation.

parent 5f2572f1
......@@ -31,6 +31,7 @@ var (
// StrType is the object representing the Python 'str' type.
StrType = newBasisType("str", reflect.TypeOf(Str{}), toStrUnsafe, BaseStringType)
whitespaceSplitRegexp = regexp.MustCompile(`\s+`)
strASCIISpaces = []byte(" \t\n\v\f\r")
strInterpolationRegexp = regexp.MustCompile(`^%([#0 +-]?)((\*|[0-9]+)?)((\.(\*|[0-9]+))?)[hlL]?([diouxXeEfFgGcrs%])`)
internedStrs = map[string]*Str{}
)
......@@ -188,6 +189,10 @@ func strDecode(f *Frame, args Args, kwargs KWArgs) (*Object, *BaseException) {
return s.ToObject(), nil
}
func strEndsWith(f *Frame, args Args, _ KWArgs) (*Object, *BaseException) {
return strStartsEndsWith(f, "endswith", args)
}
func strEq(f *Frame, v, w *Object) (*Object, *BaseException) {
return strCompare(v, w, False, True, False), nil
}
......@@ -426,52 +431,64 @@ func strSplit(f *Frame, args Args, kwargs KWArgs) (*Object, *BaseException) {
return NewList(results...).ToObject(), nil
}
func strStartsWith(f *Frame, args Args, kwargs KWArgs) (*Object, *BaseException) {
expectedTypes := []*Type{StrType, ObjectType, IntType, IntType}
func strStrip(f *Frame, args Args, _ KWArgs) (*Object, *BaseException) {
expectedTypes := []*Type{StrType, ObjectType}
argc := len(args)
if argc == 2 || argc == 3 {
if argc == 1 {
expectedTypes = expectedTypes[:argc]
}
if raised := checkMethodArgs(f, "startswith", args, expectedTypes...); raised != nil {
if raised := checkMethodArgs(f, "strip", args, expectedTypes...); raised != nil {
return nil, raised
}
prefixArg := args[1]
var prefixes []*Object
s := toStrUnsafe(args[0])
charsArg := None
if argc > 1 {
charsArg = args[1]
}
var chars []byte
switch {
case prefixArg.isInstance(TupleType):
tup := toTupleUnsafe(prefixArg)
for _, o := range tup.elems {
if !o.isInstance(StrType) {
return nil, f.RaiseType(TypeErrorType, "expected a str")
}
case charsArg.isInstance(UnicodeType):
u, raised := s.Decode(f, EncodeDefault, EncodeStrict)
if raised != nil {
return nil, raised
}
prefixes = tup.elems
case prefixArg.isInstance(StrType):
prefixes = []*Object{prefixArg}
return unicodeStrip(f, Args{u.ToObject(), charsArg}, nil)
case charsArg.isInstance(StrType):
chars = []byte(toStrUnsafe(charsArg).Value())
case charsArg == None:
chars = strASCIISpaces
default:
msg := "startswith first arg must be str or tuple, not "
return nil, f.RaiseType(TypeErrorType, msg+prefixArg.typ.Name())
}
s := toStrUnsafe(args[0]).Value()
l := len(s)
start, end := 0, l
if argc >= 3 {
start = adjustIndex(toIntUnsafe(args[2]).Value(), l)
}
if argc == 4 {
end = adjustIndex(toIntUnsafe(args[3]).Value(), l)
}
if start > end {
// start == end may still return true when '' is a prefix.
return False.ToObject(), nil
}
s = s[start:end]
for _, prefix := range prefixes {
if strings.HasPrefix(s, toStrUnsafe(prefix).Value()) {
return True.ToObject(), nil
return nil, f.RaiseType(TypeErrorType, "strip arg must be None, str or unicode")
}
byteSlice := []byte(s.Value())
numBytes := len(byteSlice)
lindex := 0
LeftStrip:
for ; lindex < numBytes; lindex++ {
b := byteSlice[lindex]
for _, c := range chars {
if b == c {
continue LeftStrip
}
}
break
}
rindex := numBytes
RightStrip:
for ; rindex > lindex; rindex-- {
b := byteSlice[rindex-1]
for _, c := range chars {
if b == c {
continue RightStrip
}
}
break
}
return False.ToObject(), nil
return NewStr(string(byteSlice[lindex:rindex])).ToObject(), nil
}
func strStartsWith(f *Frame, args Args, _ KWArgs) (*Object, *BaseException) {
return strStartsEndsWith(f, "startswith", args)
}
func strStr(_ *Frame, o *Object) (*Object, *BaseException) {
......@@ -484,9 +501,11 @@ func strStr(_ *Frame, o *Object) (*Object, *BaseException) {
func initStrType(dict map[string]*Object) {
dict["__getnewargs__"] = newBuiltinFunction("__getnewargs__", strGetNewArgs).ToObject()
dict["decode"] = newBuiltinFunction("decode", strDecode).ToObject()
dict["endswith"] = newBuiltinFunction("endswith", strEndsWith).ToObject()
dict["join"] = newBuiltinFunction("join", strJoin).ToObject()
dict["split"] = newBuiltinFunction("split", strSplit).ToObject()
dict["startswith"] = newBuiltinFunction("startswith", strStartsWith).ToObject()
dict["strip"] = newBuiltinFunction("strip", strStrip).ToObject()
StrType.slots.Add = &binaryOpSlot{strAdd}
StrType.slots.Contains = &binaryOpSlot{strContains}
StrType.slots.Eq = &binaryOpSlot{strEq}
......@@ -568,6 +587,21 @@ func strInterpolate(f *Frame, format string, values *Tuple) (*Object, *BaseExcep
} else {
return nil, f.RaiseType(TypeErrorType, fmt.Sprintf("float argument required, not %s", o.typ.Name()))
}
case "d":
o := values.elems[valueIndex]
if o.typ.slots.Int == nil {
return nil, f.RaiseType(TypeErrorType, "%d format: a number is required, not "+o.typ.Name())
}
i, raised := intFromObject(f, values.elems[valueIndex])
if raised != nil {
return nil, raised
}
s, raised := ToStr(f, i)
if raised != nil {
return nil, raised
}
buf.WriteString(s.Value())
valueIndex++
case "%":
buf.WriteString("%")
default:
......@@ -619,6 +653,67 @@ func adjustIndex(i, l int) int {
return i
}
func strStartsEndsWith(f *Frame, method string, args Args) (*Object, *BaseException) {
expectedTypes := []*Type{StrType, ObjectType, IntType, IntType}
argc := len(args)
if argc == 2 || argc == 3 {
expectedTypes = expectedTypes[:argc]
}
if raised := checkMethodArgs(f, method, args, expectedTypes...); raised != nil {
return nil, raised
}
matchesArg := args[1]
var matches []string
switch {
case matchesArg.isInstance(TupleType):
elems := toTupleUnsafe(matchesArg).elems
matches = make([]string, len(elems))
for i, o := range elems {
if !o.isInstance(BaseStringType) {
return nil, f.RaiseType(TypeErrorType, "expected a str")
}
s, raised := ToStr(f, o)
if raised != nil {
return nil, raised
}
matches[i] = s.Value()
}
case matchesArg.isInstance(BaseStringType):
s, raised := ToStr(f, matchesArg)
if raised != nil {
return nil, raised
}
matches = []string{s.Value()}
default:
msg := " first arg must be str, unicode, or tuple, not "
return nil, f.RaiseType(TypeErrorType, method+msg+matchesArg.typ.Name())
}
s := toStrUnsafe(args[0]).Value()
l := len(s)
start, end := 0, l
if argc >= 3 {
start = adjustIndex(toIntUnsafe(args[2]).Value(), l)
}
if argc == 4 {
end = adjustIndex(toIntUnsafe(args[3]).Value(), l)
}
if start > end {
// start == end may still return true when matching ''.
return False.ToObject(), nil
}
s = s[start:end]
matcher := strings.HasPrefix
if method == "endswith" {
matcher = strings.HasSuffix
}
for _, match := range matches {
if matcher(s, match) {
return True.ToObject(), nil
}
}
return False.ToObject(), nil
}
func init() {
InternStr("")
for i := 0; i < 256; i++ {
......
......@@ -63,6 +63,8 @@ func TestStrBinaryOps(t *testing.T) {
{args: wrapArgs(Add, None, ""), wantExc: mustCreateException(TypeErrorType, "unsupported operand type(s) for +: 'NoneType' and 'str'")},
{args: wrapArgs(Mod, "%s", 42), want: NewStr("42").ToObject()},
{args: wrapArgs(Mod, "%f", 3.14), want: NewStr("3.140000").ToObject()},
{args: wrapArgs(Mod, "abc %d", NewLong(big.NewInt(123))), want: NewStr("abc 123").ToObject()},
{args: wrapArgs(Mod, "%d", 3.14), want: NewStr("3").ToObject()},
{args: wrapArgs(Mod, "%%", NewTuple()), want: NewStr("%").ToObject()},
{args: wrapArgs(Mod, "%r", "abc"), want: NewStr("'abc'").ToObject()},
{args: wrapArgs(Mod, "%s %s", true), wantExc: mustCreateException(TypeErrorType, "not enough arguments for format string")},
......@@ -73,6 +75,7 @@ func TestStrBinaryOps(t *testing.T) {
{args: wrapArgs(Mod, "%x", 24), wantExc: mustCreateException(NotImplementedErrorType, "conversion type not yet supported: x")},
{args: wrapArgs(Mod, "%f", None), wantExc: mustCreateException(TypeErrorType, "float argument required, not NoneType")},
{args: wrapArgs(Mod, "%s", newTestTuple(123, None)), wantExc: mustCreateException(TypeErrorType, "not all arguments converted during string formatting")},
{args: wrapArgs(Mod, "%d", newTestTuple("123")), wantExc: mustCreateException(TypeErrorType, "%d format: a number is required, not str")},
{args: wrapArgs(Mul, "", 10), want: NewStr("").ToObject()},
{args: wrapArgs(Mul, "foo", -2), want: NewStr("").ToObject()},
{args: wrapArgs(Mul, "foobar", 0), want: NewStr("").ToObject()},
......@@ -239,6 +242,17 @@ func TestStrMethods(t *testing.T) {
want *Object
wantExc *BaseException
}{
{"endswith", wrapArgs("", ""), True.ToObject(), nil},
{"endswith", wrapArgs("", "", 1), True.ToObject(), nil},
{"endswith", wrapArgs("foobar", "bar"), True.ToObject(), nil},
{"endswith", wrapArgs("foobar", "bar", 0, -2), False.ToObject(), nil},
{"endswith", wrapArgs("foobar", "foo", 0, 3), True.ToObject(), nil},
{"endswith", wrapArgs("foobar", "bar", 3, 5), False.ToObject(), nil},
{"endswith", wrapArgs("foobar", "bar", 5, 3), False.ToObject(), nil},
{"endswith", wrapArgs("bar", "foobar"), False.ToObject(), nil},
{"endswith", wrapArgs("foo", newTestTuple("barfoo", "oo").ToObject()), True.ToObject(), nil},
{"endswith", wrapArgs("foo", 123), nil, mustCreateException(TypeErrorType, "endswith first arg must be str, unicode, or tuple, not int")},
{"endswith", wrapArgs("foo", newTestTuple(123).ToObject()), nil, mustCreateException(TypeErrorType, "expected a str")},
{"join", wrapArgs(",", newTestList("foo", "bar")), NewStr("foo,bar").ToObject(), nil},
{"join", wrapArgs(":", newTestList("foo", "bar", NewUnicode("baz"))), NewUnicode("foo:bar:baz").ToObject(), nil},
{"join", wrapArgs("nope", NewTuple()), NewStr("").ToObject(), nil},
......@@ -262,8 +276,18 @@ func TestStrMethods(t *testing.T) {
{"startswith", wrapArgs("foobar", "bar", 5, 3), False.ToObject(), nil},
{"startswith", wrapArgs("foo", "foobar"), False.ToObject(), nil},
{"startswith", wrapArgs("foo", newTestTuple("foobar", "fo").ToObject()), True.ToObject(), nil},
{"startswith", wrapArgs("foo", 123), nil, mustCreateException(TypeErrorType, "startswith first arg must be str or tuple, not int")},
{"startswith", wrapArgs("foo", 123), nil, mustCreateException(TypeErrorType, "startswith first arg must be str, unicode, or tuple, not int")},
{"startswith", wrapArgs("foo", "f", "123"), nil, mustCreateException(TypeErrorType, "'startswith' requires a 'int' object but received a 'str'")},
{"startswith", wrapArgs("foo", newTestTuple(123).ToObject()), nil, mustCreateException(TypeErrorType, "expected a str")},
{"strip", wrapArgs("foo "), NewStr("foo").ToObject(), nil},
{"strip", wrapArgs(" foo bar "), NewStr("foo bar").ToObject(), nil},
{"strip", wrapArgs("foo foo", "o"), NewStr("foo f").ToObject(), nil},
{"strip", wrapArgs("foo bar", "abr"), NewStr("foo ").ToObject(), nil},
{"strip", wrapArgs("foo", NewUnicode("o")), NewUnicode("f").ToObject(), nil},
{"strip", wrapArgs("123", 3), nil, mustCreateException(TypeErrorType, "strip arg must be None, str or unicode")},
{"strip", wrapArgs("foo", "bar", "baz"), nil, mustCreateException(TypeErrorType, "'strip' of 'str' requires 2 arguments")},
{"strip", wrapArgs("\xfboo", NewUnicode("o")), nil, mustCreateException(UnicodeDecodeErrorType, "'utf8' codec can't decode byte 0xfb in position 0")},
{"strip", wrapArgs("foo", NewUnicode("o")), NewUnicode("f").ToObject(), nil},
}
for _, cas := range cases {
testCase := invokeTestCase{args: cas.args, want: cas.want, wantExc: cas.wantExc}
......
......@@ -344,10 +344,59 @@ func unicodeStr(f *Frame, o *Object) (*Object, *BaseException) {
return ret.ToObject(), nil
}
func unicodeStrip(f *Frame, args Args, _ KWArgs) (*Object, *BaseException) {
expectedTypes := []*Type{UnicodeType, ObjectType}
argc := len(args)
if argc == 1 {
expectedTypes = expectedTypes[:argc]
}
if raised := checkMethodArgs(f, "strip", args, expectedTypes...); raised != nil {
return nil, raised
}
s := toUnicodeUnsafe(args[0])
charsArg := None
if argc > 1 {
charsArg = args[1]
}
matchFunc := unicode.IsSpace
if charsArg != None {
chars, raised := unicodeCoerce(f, charsArg)
if raised != nil {
return nil, raised
}
matchFunc = func(r rune) bool {
for _, c := range chars.Value() {
if r == c {
return true
}
}
return false
}
}
runes := s.Value()
numRunes := len(runes)
lindex := 0
for ; lindex < numRunes; lindex++ {
if !matchFunc(runes[lindex]) {
break
}
}
rindex := numRunes
for ; rindex > lindex; rindex-- {
if !matchFunc(runes[rindex-1]) {
break
}
}
result := make([]rune, rindex-lindex)
copy(result, runes[lindex:rindex])
return NewUnicodeFromRunes(result).ToObject(), nil
}
func initUnicodeType(dict map[string]*Object) {
dict["__getnewargs__"] = newBuiltinFunction("__getnewargs__", unicodeGetNewArgs).ToObject()
dict["encode"] = newBuiltinFunction("encode", unicodeEncode).ToObject()
dict["join"] = newBuiltinFunction("join", unicodeJoin).ToObject()
dict["strip"] = newBuiltinFunction("strip", unicodeStrip).ToObject()
UnicodeType.slots.Add = &binaryOpSlot{unicodeAdd}
UnicodeType.slots.Contains = &binaryOpSlot{unicodeContains}
UnicodeType.slots.Eq = &binaryOpSlot{unicodeEq}
......
......@@ -180,6 +180,14 @@ func TestUnicodeMethods(t *testing.T) {
{"join", wrapArgs(NewUnicode("nope"), NewTuple()), NewUnicode("").ToObject(), nil},
{"join", wrapArgs(NewUnicode("nope"), newTestTuple(NewUnicode("foo"))), NewUnicode("foo").ToObject(), nil},
{"join", wrapArgs(NewUnicode(","), newTestList("foo", "bar", 3.14)), nil, mustCreateException(TypeErrorType, "coercing to Unicode: need string, float found")},
{"strip", wrapArgs(NewUnicode("foo ")), NewStr("foo").ToObject(), nil},
{"strip", wrapArgs(NewUnicode(" foo bar ")), NewStr("foo bar").ToObject(), nil},
{"strip", wrapArgs(NewUnicode("foo foo"), "o"), NewStr("foo f").ToObject(), nil},
{"strip", wrapArgs(NewUnicode("foo bar"), "abr"), NewStr("foo ").ToObject(), nil},
{"strip", wrapArgs(NewUnicode("foo"), NewUnicode("o")), NewUnicode("f").ToObject(), nil},
{"strip", wrapArgs(NewUnicode("123"), 3), nil, mustCreateException(TypeErrorType, "coercing to Unicode: need string, int found")},
{"strip", wrapArgs(NewUnicode("foo"), "bar", "baz"), nil, mustCreateException(TypeErrorType, "'strip' of 'unicode' requires 2 arguments")},
{"strip", wrapArgs(NewUnicode("foo"), NewUnicode("o")), NewUnicode("f").ToObject(), nil},
}
for _, cas := range cases {
testCase := invokeTestCase{args: cas.args, want: cas.want, wantExc: cas.wantExc}
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment