Commit f9046fbe authored by Kirill Smelkov's avatar Kirill Smelkov

.

parent ae9e330c
...@@ -3,7 +3,6 @@ ...@@ -3,7 +3,6 @@
package main package main
import ( import (
"fmt"
"strconv" "strconv"
"strings" "strings"
"unicode/utf8" "unicode/utf8"
...@@ -17,9 +16,12 @@ func pyQuote(s string) string { ...@@ -17,9 +16,12 @@ func pyQuote(s string) string {
return mem.String(out) return mem.String(out)
} }
const hex = "0123456789abcdef"
func pyQuoteBytes(b []byte) []byte { func pyQuoteBytes(b []byte) []byte {
s := mem.String(b) s := mem.String(b)
buf := make([]byte, 0, len(s)) buf := make([]byte, 0, len(s) + 2/*quotes*/)
// smartquotes: choose ' or " as quoting character // smartquotes: choose ' or " as quoting character
// https://github.com/python/cpython/blob/v2.7.13-116-g1aa1803b3d/Objects/stringobject.c#L947 // https://github.com/python/cpython/blob/v2.7.13-116-g1aa1803b3d/Objects/stringobject.c#L947
...@@ -34,7 +36,7 @@ func pyQuoteBytes(b []byte) []byte { ...@@ -34,7 +36,7 @@ func pyQuoteBytes(b []byte) []byte {
for i, r := range s { for i, r := range s {
switch r { switch r {
case utf8.RuneError: case utf8.RuneError:
buf = append(buf, []byte(fmt.Sprintf("\\x%02x", s[i]))...) buf = append(buf, '\\', 'x', hex[s[i]>>4], hex[s[i]&0xf])
case '\\', rune(quote): case '\\', rune(quote):
buf = append(buf, '\\', byte(r)) buf = append(buf, '\\', byte(r))
case rune(noquote): case rune(noquote):
...@@ -53,7 +55,7 @@ func pyQuoteBytes(b []byte) []byte { ...@@ -53,7 +55,7 @@ func pyQuoteBytes(b []byte) []byte {
switch { switch {
case r < ' ': case r < ' ':
// we already converted to \<letter> what python represents as such above // we already converted to \<letter> what python represents as such above
buf = append(buf, []byte(fmt.Sprintf("\\x%02x", s[i]))...) buf = append(buf, '\\', 'x', hex[s[i]>>4], hex[s[i]&0xf])
default: default:
// we already handled ', " and (< ' ') above, so now it // we already handled ', " and (< ' ') above, so now it
......
...@@ -15,38 +15,45 @@ func byterange(start, stop byte) []byte { ...@@ -15,38 +15,45 @@ func byterange(start, stop byte) []byte {
return b return b
} }
func TestPyQuote(t *testing.T) { var pyQuoteTestv = []struct {in, quoted string} {
// XXX -> global // empty
testv := []struct {in, quoted string} { {``, `''`},
// empty
{``, `''`}, // special characters
{string(byterange(0, 32)), `'\x00\x01\x02\x03\x04\x05\x06\x07\x08\t\n\x0b\x0c\r\x0e\x0f\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f'`},
// special characters
{string(byterange(0, 32)), `'\x00\x01\x02\x03\x04\x05\x06\x07\x08\t\n\x0b\x0c\r\x0e\x0f\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f'`}, // " vs '
{`hello world`, `'hello world'`},
// " vs ' {`hello ' world`, `"hello ' world"`},
{`hello world`, `'hello world'`}, {`hello ' " world`, `'hello \' " world'`},
{`hello ' world`, `"hello ' world"`},
{`hello ' " world`, `'hello \' " world'`}, // \
{`hello \ world`, `'hello \\ world'`},
// \
{`hello \ world`, `'hello \\ world'`}, // utf-8
// XXX python escapes non-ascii, but since FileStorage connot
// utf-8 // commit such strings we take the freedom and output them as
// XXX python escapes non-ascii, but since FileStorage connot // readable
// commit such strings we take the freedom and output them as //{`привет мир`, `'\xd0\xbf\xd1\x80\xd0\xb8\xd0\xb2\xd0\xb5\xd1\x82 \xd0\xbc\xd0\xb8\xd1\x80'`},
// readable {`привет мир`, `'привет мир'`},
//{`привет мир`, `'\xd0\xbf\xd1\x80\xd0\xb8\xd0\xb2\xd0\xb5\xd1\x82 \xd0\xbc\xd0\xb8\xd1\x80'`},
{`привет мир`, `'привет мир'`}, // invalid utf-8
{"\xd0a", `'\xd0a'`},
// invalid utf-8 }
{"\xd0a", `'\xd0a'`},
}
for _, tt := range testv { func TestPyQuote(t *testing.T) {
for _, tt := range pyQuoteTestv {
quoted := pyQuote(tt.in) quoted := pyQuote(tt.in)
if quoted != tt.quoted { if quoted != tt.quoted {
t.Errorf("pyQuote(%q) -> %s ; want %s", tt.in, quoted, tt.quoted) t.Errorf("pyQuote(%q) -> %s ; want %s", tt.in, quoted, tt.quoted)
} }
} }
} }
func BenchmarkPyQuote(b *testing.B) {
for i := 0; i < b.N; i++ {
for _, tt := range pyQuoteTestv {
pyQuote(tt.in)
}
}
}
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment