diff --git a/src/strconv/isprint.go b/src/strconv/isprint.go index 0cf363c699ed3ea88b6284f8f363e6a8a06c3d15..20a02dec33f2498eb7d38da4588f49765e5bd1b1 100644 --- a/src/strconv/isprint.go +++ b/src/strconv/isprint.go @@ -635,3 +635,23 @@ var isNotPrint32 = []uint16{ // add 0x10000 to each entry 0xf57a, 0xf5a4, } + +// isGraphic lists the graphic runes not matched by IsPrint. +var isGraphic = []uint16{ + 0x00a0, + 0x1680, + 0x2000, + 0x2001, + 0x2002, + 0x2003, + 0x2004, + 0x2005, + 0x2006, + 0x2007, + 0x2008, + 0x2009, + 0x200a, + 0x202f, + 0x205f, + 0x3000, +} diff --git a/src/strconv/makeisprint.go b/src/strconv/makeisprint.go index 588d0a00b53b242010cc50afd051065e1ceb5a91..514258060e59693d86cf6c86f314826a1331fb39 100644 --- a/src/strconv/makeisprint.go +++ b/src/strconv/makeisprint.go @@ -174,6 +174,23 @@ func main() { } fmt.Fprintf(&buf, "\t%#04x,\n", r-0x10000) } + fmt.Fprintf(&buf, "}\n\n") + + // The list of graphic but not "printable" runes is short. Just make one easy table. + fmt.Fprintf(&buf, "// isGraphic lists the graphic runes not matched by IsPrint.\n") + fmt.Fprintf(&buf, "var isGraphic = []uint16{\n") + for r := rune(0); r <= unicode.MaxRune; r++ { + if unicode.IsPrint(r) != unicode.IsGraphic(r) { + // Sanity check. + if !unicode.IsGraphic(r) { + log.Fatalf("%U is printable but not graphic\n", r) + } + if r > 0xFFFF { // We expect only 16-bit values. + log.Fatalf("%U too big for isGraphic\n", r) + } + fmt.Fprintf(&buf, "\t%#04x,\n", r) + } + } fmt.Fprintf(&buf, "}\n") data, err := format.Source(buf.Bytes()) diff --git a/src/strconv/quote.go b/src/strconv/quote.go index 53d51b5a46a324a90bbdbf168efba9e3fab75129..40d0667551a63c1c2f28f78f022d75d925cc396b 100644 --- a/src/strconv/quote.go +++ b/src/strconv/quote.go @@ -12,7 +12,7 @@ import ( const lowerhex = "0123456789abcdef" -func quoteWith(s string, quote byte, ASCIIonly bool) string { +func quoteWith(s string, quote byte, ASCIIonly, graphicOnly bool) string { var runeTmp [utf8.UTFMax]byte buf := make([]byte, 0, 3*len(s)/2) // Try to avoid more allocations. buf = append(buf, quote) @@ -38,7 +38,7 @@ func quoteWith(s string, quote byte, ASCIIonly bool) string { buf = append(buf, byte(r)) continue } - } else if IsPrint(r) { + } else if IsPrint(r) || graphicOnly && isInGraphicList(r) { n := utf8.EncodeRune(runeTmp[:], r) buf = append(buf, runeTmp[:n]...) continue @@ -90,7 +90,7 @@ func quoteWith(s string, quote byte, ASCIIonly bool) string { // control characters and non-printable characters as defined by // IsPrint. func Quote(s string) string { - return quoteWith(s, '"', false) + return quoteWith(s, '"', false, false) } // AppendQuote appends a double-quoted Go string literal representing s, @@ -103,7 +103,7 @@ func AppendQuote(dst []byte, s string) []byte { // The returned string uses Go escape sequences (\t, \n, \xFF, \u0100) for // non-ASCII characters and non-printable characters as defined by IsPrint. func QuoteToASCII(s string) string { - return quoteWith(s, '"', true) + return quoteWith(s, '"', true, false) } // AppendQuoteToASCII appends a double-quoted Go string literal representing s, @@ -112,12 +112,25 @@ func AppendQuoteToASCII(dst []byte, s string) []byte { return append(dst, QuoteToASCII(s)...) } +// QuoteToGraphic returns a double-quoted Go string literal representing s. +// The returned string uses Go escape sequences (\t, \n, \xFF, \u0100) for +// non-ASCII characters and non-printable characters as defined by IsGraphic. +func QuoteToGraphic(s string) string { + return quoteWith(s, '"', false, true) +} + +// AppendQuoteToGraphic appends a double-quoted Go string literal representing s, +// as generated by QuoteToGraphic, to dst and returns the extended buffer. +func AppendQuoteToGraphic(dst []byte, s string) []byte { + return append(dst, QuoteToGraphic(s)...) +} + // QuoteRune returns a single-quoted Go character literal representing the -// rune. The returned string uses Go escape sequences (\t, \n, \xFF, \u0100) +// rune. The returned string uses Go escape sequences (\t, \n, \xFF, \u0100) // for control characters and non-printable characters as defined by IsPrint. func QuoteRune(r rune) string { // TODO: avoid the allocation here. - return quoteWith(string(r), '\'', false) + return quoteWith(string(r), '\'', false, false) } // AppendQuoteRune appends a single-quoted Go character literal representing the rune, @@ -127,12 +140,12 @@ func AppendQuoteRune(dst []byte, r rune) []byte { } // QuoteRuneToASCII returns a single-quoted Go character literal representing -// the rune. The returned string uses Go escape sequences (\t, \n, \xFF, +// the rune. The returned string uses Go escape sequences (\t, \n, \xFF, // \u0100) for non-ASCII characters and non-printable characters as defined // by IsPrint. func QuoteRuneToASCII(r rune) string { // TODO: avoid the allocation here. - return quoteWith(string(r), '\'', true) + return quoteWith(string(r), '\'', true, false) } // AppendQuoteRuneToASCII appends a single-quoted Go character literal representing the rune, @@ -141,6 +154,21 @@ func AppendQuoteRuneToASCII(dst []byte, r rune) []byte { return append(dst, QuoteRuneToASCII(r)...) } +// QuoteRuneToGraphic returns a single-quoted Go character literal representing +// the rune. The returned string uses Go escape sequences (\t, \n, \xFF, +// \u0100) for non-ASCII characters and non-printable characters as defined +// by IsGraphic. +func QuoteRuneToGraphic(r rune) string { + // TODO: avoid the allocation here. + return quoteWith(string(r), '\'', false, true) +} + +// AppendQuoteRuneToGraphic appends a single-quoted Go character literal representing the rune, +// as generated by QuoteRuneToGraphic, to dst and returns the extended buffer. +func AppendQuoteRuneToGraphic(dst []byte, r rune) []byte { + return append(dst, QuoteRuneToGraphic(r)...) +} + // CanBackquote reports whether the string s can be represented // unchanged as a single-line backquoted string without control // characters other than tab. @@ -453,3 +481,26 @@ func IsPrint(r rune) bool { j := bsearch16(isNotPrint, uint16(r)) return j >= len(isNotPrint) || isNotPrint[j] != uint16(r) } + +// IsGraphic reports whether the rune is defined as a Graphic by Unicode. Such +// characters include letters, marks, numbers, punctuation, symbols, and +// spaces, from categories L, M, N, P, S, and Zs. +func IsGraphic(r rune) bool { + if IsPrint(r) { + return true + } + return isInGraphicList(r) +} + +// isInGraphicList reports whether the rune is in the isGraphic list. This separation +// from IsGraphic allows quoteWith to avoid two calls to IsPrint. +// Should be called only if IsPrint fails. +func isInGraphicList(r rune) bool { + // We know r must fit in 16 bits - see makeisprint.go. + if r > 0xFFFF { + return false + } + rr := uint16(r) + i := bsearch16(isGraphic, rr) + return i < len(isGraphic) && rr == isGraphic[i] +} diff --git a/src/strconv/quote_test.go b/src/strconv/quote_test.go index 3bf162f987e7878e57602d3a34923a0095e6c82c..3e8ec2c98f367828f0572735cdfae09a9e6abdcc 100644 --- a/src/strconv/quote_test.go +++ b/src/strconv/quote_test.go @@ -10,7 +10,7 @@ import ( "unicode" ) -// Verify that our isPrint agrees with unicode.IsPrint +// Verify that our IsPrint agrees with unicode.IsPrint. func TestIsPrint(t *testing.T) { n := 0 for r := rune(0); r <= unicode.MaxRune; r++ { @@ -24,19 +24,36 @@ func TestIsPrint(t *testing.T) { } } +// Verify that our IsGraphic agrees with unicode.IsGraphic. +func TestIsGraphic(t *testing.T) { + n := 0 + for r := rune(0); r <= unicode.MaxRune; r++ { + if IsGraphic(r) != unicode.IsGraphic(r) { + t.Errorf("IsGraphic(%U)=%t incorrect", r, IsGraphic(r)) + n++ + if n > 10 { + return + } + } + } +} + type quoteTest struct { - in string - out string - ascii string + in string + out string + ascii string + graphic string } var quotetests = []quoteTest{ - {"\a\b\f\r\n\t\v", `"\a\b\f\r\n\t\v"`, `"\a\b\f\r\n\t\v"`}, - {"\\", `"\\"`, `"\\"`}, - {"abc\xffdef", `"abc\xffdef"`, `"abc\xffdef"`}, - {"\u263a", `"☺"`, `"\u263a"`}, - {"\U0010ffff", `"\U0010ffff"`, `"\U0010ffff"`}, - {"\x04", `"\x04"`, `"\x04"`}, + {"\a\b\f\r\n\t\v", `"\a\b\f\r\n\t\v"`, `"\a\b\f\r\n\t\v"`, `"\a\b\f\r\n\t\v"`}, + {"\\", `"\\"`, `"\\"`, `"\\"`}, + {"abc\xffdef", `"abc\xffdef"`, `"abc\xffdef"`, `"abc\xffdef"`}, + {"\u263a", `"☺"`, `"\u263a"`, `"☺"`}, + {"\U0010ffff", `"\U0010ffff"`, `"\U0010ffff"`, `"\U0010ffff"`}, + {"\x04", `"\x04"`, `"\x04"`, `"\x04"`}, + // Some non-printable but graphic runes. Final column is double-quoted. + {"!\u00a0!\u2000!\u3000!", `"!\u00a0!\u2000!\u3000!"`, `"!\u00a0!\u2000!\u3000!"`, "\"!\u00a0!\u2000!\u3000!\""}, } func TestQuote(t *testing.T) { @@ -61,22 +78,38 @@ func TestQuoteToASCII(t *testing.T) { } } +func TestQuoteToGraphic(t *testing.T) { + for _, tt := range quotetests { + if out := QuoteToGraphic(tt.in); out != tt.graphic { + t.Errorf("QuoteToGraphic(%s) = %s, want %s", tt.in, out, tt.graphic) + } + if out := AppendQuoteToGraphic([]byte("abc"), tt.in); string(out) != "abc"+tt.graphic { + t.Errorf("AppendQuoteToGraphic(%q, %s) = %s, want %s", "abc", tt.in, out, "abc"+tt.graphic) + } + } +} + type quoteRuneTest struct { - in rune - out string - ascii string + in rune + out string + ascii string + graphic string } var quoterunetests = []quoteRuneTest{ - {'a', `'a'`, `'a'`}, - {'\a', `'\a'`, `'\a'`}, - {'\\', `'\\'`, `'\\'`}, - {0xFF, `'ÿ'`, `'\u00ff'`}, - {0x263a, `'☺'`, `'\u263a'`}, - {0xfffd, `'�'`, `'\ufffd'`}, - {0x0010ffff, `'\U0010ffff'`, `'\U0010ffff'`}, - {0x0010ffff + 1, `'�'`, `'\ufffd'`}, - {0x04, `'\x04'`, `'\x04'`}, + {'a', `'a'`, `'a'`, `'a'`}, + {'\a', `'\a'`, `'\a'`, `'\a'`}, + {'\\', `'\\'`, `'\\'`, `'\\'`}, + {0xFF, `'ÿ'`, `'\u00ff'`, `'ÿ'`}, + {0x263a, `'☺'`, `'\u263a'`, `'☺'`}, + {0xfffd, `'�'`, `'\ufffd'`, `'�'`}, + {0x0010ffff, `'\U0010ffff'`, `'\U0010ffff'`, `'\U0010ffff'`}, + {0x0010ffff + 1, `'�'`, `'\ufffd'`, `'�'`}, + {0x04, `'\x04'`, `'\x04'`, `'\x04'`}, + // Some differences between graphic and printable. Note the last column is double-quoted. + {'\u00a0', `'\u00a0'`, `'\u00a0'`, "'\u00a0'"}, + {'\u2000', `'\u2000'`, `'\u2000'`, "'\u2000'"}, + {'\u3000', `'\u3000'`, `'\u3000'`, "'\u3000'"}, } func TestQuoteRune(t *testing.T) { @@ -101,6 +134,17 @@ func TestQuoteRuneToASCII(t *testing.T) { } } +func TestQuoteRuneToGraphic(t *testing.T) { + for _, tt := range quoterunetests { + if out := QuoteRuneToGraphic(tt.in); out != tt.graphic { + t.Errorf("QuoteRuneToGraphic(%U) = %s, want %s", tt.in, out, tt.graphic) + } + if out := AppendQuoteRuneToGraphic([]byte("abc"), tt.in); string(out) != "abc"+tt.graphic { + t.Errorf("AppendQuoteRuneToGraphic(%q, %U) = %s, want %s", "abc", tt.in, out, "abc"+tt.graphic) + } + } +} + type canBackquoteTest struct { in string out bool