Commit 0add9a4d authored by Joe Tsai's avatar Joe Tsai Committed by Joe Tsai

encoding/csv: avoid mangling invalid UTF-8 in Writer

In the situation where a quoted field is necessary, avoid processing
each UTF-8 rune one-by-one, which causes mangling of invalid sequences
into utf8.RuneError, causing a loss of information.
Instead, search only for the escaped characters, handle those specially
and copy everything else in between verbatim.

This symmetrically matches the behavior of Reader.

Fixes #24298

Change-Id: I9276f64891084ce8487678f663fad711b4095dbb
Reviewed-on: https://go-review.googlesource.com/99297
Run-TryBot: Joe Tsai <thebrokentoaster@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: default avatarIan Lance Taylor <iant@golang.org>
parent 88466e93
...@@ -57,13 +57,27 @@ func (w *Writer) Write(record []string) error { ...@@ -57,13 +57,27 @@ func (w *Writer) Write(record []string) error {
} }
continue continue
} }
if err := w.w.WriteByte('"'); err != nil { if err := w.w.WriteByte('"'); err != nil {
return err return err
} }
for len(field) > 0 {
// Search for special characters.
i := strings.IndexAny(field, "\"\r\n")
if i < 0 {
i = len(field)
}
for _, r1 := range field { // Copy verbatim everything before the special character.
if _, err := w.w.WriteString(field[:i]); err != nil {
return err
}
field = field[i:]
// Encode the special character.
if len(field) > 0 {
var err error var err error
switch r1 { switch field[0] {
case '"': case '"':
_, err = w.w.WriteString(`""`) _, err = w.w.WriteString(`""`)
case '\r': case '\r':
...@@ -76,14 +90,13 @@ func (w *Writer) Write(record []string) error { ...@@ -76,14 +90,13 @@ func (w *Writer) Write(record []string) error {
} else { } else {
err = w.w.WriteByte('\n') err = w.w.WriteByte('\n')
} }
default:
_, err = w.w.WriteRune(r1)
} }
field = field[1:]
if err != nil { if err != nil {
return err return err
} }
} }
}
if err := w.w.WriteByte('"'); err != nil { if err := w.w.WriteByte('"'); err != nil {
return err return err
} }
......
...@@ -39,6 +39,8 @@ var writeTests = []struct { ...@@ -39,6 +39,8 @@ var writeTests = []struct {
{Input: [][]string{{"a", "a", ""}}, Output: "a,a,\n"}, {Input: [][]string{{"a", "a", ""}}, Output: "a,a,\n"},
{Input: [][]string{{"a", "a", "a"}}, Output: "a,a,a\n"}, {Input: [][]string{{"a", "a", "a"}}, Output: "a,a,a\n"},
{Input: [][]string{{`\.`}}, Output: "\"\\.\"\n"}, {Input: [][]string{{`\.`}}, Output: "\"\\.\"\n"},
{Input: [][]string{{"x09\x41\xb4\x1c", "aktau"}}, Output: "x09\x41\xb4\x1c,aktau\n"},
{Input: [][]string{{",x09\x41\xb4\x1c", "aktau"}}, Output: "\",x09\x41\xb4\x1c\",aktau\n"},
} }
func TestWrite(t *testing.T) { func TestWrite(t *testing.T) {
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment