Commit f2bc2755 authored by Pieter Droogendijk's avatar Pieter Droogendijk Committed by Rob Pike

encoding/csv: always allow trailing commas

Original CL by rsc (11916045):

The motivation for disallowing them was RFC 4180 saying
"The last field in the record must not be followed by a comma."
I believe this is an admonition to CSV generators, not readers.
When reading, anything followed by a comma is not the last field.

Fixes #5892.

R=golang-dev, rsc, r
CC=golang-dev
https://golang.org/cl/12294043
parent 357f7336
...@@ -72,7 +72,7 @@ func (e *ParseError) Error() string { ...@@ -72,7 +72,7 @@ func (e *ParseError) Error() string {
// These are the errors that can be returned in ParseError.Error // These are the errors that can be returned in ParseError.Error
var ( var (
ErrTrailingComma = errors.New("extra delimiter at end of line") ErrTrailingComma = errors.New("extra delimiter at end of line") // no longer used
ErrBareQuote = errors.New("bare \" in non-quoted-field") ErrBareQuote = errors.New("bare \" in non-quoted-field")
ErrQuote = errors.New("extraneous \" in field") ErrQuote = errors.New("extraneous \" in field")
ErrFieldCount = errors.New("wrong number of fields in line") ErrFieldCount = errors.New("wrong number of fields in line")
...@@ -98,16 +98,14 @@ var ( ...@@ -98,16 +98,14 @@ var (
// If LazyQuotes is true, a quote may appear in an unquoted field and a // If LazyQuotes is true, a quote may appear in an unquoted field and a
// non-doubled quote may appear in a quoted field. // non-doubled quote may appear in a quoted field.
// //
// If TrailingComma is true, the last field may be an unquoted empty field.
//
// If TrimLeadingSpace is true, leading white space in a field is ignored. // If TrimLeadingSpace is true, leading white space in a field is ignored.
type Reader struct { type Reader struct {
Comma rune // Field delimiter (set to ',' by NewReader) Comma rune // field delimiter (set to ',' by NewReader)
Comment rune // Comment character for start of line Comment rune // comment character for start of line
FieldsPerRecord int // Number of expected fields per record FieldsPerRecord int // number of expected fields per record
LazyQuotes bool // Allow lazy quotes LazyQuotes bool // allow lazy quotes
TrailingComma bool // Allow trailing comma TrailingComma bool // ignored; here for backwards compatibility
TrimLeadingSpace bool // Trim leading space TrimLeadingSpace bool // trim leading space
line int line int
column int column int
r *bufio.Reader r *bufio.Reader
...@@ -257,24 +255,16 @@ func (r *Reader) parseField() (haveField bool, delim rune, err error) { ...@@ -257,24 +255,16 @@ func (r *Reader) parseField() (haveField bool, delim rune, err error) {
r.field.Reset() r.field.Reset()
r1, err := r.readRune() r1, err := r.readRune()
if err != nil { for err == nil && r.TrimLeadingSpace && r1 != '\n' && unicode.IsSpace(r1) {
// If we have EOF and are not at the start of a line r1, err = r.readRune()
// then we return the empty field. We have already }
// checked for trailing commas if needed.
if err == io.EOF && r.column != 0 { if err == io.EOF && r.column != 0 {
return true, 0, err return true, 0, err
} }
return false, 0, err
}
if r.TrimLeadingSpace {
for r1 != '\n' && unicode.IsSpace(r1) {
r1, err = r.readRune()
if err != nil { if err != nil {
return false, 0, err return false, 0, err
} }
}
}
switch r1 { switch r1 {
case r.Comma: case r.Comma:
...@@ -349,25 +339,5 @@ func (r *Reader) parseField() (haveField bool, delim rune, err error) { ...@@ -349,25 +339,5 @@ func (r *Reader) parseField() (haveField bool, delim rune, err error) {
return false, 0, err return false, 0, err
} }
if !r.TrailingComma {
// We don't allow trailing commas. See if we
// are at the end of the line (being mindful
// of trimming spaces).
c := r.column
r1, err = r.readRune()
if r.TrimLeadingSpace {
for r1 != '\n' && unicode.IsSpace(r1) {
r1, err = r.readRune()
if err != nil {
break
}
}
}
if err == io.EOF || r1 == '\n' {
r.column = c // report the comma
return false, 0, r.error(ErrTrailingComma)
}
r.unreadRune()
}
return true, r1, nil return true, r1, nil
} }
...@@ -171,32 +171,32 @@ field"`, ...@@ -171,32 +171,32 @@ field"`,
Output: [][]string{{"a", "b", "c"}, {"d", "e"}}, Output: [][]string{{"a", "b", "c"}, {"d", "e"}},
}, },
{ {
Name: "BadTrailingCommaEOF", Name: "TrailingCommaEOF",
Input: "a,b,c,", Input: "a,b,c,",
Error: "extra delimiter at end of line", Line: 1, Column: 5, Output: [][]string{{"a", "b", "c", ""}},
}, },
{ {
Name: "BadTrailingCommaEOL", Name: "TrailingCommaEOL",
Input: "a,b,c,\n", Input: "a,b,c,\n",
Error: "extra delimiter at end of line", Line: 1, Column: 5, Output: [][]string{{"a", "b", "c", ""}},
}, },
{ {
Name: "BadTrailingCommaSpaceEOF", Name: "TrailingCommaSpaceEOF",
TrimLeadingSpace: true, TrimLeadingSpace: true,
Input: "a,b,c, ", Input: "a,b,c, ",
Error: "extra delimiter at end of line", Line: 1, Column: 5, Output: [][]string{{"a", "b", "c", ""}},
}, },
{ {
Name: "BadTrailingCommaSpaceEOL", Name: "TrailingCommaSpaceEOL",
TrimLeadingSpace: true, TrimLeadingSpace: true,
Input: "a,b,c, \n", Input: "a,b,c, \n",
Error: "extra delimiter at end of line", Line: 1, Column: 5, Output: [][]string{{"a", "b", "c", ""}},
}, },
{ {
Name: "BadTrailingCommaLine3", Name: "TrailingCommaLine3",
TrimLeadingSpace: true, TrimLeadingSpace: true,
Input: "a,b,c\nd,e,f\ng,hi,", Input: "a,b,c\nd,e,f\ng,hi,",
Error: "extra delimiter at end of line", Line: 3, Column: 4, Output: [][]string{{"a", "b", "c"}, {"d", "e", "f"}, {"g", "hi", ""}},
}, },
{ {
Name: "NotTrailingComma3", Name: "NotTrailingComma3",
...@@ -231,7 +231,7 @@ x,,, ...@@ -231,7 +231,7 @@ x,,,
}, },
}, },
{ {
Name: "Issue 2366", Name: "TrailingCommaIneffective1",
TrailingComma: true, TrailingComma: true,
TrimLeadingSpace: true, TrimLeadingSpace: true,
Input: "a,b,\nc,d,e", Input: "a,b,\nc,d,e",
...@@ -241,11 +241,14 @@ x,,, ...@@ -241,11 +241,14 @@ x,,,
}, },
}, },
{ {
Name: "Issue 2366a", Name: "TrailingCommaIneffective2",
TrailingComma: false, TrailingComma: false,
TrimLeadingSpace: true, TrimLeadingSpace: true,
Input: "a,b,\nc,d,e", Input: "a,b,\nc,d,e",
Error: "extra delimiter at end of line", Output: [][]string{
{"a", "b", ""},
{"c", "d", "e"},
},
}, },
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment