Commit b4fd4d37 authored by Skip Montanaro's avatar Skip Montanaro

Patch from Thomas Barr so that csv.Sniffer will set doublequote property.

Closes issue 6606.
parent 17565e5b
...@@ -170,7 +170,7 @@ class Sniffer: ...@@ -170,7 +170,7 @@ class Sniffer:
Returns a dialect (or None) corresponding to the sample Returns a dialect (or None) corresponding to the sample
""" """
quotechar, delimiter, skipinitialspace = \ quotechar, doublequote, delimiter, skipinitialspace = \
self._guess_quote_and_delimiter(sample, delimiters) self._guess_quote_and_delimiter(sample, delimiters)
if not delimiter: if not delimiter:
delimiter, skipinitialspace = self._guess_delimiter(sample, delimiter, skipinitialspace = self._guess_delimiter(sample,
...@@ -184,8 +184,8 @@ class Sniffer: ...@@ -184,8 +184,8 @@ class Sniffer:
lineterminator = '\r\n' lineterminator = '\r\n'
quoting = QUOTE_MINIMAL quoting = QUOTE_MINIMAL
# escapechar = '' # escapechar = ''
doublequote = False
dialect.doublequote = doublequote
dialect.delimiter = delimiter dialect.delimiter = delimiter
# _csv.reader won't accept a quotechar of '' # _csv.reader won't accept a quotechar of ''
dialect.quotechar = quotechar or '"' dialect.quotechar = quotechar or '"'
...@@ -217,8 +217,8 @@ class Sniffer: ...@@ -217,8 +217,8 @@ class Sniffer:
break break
if not matches: if not matches:
return ('', None, 0) # (quotechar, delimiter, skipinitialspace) # (quotechar, doublequote, delimiter, skipinitialspace)
return ('', False, None, 0)
quotes = {} quotes = {}
delims = {} delims = {}
spaces = 0 spaces = 0
...@@ -255,7 +255,19 @@ class Sniffer: ...@@ -255,7 +255,19 @@ class Sniffer:
delim = '' delim = ''
skipinitialspace = 0 skipinitialspace = 0
return (quotechar, delim, skipinitialspace) # if we see an extra quote between delimiters, we've got a
# double quoted format
dq_regexp = re.compile(r"((%(delim)s)|^)\W*%(quote)s[^%(delim)s\n]*%(quote)s[^%(delim)s\n]*%(quote)s\W*((%(delim)s)|$)" % \
{'delim':delim, 'quote':quotechar}, re.MULTILINE)
if dq_regexp.search(data):
doublequote = True
else:
doublequote = False
return (quotechar, doublequote, delim, skipinitialspace)
def _guess_delimiter(self, data, delimiters): def _guess_delimiter(self, data, delimiters):
......
...@@ -891,7 +891,7 @@ Stonecutters Seafood and Chop House, Lemont, IL, 12/19/02, Week Back ...@@ -891,7 +891,7 @@ Stonecutters Seafood and Chop House, Lemont, IL, 12/19/02, Week Back
'Harry''s':'Arlington Heights':'IL':'2/1/03':'Kimi Hayes' 'Harry''s':'Arlington Heights':'IL':'2/1/03':'Kimi Hayes'
'Shark City':'Glendale Heights':'IL':'12/28/02':'Prezence' 'Shark City':'Glendale Heights':'IL':'12/28/02':'Prezence'
'Tommy''s Place':'Blue Island':'IL':'12/28/02':'Blue Sunday/White Crow' 'Tommy''s Place':'Blue Island':'IL':'12/28/02':'Blue Sunday/White Crow'
'Stonecutters Seafood and Chop House':'Lemont':'IL':'12/19/02':'Week Back' 'Stonecutters ''Seafood'' and Chop House':'Lemont':'IL':'12/19/02':'Week Back'
""" """
header = '''\ header = '''\
"venue","city","state","date","performers" "venue","city","state","date","performers"
...@@ -950,6 +950,13 @@ Stonecutters Seafood and Chop House, Lemont, IL, 12/19/02, Week Back ...@@ -950,6 +950,13 @@ Stonecutters Seafood and Chop House, Lemont, IL, 12/19/02, Week Back
self.assertEqual(dialect.delimiter, "|") self.assertEqual(dialect.delimiter, "|")
self.assertEqual(dialect.quotechar, "'") self.assertEqual(dialect.quotechar, "'")
def test_doublequote(self):
sniffer = csv.Sniffer()
dialect = sniffer.sniff(self.header)
self.assertFalse(dialect.doublequote)
dialect = sniffer.sniff(self.sample2)
self.assertTrue(dialect.doublequote)
if not hasattr(sys, "gettotalrefcount"): if not hasattr(sys, "gettotalrefcount"):
if test_support.verbose: print "*** skipping leakage tests ***" if test_support.verbose: print "*** skipping leakage tests ***"
else: else:
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment