Commit 24dc7536 authored by R David Murray's avatar R David Murray

#18155: Regex-escape delimiter, in case it is a regex special char.

Patch by Vajrasky Kok, with slight modification to the tests by me.
parent 1d14246b
...@@ -261,8 +261,9 @@ class Sniffer: ...@@ -261,8 +261,9 @@ class Sniffer:
# if we see an extra quote between delimiters, we've got a # if we see an extra quote between delimiters, we've got a
# double quoted format # double quoted format
dq_regexp = re.compile(r"((%(delim)s)|^)\W*%(quote)s[^%(delim)s\n]*%(quote)s[^%(delim)s\n]*%(quote)s\W*((%(delim)s)|$)" % \ dq_regexp = re.compile(
{'delim':delim, 'quote':quotechar}, re.MULTILINE) r"((%(delim)s)|^)\W*%(quote)s[^%(delim)s\n]*%(quote)s[^%(delim)s\n]*%(quote)s\W*((%(delim)s)|$)" % \
{'delim':re.escape(delim), 'quote':quotechar}, re.MULTILINE)
......
...@@ -914,7 +914,7 @@ Stonecutters Seafood and Chop House, Lemont, IL, 12/19/02, Week Back ...@@ -914,7 +914,7 @@ Stonecutters Seafood and Chop House, Lemont, IL, 12/19/02, Week Back
'Tommy''s Place':'Blue Island':'IL':'12/28/02':'Blue Sunday/White Crow' 'Tommy''s Place':'Blue Island':'IL':'12/28/02':'Blue Sunday/White Crow'
'Stonecutters ''Seafood'' and Chop House':'Lemont':'IL':'12/19/02':'Week Back' 'Stonecutters ''Seafood'' and Chop House':'Lemont':'IL':'12/19/02':'Week Back'
""" """
header = '''\ header1 = '''\
"venue","city","state","date","performers" "venue","city","state","date","performers"
''' '''
sample3 = '''\ sample3 = '''\
...@@ -933,10 +933,35 @@ Stonecutters Seafood and Chop House, Lemont, IL, 12/19/02, Week Back ...@@ -933,10 +933,35 @@ Stonecutters Seafood and Chop House, Lemont, IL, 12/19/02, Week Back
sample6 = "a|b|c\r\nd|e|f\r\n" sample6 = "a|b|c\r\nd|e|f\r\n"
sample7 = "'a'|'b'|'c'\r\n'd'|e|f\r\n" sample7 = "'a'|'b'|'c'\r\n'd'|e|f\r\n"
# Issue 18155: Use a delimiter that is a special char to regex:
header2 = '''\
"venue"+"city"+"state"+"date"+"performers"
'''
sample8 = """\
Harry's+ Arlington Heights+ IL+ 2/1/03+ Kimi Hayes
Shark City+ Glendale Heights+ IL+ 12/28/02+ Prezence
Tommy's Place+ Blue Island+ IL+ 12/28/02+ Blue Sunday/White Crow
Stonecutters Seafood and Chop House+ Lemont+ IL+ 12/19/02+ Week Back
"""
sample9 = """\
'Harry''s'+ Arlington Heights'+ 'IL'+ '2/1/03'+ 'Kimi Hayes'
'Shark City'+ Glendale Heights'+' IL'+ '12/28/02'+ 'Prezence'
'Tommy''s Place'+ Blue Island'+ 'IL'+ '12/28/02'+ 'Blue Sunday/White Crow'
'Stonecutters ''Seafood'' and Chop House'+ 'Lemont'+ 'IL'+ '12/19/02'+ 'Week Back'
"""
def test_has_header(self): def test_has_header(self):
sniffer = csv.Sniffer() sniffer = csv.Sniffer()
self.assertEqual(sniffer.has_header(self.sample1), False) self.assertEqual(sniffer.has_header(self.sample1), False)
self.assertEqual(sniffer.has_header(self.header+self.sample1), True) self.assertEqual(sniffer.has_header(self.header1 + self.sample1),
True)
def test_has_header_regex_special_delimiter(self):
sniffer = csv.Sniffer()
self.assertEqual(sniffer.has_header(self.sample8), False)
self.assertEqual(sniffer.has_header(self.header2 + self.sample8),
True)
def test_sniff(self): def test_sniff(self):
sniffer = csv.Sniffer() sniffer = csv.Sniffer()
...@@ -970,13 +995,24 @@ Stonecutters Seafood and Chop House, Lemont, IL, 12/19/02, Week Back ...@@ -970,13 +995,24 @@ Stonecutters Seafood and Chop House, Lemont, IL, 12/19/02, Week Back
dialect = sniffer.sniff(self.sample7) dialect = sniffer.sniff(self.sample7)
self.assertEqual(dialect.delimiter, "|") self.assertEqual(dialect.delimiter, "|")
self.assertEqual(dialect.quotechar, "'") self.assertEqual(dialect.quotechar, "'")
dialect = sniffer.sniff(self.sample8)
self.assertEqual(dialect.delimiter, '+')
dialect = sniffer.sniff(self.sample9)
self.assertEqual(dialect.delimiter, '+')
self.assertEqual(dialect.quotechar, "'")
def test_doublequote(self): def test_doublequote(self):
sniffer = csv.Sniffer() sniffer = csv.Sniffer()
dialect = sniffer.sniff(self.header) dialect = sniffer.sniff(self.header1)
self.assertFalse(dialect.doublequote)
dialect = sniffer.sniff(self.header2)
self.assertFalse(dialect.doublequote) self.assertFalse(dialect.doublequote)
dialect = sniffer.sniff(self.sample2) dialect = sniffer.sniff(self.sample2)
self.assertTrue(dialect.doublequote) self.assertTrue(dialect.doublequote)
dialect = sniffer.sniff(self.sample8)
self.assertFalse(dialect.doublequote)
dialect = sniffer.sniff(self.sample9)
self.assertTrue(dialect.doublequote)
if not hasattr(sys, "gettotalrefcount"): if not hasattr(sys, "gettotalrefcount"):
if test_support.verbose: print "*** skipping leakage tests ***" if test_support.verbose: print "*** skipping leakage tests ***"
......
...@@ -545,6 +545,7 @@ Jeff Knupp ...@@ -545,6 +545,7 @@ Jeff Knupp
Greg Kochanski Greg Kochanski
Damon Kohler Damon Kohler
Marko Kohtala Marko Kohtala
Vajrasky Kok
Guido Kollerie Guido Kollerie
Peter A. Koren Peter A. Koren
Joseph Koshy Joseph Koshy
......
...@@ -24,11 +24,15 @@ Core and Builtins ...@@ -24,11 +24,15 @@ Core and Builtins
Library Library
------- -------
- Issue #18155: The csv module now correctly handles csv files that use
a delimiter character that has a special meaning in regexes, instead of
throwing an exception.
- Issue #18135: ssl.SSLSocket.write() now raises an OverflowError if the input - Issue #18135: ssl.SSLSocket.write() now raises an OverflowError if the input
string in longer than 2 gigabytes. The ssl module does not support partial string in longer than 2 gigabytes. The ssl module does not support partial
write. write.
- Issue #18167: cgi.FieldStorage no more fails to handle multipart/form-data - Issue #18167: cgi.FieldStorage no longer fails to handle multipart/form-data
when \r\n appears at end of 65535 bytes without other newlines. when \r\n appears at end of 65535 bytes without other newlines.
- Issue #17403: urllib.parse.robotparser normalizes the urls before adding to - Issue #17403: urllib.parse.robotparser normalizes the urls before adding to
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment