Commit e7197936 authored by Serhiy Storchaka's avatar Serhiy Storchaka Committed by GitHub

[2.7] bpo-30157: Fix csv.Sniffer.sniff() regex pattern. (GH-5601) (GH-5604)

Co-authored-by: Jake Davis <jcdavis@awedge.net>.
(cherry picked from commit 2411292b)
parent 672fd7d8
...@@ -217,7 +217,7 @@ class Sniffer: ...@@ -217,7 +217,7 @@ class Sniffer:
matches = [] matches = []
for restr in ('(?P<delim>[^\w\n"\'])(?P<space> ?)(?P<quote>["\']).*?(?P=quote)(?P=delim)', # ,".*?", for restr in ('(?P<delim>[^\w\n"\'])(?P<space> ?)(?P<quote>["\']).*?(?P=quote)(?P=delim)', # ,".*?",
'(?:^|\n)(?P<quote>["\']).*?(?P=quote)(?P<delim>[^\w\n"\'])(?P<space> ?)', # ".*?", '(?:^|\n)(?P<quote>["\']).*?(?P=quote)(?P<delim>[^\w\n"\'])(?P<space> ?)', # ".*?",
'(?P<delim>>[^\w\n"\'])(?P<space> ?)(?P<quote>["\']).*?(?P=quote)(?:$|\n)', # ,".*?" '(?P<delim>[^\w\n"\'])(?P<space> ?)(?P<quote>["\']).*?(?P=quote)(?:$|\n)', # ,".*?"
'(?:^|\n)(?P<quote>["\']).*?(?P=quote)(?:$|\n)'): # ".*?" (no delim, no space) '(?:^|\n)(?P<quote>["\']).*?(?P=quote)(?:$|\n)'): # ".*?" (no delim, no space)
regexp = re.compile(restr, re.DOTALL | re.MULTILINE) regexp = re.compile(restr, re.DOTALL | re.MULTILINE)
matches = regexp.findall(data) matches = regexp.findall(data)
......
...@@ -1036,6 +1036,15 @@ Stonecutters Seafood and Chop House+ Lemont+ IL+ 12/19/02+ Week Back ...@@ -1036,6 +1036,15 @@ Stonecutters Seafood and Chop House+ Lemont+ IL+ 12/19/02+ Week Back
self.assertEqual(sniffer.has_header(self.header2 + self.sample8), self.assertEqual(sniffer.has_header(self.header2 + self.sample8),
True) True)
def test_guess_quote_and_delimiter(self):
sniffer = csv.Sniffer()
for header in (";'123;4';", "'123;4';", ";'123;4'", "'123;4'"):
dialect = sniffer.sniff(header, ",;")
self.assertEqual(dialect.delimiter, ';')
self.assertEqual(dialect.quotechar, "'")
self.assertIs(dialect.doublequote, False)
self.assertIs(dialect.skipinitialspace, False)
def test_sniff(self): def test_sniff(self):
sniffer = csv.Sniffer() sniffer = csv.Sniffer()
dialect = sniffer.sniff(self.sample1) dialect = sniffer.sniff(self.sample1)
......
...@@ -317,6 +317,7 @@ Kushal Das ...@@ -317,6 +317,7 @@ Kushal Das
Jonathan Dasteel Jonathan Dasteel
Pierre-Yves David Pierre-Yves David
A. Jesse Jiryu Davis A. Jesse Jiryu Davis
Jake Davis
Merlijn van Deen Merlijn van Deen
John DeGood John DeGood
Ned Deily Ned Deily
......
Fixed guessing quote and delimiter in csv.Sniffer.sniff() when only the last
field is quoted. Patch by Jake Davis.
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment