Commit 4f35c715 authored by Brett Cannon's avatar Brett Cannon

Locale data that contains regex metacharacters are now properly escaped.

Closes bug #1039270.
parent 579b3e24
...@@ -15,6 +15,7 @@ import locale ...@@ -15,6 +15,7 @@ import locale
import calendar import calendar
from re import compile as re_compile from re import compile as re_compile
from re import IGNORECASE from re import IGNORECASE
from re import escape as re_escape
from datetime import date as datetime_date from datetime import date as datetime_date
try: try:
from thread import allocate_lock as _thread_allocate_lock from thread import allocate_lock as _thread_allocate_lock
...@@ -232,7 +233,7 @@ class TimeRE(dict): ...@@ -232,7 +233,7 @@ class TimeRE(dict):
return '' return ''
to_convert = to_convert[:] to_convert = to_convert[:]
to_convert.sort(key=len, reverse=True) to_convert.sort(key=len, reverse=True)
regex = '|'.join(to_convert) regex = '|'.join(re_escape(stuff) for stuff in to_convert)
regex = '(?P<%s>%s' % (directive, regex) regex = '(?P<%s>%s' % (directive, regex)
return '%s)' % regex return '%s)' % regex
...@@ -245,7 +246,8 @@ class TimeRE(dict): ...@@ -245,7 +246,8 @@ class TimeRE(dict):
""" """
processed_format = '' processed_format = ''
# The sub() call escapes all characters that might be misconstrued # The sub() call escapes all characters that might be misconstrued
# as regex syntax. # as regex syntax. Cannot use re.escape since we have to deal with
# format directives (%m, etc.).
regex_chars = re_compile(r"([\\.^$*+?\(\){}\[\]|])") regex_chars = re_compile(r"([\\.^$*+?\(\){}\[\]|])")
format = regex_chars.sub(r"\\\1", format) format = regex_chars.sub(r"\\\1", format)
whitespace_replacement = re_compile('\s+') whitespace_replacement = re_compile('\s+')
......
...@@ -176,6 +176,19 @@ class TimeRETests(unittest.TestCase): ...@@ -176,6 +176,19 @@ class TimeRETests(unittest.TestCase):
found = compiled_re.match("\w+ 10") found = compiled_re.match("\w+ 10")
self.failUnless(found, "Escaping failed of format '\w+ 10'") self.failUnless(found, "Escaping failed of format '\w+ 10'")
def test_locale_data_w_regex_metacharacters(self):
# Check that if locale data contains regex metacharacters they are
# escaped properly.
# Discovered by bug #1039270 .
locale_time = _strptime.LocaleTime()
locale_time.timezone = (frozenset(("utc", "gmt",
"Tokyo (standard time)")),
frozenset("Tokyo (daylight time)"))
time_re = _strptime.TimeRE(locale_time)
self.failUnless(time_re.compile("%Z").match("Tokyo (standard time)"),
"locale data that contains regex metacharacters is not"
" properly escaped")
class StrptimeTests(unittest.TestCase): class StrptimeTests(unittest.TestCase):
"""Tests for _strptime.strptime.""" """Tests for _strptime.strptime."""
......
...@@ -34,6 +34,11 @@ Extension modules ...@@ -34,6 +34,11 @@ Extension modules
Library Library
------- -------
- time.strptime() now properly escapes timezones and all other locale-specific
strings for regex-specific symbols. Was breaking under Japanese Windows when
the timezone was specified as "Tokyo (standard time)".
Closes bug #1039270.
- Updates for the email package: - Updates for the email package:
+ All deprecated APIs that in email 2.x issued warnings have been removed: + All deprecated APIs that in email 2.x issued warnings have been removed:
_encoder argument to the MIMEText constructor, Message.add_payload(), _encoder argument to the MIMEText constructor, Message.add_payload(),
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment