Commit 6e1b832a authored by Xiang Zhang's avatar Xiang Zhang Committed by GitHub

bpo-30003: Fix handling escape characters in HZ codec (#1720) (#1556)

parent 2b67c7aa
......@@ -82,6 +82,10 @@ class Test_HZ(test_multibytecodec_support.TestBase, unittest.TestCase):
(b'ab~cd', 'replace', u'ab\uFFFDd'),
(b'ab\xffcd', 'replace', u'ab\uFFFDcd'),
(b'ab~{\x81\x81\x41\x44~}cd', 'replace', u'ab\uFFFD\uFFFD\u804Acd'),
# issue 30003
(u'ab~cd', 'strict', b'ab~~cd'), # escape ~
(b'~{Dc~~:C~}', 'strict', None), # ~~ only in ASCII mode
(b'~{Dc~\n:C~}', 'strict', None), # ~\n only in ASCII mode
)
def test_main():
......
......@@ -49,6 +49,9 @@ Extension Modules
Library
-------
- bpo-30003: Fix handling escape characters in HZ codec. Based on patch
by Ma Lin.
- bpo-30375: Warnings emitted when compile a regular expression now always
point to the line in the user code. Previously they could point into inners
of the re module if emitted from inside of groups or conditionals.
......
......@@ -335,15 +335,17 @@ ENCODER(hz)
DBCHAR code;
if (c < 0x80) {
if (state->i == 0) {
WRITE1((unsigned char)c)
NEXT(1, 1)
}
else {
WRITE3('~', '}', (unsigned char)c)
NEXT(1, 3)
if (state->i) {
WRITE2('~', '}')
NEXT_OUT(2)
state->i = 0;
}
WRITE1((unsigned char)c)
NEXT(1, 1)
if (c == '~') {
WRITE1('~')
NEXT_OUT(1)
}
continue;
}
......@@ -390,20 +392,19 @@ DECODER(hz)
unsigned char c2 = IN2;
REQUIRE_INBUF(2)
if (c2 == '~') {
if (c2 == '~' && state->i == 0) {
WRITE1('~')
NEXT(2, 1)
continue;
NEXT_OUT(1)
}
else if (c2 == '{' && state->i == 0)
state->i = 1; /* set GB */
else if (c2 == '\n' && state->i == 0)
; /* line-continuation */
else if (c2 == '}' && state->i == 1)
state->i = 0; /* set ASCII */
else if (c2 == '\n')
; /* line-continuation */
else
return 2;
NEXT(2, 0);
NEXT_IN(2)
continue;
}
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment