Commit 89a5e032 authored by Xiang Zhang's avatar Xiang Zhang Committed by GitHub

bpo-30003: Fix handling escape characters in HZ codec (#1556)

parent 15033d14
...@@ -86,6 +86,10 @@ class Test_HZ(multibytecodec_support.TestBase, unittest.TestCase): ...@@ -86,6 +86,10 @@ class Test_HZ(multibytecodec_support.TestBase, unittest.TestCase):
(b'ab~{\x81\x81\x41\x44~}cd', 'replace', 'ab\uFFFD\uFFFD\u804Acd'), (b'ab~{\x81\x81\x41\x44~}cd', 'replace', 'ab\uFFFD\uFFFD\u804Acd'),
(b'ab~{\x41\x44~}cd', 'replace', 'ab\u804Acd'), (b'ab~{\x41\x44~}cd', 'replace', 'ab\u804Acd'),
(b"ab~{\x79\x79\x41\x44~}cd", "replace", "ab\ufffd\ufffd\u804acd"), (b"ab~{\x79\x79\x41\x44~}cd", "replace", "ab\ufffd\ufffd\u804acd"),
# issue 30003
('ab~cd', 'strict', b'ab~~cd'), # escape ~
(b'~{Dc~~:C~}', 'strict', None), # ~~ only in ASCII mode
(b'~{Dc~\n:C~}', 'strict', None), # ~\n only in ASCII mode
) )
if __name__ == "__main__": if __name__ == "__main__":
......
...@@ -334,6 +334,9 @@ Extension Modules ...@@ -334,6 +334,9 @@ Extension Modules
Library Library
------- -------
- bpo-30003: Fix handling escape characters in HZ codec. Based on patch
by Ma Lin.
- bpo-30149: inspect.signature() now supports callables with - bpo-30149: inspect.signature() now supports callables with
variable-argument parameters wrapped with partialmethod. variable-argument parameters wrapped with partialmethod.
Patch by Dong-hee Na. Patch by Dong-hee Na.
......
...@@ -350,15 +350,17 @@ ENCODER(hz) ...@@ -350,15 +350,17 @@ ENCODER(hz)
DBCHAR code; DBCHAR code;
if (c < 0x80) { if (c < 0x80) {
if (state->i == 0) { if (state->i) {
WRITEBYTE1((unsigned char)c); WRITEBYTE2('~', '}');
NEXT(1, 1); NEXT_OUT(2);
}
else {
WRITEBYTE3('~', '}', (unsigned char)c);
NEXT(1, 3);
state->i = 0; state->i = 0;
} }
WRITEBYTE1((unsigned char)c);
NEXT(1, 1);
if (c == '~') {
WRITEBYTE1('~');
NEXT_OUT(1);
}
continue; continue;
} }
...@@ -409,17 +411,14 @@ DECODER(hz) ...@@ -409,17 +411,14 @@ DECODER(hz)
unsigned char c2 = INBYTE2; unsigned char c2 = INBYTE2;
REQUIRE_INBUF(2); REQUIRE_INBUF(2);
if (c2 == '~') { if (c2 == '~' && state->i == 0)
OUTCHAR('~'); OUTCHAR('~');
NEXT_IN(2);
continue;
}
else if (c2 == '{' && state->i == 0) else if (c2 == '{' && state->i == 0)
state->i = 1; /* set GB */ state->i = 1; /* set GB */
else if (c2 == '\n' && state->i == 0)
; /* line-continuation */
else if (c2 == '}' && state->i == 1) else if (c2 == '}' && state->i == 1)
state->i = 0; /* set ASCII */ state->i = 0; /* set ASCII */
else if (c2 == '\n')
; /* line-continuation */
else else
return 1; return 1;
NEXT_IN(2); NEXT_IN(2);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment