Commit 745d0091 authored by scoder's avatar scoder

Merge pull request #193 from nnemkin/pyunicode_string_fix

Compatibility fix for Py_UNICODE* support.
parents d4546c4f 08342a2d
...@@ -4,7 +4,6 @@ ...@@ -4,7 +4,6 @@
import re import re
import sys import sys
import array
if sys.version_info[0] >= 3: if sys.version_info[0] >= 3:
_unicode, _str, _bytes = str, str, bytes _unicode, _str, _bytes = str, str, bytes
...@@ -267,18 +266,26 @@ def split_string_literal(s, limit=2000): ...@@ -267,18 +266,26 @@ def split_string_literal(s, limit=2000):
def encode_pyunicode_string(s): def encode_pyunicode_string(s):
"""Create Py_UNICODE[] representation of a given unicode string. """Create Py_UNICODE[] representation of a given unicode string.
""" """
utf32_array = array.array('i', s.encode('UTF-32')) s = map(ord, s) + [0]
assert utf32_array.itemsize == 4
utf32_array.pop(0) # Remove BOM if sys.maxunicode >= 0x10000: # Wide build or Py3.3
utf32_array.append(0) # Add NULL terminator utf16, utf32 = [], s
for code_point in s:
for c in utf32_array: if code_point >= 0x10000: # outside of BMP
if c > 65535: high, low = divmod(code_point - 0x10000, 1024)
utf16_array = array.array('H', s.encode('UTF-16')) utf16.append(high + 0xD800)
utf16_array.pop(0) # Remove BOM utf16.append(low + 0xDC00)
utf16_array.append(0) # Add NULL terminator else:
break utf16.append(code_point)
else:
utf16, utf32 = s, []
for code_unit in s:
if 0xDC00 <= code_unit <= 0xDFFF and utf32 and 0xD800 <= utf32[-1] <= 0xDBFF:
high, low = utf32[-1], code_unit
utf32[-1] = ((high & 0x3FF) << 10) + (low & 0x3FF) + 0x10000
else: else:
utf16_array = [] utf32.append(code_unit)
return ",".join(map(unicode, utf16_array)), ",".join(map(unicode, utf32_array)) if utf16 == utf32:
utf16 = []
return ",".join(map(unicode, utf16)), ",".join(map(unicode, utf32))
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment