Commit 595502fc authored by Stefan Behnel's avatar Stefan Behnel

adapt 'unicode' usage to Py2/Py3

parent e2922b0c
......@@ -56,7 +56,7 @@ if sys.version_info[0] < 3:
if _fs_encoding is None:
_fs_encoding = sys.getdefaultencoding()
def encode_filename_in_py2(filename):
if isinstance(filename, unicode):
if not isinstance(filename, bytes):
return filename.encode(_fs_encoding)
return filename
else:
......
......@@ -27,7 +27,7 @@ IS_PY3 = sys.version_info >= (3, 0)
# A utility function to convert user-supplied ASCII strings to unicode.
if sys.version_info[0] < 3:
def to_unicode(s):
if not isinstance(s, unicode):
if isinstance(s, bytes):
return s.decode('ascii')
else:
return s
......
......@@ -6,8 +6,11 @@ The output is in a strict format, no whitespace or comments from the input
is preserved (and it could not be as it is not present in the code tree).
"""
from Cython.Compiler.Visitor import TreeVisitor
from Cython.Compiler.ExprNodes import *
from __future__ import absolute_import, print_function
from .Compiler.Visitor import TreeVisitor
from .Compiler.ExprNodes import *
class LinesResult(object):
def __init__(self):
......@@ -497,7 +500,7 @@ class CodeWriter(DeclarationWriter):
class PxdWriter(DeclarationWriter):
def __call__(self, node):
print u'\n'.join(self.write(node).lines)
print(u'\n'.join(self.write(node).lines))
return node
def visit_CFuncDefNode(self, node):
......@@ -516,5 +519,3 @@ class PxdWriter(DeclarationWriter):
def visit_StatNode(self, node):
pass
......@@ -218,7 +218,7 @@ class AnnotationCCodeWriter(CCodeWriter):
def annotate(match):
group_name = match.lastgroup
calls[group_name] += 1
return ur"<span class='%s'>%s</span>" % (
return u"<span class='%s'>%s</span>" % (
group_name, match.group(group_name))
lines = self._htmlify_code(cython_code).splitlines()
......@@ -275,22 +275,22 @@ class AnnotationCCodeWriter(CCodeWriter):
return outlist
_parse_code = re.compile(
ur'(?P<refnanny>__Pyx_X?(?:GOT|GIVE)REF|__Pyx_RefNanny[A-Za-z]+)|'
ur'(?P<trace>__Pyx_Trace[A-Za-z]+)|'
ur'(?:'
ur'(?P<pyx_macro_api>__Pyx_[A-Z][A-Z_]+)|'
ur'(?P<pyx_c_api>__Pyx_[A-Z][a-z_][A-Za-z_]*)|'
ur'(?P<py_macro_api>Py[A-Z][a-z]+_[A-Z][A-Z_]+)|'
ur'(?P<py_c_api>Py[A-Z][a-z]+_[A-Z][a-z][A-Za-z_]*)'
ur')(?=\()|' # look-ahead to exclude subsequent '(' from replacement
ur'(?P<error_goto>(?:(?<=;) *if .* +)?\{__pyx_filename = .*goto __pyx_L\w+;\})'
).sub
_parse_code = re.compile((
br'(?P<refnanny>__Pyx_X?(?:GOT|GIVE)REF|__Pyx_RefNanny[A-Za-z]+)|'
br'(?P<trace>__Pyx_Trace[A-Za-z]+)|'
br'(?:'
br'(?P<pyx_macro_api>__Pyx_[A-Z][A-Z_]+)|'
br'(?P<pyx_c_api>__Pyx_[A-Z][a-z_][A-Za-z_]*)|'
br'(?P<py_macro_api>Py[A-Z][a-z]+_[A-Z][A-Z_]+)|'
br'(?P<py_c_api>Py[A-Z][a-z]+_[A-Z][a-z][A-Za-z_]*)'
br')(?=\()|' # look-ahead to exclude subsequent '(' from replacement
br'(?P<error_goto>(?:(?<=;) *if .* +)?\{__pyx_filename = .*goto __pyx_L\w+;\})'
).decode('ascii')).sub
_replace_pos_comment = re.compile(
# this matches what Cython generates as code line marker comment
ur'^\s*/\*(?:(?:[^*]|\*[^/])*\n)+\s*\*/\s*\n',
br'^\s*/\*(?:(?:[^*]|\*[^/])*\n)+\s*\*/\s*\n'.decode('ascii'),
re.M
).sub
......
......@@ -163,7 +163,7 @@ class UtilityCodeBase(object):
if ext in ('.pyx', '.py', '.pxd', '.pxi'):
comment = '#'
strip_comments = partial(re.compile(r'^\s*#.*').sub, '')
rstrip = unicode.rstrip
rstrip = str.rstrip
else:
comment = '/'
strip_comments = partial(re.compile(r'^\s*//.*|/\*[^*]*\*/').sub, '')
......@@ -819,7 +819,7 @@ class PyObjectConst(object):
cython.declare(possible_unicode_identifier=object, possible_bytes_identifier=object,
replace_identifier=object, find_alphanums=object)
possible_unicode_identifier = re.compile(ur"(?![0-9])\w+$", re.U).match
possible_unicode_identifier = re.compile(br"(?![0-9])\w+$".decode('ascii'), re.U).match
possible_bytes_identifier = re.compile(r"(?![0-9])\w+$".encode('ASCII')).match
replace_identifier = re.compile(r'[^a-zA-Z0-9_]+').sub
find_alphanums = re.compile('([a-zA-Z0-9]+)').findall
......@@ -876,10 +876,10 @@ class StringConst(object):
if identifier:
intern = True
elif identifier is None:
if isinstance(text, unicode):
intern = bool(possible_unicode_identifier(text))
else:
if isinstance(text, bytes):
intern = bool(possible_bytes_identifier(text))
else:
intern = bool(possible_unicode_identifier(text))
else:
intern = False
if intern:
......@@ -2298,9 +2298,8 @@ class PyxCodeWriter(object):
def getvalue(self):
result = self.buffer.getvalue()
if not isinstance(result, unicode):
if isinstance(result, bytes):
result = result.decode(self.encoding)
return result
def putln(self, line, context=None):
......
......@@ -4,6 +4,11 @@
from __future__ import absolute_import
try:
from __builtin__ import basestring as any_string_type
except ImportError:
any_string_type = (bytes, str)
import sys
from ..Utils import open_new_file
......@@ -21,7 +26,7 @@ class PyrexWarning(Exception):
def context(position):
source = position[0]
assert not (isinstance(source, unicode) or isinstance(source, str)), (
assert not (isinstance(source, any_string_type)), (
"Please replace filename strings with Scanning.FileSourceDescriptor instances %r" % source)
try:
F = source.get_lines()
......@@ -167,7 +172,7 @@ def report_error(err):
def error(position, message):
#print "Errors.error:", repr(position), repr(message) ###
#print("Errors.error:", repr(position), repr(message)) ###
if position is None:
raise InternalError(message)
err = CompileError(position, message)
......
......@@ -45,12 +45,12 @@ from .DebugFlags import debug_disposal_code, debug_temp_alloc, \
try:
from __builtin__ import basestring
except ImportError:
basestring = str # Python 3
try:
from builtins import bytes
except ImportError:
bytes = str # Python 2
# Python 3
basestring = str
any_string_type = (bytes, str)
else:
# Python 2
any_string_type = (bytes, unicode)
if sys.version_info[0] >= 3:
......@@ -1216,7 +1216,7 @@ class FloatNode(ConstNode):
def get_constant_c_result_code(self):
strval = self.value
assert isinstance(strval, (str, unicode))
assert isinstance(strval, basestring)
cmpval = repr(float(strval))
if cmpval == 'nan':
return "(Py_HUGE_VAL * 0)"
......@@ -10907,8 +10907,8 @@ class CmpNode(object):
def calculate_cascaded_constant_result(self, operand1_result):
func = compile_time_binary_operators[self.operator]
operand2_result = self.operand2.constant_result
if (isinstance(operand1_result, (bytes, unicode)) and
isinstance(operand2_result, (bytes, unicode)) and
if (isinstance(operand1_result, any_string_type) and
isinstance(operand2_result, any_string_type) and
type(operand1_result) != type(operand2_result)):
# string comparison of different types isn't portable
return
......
......@@ -6,7 +6,7 @@ import cython
cython.declare(PyrexTypes=object, Naming=object, ExprNodes=object, Nodes=object,
Options=object, UtilNodes=object, LetNode=object,
LetRefNode=object, TreeFragment=object, EncodedString=object,
error=object, warning=object, copy=object)
error=object, warning=object, copy=object, _unicode=object)
from . import PyrexTypes
from . import Naming
......@@ -19,7 +19,7 @@ from .Visitor import VisitorTransform, TreeVisitor
from .Visitor import CythonTransform, EnvTransform, ScopeTrackingTransform
from .UtilNodes import LetNode, LetRefNode, ResultRefNode
from .TreeFragment import TreeFragment
from .StringEncoding import EncodedString
from .StringEncoding import EncodedString, _unicode
from .Errors import error, warning, CompileError, InternalError
from .Code import UtilityCode
......@@ -663,7 +663,7 @@ class InterpretCompilerDirectives(CythonTransform, SkipDeclarations):
self.parallel_directives = {}
directives = copy.deepcopy(Options.directive_defaults)
for key, value in compilation_directive_defaults.items():
directives[unicode(key)] = copy.deepcopy(value)
directives[_unicode(key)] = copy.deepcopy(value)
self.directives = directives
def check_directive_scope(self, pos, directive, scope):
......
......@@ -7,7 +7,7 @@ from __future__ import absolute_import
import cython
cython.declare(make_lexicon=object, lexicon=object,
any_string_prefix=unicode, IDENT=unicode,
any_string_prefix=cython.unicode, IDENT=cython.unicode,
print_function=object, error=object, warning=object,
os=object, platform=object)
......
......@@ -8,10 +8,10 @@ import re
import sys
if sys.version_info[0] >= 3:
_unicode, _str, _bytes = str, str, bytes
_unicode, _str, _bytes, _unichr = str, str, bytes, chr
IS_PYTHON3 = True
else:
_unicode, _str, _bytes = unicode, str, str
_unicode, _str, _bytes, _unichr = unicode, str, str, unichr
IS_PYTHON3 = False
empty_bytes = _bytes()
......@@ -39,13 +39,13 @@ class UnicodeLiteralBuilder(object):
# wide Unicode character on narrow platform => replace
# by surrogate pair
char_number -= 0x10000
self.chars.append( unichr((char_number // 1024) + 0xD800) )
self.chars.append( unichr((char_number % 1024) + 0xDC00) )
self.chars.append( _unichr((char_number // 1024) + 0xD800) )
self.chars.append( _unichr((char_number % 1024) + 0xDC00) )
else:
self.chars.append( unichr(char_number) )
self.chars.append( _unichr(char_number) )
else:
def append_charval(self, char_number):
self.chars.append( unichr(char_number) )
self.chars.append( _unichr(char_number) )
def append_uescape(self, char_number, escape_string):
self.append_charval(char_number)
......@@ -71,7 +71,7 @@ class BytesLiteralBuilder(object):
self.chars.append(characters)
def append_charval(self, char_number):
self.chars.append( unichr(char_number).encode('ISO-8859-1') )
self.chars.append( _unichr(char_number).encode('ISO-8859-1') )
def append_uescape(self, char_number, escape_string):
self.append(escape_string)
......@@ -311,4 +311,4 @@ def encode_pyunicode_string(s):
if utf16 == utf32:
utf16 = []
return ",".join(map(unicode, utf16)), ",".join(map(unicode, utf32))
return ",".join(map(_unicode, utf16)), ",".join(map(_unicode, utf32))
......@@ -17,6 +17,7 @@ from . import PyrexTypes
from .Visitor import VisitorTransform
from .Nodes import Node, StatListNode
from .ExprNodes import NameNode
from .StringEncoding import _unicode
from . import Parsing
from . import Main
from . import UtilNodes
......@@ -59,7 +60,7 @@ def parse_from_strings(name, code, pxds={}, level=None, initial_pos=None,
# to use a unicode string so that code fragments don't have to bother
# with encoding. This means that test code passed in should not have an
# encoding header.
assert isinstance(code, unicode), "unicode code snippets only please"
assert isinstance(code, _unicode), "unicode code snippets only please"
encoding = "UTF-8"
module_name = name
......@@ -198,7 +199,7 @@ def copy_code_tree(node):
return TreeCopier()(node)
_match_indent = re.compile(ur"^ *").match
_match_indent = re.compile(u"^ *").match
def strip_common_indent(lines):
......@@ -214,7 +215,7 @@ class TreeFragment(object):
def __init__(self, code, name=None, pxds={}, temps=[], pipeline=[], level=None, initial_pos=None):
if not name:
name = "(tree fragment)"
if isinstance(code, unicode):
if isinstance(code, _unicode):
def fmt(x): return u"\n".join(strip_common_indent(x.split(u"\n")))
fmt_code = fmt(code)
......
......@@ -17,6 +17,11 @@ try:
except ImportError:
from sys import maxint
try:
unichr
except NameError:
unichr = chr
LOWEST_PRIORITY = -maxint
......
# cython: language_level=3
from cpython.ref cimport PyObject, Py_INCREF, Py_DECREF, Py_XDECREF, Py_XINCREF
from cpython.exc cimport PyErr_Fetch, PyErr_Restore
from cpython.pystate cimport PyThreadState_Get
......@@ -72,7 +74,7 @@ cdef void report_unraisable(object e=None):
if e is None:
import sys
e = sys.exc_info()[1]
print u"refnanny raised an exception: %s" % e
print(u"refnanny raised an exception: %s" % e)
except:
pass # We absolutely cannot exit with an exception
......@@ -159,9 +161,10 @@ cdef void FinishContext(PyObject** ctx):
context = <Context>ctx[0]
errors = context.end()
if errors:
print u"%s: %s()" % (context.filename.decode('latin1'),
context.name.decode('latin1'))
print errors
print(u"%s: %s()" % (
context.filename.decode('latin1'),
context.name.decode('latin1')))
print(errors)
context = None
except:
report_unraisable()
......
......@@ -43,7 +43,7 @@ import tokenize
from io import StringIO
from ._looper import looper
from .compat3 import bytes, basestring_, next, is_unicode, coerce_text
from .compat3 import bytes, unicode, basestring_, next, is_unicode, coerce_text
__all__ = ['TemplateError', 'Template', 'sub', 'HTMLTemplate',
'sub_html', 'html', 'bunch']
......
import sys
__all__ = ['b', 'basestring_', 'bytes', 'next', 'is_unicode']
__all__ = ['b', 'basestring_', 'bytes', 'unicode', 'next', 'is_unicode']
if sys.version < "3":
b = bytes = str
basestring_ = basestring
unicode = unicode
else:
def b(s):
......@@ -13,6 +14,7 @@ else:
return bytes(s)
basestring_ = (bytes, str)
bytes = bytes
unicode = str
text = str
if sys.version < "3":
......
......@@ -20,7 +20,7 @@ TOOLS_DIR = os.path.abspath(os.path.join(os.path.dirname(__file__), '..', '..',
@contextmanager
def _tempfile(code):
code = dedent(code)
if isinstance(code, unicode):
if not isinstance(code, bytes):
code = code.encode('utf8')
with NamedTemporaryFile(suffix='.py') as f:
......
......@@ -131,8 +131,8 @@ cdef class array:
if itemsize <= 0:
raise ValueError("itemsize <= 0 for cython.array")
if isinstance(format, unicode):
format = (<unicode>format).encode('ASCII')
if not isinstance(format, bytes):
format = format.encode('ASCII')
self._format = format # keep a reference to the byte string
self.format = self._format
......
......@@ -187,15 +187,14 @@ def path_exists(path):
# file name encodings
def decode_filename(filename):
if isinstance(filename, unicode):
return filename
try:
filename_encoding = sys.getfilesystemencoding()
if filename_encoding is None:
filename_encoding = sys.getdefaultencoding()
filename = filename.decode(filename_encoding)
except UnicodeDecodeError:
pass
if isinstance(filename, bytes):
try:
filename_encoding = sys.getfilesystemencoding()
if filename_encoding is None:
filename_encoding = sys.getdefaultencoding()
filename = filename.decode(filename_encoding)
except UnicodeDecodeError:
pass
return filename
# support for source file encoding detection
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment