Commit 270bf960 authored by da-woods's avatar da-woods Committed by Stefan Behnel

Unicode identifiers (PEP 3131) (GH-3081)

Closes #2601
parent 4c5cc91b
......@@ -1152,7 +1152,7 @@ class GlobalState(object):
w.putln("")
w.putln("static CYTHON_SMALL_CODE int __Pyx_InitCachedConstants(void) {")
w.put_declare_refcount_context()
w.put_setup_refcount_context("__Pyx_InitCachedConstants")
w.put_setup_refcount_context(StringEncoding.EncodedString("__Pyx_InitCachedConstants"))
w = self.parts['init_globals']
w.enter_cfunc_scope()
......@@ -2205,9 +2205,10 @@ class CCodeWriter(object):
cast = entry.signature.method_function_type()
if cast != 'PyCFunction':
func_ptr = '(void*)(%s)%s' % (cast, func_ptr)
entry_name = entry.name.as_c_string_literal()
self.putln(
'{"%s", (PyCFunction)%s, %s, %s}%s' % (
entry.name,
'{%s, (PyCFunction)%s, %s, %s}%s' % (
entry_name,
func_ptr,
"|".join(method_flags),
entry.doc_cname if entry.doc else '0',
......@@ -2365,10 +2366,11 @@ class CCodeWriter(object):
self.putln('__Pyx_RefNannyDeclarations')
def put_setup_refcount_context(self, name, acquire_gil=False):
name = name.as_c_string_literal() # handle unicode names
if acquire_gil:
self.globalstate.use_utility_code(
UtilityCode.load_cached("ForceInitThreads", "ModuleSetupCode.c"))
self.putln('__Pyx_RefNannySetupContext("%s", %d);' % (name, acquire_gil and 1 or 0))
self.putln('__Pyx_RefNannySetupContext(%s, %d);' % (name, acquire_gil and 1 or 0))
def put_finish_refcount_context(self):
self.putln("__Pyx_RefNannyFinishContext();")
......@@ -2379,14 +2381,16 @@ class CCodeWriter(object):
qualified_name should be the qualified name of the function.
"""
qualified_name = qualified_name.as_c_string_literal() # handle unicode names
format_tuple = (
qualified_name,
Naming.clineno_cname if include_cline else 0,
Naming.lineno_cname,
Naming.filename_cname,
)
self.funcstate.uses_error_indicator = True
self.putln('__Pyx_AddTraceback("%s", %s, %s, %s);' % format_tuple)
self.putln('__Pyx_AddTraceback(%s, %s, %s, %s);' % format_tuple)
def put_unraisable(self, qualified_name, nogil=False):
"""
......
......@@ -167,7 +167,6 @@ def report_error(err, use_stack=True):
if Options.fast_fail:
raise AbortError("fatal errors")
def error(position, message):
#print("Errors.error:", repr(position), repr(message)) ###
if position is None:
......@@ -180,16 +179,22 @@ def error(position, message):
LEVEL = 1 # warn about all errors level 1 or higher
def _write_file_encode(file, line):
try:
file.write(line)
except UnicodeEncodeError:
file.write(line.encode('ascii', 'replace'))
def message(position, message, level=1):
if level < LEVEL:
return
warn = CompileWarning(position, message)
line = "note: %s\n" % warn
line = u"note: %s\n" % warn
if listing_file:
listing_file.write(line)
_write_file_encode(listing_file, line)
if echo_file:
echo_file.write(line)
_write_file_encode(echo_file, line)
return warn
......@@ -199,11 +204,11 @@ def warning(position, message, level=0):
if Options.warning_errors and position:
return error(position, message)
warn = CompileWarning(position, message)
line = "warning: %s\n" % warn
line = u"warning: %s\n" % warn
if listing_file:
listing_file.write(line)
_write_file_encode(listing_file, line)
if echo_file:
echo_file.write(line)
_write_file_encode(echo_file, line)
return warn
......@@ -212,11 +217,11 @@ def warn_once(position, message, level=0):
if level < LEVEL or message in _warn_once_seen:
return
warn = CompileWarning(position, message)
line = "warning: %s\n" % warn
line = u"warning: %s\n" % warn
if listing_file:
listing_file.write(line)
_write_file_encode(listing_file, line)
if echo_file:
echo_file.write(line)
_write_file_encode(echo_file, line)
_warn_once_seen[message] = True
return warn
......
# -*- coding: utf-8 -*-
# cython: language_level=3, py2_import=True
#
# Cython Scanner - Lexical Definitions
......@@ -16,7 +17,7 @@ IDENT = 'IDENT'
def make_lexicon():
from ..Plex import \
Str, Any, AnyBut, AnyChar, Rep, Rep1, Opt, Bol, Eol, Eof, \
TEXT, IGNORE, Method, State, Lexicon
TEXT, IGNORE, Method, State, Lexicon, Range
letter = Any("ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz_")
nonzero_digit = Any("123456789")
......@@ -26,6 +27,14 @@ def make_lexicon():
hexdigit = Any("0123456789ABCDEFabcdef")
indentation = Bol + Rep(Any(" \t"))
# The list of valid unicode identifier characters are pretty slow to generate at runtime,
# and require Python3, so are just included directly here
# (via the generated code block at the bottom of the file)
unicode_start_character = (Any(unicode_start_ch_any) | Range(unicode_start_ch_range))
unicode_continuation_character = (
unicode_start_character |
Any(unicode_continuation_ch_any) | Range(unicode_continuation_ch_range))
def underscore_digits(d):
return Rep1(d) + Rep(Str("_") + Rep1(d))
......@@ -37,7 +46,8 @@ def make_lexicon():
exponent = Any("Ee") + Opt(Any("+-")) + decimal
decimal_fract = (decimal + dot + Opt(decimal)) | (dot + decimal)
name = letter + Rep(letter | digit)
#name = letter + Rep(letter | digit)
name = unicode_start_character + Rep(unicode_continuation_character)
intconst = (prefixed_digits(nonzero_digit, digit) | # decimal literals with underscores must not start with '0'
(Str("0") + (prefixed_digits(Any("Xx"), hexdigit) |
prefixed_digits(Any("Oo"), octdigit) |
......@@ -143,3 +153,46 @@ def make_lexicon():
#debug_file = scanner_dump_file
)
# BEGIN GENERATED CODE
# generated with:
# cpython 3.7.3 (default, Apr 09 2019, 05:18:21) [GCC]
unicode_start_ch_any = (
u"_ªµºˬˮͿΆΌՙەۿܐޱߺࠚࠤࠨऽॐলঽৎৼਫ਼ઽૐૹଽୱஃஜௐఽಀಽೞഽൎලาຄຊຍລວາຽໆༀဿၡႎჇჍቘዀៗៜᢪ"
u"ᪧὙὛὝιⁱⁿℂℇℕℤΩℨⅎⴧⴭⵯꣻꧏꩺꪱꫀꫂיִמּﹱﹳﹷﹹﹻﹽ𐠈𐠼𐨀𐼧𑅄𑅶𑇚𑇜𑊈𑌽𑍐𑓇𑙄𑣿𑨀𑨺𑩐𑪝𑱀𑵆𑶘𖽐𝒢𝒻𝕆𞸤𞸧𞸹𞸻"
u"𞹂𞹇𞹉𞹋𞹔𞹗𞹙𞹛𞹝𞹟𞹤𞹾")
unicode_start_ch_range = (
u"AZazÀÖØöøˁˆˑˠˤͰʹͶͷͻͽΈΊΎΡΣϵϷҁҊԯԱՖՠֈאתׯײؠيٮٯٱۓۥۦۮۯۺۼܒܯݍޥߊߪߴߵࠀࠕ"
u"ࡀࡘࡠࡪࢠࢴࢶࢽऄहक़ॡॱঀঅঌএঐওনপরশহড়ঢ়য়ৡৰৱਅਊਏਐਓਨਪਰਲਲ਼ਵਸ਼ਸਹਖ਼ੜੲੴઅઍએઑઓનપરલળવહ"
u"ૠૡଅଌଏଐଓନପରଲଳଵହଡ଼ଢ଼ୟୡஅஊஎஐஒகஙசஞடணதநபமஹఅఌఎఐఒనపహౘౚౠౡಅಌಎಐಒನಪಳವಹೠೡೱೲ"
u"അഌഎഐഒഺൔൖൟൡൺൿඅඖකනඳරවෆกะเๆກຂງຈດທນຟມຣສຫອະເໄໜໟཀཇཉཬྈྌကဪၐၕၚၝၥၦၮၰၵႁ"
u"ႠჅაჺჼቈቊቍቐቖቚቝበኈኊኍነኰኲኵኸኾዂዅወዖዘጐጒጕጘፚᎀᎏᎠᏵᏸᏽᐁᙬᙯᙿᚁᚚᚠᛪᛮᛸᜀᜌᜎᜑᜠᜱᝀᝑᝠᝬᝮᝰ"
u"កឳᠠᡸᢀᢨᢰᣵᤀᤞᥐᥭᥰᥴᦀᦫᦰᧉᨀᨖᨠᩔᬅᬳᭅᭋᮃᮠᮮᮯᮺᯥᰀᰣᱍᱏᱚᱽᲀᲈᲐᲺᲽᲿᳩᳬᳮᳱᳵᳶᴀᶿḀἕἘἝἠὅὈὍ"
u"ὐὗὟώᾀᾴᾶᾼῂῄῆῌῐΐῖΊῠῬῲῴῶῼₐₜℊℓ℘ℝKℹℼℿⅅⅉⅠↈⰀⰮⰰⱞⱠⳤⳫⳮⳲⳳⴀⴥⴰⵧⶀⶖⶠⶦⶨⶮⶰⶶⶸⶾ"
u"ⷀⷆⷈⷎⷐⷖⷘⷞ々〇〡〩〱〵〸〼ぁゖゝゟァヺーヿㄅㄯㄱㆎㆠㆺㇰㇿ㐀䶵一鿯ꀀꒌꓐꓽꔀꘌꘐꘟꘪꘫꙀꙮꙿꚝꚠꛯꜗꜟꜢꞈꞋꞹꟷꠁ"
u"ꠃꠅꠇꠊꠌꠢꡀꡳꢂꢳꣲꣷꣽꣾꤊꤥꤰꥆꥠꥼꦄꦲꧠꧤꧦꧯꧺꧾꨀꨨꩀꩂꩄꩋꩠꩶꩾꪯꪵꪶꪹꪽꫛꫝꫠꫪꫲꫴꬁꬆꬉꬎꬑꬖꬠꬦꬨꬮꬰꭚ"
u"ꭜꭥꭰꯢ가힣ힰퟆퟋퟻ豈舘並龎ffstﬓﬗײַﬨשׁזּטּלּנּסּףּפּצּﮱﯓﱝﱤﴽﵐﶏﶒﷇﷰﷹﹿﻼAZazヲンᅠ하ᅦᅧᅬᅭᅲᅳᅵ𐀀𐀋"
u"𐀍𐀦𐀨𐀺𐀼𐀽𐀿𐁍𐁐𐁝𐂀𐃺𐅀𐅴𐊀𐊜𐊠𐋐𐌀𐌟𐌭𐍊𐍐𐍵𐎀𐎝𐎠𐏃𐏈𐏏𐏑𐏕𐐀𐒝𐒰𐓓𐓘𐓻𐔀𐔧𐔰𐕣𐘀𐜶𐝀𐝕𐝠𐝧𐠀𐠅𐠊𐠵𐠷𐠸𐠿𐡕𐡠𐡶𐢀𐢞"
u"𐣠𐣲𐣴𐣵𐤀𐤕𐤠𐤹𐦀𐦷𐦾𐦿𐨐𐨓𐨕𐨗𐨙𐨵𐩠𐩼𐪀𐪜𐫀𐫇𐫉𐫤𐬀𐬵𐭀𐭕𐭠𐭲𐮀𐮑𐰀𐱈𐲀𐲲𐳀𐳲𐴀𐴣𐼀𐼜𐼰𐽅𑀃𑀷𑂃𑂯𑃐𑃨𑄃𑄦𑅐𑅲𑆃𑆲𑇁𑇄"
u"𑈀𑈑𑈓𑈫𑊀𑊆𑊊𑊍𑊏𑊝𑊟𑊨𑊰𑋞𑌅𑌌𑌏𑌐𑌓𑌨𑌪𑌰𑌲𑌳𑌵𑌹𑍝𑍡𑐀𑐴𑑇𑑊𑒀𑒯𑓄𑓅𑖀𑖮𑗘𑗛𑘀𑘯𑚀𑚪𑜀𑜚𑠀𑠫𑢠𑣟𑨋𑨲𑩜𑪃𑪆𑪉𑫀𑫸𑰀𑰈"
u"𑰊𑰮𑱲𑲏𑴀𑴆𑴈𑴉𑴋𑴰𑵠𑵥𑵧𑵨𑵪𑶉𑻠𑻲𒀀𒎙𒐀𒑮𒒀𒕃𓀀𓐮𔐀𔙆𖠀𖨸𖩀𖩞𖫐𖫭𖬀𖬯𖭀𖭃𖭣𖭷𖭽𖮏𖹀𖹿𖼀𖽄𖾓𖾟𖿠𖿡𗀀𘟱𘠀𘫲𛀀𛄞𛅰𛋻𛰀𛱪"
u"𛱰𛱼𛲀𛲈𛲐𛲙𝐀𝑔𝑖𝒜𝒞𝒟𝒥𝒦𝒩𝒬𝒮𝒹𝒽𝓃𝓅𝔅𝔇𝔊𝔍𝔔𝔖𝔜𝔞𝔹𝔻𝔾𝕀𝕄𝕊𝕐𝕒𝚥𝚨𝛀𝛂𝛚𝛜𝛺𝛼𝜔𝜖𝜴𝜶𝝎𝝐𝝮𝝰𝞈𝞊𝞨𝞪𝟂𝟄𝟋"
u"𞠀𞣄𞤀𞥃𞸀𞸃𞸅𞸟𞸡𞸢𞸩𞸲𞸴𞸷𞹍𞹏𞹑𞹒𞹡𞹢𞹧𞹪𞹬𞹲𞹴𞹷𞹹𞹼𞺀𞺉𞺋𞺛𞺡𞺣𞺥𞺩𞺫𞺻𠀀𪛖𪜀𫜴𫝀𫠝𫠠𬺡𬺰𮯠")
unicode_continuation_ch_any = (
u"··়ׇֿٰܑ߽ৗ਼৾ੑੵ઼଼ஂௗ಼ൗ්ූัັ༹༵༷࿆᳭ᢩ៝⁔⵿⃡꙯ꠂ꠆ꠋꧥꩃﬞꪰ꫁_𑅳𐨿𐇽𐋠𑈾𑍗𑩇𑑞𑴺𑵇𝩵𝪄")
unicode_continuation_ch_range = (
u"09ֽׁׂًؚ֑ׅ̀ͯ҃҇ׄؐ٩۪ۭۖۜ۟ۤۧۨ۰۹ܰ݊ަް߀߉࡙࡛࣓ࣣ߫߳ࠖ࠙ࠛࠣࠥࠧࠩ࠭࣡ःऺ़ाॏ॑ॗॢॣ०९ঁঃ"
u"াৄেৈো্ৢৣ০৯ਁਃਾੂੇੈੋ੍੦ੱઁઃાૅેૉો્ૢૣ૦૯ૺ૿ଁଃାୄେୈୋ୍ୖୗୢୣ୦୯ாூெைொ்௦௯ఀఄాౄ"
u"ెైొ్ౕౖౢౣ౦౯ಁಃಾೄೆೈೊ್ೕೖೢೣ೦೯ഀഃ഻഼ാൄെൈൊ്ൢൣ൦൯ංඃාුෘෟ෦෯ෲෳำฺ็๎๐๙ຳູົຼ່ໍ"
u"໐໙༘༙༠༩༾༿྄ཱ྆྇ྍྗྙྼါှ၀၉ၖၙၞၠၢၤၧၭၱၴႂႍႏႝ፝፟፩፱ᜒ᜔ᜲ᜴ᝒᝓᝲᝳ឴៓០៩᠋᠍᠐᠙ᤠᤫᤰ᤻᥆᥏"
u"᧐᧚ᨗᨛᩕᩞ᩠᩿᩼᪉᪐᪙᪽᪰ᬀᬄ᬴᭄᭐᭙᭫᭳ᮀᮂᮡᮭ᮰᮹᯦᯳ᰤ᰷᱀᱉᱐᱙᳔᳨᳐᳒ᳲ᳴᳷᷹᷿᳹᷀᷻‿⁀⃥〪〯⃐⃜⃰⳯⳱ⷠⷿ"
u"゙゚꘠꘩ꙴ꙽ꚞꚟ꛰꛱ꠣꠧꢀꢁꢴꣅ꣐꣙꣠꣱ꣿ꤉ꤦ꤭ꥇ꥓ꦀꦃ꦳꧀꧐꧙꧰꧹ꨩꨶꩌꩍ꩐꩙ꩻꩽꪴꪲꪷꪸꪾ꪿ꫫꫯꫵ꫶ꯣꯪ꯬꯭꯰꯹︀️"
u"︠︯︳︴﹍﹏09゙゚𐍶𐍺𐒠𐒩𐨁𐨃𐨅𐨆𐨌𐨺𐫦𐨏𐨸𐫥𐴤𐴧𐴰𐴹𐽆𐽐𑀀𑀂𑀸𑁆𑁦𑁯𑁿𑂂𑂰𑂺𑃰𑃹𑄀𑄂𑄧𑄴𑄶𑄿𑅅𑅆𑆀𑆂𑆳𑇀𑇉𑇌𑇐𑇙𑈬𑈷"
u"𑋟𑋪𑋰𑋹𑌀𑌃𑌻𑌼𑌾𑍄𑍇𑍈𑍋𑍍𑍢𑍣𑍦𑍬𑍰𑍴𑐵𑑆𑑐𑑙𑒰𑓃𑓐𑓙𑖯𑖵𑖸𑗀𑗜𑗝𑘰𑙀𑙐𑙙𑚫𑚷𑛀𑛉𑜝𑜫𑜰𑜹𑠬𑠺𑣠𑣩𑨁𑨊𑨳𑨹𑨻𑨾𑩑𑩛𑪊𑪙"
u"𑰯𑰶𑰸𑰿𑱐𑱙𑲒𑲧𑲩𑲶𑴱𑴶𑴼𑴽𑴿𑵅𑵐𑵙𑶊𑶎𑶐𑶑𑶓𑶗𑶠𑶩𑻳𑻶𖩠𖩩𖫰𖫴𖬰𖬶𖭐𖭙𖽑𖽾𖾏𖾒𛲝𛲞𝅩𝅥𝅲𝅻𝆂𝆋𝅭𝆅𝆪𝆭𝉂𝉄𝟎𝟿𝨀𝨶𝨻𝩬"
u"𝪛𝪟𝪡𝪯𞥊𞣐𞣖𞀀𞀆𞀈𞀘𞀛𞀡𞀣𞀤𞀦𞀪𞥄𞥐𞥙")
# END GENERATED CODE
......@@ -292,14 +292,14 @@ class ModuleNode(Nodes.Node, Nodes.BlockNode):
cname = env.mangle(Naming.func_prefix_api, entry.name)
sig = entry.type.signature_string()
h_code.putln(
'if (__Pyx_ImportFunction(module, "%s", (void (**)(void))&%s, "%s") < 0) goto bad;'
% (entry.name, cname, sig))
'if (__Pyx_ImportFunction(module, %s, (void (**)(void))&%s, "%s") < 0) goto bad;'
% (entry.name.as_c_string_literal(), cname, sig))
for entry in api_vars:
cname = env.mangle(Naming.varptr_prefix_api, entry.name)
sig = entry.type.empty_declaration_code()
h_code.putln(
'if (__Pyx_ImportVoidPtr(module, "%s", (void **)&%s, "%s") < 0) goto bad;'
% (entry.name, cname, sig))
'if (__Pyx_ImportVoidPtr(module, %s, (void **)&%s, "%s") < 0) goto bad;'
% (entry.name.as_c_string_literal(), cname, sig))
with ModuleImportGenerator(h_code, imported_modules={env.qualified_name: 'module'}) as import_generator:
for entry in api_extension_types:
self.generate_type_import_call(entry.type, h_code, import_generator, error_code="goto bad;")
......@@ -2155,9 +2155,11 @@ class ModuleNode(Nodes.Node, Nodes.BlockNode):
code.putln(header % type.typeobj_cname)
code.putln(
"PyVarObject_HEAD_INIT(0, 0)")
classname = scope.class_name.as_c_string_literal()
code.putln(
'"%s.%s", /*tp_name*/' % (
self.full_module_name, scope.class_name))
'"%s."%s, /*tp_name*/' % (
self.full_module_name,
classname))
if type.typedef_flag:
objstruct = type.objstruct_cname
else:
......@@ -2224,8 +2226,8 @@ class ModuleNode(Nodes.Node, Nodes.BlockNode):
else:
doc_code = "0"
code.putln(
'{(char *)"%s", %s, %s, (char *)%s, 0},' % (
entry.name,
'{(char *)%s, %s, %s, (char *)%s, 0},' % (
entry.name.as_c_string_literal(),
entry.getter_cname or "0",
entry.setter_cname or "0",
doc_code))
......@@ -2301,7 +2303,7 @@ class ModuleNode(Nodes.Node, Nodes.BlockNode):
if code.label_used(code.error_label):
code.put_label(code.error_label)
# This helps locate the offending name.
code.put_add_traceback(self.full_module_name)
code.put_add_traceback(EncodedString(self.full_module_name))
code.error_label = old_error_label
code.putln("bad:")
code.putln("return -1;")
......@@ -2318,6 +2320,7 @@ class ModuleNode(Nodes.Node, Nodes.BlockNode):
code.putln(UtilityCode.load_as_string("PyModInitFuncType", "ModuleSetupCode.c")[0])
header2 = "__Pyx_PyMODINIT_FUNC init%s(void)" % env.module_name
header3 = "__Pyx_PyMODINIT_FUNC %s(void)" % self.mod_init_func_cname('PyInit', env)
header3 = EncodedString(header3)
code.putln("#if PY_MAJOR_VERSION < 3")
# Optimise for small code size as the module init function is only executed once.
code.putln("%s CYTHON_SMALL_CODE; /*proto*/" % header2)
......@@ -2513,7 +2516,7 @@ class ModuleNode(Nodes.Node, Nodes.BlockNode):
code.put_xdecref(cname, type)
code.putln('if (%s) {' % env.module_cname)
code.putln('if (%s) {' % env.module_dict_cname)
code.put_add_traceback("init %s" % env.qualified_name)
code.put_add_traceback(EncodedString("init %s" % env.qualified_name))
code.globalstate.use_utility_code(Nodes.traceback_utility_code)
# Module reference and module dict are in global variables which might still be needed
# for cleanup, atexit code, etc., so leaking is better than crashing.
......@@ -2573,7 +2576,7 @@ class ModuleNode(Nodes.Node, Nodes.BlockNode):
code.putln("static int %s(void) {" % self.cfunc_name)
code.put_declare_refcount_context()
self.tempdecl_code = code.insertion_point()
code.put_setup_refcount_context(self.cfunc_name)
code.put_setup_refcount_context(EncodedString(self.cfunc_name))
# Leave a grepable marker that makes it easy to find the generator source.
code.putln("/*--- %s ---*/" % self.description)
return code
......@@ -2925,8 +2928,8 @@ class ModuleNode(Nodes.Node, Nodes.BlockNode):
# investigation shows that the resulting binary is smaller with repeated functions calls.
for entry in entries:
signature = entry.type.signature_string()
code.putln('if (__Pyx_ExportFunction("%s", (void (*)(void))%s, "%s") < 0) %s' % (
entry.name,
code.putln('if (__Pyx_ExportFunction(%s, (void (*)(void))%s, "%s") < 0) %s' % (
entry.name.as_c_string_literal(),
entry.cname,
signature,
code.error_goto(self.pos)))
......@@ -2998,9 +3001,9 @@ class ModuleNode(Nodes.Node, Nodes.BlockNode):
code.error_goto(self.pos)))
for entry in entries:
code.putln(
'if (__Pyx_ImportFunction(%s, "%s", (void (**)(void))&%s, "%s") < 0) %s' % (
'if (__Pyx_ImportFunction(%s, %s, (void (**)(void))&%s, "%s") < 0) %s' % (
temp,
entry.name,
entry.name.as_c_string_literal(),
entry.cname,
entry.type.signature_string(),
code.error_goto(self.pos)))
......@@ -3079,15 +3082,17 @@ class ModuleNode(Nodes.Node, Nodes.BlockNode):
module,
module_name))
type_name = type.name.as_c_string_literal()
if condition and replacement:
code.putln("") # start in new line
code.putln("#if %s" % condition)
code.putln('"%s",' % replacement)
code.putln("#else")
code.putln('"%s",' % type.name)
code.putln('%s,' % type_name)
code.putln("#endif")
else:
code.put(' "%s", ' % type.name)
code.put(' %s, ' % type_name)
if sizeof_objstruct != objstruct:
if not condition:
......
......@@ -13,6 +13,8 @@ codewriter_temp_prefix = pyrex_prefix + "t_"
temp_prefix = u"__cyt_"
pyunicode_identifier_prefix = pyrex_prefix + 'U'
builtin_prefix = pyrex_prefix + "builtin_"
arg_prefix = pyrex_prefix + "arg_"
funcdoc_prefix = pyrex_prefix + "doc_"
......@@ -45,6 +47,13 @@ pybufferstruct_prefix = pyrex_prefix + "pybuffer_"
vtable_prefix = pyrex_prefix + "vtable_"
vtabptr_prefix = pyrex_prefix + "vtabptr_"
vtabstruct_prefix = pyrex_prefix + "vtabstruct_"
unicode_vtabentry_prefix = pyrex_prefix + "Uvtabentry_"
# vtab entries aren't normally manged,
# but punycode names sometimes start with numbers leading
# to a C syntax error
unicode_structmember_prefix = pyrex_prefix + "Umember_"
# as above -
# not normally manged but punycode names cause specific problems
opt_arg_prefix = pyrex_prefix + "opt_args_"
convert_func_prefix = pyrex_prefix + "convert_"
closure_scope_prefix = pyrex_prefix + "scope_"
......
......@@ -22,7 +22,8 @@ from . import PyrexTypes
from . import TypeSlots
from .PyrexTypes import py_object_type, error_type
from .Symtab import (ModuleScope, LocalScope, ClosureScope,
StructOrUnionScope, PyClassScope, CppClassScope, TemplateScope)
StructOrUnionScope, PyClassScope, CppClassScope, TemplateScope,
punycodify_name)
from .Code import UtilityCode
from .StringEncoding import EncodedString
from . import Future
......@@ -862,6 +863,9 @@ class CArgDeclNode(Node):
# kw_only boolean Is a keyword-only argument
# is_dynamic boolean Non-literal arg stored inside CyFunction
# pos_only boolean Is a positional-only argument
#
# name_cstring property that converts the name to a cstring taking care of unicode
# and quoting it
child_attrs = ["base_type", "declarator", "default", "annotation"]
outer_attrs = ["default", "annotation"]
......@@ -879,6 +883,10 @@ class CArgDeclNode(Node):
annotation = None
is_dynamic = 0
@property
def name_cstring(self):
return self.name.as_c_string_literal()
def analyse(self, env, nonempty=0, is_self_arg=False):
if is_self_arg:
self.base_type.is_self_arg = self.is_self_arg = True
......@@ -2206,11 +2214,11 @@ class FuncDefNode(StatNode, BlockNode):
typeptr_cname = arg.type.typeptr_cname
arg_code = "((PyObject *)%s)" % arg.entry.cname
code.putln(
'if (unlikely(!__Pyx_ArgTypeTest(%s, %s, %d, "%s", %s))) %s' % (
'if (unlikely(!__Pyx_ArgTypeTest(%s, %s, %d, %s, %s))) %s' % (
arg_code,
typeptr_cname,
arg.accept_none,
arg.name,
arg.name_cstring,
arg.type.is_builtin_type and arg.type.require_exact,
code.error_goto(arg.pos)))
else:
......@@ -2224,8 +2232,8 @@ class FuncDefNode(StatNode, BlockNode):
cname = arg.entry.cname
code.putln('if (unlikely(((PyObject *)%s) == Py_None)) {' % cname)
code.putln('''PyErr_Format(PyExc_TypeError, "Argument '%%.%ds' must not be None", "%s"); %s''' % (
max(200, len(arg.name)), arg.name,
code.putln('''PyErr_Format(PyExc_TypeError, "Argument '%%.%ds' must not be None", %s); %s''' % (
max(200, len(arg.name_cstring)), arg.name_cstring,
code.error_goto(arg.pos)))
code.putln('}')
......@@ -3079,9 +3087,9 @@ class DefNode(FuncDefNode):
else:
arg.needs_conversion = 1
if arg.needs_conversion:
arg.hdr_cname = Naming.arg_prefix + arg.name
arg.hdr_cname = punycodify_name(Naming.arg_prefix + arg.name)
else:
arg.hdr_cname = Naming.var_prefix + arg.name
arg.hdr_cname = punycodify_name(Naming.var_prefix + arg.name)
if nfixed > len(self.args):
self.bad_signature()
......@@ -3118,16 +3126,16 @@ class DefNode(FuncDefNode):
entry = env.declare_pyfunction(name, self.pos, allow_redefine=not self.is_wrapper)
self.entry = entry
prefix = env.next_id(env.scope_prefix)
self.entry.pyfunc_cname = Naming.pyfunc_prefix + prefix + name
self.entry.pyfunc_cname = punycodify_name(Naming.pyfunc_prefix + prefix + name)
if Options.docstrings:
entry.doc = embed_position(self.pos, self.doc)
entry.doc_cname = Naming.funcdoc_prefix + prefix + name
entry.doc_cname = punycodify_name(Naming.funcdoc_prefix + prefix + name)
if entry.is_special:
if entry.name in TypeSlots.invisible or not entry.doc or (
entry.name in '__getattr__' and env.directives['fast_getattr']):
entry.wrapperbase_cname = None
else:
entry.wrapperbase_cname = Naming.wrapperbase_prefix + prefix + name
entry.wrapperbase_cname = punycodify_name(Naming.wrapperbase_prefix + prefix + name)
else:
entry.doc = None
......@@ -3304,8 +3312,8 @@ class DefNodeWrapper(FuncDefNode):
target_entry = self.target.entry
name = self.name
prefix = env.next_id(env.scope_prefix)
target_entry.func_cname = Naming.pywrap_prefix + prefix + name
target_entry.pymethdef_cname = Naming.pymethdef_prefix + prefix + name
target_entry.func_cname = punycodify_name(Naming.pywrap_prefix + prefix + name)
target_entry.pymethdef_cname = punycodify_name(Naming.pymethdef_prefix + prefix + name)
self.signature = target_entry.signature
......@@ -3393,7 +3401,7 @@ class DefNodeWrapper(FuncDefNode):
self.return_type.declaration_code(Naming.retval_cname),
retval_init))
code.put_declare_refcount_context()
code.put_setup_refcount_context('%s (wrapper)' % self.name)
code.put_setup_refcount_context(EncodedString('%s (wrapper)' % self.name))
self.generate_argument_parsing_code(lenv, code)
self.generate_argument_type_tests(code)
......@@ -3660,6 +3668,8 @@ class DefNodeWrapper(FuncDefNode):
self.star_arg.entry.xdecref_cleanup = 0
def generate_tuple_and_keyword_parsing_code(self, args, success_label, code):
self_name_csafe = self.name.as_c_string_literal()
argtuple_error_label = code.new_label("argtuple_error")
positional_args = []
......@@ -3741,13 +3751,13 @@ class DefNodeWrapper(FuncDefNode):
# the kw-args dict passed is non-empty (which it will be, since kw_unpacking_condition is true)
code.globalstate.use_utility_code(
UtilityCode.load_cached("ParseKeywords", "FunctionArguments.c"))
code.putln('if (likely(__Pyx_ParseOptionalKeywords(%s, %s, %s, %s, %s, "%s") < 0)) %s' % (
code.putln('if (likely(__Pyx_ParseOptionalKeywords(%s, %s, %s, %s, %s, %s) < 0)) %s' % (
Naming.kwds_cname,
Naming.pykwdlist_cname,
self.starstar_arg and self.starstar_arg.entry.cname or '0',
'values',
0,
self.name,
self_name_csafe,
code.error_goto(self.pos)))
# --- optimised code when we do not receive any keyword arguments
......@@ -3831,8 +3841,8 @@ class DefNodeWrapper(FuncDefNode):
code.put_label(argtuple_error_label)
code.globalstate.use_utility_code(
UtilityCode.load_cached("RaiseArgTupleInvalid", "FunctionArguments.c"))
code.put('__Pyx_RaiseArgtupleInvalid("%s", %d, %d, %d, %s); ' % (
self.name, has_fixed_positional_count,
code.put('__Pyx_RaiseArgtupleInvalid(%s, %d, %d, %d, %s); ' % (
self_name_csafe, has_fixed_positional_count,
min_positional_args, max_positional_args,
Naming.nargs_cname))
code.putln(code.error_goto(self.pos))
......@@ -3962,6 +3972,8 @@ class DefNodeWrapper(FuncDefNode):
# If we received kwargs, fill up the positional/required
# arguments with values from the kw dict
self_name_csafe = self.name.as_c_string_literal()
code.putln('kw_args = PyDict_Size(%s);' % Naming.kwds_cname)
if self.num_required_args or max_positional_args > 0:
last_required_arg = -1
......@@ -4006,8 +4018,8 @@ class DefNodeWrapper(FuncDefNode):
code.putln('else {')
code.globalstate.use_utility_code(
UtilityCode.load_cached("RaiseArgTupleInvalid", "FunctionArguments.c"))
code.put('__Pyx_RaiseArgtupleInvalid("%s", %d, %d, %d, %d); ' % (
self.name, has_fixed_positional_count,
code.put('__Pyx_RaiseArgtupleInvalid(%s, %d, %d, %d, %d); ' % (
self_name_csafe, has_fixed_positional_count,
min_positional_args, max_positional_args, i))
code.putln(code.error_goto(self.pos))
code.putln('}')
......@@ -4015,8 +4027,8 @@ class DefNodeWrapper(FuncDefNode):
code.putln('else {')
code.globalstate.use_utility_code(
UtilityCode.load_cached("RaiseKeywordRequired", "FunctionArguments.c"))
code.put('__Pyx_RaiseKeywordRequired("%s", %s); ' % (
self.name, pystring_cname))
code.put('__Pyx_RaiseKeywordRequired(%s, %s); ' % (
self_name_csafe, pystring_cname))
code.putln(code.error_goto(self.pos))
code.putln('}')
if max_positional_args > num_pos_only_args:
......@@ -4071,13 +4083,13 @@ class DefNodeWrapper(FuncDefNode):
values_array = 'values'
code.globalstate.use_utility_code(
UtilityCode.load_cached("ParseKeywords", "FunctionArguments.c"))
code.putln('if (unlikely(__Pyx_ParseOptionalKeywords(%s, %s, %s, %s, %s, "%s") < 0)) %s' % (
code.putln('if (unlikely(__Pyx_ParseOptionalKeywords(%s, %s, %s, %s, %s, %s) < 0)) %s' % (
Naming.kwds_cname,
Naming.pykwdlist_cname,
self.starstar_arg and self.starstar_arg.entry.cname or '0',
values_array,
pos_arg_count,
self.name,
self_name_csafe,
code.error_goto(self.pos)))
code.putln('}')
......@@ -4790,6 +4802,10 @@ class CClassDefNode(ClassDefNode):
decorators = None
shadow = False
@property
def punycode_class_name(self):
return punycodify_name(self.class_name)
def buffer_defaults(self, env):
if not hasattr(self, '_buffer_defaults'):
from . import Buffer
......
......@@ -1325,9 +1325,9 @@ class DecoratorTransform(ScopeTrackingTransform, SkipDeclarations):
_properties = None
_map_property_attribute = {
'getter': '__get__',
'setter': '__set__',
'deleter': '__del__',
'getter': EncodedString('__get__'),
'setter': EncodedString('__set__'),
'deleter': EncodedString('__del__'),
}.get
def visit_CClassDefNode(self, node):
......@@ -1701,7 +1701,7 @@ if VALUE is not None:
e.type.create_from_py_utility_code(env)
all_members_names = sorted([e.name for e in all_members])
checksum = '0x%s' % hashlib.sha1(' '.join(all_members_names).encode('utf-8')).hexdigest()[:7]
unpickle_func_name = '__pyx_unpickle_%s' % node.class_name
unpickle_func_name = '__pyx_unpickle_%s' % node.punycode_class_name
# TODO(robertwb): Move the state into the third argument
# so it can be pickled *after* self is memoized.
......@@ -2744,6 +2744,7 @@ class CreateClosureClasses(CythonTransform):
as_name = '%s_%s' % (
target_module_scope.next_id(Naming.closure_class_prefix),
node.entry.cname.replace('.','__'))
as_name = EncodedString(as_name)
entry = target_module_scope.declare_c_class(
name=as_name, pos=node.pos, defining=True,
......
......@@ -138,6 +138,14 @@ class EncodedString(_unicode):
def as_utf8_string(self):
return bytes_literal(self.utf8encode(), 'utf8')
def as_c_string_literal(self):
# first encodes the string then produces a c string literal
if self.encoding is None:
s = self.as_utf8_string()
else:
s = bytes_literal(self.byteencode(), self.encoding)
return s.as_c_string_literal()
def string_contains_surrogates(ustring):
"""
......
......@@ -42,6 +42,28 @@ def c_safe_identifier(cname):
cname = Naming.pyrex_prefix + cname
return cname
def punycodify_name(cname, mangle_with=None):
# if passed the mangle_with should be a byte string
# modified from PEP489
try:
cname.encode('ascii')
except UnicodeEncodeError:
cname = cname.encode('punycode').replace(b'-', b'_').decode('ascii')
if mangle_with:
# sometimes it necessary to mangle unicode names alone where
# they'll be inserted directly into C, because the punycode
# transformation can turn them into invalid identifiers
cname = "%s_%s" % (mangle_with, cname)
elif cname.startswith(Naming.pyrex_prefix):
# a punycode name could also be a valid ascii variable name so
# change the prefix to distinguish
cname = cname.replace(Naming.pyrex_prefix,
Naming.pyunicode_identifier_prefix, 1)
return cname
class BufferAux(object):
writable_needed = False
......@@ -391,7 +413,7 @@ class Scope(object):
def mangle(self, prefix, name = None):
if name:
return "%s%s%s" % (prefix, self.scope_prefix, name)
return punycodify_name("%s%s%s" % (prefix, self.scope_prefix, name))
else:
return self.parent_scope.mangle(prefix, self.name)
......@@ -446,6 +468,7 @@ class Scope(object):
if not self.in_cinclude and cname and re.match("^_[_A-Z]+$", cname):
# See https://www.gnu.org/software/libc/manual/html_node/Reserved-Names.html#Reserved-Names
warning(pos, "'%s' is a reserved name in C." % cname, -1)
entries = self.entries
if name and name in entries and not shadow:
old_entry = entries[name]
......@@ -737,7 +760,7 @@ class Scope(object):
qualified_name = self.qualify_name(lambda_name)
entry = self.declare(None, func_cname, py_object_type, pos, 'private')
entry.name = lambda_name
entry.name = EncodedString(lambda_name)
entry.qualified_name = qualified_name
entry.pymethdef_cname = pymethdef_cname
entry.func_cname = func_cname
......@@ -1740,7 +1763,7 @@ class LocalScope(Scope):
Scope.__init__(self, name, outer_scope, parent_scope)
def mangle(self, prefix, name):
return prefix + name
return punycodify_name(prefix + name)
def declare_arg(self, name, type, pos):
# Add an entry for an argument of a function.
......@@ -2146,6 +2169,7 @@ class CClassScope(ClassScope):
cname = name
if visibility == 'private':
cname = c_safe_identifier(cname)
cname = punycodify_name(cname, Naming.unicode_structmember_prefix)
if type.is_cpp_class and visibility != 'extern':
type.check_nullary_constructor(pos)
self.use_utility_code(Code.UtilityCode("#include <new>"))
......@@ -2189,6 +2213,7 @@ class CClassScope(ClassScope):
# I keep it in for now. is_member should be enough
# later on
self.namespace_cname = "(PyObject *)%s" % self.parent_type.typeptr_cname
return entry
def declare_pyfunction(self, name, pos, allow_redefine=False):
......@@ -2247,7 +2272,7 @@ class CClassScope(ClassScope):
(args[0].type, name, self.parent_type))
entry = self.lookup_here(name)
if cname is None:
cname = c_safe_identifier(name)
cname = punycodify_name(c_safe_identifier(name), Naming.unicode_vtabentry_prefix)
if entry:
if not entry.is_cfunction:
warning(pos, "'%s' redeclared " % name, 0)
......@@ -2428,7 +2453,7 @@ class CppClassScope(Scope):
class_name = self.name.split('::')[-1]
if name in (class_name, '__init__') and cname is None:
cname = "%s__init__%s" % (Naming.func_prefix, class_name)
name = '<init>'
name = EncodedString('<init>')
type.return_type = PyrexTypes.CVoidType()
# This is called by the actual constructor, but need to support
# arguments that cannot by called by value.
......@@ -2442,7 +2467,7 @@ class CppClassScope(Scope):
type.args = [maybe_ref(arg) for arg in type.args]
elif name == '__dealloc__' and cname is None:
cname = "%s__dealloc__%s" % (Naming.func_prefix, class_name)
name = '<del>'
name = EncodedString('<del>')
type.return_type = PyrexTypes.CVoidType()
if name in ('<init>', '<del>') and type.nogil:
for base in self.type.base_classes:
......
......@@ -569,6 +569,17 @@ class MethodDispatcherTransform(EnvTransform):
### dispatch to specific handlers
def _find_handler(self, match_name, has_kwargs):
try:
match_name.encode('ascii')
except UnicodeEncodeError:
# specifically when running the Cython compiler under Python 2
# getattr can't take a unicode string.
# Classes with unicode names won't have specific handlers and thus it
# should be OK to return None.
# Doing the test here ensures that the same code gets run on
# Python 2 and 3
return None
call_type = has_kwargs and 'general' or 'simple'
handler = getattr(self, '_handle_%s_%s' % (call_type, match_name), None)
if handler is None:
......
#!/usr/bin/env python3