Commit 270bf960 authored by da-woods's avatar da-woods Committed by Stefan Behnel

Unicode identifiers (PEP 3131) (GH-3081)

Closes #2601
parent 4c5cc91b
...@@ -1152,7 +1152,7 @@ class GlobalState(object): ...@@ -1152,7 +1152,7 @@ class GlobalState(object):
w.putln("") w.putln("")
w.putln("static CYTHON_SMALL_CODE int __Pyx_InitCachedConstants(void) {") w.putln("static CYTHON_SMALL_CODE int __Pyx_InitCachedConstants(void) {")
w.put_declare_refcount_context() w.put_declare_refcount_context()
w.put_setup_refcount_context("__Pyx_InitCachedConstants") w.put_setup_refcount_context(StringEncoding.EncodedString("__Pyx_InitCachedConstants"))
w = self.parts['init_globals'] w = self.parts['init_globals']
w.enter_cfunc_scope() w.enter_cfunc_scope()
...@@ -2205,9 +2205,10 @@ class CCodeWriter(object): ...@@ -2205,9 +2205,10 @@ class CCodeWriter(object):
cast = entry.signature.method_function_type() cast = entry.signature.method_function_type()
if cast != 'PyCFunction': if cast != 'PyCFunction':
func_ptr = '(void*)(%s)%s' % (cast, func_ptr) func_ptr = '(void*)(%s)%s' % (cast, func_ptr)
entry_name = entry.name.as_c_string_literal()
self.putln( self.putln(
'{"%s", (PyCFunction)%s, %s, %s}%s' % ( '{%s, (PyCFunction)%s, %s, %s}%s' % (
entry.name, entry_name,
func_ptr, func_ptr,
"|".join(method_flags), "|".join(method_flags),
entry.doc_cname if entry.doc else '0', entry.doc_cname if entry.doc else '0',
...@@ -2365,10 +2366,11 @@ class CCodeWriter(object): ...@@ -2365,10 +2366,11 @@ class CCodeWriter(object):
self.putln('__Pyx_RefNannyDeclarations') self.putln('__Pyx_RefNannyDeclarations')
def put_setup_refcount_context(self, name, acquire_gil=False): def put_setup_refcount_context(self, name, acquire_gil=False):
name = name.as_c_string_literal() # handle unicode names
if acquire_gil: if acquire_gil:
self.globalstate.use_utility_code( self.globalstate.use_utility_code(
UtilityCode.load_cached("ForceInitThreads", "ModuleSetupCode.c")) UtilityCode.load_cached("ForceInitThreads", "ModuleSetupCode.c"))
self.putln('__Pyx_RefNannySetupContext("%s", %d);' % (name, acquire_gil and 1 or 0)) self.putln('__Pyx_RefNannySetupContext(%s, %d);' % (name, acquire_gil and 1 or 0))
def put_finish_refcount_context(self): def put_finish_refcount_context(self):
self.putln("__Pyx_RefNannyFinishContext();") self.putln("__Pyx_RefNannyFinishContext();")
...@@ -2379,14 +2381,16 @@ class CCodeWriter(object): ...@@ -2379,14 +2381,16 @@ class CCodeWriter(object):
qualified_name should be the qualified name of the function. qualified_name should be the qualified name of the function.
""" """
qualified_name = qualified_name.as_c_string_literal() # handle unicode names
format_tuple = ( format_tuple = (
qualified_name, qualified_name,
Naming.clineno_cname if include_cline else 0, Naming.clineno_cname if include_cline else 0,
Naming.lineno_cname, Naming.lineno_cname,
Naming.filename_cname, Naming.filename_cname,
) )
self.funcstate.uses_error_indicator = True self.funcstate.uses_error_indicator = True
self.putln('__Pyx_AddTraceback("%s", %s, %s, %s);' % format_tuple) self.putln('__Pyx_AddTraceback(%s, %s, %s, %s);' % format_tuple)
def put_unraisable(self, qualified_name, nogil=False): def put_unraisable(self, qualified_name, nogil=False):
""" """
......
...@@ -167,7 +167,6 @@ def report_error(err, use_stack=True): ...@@ -167,7 +167,6 @@ def report_error(err, use_stack=True):
if Options.fast_fail: if Options.fast_fail:
raise AbortError("fatal errors") raise AbortError("fatal errors")
def error(position, message): def error(position, message):
#print("Errors.error:", repr(position), repr(message)) ### #print("Errors.error:", repr(position), repr(message)) ###
if position is None: if position is None:
...@@ -180,16 +179,22 @@ def error(position, message): ...@@ -180,16 +179,22 @@ def error(position, message):
LEVEL = 1 # warn about all errors level 1 or higher LEVEL = 1 # warn about all errors level 1 or higher
def _write_file_encode(file, line):
try:
file.write(line)
except UnicodeEncodeError:
file.write(line.encode('ascii', 'replace'))
def message(position, message, level=1): def message(position, message, level=1):
if level < LEVEL: if level < LEVEL:
return return
warn = CompileWarning(position, message) warn = CompileWarning(position, message)
line = "note: %s\n" % warn line = u"note: %s\n" % warn
if listing_file: if listing_file:
listing_file.write(line) _write_file_encode(listing_file, line)
if echo_file: if echo_file:
echo_file.write(line) _write_file_encode(echo_file, line)
return warn return warn
...@@ -199,11 +204,11 @@ def warning(position, message, level=0): ...@@ -199,11 +204,11 @@ def warning(position, message, level=0):
if Options.warning_errors and position: if Options.warning_errors and position:
return error(position, message) return error(position, message)
warn = CompileWarning(position, message) warn = CompileWarning(position, message)
line = "warning: %s\n" % warn line = u"warning: %s\n" % warn
if listing_file: if listing_file:
listing_file.write(line) _write_file_encode(listing_file, line)
if echo_file: if echo_file:
echo_file.write(line) _write_file_encode(echo_file, line)
return warn return warn
...@@ -212,11 +217,11 @@ def warn_once(position, message, level=0): ...@@ -212,11 +217,11 @@ def warn_once(position, message, level=0):
if level < LEVEL or message in _warn_once_seen: if level < LEVEL or message in _warn_once_seen:
return return
warn = CompileWarning(position, message) warn = CompileWarning(position, message)
line = "warning: %s\n" % warn line = u"warning: %s\n" % warn
if listing_file: if listing_file:
listing_file.write(line) _write_file_encode(listing_file, line)
if echo_file: if echo_file:
echo_file.write(line) _write_file_encode(echo_file, line)
_warn_once_seen[message] = True _warn_once_seen[message] = True
return warn return warn
......
# -*- coding: utf-8 -*-
# cython: language_level=3, py2_import=True # cython: language_level=3, py2_import=True
# #
# Cython Scanner - Lexical Definitions # Cython Scanner - Lexical Definitions
...@@ -16,7 +17,7 @@ IDENT = 'IDENT' ...@@ -16,7 +17,7 @@ IDENT = 'IDENT'
def make_lexicon(): def make_lexicon():
from ..Plex import \ from ..Plex import \
Str, Any, AnyBut, AnyChar, Rep, Rep1, Opt, Bol, Eol, Eof, \ Str, Any, AnyBut, AnyChar, Rep, Rep1, Opt, Bol, Eol, Eof, \
TEXT, IGNORE, Method, State, Lexicon TEXT, IGNORE, Method, State, Lexicon, Range
letter = Any("ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz_") letter = Any("ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz_")
nonzero_digit = Any("123456789") nonzero_digit = Any("123456789")
...@@ -26,6 +27,14 @@ def make_lexicon(): ...@@ -26,6 +27,14 @@ def make_lexicon():
hexdigit = Any("0123456789ABCDEFabcdef") hexdigit = Any("0123456789ABCDEFabcdef")
indentation = Bol + Rep(Any(" \t")) indentation = Bol + Rep(Any(" \t"))
# The list of valid unicode identifier characters are pretty slow to generate at runtime,
# and require Python3, so are just included directly here
# (via the generated code block at the bottom of the file)
unicode_start_character = (Any(unicode_start_ch_any) | Range(unicode_start_ch_range))
unicode_continuation_character = (
unicode_start_character |
Any(unicode_continuation_ch_any) | Range(unicode_continuation_ch_range))
def underscore_digits(d): def underscore_digits(d):
return Rep1(d) + Rep(Str("_") + Rep1(d)) return Rep1(d) + Rep(Str("_") + Rep1(d))
...@@ -37,7 +46,8 @@ def make_lexicon(): ...@@ -37,7 +46,8 @@ def make_lexicon():
exponent = Any("Ee") + Opt(Any("+-")) + decimal exponent = Any("Ee") + Opt(Any("+-")) + decimal
decimal_fract = (decimal + dot + Opt(decimal)) | (dot + decimal) decimal_fract = (decimal + dot + Opt(decimal)) | (dot + decimal)
name = letter + Rep(letter | digit) #name = letter + Rep(letter | digit)
name = unicode_start_character + Rep(unicode_continuation_character)
intconst = (prefixed_digits(nonzero_digit, digit) | # decimal literals with underscores must not start with '0' intconst = (prefixed_digits(nonzero_digit, digit) | # decimal literals with underscores must not start with '0'
(Str("0") + (prefixed_digits(Any("Xx"), hexdigit) | (Str("0") + (prefixed_digits(Any("Xx"), hexdigit) |
prefixed_digits(Any("Oo"), octdigit) | prefixed_digits(Any("Oo"), octdigit) |
...@@ -143,3 +153,46 @@ def make_lexicon(): ...@@ -143,3 +153,46 @@ def make_lexicon():
#debug_file = scanner_dump_file #debug_file = scanner_dump_file
) )
# BEGIN GENERATED CODE
# generated with:
# cpython 3.7.3 (default, Apr 09 2019, 05:18:21) [GCC]
unicode_start_ch_any = (
u"_ªµºˬˮͿΆΌՙەۿܐޱߺࠚࠤࠨऽॐলঽৎৼਫ਼ઽૐૹଽୱஃஜௐఽಀಽೞഽൎලาຄຊຍລວາຽໆༀဿၡႎჇჍቘዀៗៜᢪ"
u"ᪧὙὛὝιⁱⁿℂℇℕℤΩℨⅎⴧⴭⵯꣻꧏꩺꪱꫀꫂיִמּﹱﹳﹷﹹﹻﹽ𐠈𐠼𐨀𐼧𑅄𑅶𑇚𑇜𑊈𑌽𑍐𑓇𑙄𑣿𑨀𑨺𑩐𑪝𑱀𑵆𑶘𖽐𝒢𝒻𝕆𞸤𞸧𞸹𞸻"
u"𞹂𞹇𞹉𞹋𞹔𞹗𞹙𞹛𞹝𞹟𞹤𞹾")
unicode_start_ch_range = (
u"AZazÀÖØöøˁˆˑˠˤͰʹͶͷͻͽΈΊΎΡΣϵϷҁҊԯԱՖՠֈאתׯײؠيٮٯٱۓۥۦۮۯۺۼܒܯݍޥߊߪߴߵࠀࠕ"
u"ࡀࡘࡠࡪࢠࢴࢶࢽऄहक़ॡॱঀঅঌএঐওনপরশহড়ঢ়য়ৡৰৱਅਊਏਐਓਨਪਰਲਲ਼ਵਸ਼ਸਹਖ਼ੜੲੴઅઍએઑઓનપરલળવહ"
u"ૠૡଅଌଏଐଓନପରଲଳଵହଡ଼ଢ଼ୟୡஅஊஎஐஒகஙசஞடணதநபமஹఅఌఎఐఒనపహౘౚౠౡಅಌಎಐಒನಪಳವಹೠೡೱೲ"
u"അഌഎഐഒഺൔൖൟൡൺൿඅඖකනඳරවෆกะเๆກຂງຈດທນຟມຣສຫອະເໄໜໟཀཇཉཬྈྌကဪၐၕၚၝၥၦၮၰၵႁ"
u"ႠჅაჺჼቈቊቍቐቖቚቝበኈኊኍነኰኲኵኸኾዂዅወዖዘጐጒጕጘፚᎀᎏᎠᏵᏸᏽᐁᙬᙯᙿᚁᚚᚠᛪᛮᛸᜀᜌᜎᜑᜠᜱᝀᝑᝠᝬᝮᝰ"
u"កឳᠠᡸᢀᢨᢰᣵᤀᤞᥐᥭᥰᥴᦀᦫᦰᧉᨀᨖᨠᩔᬅᬳᭅᭋᮃᮠᮮᮯᮺᯥᰀᰣᱍᱏᱚᱽᲀᲈᲐᲺᲽᲿᳩᳬᳮᳱᳵᳶᴀᶿḀἕἘἝἠὅὈὍ"
u"ὐὗὟώᾀᾴᾶᾼῂῄῆῌῐΐῖΊῠῬῲῴῶῼₐₜℊℓ℘ℝKℹℼℿⅅⅉⅠↈⰀⰮⰰⱞⱠⳤⳫⳮⳲⳳⴀⴥⴰⵧⶀⶖⶠⶦⶨⶮⶰⶶⶸⶾ"
u"ⷀⷆⷈⷎⷐⷖⷘⷞ々〇〡〩〱〵〸〼ぁゖゝゟァヺーヿㄅㄯㄱㆎㆠㆺㇰㇿ㐀䶵一鿯ꀀꒌꓐꓽꔀꘌꘐꘟꘪꘫꙀꙮꙿꚝꚠꛯꜗꜟꜢꞈꞋꞹꟷꠁ"
u"ꠃꠅꠇꠊꠌꠢꡀꡳꢂꢳꣲꣷꣽꣾꤊꤥꤰꥆꥠꥼꦄꦲꧠꧤꧦꧯꧺꧾꨀꨨꩀꩂꩄꩋꩠꩶꩾꪯꪵꪶꪹꪽꫛꫝꫠꫪꫲꫴꬁꬆꬉꬎꬑꬖꬠꬦꬨꬮꬰꭚ"
u"ꭜꭥꭰꯢ가힣ힰퟆퟋퟻ豈舘並龎ffstﬓﬗײַﬨשׁזּטּלּנּסּףּפּצּﮱﯓﱝﱤﴽﵐﶏﶒﷇﷰﷹﹿﻼAZazヲンᅠ하ᅦᅧᅬᅭᅲᅳᅵ𐀀𐀋"
u"𐀍𐀦𐀨𐀺𐀼𐀽𐀿𐁍𐁐𐁝𐂀𐃺𐅀𐅴𐊀𐊜𐊠𐋐𐌀𐌟𐌭𐍊𐍐𐍵𐎀𐎝𐎠𐏃𐏈𐏏𐏑𐏕𐐀𐒝𐒰𐓓𐓘𐓻𐔀𐔧𐔰𐕣𐘀𐜶𐝀𐝕𐝠𐝧𐠀𐠅𐠊𐠵𐠷𐠸𐠿𐡕𐡠𐡶𐢀𐢞"
u"𐣠𐣲𐣴𐣵𐤀𐤕𐤠𐤹𐦀𐦷𐦾𐦿𐨐𐨓𐨕𐨗𐨙𐨵𐩠𐩼𐪀𐪜𐫀𐫇𐫉𐫤𐬀𐬵𐭀𐭕𐭠𐭲𐮀𐮑𐰀𐱈𐲀𐲲𐳀𐳲𐴀𐴣𐼀𐼜𐼰𐽅𑀃𑀷𑂃𑂯𑃐𑃨𑄃𑄦𑅐𑅲𑆃𑆲𑇁𑇄"
u"𑈀𑈑𑈓𑈫𑊀𑊆𑊊𑊍𑊏𑊝𑊟𑊨𑊰𑋞𑌅𑌌𑌏𑌐𑌓𑌨𑌪𑌰𑌲𑌳𑌵𑌹𑍝𑍡𑐀𑐴𑑇𑑊𑒀𑒯𑓄𑓅𑖀𑖮𑗘𑗛𑘀𑘯𑚀𑚪𑜀𑜚𑠀𑠫𑢠𑣟𑨋𑨲𑩜𑪃𑪆𑪉𑫀𑫸𑰀𑰈"
u"𑰊𑰮𑱲𑲏𑴀𑴆𑴈𑴉𑴋𑴰𑵠𑵥𑵧𑵨𑵪𑶉𑻠𑻲𒀀𒎙𒐀𒑮𒒀𒕃𓀀𓐮𔐀𔙆𖠀𖨸𖩀𖩞𖫐𖫭𖬀𖬯𖭀𖭃𖭣𖭷𖭽𖮏𖹀𖹿𖼀𖽄𖾓𖾟𖿠𖿡𗀀𘟱𘠀𘫲𛀀𛄞𛅰𛋻𛰀𛱪"
u"𛱰𛱼𛲀𛲈𛲐𛲙𝐀𝑔𝑖𝒜𝒞𝒟𝒥𝒦𝒩𝒬𝒮𝒹𝒽𝓃𝓅𝔅𝔇𝔊𝔍𝔔𝔖𝔜𝔞𝔹𝔻𝔾𝕀𝕄𝕊𝕐𝕒𝚥𝚨𝛀𝛂𝛚𝛜𝛺𝛼𝜔𝜖𝜴𝜶𝝎𝝐𝝮𝝰𝞈𝞊𝞨𝞪𝟂𝟄𝟋"
u"𞠀𞣄𞤀𞥃𞸀𞸃𞸅𞸟𞸡𞸢𞸩𞸲𞸴𞸷𞹍𞹏𞹑𞹒𞹡𞹢𞹧𞹪𞹬𞹲𞹴𞹷𞹹𞹼𞺀𞺉𞺋𞺛𞺡𞺣𞺥𞺩𞺫𞺻𠀀𪛖𪜀𫜴𫝀𫠝𫠠𬺡𬺰𮯠")
unicode_continuation_ch_any = (
u"··়ׇֿٰܑ߽ৗ਼৾ੑੵ઼଼ஂௗ಼ൗ්ූัັ༹༵༷࿆᳭ᢩ៝⁔⵿⃡꙯ꠂ꠆ꠋꧥꩃﬞꪰ꫁_𑅳𐨿𐇽𐋠𑈾𑍗𑩇𑑞𑴺𑵇𝩵𝪄")
unicode_continuation_ch_range = (
u"09ֽׁׂًؚ֑ׅ̀ͯ҃҇ׄؐ٩۪ۭۖۜ۟ۤۧۨ۰۹ܰ݊ަް߀߉࡙࡛࣓ࣣ߫߳ࠖ࠙ࠛࠣࠥࠧࠩ࠭࣡ःऺ़ाॏ॑ॗॢॣ०९ঁঃ"
u"াৄেৈো্ৢৣ০৯ਁਃਾੂੇੈੋ੍੦ੱઁઃાૅેૉો્ૢૣ૦૯ૺ૿ଁଃାୄେୈୋ୍ୖୗୢୣ୦୯ாூெைொ்௦௯ఀఄాౄ"
u"ెైొ్ౕౖౢౣ౦౯ಁಃಾೄೆೈೊ್ೕೖೢೣ೦೯ഀഃ഻഼ാൄെൈൊ്ൢൣ൦൯ංඃාුෘෟ෦෯ෲෳำฺ็๎๐๙ຳູົຼ່ໍ"
u"໐໙༘༙༠༩༾༿྄ཱ྆྇ྍྗྙྼါှ၀၉ၖၙၞၠၢၤၧၭၱၴႂႍႏႝ፝፟፩፱ᜒ᜔ᜲ᜴ᝒᝓᝲᝳ឴៓០៩᠋᠍᠐᠙ᤠᤫᤰ᤻᥆᥏"
u"᧐᧚ᨗᨛᩕᩞ᩠᩿᩼᪉᪐᪙᪽᪰ᬀᬄ᬴᭄᭐᭙᭫᭳ᮀᮂᮡᮭ᮰᮹᯦᯳ᰤ᰷᱀᱉᱐᱙᳔᳨᳐᳒ᳲ᳴᳷᷹᷿᳹᷀᷻‿⁀⃥〪〯⃐⃜⃰⳯⳱ⷠⷿ"
u"゙゚꘠꘩ꙴ꙽ꚞꚟ꛰꛱ꠣꠧꢀꢁꢴꣅ꣐꣙꣠꣱ꣿ꤉ꤦ꤭ꥇ꥓ꦀꦃ꦳꧀꧐꧙꧰꧹ꨩꨶꩌꩍ꩐꩙ꩻꩽꪴꪲꪷꪸꪾ꪿ꫫꫯꫵ꫶ꯣꯪ꯬꯭꯰꯹︀️"
u"︠︯︳︴﹍﹏09゙゚𐍶𐍺𐒠𐒩𐨁𐨃𐨅𐨆𐨌𐨺𐫦𐨏𐨸𐫥𐴤𐴧𐴰𐴹𐽆𐽐𑀀𑀂𑀸𑁆𑁦𑁯𑁿𑂂𑂰𑂺𑃰𑃹𑄀𑄂𑄧𑄴𑄶𑄿𑅅𑅆𑆀𑆂𑆳𑇀𑇉𑇌𑇐𑇙𑈬𑈷"
u"𑋟𑋪𑋰𑋹𑌀𑌃𑌻𑌼𑌾𑍄𑍇𑍈𑍋𑍍𑍢𑍣𑍦𑍬𑍰𑍴𑐵𑑆𑑐𑑙𑒰𑓃𑓐𑓙𑖯𑖵𑖸𑗀𑗜𑗝𑘰𑙀𑙐𑙙𑚫𑚷𑛀𑛉𑜝𑜫𑜰𑜹𑠬𑠺𑣠𑣩𑨁𑨊𑨳𑨹𑨻𑨾𑩑𑩛𑪊𑪙"
u"𑰯𑰶𑰸𑰿𑱐𑱙𑲒𑲧𑲩𑲶𑴱𑴶𑴼𑴽𑴿𑵅𑵐𑵙𑶊𑶎𑶐𑶑𑶓𑶗𑶠𑶩𑻳𑻶𖩠𖩩𖫰𖫴𖬰𖬶𖭐𖭙𖽑𖽾𖾏𖾒𛲝𛲞𝅩𝅥𝅲𝅻𝆂𝆋𝅭𝆅𝆪𝆭𝉂𝉄𝟎𝟿𝨀𝨶𝨻𝩬"
u"𝪛𝪟𝪡𝪯𞥊𞣐𞣖𞀀𞀆𞀈𞀘𞀛𞀡𞀣𞀤𞀦𞀪𞥄𞥐𞥙")
# END GENERATED CODE
...@@ -292,14 +292,14 @@ class ModuleNode(Nodes.Node, Nodes.BlockNode): ...@@ -292,14 +292,14 @@ class ModuleNode(Nodes.Node, Nodes.BlockNode):
cname = env.mangle(Naming.func_prefix_api, entry.name) cname = env.mangle(Naming.func_prefix_api, entry.name)
sig = entry.type.signature_string() sig = entry.type.signature_string()
h_code.putln( h_code.putln(
'if (__Pyx_ImportFunction(module, "%s", (void (**)(void))&%s, "%s") < 0) goto bad;' 'if (__Pyx_ImportFunction(module, %s, (void (**)(void))&%s, "%s") < 0) goto bad;'
% (entry.name, cname, sig)) % (entry.name.as_c_string_literal(), cname, sig))
for entry in api_vars: for entry in api_vars:
cname = env.mangle(Naming.varptr_prefix_api, entry.name) cname = env.mangle(Naming.varptr_prefix_api, entry.name)
sig = entry.type.empty_declaration_code() sig = entry.type.empty_declaration_code()
h_code.putln( h_code.putln(
'if (__Pyx_ImportVoidPtr(module, "%s", (void **)&%s, "%s") < 0) goto bad;' 'if (__Pyx_ImportVoidPtr(module, %s, (void **)&%s, "%s") < 0) goto bad;'
% (entry.name, cname, sig)) % (entry.name.as_c_string_literal(), cname, sig))
with ModuleImportGenerator(h_code, imported_modules={env.qualified_name: 'module'}) as import_generator: with ModuleImportGenerator(h_code, imported_modules={env.qualified_name: 'module'}) as import_generator:
for entry in api_extension_types: for entry in api_extension_types:
self.generate_type_import_call(entry.type, h_code, import_generator, error_code="goto bad;") self.generate_type_import_call(entry.type, h_code, import_generator, error_code="goto bad;")
...@@ -2155,9 +2155,11 @@ class ModuleNode(Nodes.Node, Nodes.BlockNode): ...@@ -2155,9 +2155,11 @@ class ModuleNode(Nodes.Node, Nodes.BlockNode):
code.putln(header % type.typeobj_cname) code.putln(header % type.typeobj_cname)
code.putln( code.putln(
"PyVarObject_HEAD_INIT(0, 0)") "PyVarObject_HEAD_INIT(0, 0)")
classname = scope.class_name.as_c_string_literal()
code.putln( code.putln(
'"%s.%s", /*tp_name*/' % ( '"%s."%s, /*tp_name*/' % (
self.full_module_name, scope.class_name)) self.full_module_name,
classname))
if type.typedef_flag: if type.typedef_flag:
objstruct = type.objstruct_cname objstruct = type.objstruct_cname
else: else:
...@@ -2224,8 +2226,8 @@ class ModuleNode(Nodes.Node, Nodes.BlockNode): ...@@ -2224,8 +2226,8 @@ class ModuleNode(Nodes.Node, Nodes.BlockNode):
else: else:
doc_code = "0" doc_code = "0"
code.putln( code.putln(
'{(char *)"%s", %s, %s, (char *)%s, 0},' % ( '{(char *)%s, %s, %s, (char *)%s, 0},' % (
entry.name, entry.name.as_c_string_literal(),
entry.getter_cname or "0", entry.getter_cname or "0",
entry.setter_cname or "0", entry.setter_cname or "0",
doc_code)) doc_code))
...@@ -2301,7 +2303,7 @@ class ModuleNode(Nodes.Node, Nodes.BlockNode): ...@@ -2301,7 +2303,7 @@ class ModuleNode(Nodes.Node, Nodes.BlockNode):
if code.label_used(code.error_label): if code.label_used(code.error_label):
code.put_label(code.error_label) code.put_label(code.error_label)
# This helps locate the offending name. # This helps locate the offending name.
code.put_add_traceback(self.full_module_name) code.put_add_traceback(EncodedString(self.full_module_name))
code.error_label = old_error_label code.error_label = old_error_label
code.putln("bad:") code.putln("bad:")
code.putln("return -1;") code.putln("return -1;")
...@@ -2318,6 +2320,7 @@ class ModuleNode(Nodes.Node, Nodes.BlockNode): ...@@ -2318,6 +2320,7 @@ class ModuleNode(Nodes.Node, Nodes.BlockNode):
code.putln(UtilityCode.load_as_string("PyModInitFuncType", "ModuleSetupCode.c")[0]) code.putln(UtilityCode.load_as_string("PyModInitFuncType", "ModuleSetupCode.c")[0])
header2 = "__Pyx_PyMODINIT_FUNC init%s(void)" % env.module_name header2 = "__Pyx_PyMODINIT_FUNC init%s(void)" % env.module_name
header3 = "__Pyx_PyMODINIT_FUNC %s(void)" % self.mod_init_func_cname('PyInit', env) header3 = "__Pyx_PyMODINIT_FUNC %s(void)" % self.mod_init_func_cname('PyInit', env)
header3 = EncodedString(header3)
code.putln("#if PY_MAJOR_VERSION < 3") code.putln("#if PY_MAJOR_VERSION < 3")
# Optimise for small code size as the module init function is only executed once. # Optimise for small code size as the module init function is only executed once.
code.putln("%s CYTHON_SMALL_CODE; /*proto*/" % header2) code.putln("%s CYTHON_SMALL_CODE; /*proto*/" % header2)
...@@ -2513,7 +2516,7 @@ class ModuleNode(Nodes.Node, Nodes.BlockNode): ...@@ -2513,7 +2516,7 @@ class ModuleNode(Nodes.Node, Nodes.BlockNode):
code.put_xdecref(cname, type) code.put_xdecref(cname, type)
code.putln('if (%s) {' % env.module_cname) code.putln('if (%s) {' % env.module_cname)
code.putln('if (%s) {' % env.module_dict_cname) code.putln('if (%s) {' % env.module_dict_cname)
code.put_add_traceback("init %s" % env.qualified_name) code.put_add_traceback(EncodedString("init %s" % env.qualified_name))
code.globalstate.use_utility_code(Nodes.traceback_utility_code) code.globalstate.use_utility_code(Nodes.traceback_utility_code)
# Module reference and module dict are in global variables which might still be needed # Module reference and module dict are in global variables which might still be needed
# for cleanup, atexit code, etc., so leaking is better than crashing. # for cleanup, atexit code, etc., so leaking is better than crashing.
...@@ -2573,7 +2576,7 @@ class ModuleNode(Nodes.Node, Nodes.BlockNode): ...@@ -2573,7 +2576,7 @@ class ModuleNode(Nodes.Node, Nodes.BlockNode):
code.putln("static int %s(void) {" % self.cfunc_name) code.putln("static int %s(void) {" % self.cfunc_name)
code.put_declare_refcount_context() code.put_declare_refcount_context()
self.tempdecl_code = code.insertion_point() self.tempdecl_code = code.insertion_point()
code.put_setup_refcount_context(self.cfunc_name) code.put_setup_refcount_context(EncodedString(self.cfunc_name))
# Leave a grepable marker that makes it easy to find the generator source. # Leave a grepable marker that makes it easy to find the generator source.
code.putln("/*--- %s ---*/" % self.description) code.putln("/*--- %s ---*/" % self.description)
return code return code
...@@ -2925,8 +2928,8 @@ class ModuleNode(Nodes.Node, Nodes.BlockNode): ...@@ -2925,8 +2928,8 @@ class ModuleNode(Nodes.Node, Nodes.BlockNode):
# investigation shows that the resulting binary is smaller with repeated functions calls. # investigation shows that the resulting binary is smaller with repeated functions calls.
for entry in entries: for entry in entries:
signature = entry.type.signature_string() signature = entry.type.signature_string()
code.putln('if (__Pyx_ExportFunction("%s", (void (*)(void))%s, "%s") < 0) %s' % ( code.putln('if (__Pyx_ExportFunction(%s, (void (*)(void))%s, "%s") < 0) %s' % (
entry.name, entry.name.as_c_string_literal(),
entry.cname, entry.cname,
signature, signature,
code.error_goto(self.pos))) code.error_goto(self.pos)))
...@@ -2998,9 +3001,9 @@ class ModuleNode(Nodes.Node, Nodes.BlockNode): ...@@ -2998,9 +3001,9 @@ class ModuleNode(Nodes.Node, Nodes.BlockNode):
code.error_goto(self.pos))) code.error_goto(self.pos)))
for entry in entries: for entry in entries:
code.putln( code.putln(
'if (__Pyx_ImportFunction(%s, "%s", (void (**)(void))&%s, "%s") < 0) %s' % ( 'if (__Pyx_ImportFunction(%s, %s, (void (**)(void))&%s, "%s") < 0) %s' % (
temp, temp,
entry.name, entry.name.as_c_string_literal(),
entry.cname, entry.cname,
entry.type.signature_string(), entry.type.signature_string(),
code.error_goto(self.pos))) code.error_goto(self.pos)))
...@@ -3079,15 +3082,17 @@ class ModuleNode(Nodes.Node, Nodes.BlockNode): ...@@ -3079,15 +3082,17 @@ class ModuleNode(Nodes.Node, Nodes.BlockNode):
module, module,
module_name)) module_name))
type_name = type.name.as_c_string_literal()
if condition and replacement: if condition and replacement:
code.putln("") # start in new line code.putln("") # start in new line
code.putln("#if %s" % condition) code.putln("#if %s" % condition)
code.putln('"%s",' % replacement) code.putln('"%s",' % replacement)
code.putln("#else") code.putln("#else")
code.putln('"%s",' % type.name) code.putln('%s,' % type_name)
code.putln("#endif") code.putln("#endif")
else: else:
code.put(' "%s", ' % type.name) code.put(' %s, ' % type_name)
if sizeof_objstruct != objstruct: if sizeof_objstruct != objstruct:
if not condition: if not condition:
......
...@@ -13,6 +13,8 @@ codewriter_temp_prefix = pyrex_prefix + "t_" ...@@ -13,6 +13,8 @@ codewriter_temp_prefix = pyrex_prefix + "t_"
temp_prefix = u"__cyt_" temp_prefix = u"__cyt_"
pyunicode_identifier_prefix = pyrex_prefix + 'U'
builtin_prefix = pyrex_prefix + "builtin_" builtin_prefix = pyrex_prefix + "builtin_"
arg_prefix = pyrex_prefix + "arg_" arg_prefix = pyrex_prefix + "arg_"
funcdoc_prefix = pyrex_prefix + "doc_" funcdoc_prefix = pyrex_prefix + "doc_"
...@@ -45,6 +47,13 @@ pybufferstruct_prefix = pyrex_prefix + "pybuffer_" ...@@ -45,6 +47,13 @@ pybufferstruct_prefix = pyrex_prefix + "pybuffer_"
vtable_prefix = pyrex_prefix + "vtable_" vtable_prefix = pyrex_prefix + "vtable_"
vtabptr_prefix = pyrex_prefix + "vtabptr_" vtabptr_prefix = pyrex_prefix + "vtabptr_"
vtabstruct_prefix = pyrex_prefix + "vtabstruct_" vtabstruct_prefix = pyrex_prefix + "vtabstruct_"
unicode_vtabentry_prefix = pyrex_prefix + "Uvtabentry_"
# vtab entries aren't normally manged,
# but punycode names sometimes start with numbers leading
# to a C syntax error
unicode_structmember_prefix = pyrex_prefix + "Umember_"
# as above -
# not normally manged but punycode names cause specific problems
opt_arg_prefix = pyrex_prefix + "opt_args_" opt_arg_prefix = pyrex_prefix + "opt_args_"
convert_func_prefix = pyrex_prefix + "convert_" convert_func_prefix = pyrex_prefix + "convert_"
closure_scope_prefix = pyrex_prefix + "scope_" closure_scope_prefix = pyrex_prefix + "scope_"
......
...@@ -22,7 +22,8 @@ from . import PyrexTypes ...@@ -22,7 +22,8 @@ from . import PyrexTypes
from . import TypeSlots from . import TypeSlots
from .PyrexTypes import py_object_type, error_type from .PyrexTypes import py_object_type, error_type
from .Symtab import (ModuleScope, LocalScope, ClosureScope, from .Symtab import (ModuleScope, LocalScope, ClosureScope,
StructOrUnionScope, PyClassScope, CppClassScope, TemplateScope) StructOrUnionScope, PyClassScope, CppClassScope, TemplateScope,
punycodify_name)
from .Code import UtilityCode from .Code import UtilityCode
from .StringEncoding import EncodedString from .StringEncoding import EncodedString
from . import Future from . import Future
...@@ -862,6 +863,9 @@ class CArgDeclNode(Node): ...@@ -862,6 +863,9 @@ class CArgDeclNode(Node):
# kw_only boolean Is a keyword-only argument # kw_only boolean Is a keyword-only argument
# is_dynamic boolean Non-literal arg stored inside CyFunction # is_dynamic boolean Non-literal arg stored inside CyFunction
# pos_only boolean Is a positional-only argument # pos_only boolean Is a positional-only argument
#
# name_cstring property that converts the name to a cstring taking care of unicode
# and quoting it
child_attrs = ["base_type", "declarator", "default", "annotation"] child_attrs = ["base_type", "declarator", "default", "annotation"]
outer_attrs = ["default", "annotation"] outer_attrs = ["default", "annotation"]
...@@ -879,6 +883,10 @@ class CArgDeclNode(Node): ...@@ -879,6 +883,10 @@ class CArgDeclNode(Node):
annotation = None annotation = None
is_dynamic = 0 is_dynamic = 0
@property
def name_cstring(self):
return self.name.as_c_string_literal()
def analyse(self, env, nonempty=0, is_self_arg=False): def analyse(self, env, nonempty=0, is_self_arg=False):
if is_self_arg: if is_self_arg:
self.base_type.is_self_arg = self.is_self_arg = True self.base_type.is_self_arg = self.is_self_arg = True
...@@ -2206,11 +2214,11 @@ class FuncDefNode(StatNode, BlockNode): ...@@ -2206,11 +2214,11 @@ class FuncDefNode(StatNode, BlockNode):
typeptr_cname = arg.type.typeptr_cname typeptr_cname = arg.type.typeptr_cname
arg_code = "((PyObject *)%s)" % arg.entry.cname arg_code = "((PyObject *)%s)" % arg.entry.cname
code.putln( code.putln(
'if (unlikely(!__Pyx_ArgTypeTest(%s, %s, %d, "%s", %s))) %s' % ( 'if (unlikely(!__Pyx_ArgTypeTest(%s, %s, %d, %s, %s))) %s' % (
arg_code, arg_code,
typeptr_cname, typeptr_cname,
arg.accept_none, arg.accept_none,
arg.name, arg.name_cstring,
arg.type.is_builtin_type and arg.type.require_exact, arg.type.is_builtin_type and arg.type.require_exact,
code.error_goto(arg.pos))) code.error_goto(arg.pos)))
else: else:
...@@ -2224,8 +2232,8 @@ class FuncDefNode(StatNode, BlockNode): ...@@ -2224,8 +2232,8 @@ class FuncDefNode(StatNode, BlockNode):
cname = arg.entry.cname cname = arg.entry.cname
code.putln('if (unlikely(((PyObject *)%s) == Py_None)) {' % cname) code.putln('if (unlikely(((PyObject *)%s) == Py_None)) {' % cname)
code.putln('''PyErr_Format(PyExc_TypeError, "Argument '%%.%ds' must not be None", "%s"); %s''' % ( code.putln('''PyErr_Format(PyExc_TypeError, "Argument '%%.%ds' must not be None", %s); %s''' % (
max(200, len(arg.name)), arg.name, max(200, len(arg.name_cstring)), arg.name_cstring,
code.error_goto(arg.pos))) code.error_goto(arg.pos)))
code.putln('}') code.putln('}')
...@@ -3079,9 +3087,9 @@ class DefNode(FuncDefNode): ...@@ -3079,9 +3087,9 @@ class DefNode(FuncDefNode):
else: else:
arg.needs_conversion = 1 arg.needs_conversion = 1
if arg.needs_conversion: if arg.needs_conversion:
arg.hdr_cname = Naming.arg_prefix + arg.name arg.hdr_cname = punycodify_name(Naming.arg_prefix + arg.name)
else: else:
arg.hdr_cname = Naming.var_prefix + arg.name arg.hdr_cname = punycodify_name(Naming.var_prefix + arg.name)
if nfixed > len(self.args): if nfixed > len(self.args):
self.bad_signature() self.bad_signature()
...@@ -3118,16 +3126,16 @@ class DefNode(FuncDefNode): ...@@ -3118,16 +3126,16 @@ class DefNode(FuncDefNode):
entry = env.declare_pyfunction(name, self.pos, allow_redefine=not self.is_wrapper) entry = env.declare_pyfunction(name, self.pos, allow_redefine=not self.is_wrapper)
self.entry = entry self.entry = entry
prefix = env.next_id(env.scope_prefix) prefix = env.next_id(env.scope_prefix)
self.entry.pyfunc_cname = Naming.pyfunc_prefix + prefix + name self.entry.pyfunc_cname = punycodify_name(Naming.pyfunc_prefix + prefix + name)
if Options.docstrings: if Options.docstrings:
entry.doc = embed_position(self.pos, self.doc) entry.doc = embed_position(self.pos, self.doc)
entry.doc_cname = Naming.funcdoc_prefix + prefix + name entry.doc_cname = punycodify_name(Naming.funcdoc_prefix + prefix + name)
if entry.is_special: if entry.is_special:
if entry.name in TypeSlots.invisible or not entry.doc or ( if entry.name in TypeSlots.invisible or not entry.doc or (
entry.name in '__getattr__' and env.directives['fast_getattr']): entry.name in '__getattr__' and env.directives['fast_getattr']):
entry.wrapperbase_cname = None entry.wrapperbase_cname = None
else: else:
entry.wrapperbase_cname = Naming.wrapperbase_prefix + prefix + name entry.wrapperbase_cname = punycodify_name(Naming.wrapperbase_prefix + prefix + name)
else: else:
entry.doc = None entry.doc = None
...@@ -3304,8 +3312,8 @@ class DefNodeWrapper(FuncDefNode): ...@@ -3304,8 +3312,8 @@ class DefNodeWrapper(FuncDefNode):
target_entry = self.target.entry target_entry = self.target.entry
name = self.name name = self.name
prefix = env.next_id(env.scope_prefix) prefix = env.next_id(env.scope_prefix)
target_entry.func_cname = Naming.pywrap_prefix + prefix + name target_entry.func_cname = punycodify_name(Naming.pywrap_prefix + prefix + name)
target_entry.pymethdef_cname = Naming.pymethdef_prefix + prefix + name target_entry.pymethdef_cname = punycodify_name(Naming.pymethdef_prefix + prefix + name)
self.signature = target_entry.signature self.signature = target_entry.signature
...@@ -3393,7 +3401,7 @@ class DefNodeWrapper(FuncDefNode): ...@@ -3393,7 +3401,7 @@ class DefNodeWrapper(FuncDefNode):
self.return_type.declaration_code(Naming.retval_cname), self.return_type.declaration_code(Naming.retval_cname),
retval_init)) retval_init))
code.put_declare_refcount_context() code.put_declare_refcount_context()
code.put_setup_refcount_context('%s (wrapper)' % self.name) code.put_setup_refcount_context(EncodedString('%s (wrapper)' % self.name))
self.generate_argument_parsing_code(lenv, code) self.generate_argument_parsing_code(lenv, code)
self.generate_argument_type_tests(code) self.generate_argument_type_tests(code)
...@@ -3660,6 +3668,8 @@ class DefNodeWrapper(FuncDefNode): ...@@ -3660,6 +3668,8 @@ class DefNodeWrapper(FuncDefNode):
self.star_arg.entry.xdecref_cleanup = 0 self.star_arg.entry.xdecref_cleanup = 0
def generate_tuple_and_keyword_parsing_code(self, args, success_label, code): def generate_tuple_and_keyword_parsing_code(self, args, success_label, code):
self_name_csafe = self.name.as_c_string_literal()
argtuple_error_label = code.new_label("argtuple_error") argtuple_error_label = code.new_label("argtuple_error")
positional_args = [] positional_args = []
...@@ -3741,13 +3751,13 @@ class DefNodeWrapper(FuncDefNode): ...@@ -3741,13 +3751,13 @@ class DefNodeWrapper(FuncDefNode):
# the kw-args dict passed is non-empty (which it will be, since kw_unpacking_condition is true) # the kw-args dict passed is non-empty (which it will be, since kw_unpacking_condition is true)
code.globalstate.use_utility_code( code.globalstate.use_utility_code(
UtilityCode.load_cached("ParseKeywords", "FunctionArguments.c")) UtilityCode.load_cached("ParseKeywords", "FunctionArguments.c"))
code.putln('if (likely(__Pyx_ParseOptionalKeywords(%s, %s, %s, %s, %s, "%s") < 0)) %s' % ( code.putln('if (likely(__Pyx_ParseOptionalKeywords(%s, %s, %s, %s, %s, %s) < 0)) %s' % (
Naming.kwds_cname, Naming.kwds_cname,
Naming.pykwdlist_cname, Naming.pykwdlist_cname,
self.starstar_arg and self.starstar_arg.entry.cname or '0', self.starstar_arg and self.starstar_arg.entry.cname or '0',
'values', 'values',
0, 0,
self.name, self_name_csafe,
code.error_goto(self.pos))) code.error_goto(self.pos)))
# --- optimised code when we do not receive any keyword arguments # --- optimised code when we do not receive any keyword arguments
...@@ -3831,8 +3841,8 @@ class DefNodeWrapper(FuncDefNode): ...@@ -3831,8 +3841,8 @@ class DefNodeWrapper(FuncDefNode):
code.put_label(argtuple_error_label) code.put_label(argtuple_error_label)
code.globalstate.use_utility_code( code.globalstate.use_utility_code(
UtilityCode.load_cached("RaiseArgTupleInvalid", "FunctionArguments.c")) UtilityCode.load_cached("RaiseArgTupleInvalid", "FunctionArguments.c"))
code.put('__Pyx_RaiseArgtupleInvalid("%s", %d, %d, %d, %s); ' % ( code.put('__Pyx_RaiseArgtupleInvalid(%s, %d, %d, %d, %s); ' % (
self.name, has_fixed_positional_count, self_name_csafe, has_fixed_positional_count,
min_positional_args, max_positional_args, min_positional_args, max_positional_args,
Naming.nargs_cname)) Naming.nargs_cname))
code.putln(code.error_goto(self.pos)) code.putln(code.error_goto(self.pos))
...@@ -3962,6 +3972,8 @@ class DefNodeWrapper(FuncDefNode): ...@@ -3962,6 +3972,8 @@ class DefNodeWrapper(FuncDefNode):
# If we received kwargs, fill up the positional/required # If we received kwargs, fill up the positional/required
# arguments with values from the kw dict # arguments with values from the kw dict
self_name_csafe = self.name.as_c_string_literal()
code.putln('kw_args = PyDict_Size(%s);' % Naming.kwds_cname) code.putln('kw_args = PyDict_Size(%s);' % Naming.kwds_cname)
if self.num_required_args or max_positional_args > 0: if self.num_required_args or max_positional_args > 0:
last_required_arg = -1 last_required_arg = -1
...@@ -4006,8 +4018,8 @@ class DefNodeWrapper(FuncDefNode): ...@@ -4006,8 +4018,8 @@ class DefNodeWrapper(FuncDefNode):
code.putln('else {') code.putln('else {')
code.globalstate.use_utility_code( code.globalstate.use_utility_code(
UtilityCode.load_cached("RaiseArgTupleInvalid", "FunctionArguments.c")) UtilityCode.load_cached("RaiseArgTupleInvalid", "FunctionArguments.c"))
code.put('__Pyx_RaiseArgtupleInvalid("%s", %d, %d, %d, %d); ' % ( code.put('__Pyx_RaiseArgtupleInvalid(%s, %d, %d, %d, %d); ' % (
self.name, has_fixed_positional_count, self_name_csafe, has_fixed_positional_count,
min_positional_args, max_positional_args, i)) min_positional_args, max_positional_args, i))
code.putln(code.error_goto(self.pos)) code.putln(code.error_goto(self.pos))
code.putln('}') code.putln('}')
...@@ -4015,8 +4027,8 @@ class DefNodeWrapper(FuncDefNode): ...@@ -4015,8 +4027,8 @@ class DefNodeWrapper(FuncDefNode):
code.putln('else {') code.putln('else {')
code.globalstate.use_utility_code( code.globalstate.use_utility_code(
UtilityCode.load_cached("RaiseKeywordRequired", "FunctionArguments.c")) UtilityCode.load_cached("RaiseKeywordRequired", "FunctionArguments.c"))
code.put('__Pyx_RaiseKeywordRequired("%s", %s); ' % ( code.put('__Pyx_RaiseKeywordRequired(%s, %s); ' % (
self.name, pystring_cname)) self_name_csafe, pystring_cname))
code.putln(code.error_goto(self.pos)) code.putln(code.error_goto(self.pos))
code.putln('}') code.putln('}')
if max_positional_args > num_pos_only_args: if max_positional_args > num_pos_only_args:
...@@ -4071,13 +4083,13 @@ class DefNodeWrapper(FuncDefNode): ...@@ -4071,13 +4083,13 @@ class DefNodeWrapper(FuncDefNode):
values_array = 'values' values_array = 'values'
code.globalstate.use_utility_code( code.globalstate.use_utility_code(
UtilityCode.load_cached("ParseKeywords", "FunctionArguments.c")) UtilityCode.load_cached("ParseKeywords", "FunctionArguments.c"))
code.putln('if (unlikely(__Pyx_ParseOptionalKeywords(%s, %s, %s, %s, %s, "%s") < 0)) %s' % ( code.putln('if (unlikely(__Pyx_ParseOptionalKeywords(%s, %s, %s, %s, %s, %s) < 0)) %s' % (
Naming.kwds_cname, Naming.kwds_cname,
Naming.pykwdlist_cname, Naming.pykwdlist_cname,
self.starstar_arg and self.starstar_arg.entry.cname or '0', self.starstar_arg and self.starstar_arg.entry.cname or '0',
values_array, values_array,
pos_arg_count, pos_arg_count,
self.name, self_name_csafe,
code.error_goto(self.pos))) code.error_goto(self.pos)))
code.putln('}') code.putln('}')
...@@ -4790,6 +4802,10 @@ class CClassDefNode(ClassDefNode): ...@@ -4790,6 +4802,10 @@ class CClassDefNode(ClassDefNode):
decorators = None decorators = None
shadow = False shadow = False
@property
def punycode_class_name(self):
return punycodify_name(self.class_name)
def buffer_defaults(self, env): def buffer_defaults(self, env):
if not hasattr(self, '_buffer_defaults'): if not hasattr(self, '_buffer_defaults'):
from . import Buffer from . import Buffer
......
...@@ -1325,9 +1325,9 @@ class DecoratorTransform(ScopeTrackingTransform, SkipDeclarations): ...@@ -1325,9 +1325,9 @@ class DecoratorTransform(ScopeTrackingTransform, SkipDeclarations):
_properties = None _properties = None
_map_property_attribute = { _map_property_attribute = {
'getter': '__get__', 'getter': EncodedString('__get__'),
'setter': '__set__', 'setter': EncodedString('__set__'),
'deleter': '__del__', 'deleter': EncodedString('__del__'),
}.get }.get
def visit_CClassDefNode(self, node): def visit_CClassDefNode(self, node):
...@@ -1701,7 +1701,7 @@ if VALUE is not None: ...@@ -1701,7 +1701,7 @@ if VALUE is not None:
e.type.create_from_py_utility_code(env) e.type.create_from_py_utility_code(env)
all_members_names = sorted([e.name for e in all_members]) all_members_names = sorted([e.name for e in all_members])
checksum = '0x%s' % hashlib.sha1(' '.join(all_members_names).encode('utf-8')).hexdigest()[:7] checksum = '0x%s' % hashlib.sha1(' '.join(all_members_names).encode('utf-8')).hexdigest()[:7]
unpickle_func_name = '__pyx_unpickle_%s' % node.class_name unpickle_func_name = '__pyx_unpickle_%s' % node.punycode_class_name
# TODO(robertwb): Move the state into the third argument # TODO(robertwb): Move the state into the third argument
# so it can be pickled *after* self is memoized. # so it can be pickled *after* self is memoized.
...@@ -2744,6 +2744,7 @@ class CreateClosureClasses(CythonTransform): ...@@ -2744,6 +2744,7 @@ class CreateClosureClasses(CythonTransform):
as_name = '%s_%s' % ( as_name = '%s_%s' % (
target_module_scope.next_id(Naming.closure_class_prefix), target_module_scope.next_id(Naming.closure_class_prefix),
node.entry.cname.replace('.','__')) node.entry.cname.replace('.','__'))
as_name = EncodedString(as_name)
entry = target_module_scope.declare_c_class( entry = target_module_scope.declare_c_class(
name=as_name, pos=node.pos, defining=True, name=as_name, pos=node.pos, defining=True,
......
...@@ -138,6 +138,14 @@ class EncodedString(_unicode): ...@@ -138,6 +138,14 @@ class EncodedString(_unicode):
def as_utf8_string(self): def as_utf8_string(self):
return bytes_literal(self.utf8encode(), 'utf8') return bytes_literal(self.utf8encode(), 'utf8')
def as_c_string_literal(self):
# first encodes the string then produces a c string literal
if self.encoding is None:
s = self.as_utf8_string()
else:
s = bytes_literal(self.byteencode(), self.encoding)
return s.as_c_string_literal()
def string_contains_surrogates(ustring): def string_contains_surrogates(ustring):
""" """
......
...@@ -42,6 +42,28 @@ def c_safe_identifier(cname): ...@@ -42,6 +42,28 @@ def c_safe_identifier(cname):
cname = Naming.pyrex_prefix + cname cname = Naming.pyrex_prefix + cname
return cname return cname
def punycodify_name(cname, mangle_with=None):
# if passed the mangle_with should be a byte string
# modified from PEP489
try:
cname.encode('ascii')
except UnicodeEncodeError:
cname = cname.encode('punycode').replace(b'-', b'_').decode('ascii')
if mangle_with:
# sometimes it necessary to mangle unicode names alone where
# they'll be inserted directly into C, because the punycode
# transformation can turn them into invalid identifiers
cname = "%s_%s" % (mangle_with, cname)
elif cname.startswith(Naming.pyrex_prefix):
# a punycode name could also be a valid ascii variable name so
# change the prefix to distinguish
cname = cname.replace(Naming.pyrex_prefix,
Naming.pyunicode_identifier_prefix, 1)
return cname
class BufferAux(object): class BufferAux(object):
writable_needed = False writable_needed = False
...@@ -391,7 +413,7 @@ class Scope(object): ...@@ -391,7 +413,7 @@ class Scope(object):
def mangle(self, prefix, name = None): def mangle(self, prefix, name = None):
if name: if name:
return "%s%s%s" % (prefix, self.scope_prefix, name) return punycodify_name("%s%s%s" % (prefix, self.scope_prefix, name))
else: else:
return self.parent_scope.mangle(prefix, self.name) return self.parent_scope.mangle(prefix, self.name)
...@@ -446,6 +468,7 @@ class Scope(object): ...@@ -446,6 +468,7 @@ class Scope(object):
if not self.in_cinclude and cname and re.match("^_[_A-Z]+$", cname): if not self.in_cinclude and cname and re.match("^_[_A-Z]+$", cname):
# See https://www.gnu.org/software/libc/manual/html_node/Reserved-Names.html#Reserved-Names # See https://www.gnu.org/software/libc/manual/html_node/Reserved-Names.html#Reserved-Names
warning(pos, "'%s' is a reserved name in C." % cname, -1) warning(pos, "'%s' is a reserved name in C." % cname, -1)
entries = self.entries entries = self.entries
if name and name in entries and not shadow: if name and name in entries and not shadow:
old_entry = entries[name] old_entry = entries[name]
...@@ -737,7 +760,7 @@ class Scope(object): ...@@ -737,7 +760,7 @@ class Scope(object):
qualified_name = self.qualify_name(lambda_name) qualified_name = self.qualify_name(lambda_name)
entry = self.declare(None, func_cname, py_object_type, pos, 'private') entry = self.declare(None, func_cname, py_object_type, pos, 'private')
entry.name = lambda_name entry.name = EncodedString(lambda_name)
entry.qualified_name = qualified_name entry.qualified_name = qualified_name
entry.pymethdef_cname = pymethdef_cname entry.pymethdef_cname = pymethdef_cname
entry.func_cname = func_cname entry.func_cname = func_cname
...@@ -1740,7 +1763,7 @@ class LocalScope(Scope): ...@@ -1740,7 +1763,7 @@ class LocalScope(Scope):
Scope.__init__(self, name, outer_scope, parent_scope) Scope.__init__(self, name, outer_scope, parent_scope)
def mangle(self, prefix, name): def mangle(self, prefix, name):
return prefix + name return punycodify_name(prefix + name)
def declare_arg(self, name, type, pos): def declare_arg(self, name, type, pos):
# Add an entry for an argument of a function. # Add an entry for an argument of a function.
...@@ -2146,6 +2169,7 @@ class CClassScope(ClassScope): ...@@ -2146,6 +2169,7 @@ class CClassScope(ClassScope):
cname = name cname = name
if visibility == 'private': if visibility == 'private':
cname = c_safe_identifier(cname) cname = c_safe_identifier(cname)
cname = punycodify_name(cname, Naming.unicode_structmember_prefix)
if type.is_cpp_class and visibility != 'extern': if type.is_cpp_class and visibility != 'extern':
type.check_nullary_constructor(pos) type.check_nullary_constructor(pos)
self.use_utility_code(Code.UtilityCode("#include <new>")) self.use_utility_code(Code.UtilityCode("#include <new>"))
...@@ -2189,6 +2213,7 @@ class CClassScope(ClassScope): ...@@ -2189,6 +2213,7 @@ class CClassScope(ClassScope):
# I keep it in for now. is_member should be enough # I keep it in for now. is_member should be enough
# later on # later on
self.namespace_cname = "(PyObject *)%s" % self.parent_type.typeptr_cname self.namespace_cname = "(PyObject *)%s" % self.parent_type.typeptr_cname
return entry return entry
def declare_pyfunction(self, name, pos, allow_redefine=False): def declare_pyfunction(self, name, pos, allow_redefine=False):
...@@ -2247,7 +2272,7 @@ class CClassScope(ClassScope): ...@@ -2247,7 +2272,7 @@ class CClassScope(ClassScope):
(args[0].type, name, self.parent_type)) (args[0].type, name, self.parent_type))
entry = self.lookup_here(name) entry = self.lookup_here(name)
if cname is None: if cname is None:
cname = c_safe_identifier(name) cname = punycodify_name(c_safe_identifier(name), Naming.unicode_vtabentry_prefix)
if entry: if entry:
if not entry.is_cfunction: if not entry.is_cfunction:
warning(pos, "'%s' redeclared " % name, 0) warning(pos, "'%s' redeclared " % name, 0)
...@@ -2428,7 +2453,7 @@ class CppClassScope(Scope): ...@@ -2428,7 +2453,7 @@ class CppClassScope(Scope):
class_name = self.name.split('::')[-1] class_name = self.name.split('::')[-1]
if name in (class_name, '__init__') and cname is None: if name in (class_name, '__init__') and cname is None:
cname = "%s__init__%s" % (Naming.func_prefix, class_name) cname = "%s__init__%s" % (Naming.func_prefix, class_name)
name = '<init>' name = EncodedString('<init>')
type.return_type = PyrexTypes.CVoidType() type.return_type = PyrexTypes.CVoidType()
# This is called by the actual constructor, but need to support # This is called by the actual constructor, but need to support
# arguments that cannot by called by value. # arguments that cannot by called by value.
...@@ -2442,7 +2467,7 @@ class CppClassScope(Scope): ...@@ -2442,7 +2467,7 @@ class CppClassScope(Scope):
type.args = [maybe_ref(arg) for arg in type.args] type.args = [maybe_ref(arg) for arg in type.args]
elif name == '__dealloc__' and cname is None: elif name == '__dealloc__' and cname is None:
cname = "%s__dealloc__%s" % (Naming.func_prefix, class_name) cname = "%s__dealloc__%s" % (Naming.func_prefix, class_name)
name = '<del>' name = EncodedString('<del>')
type.return_type = PyrexTypes.CVoidType() type.return_type = PyrexTypes.CVoidType()
if name in ('<init>', '<del>') and type.nogil: if name in ('<init>', '<del>') and type.nogil:
for base in self.type.base_classes: for base in self.type.base_classes:
......
...@@ -569,6 +569,17 @@ class MethodDispatcherTransform(EnvTransform): ...@@ -569,6 +569,17 @@ class MethodDispatcherTransform(EnvTransform):
### dispatch to specific handlers ### dispatch to specific handlers
def _find_handler(self, match_name, has_kwargs): def _find_handler(self, match_name, has_kwargs):
try:
match_name.encode('ascii')
except UnicodeEncodeError:
# specifically when running the Cython compiler under Python 2
# getattr can't take a unicode string.
# Classes with unicode names won't have specific handlers and thus it
# should be OK to return None.
# Doing the test here ensures that the same code gets run on
# Python 2 and 3
return None
call_type = has_kwargs and 'general' or 'simple' call_type = has_kwargs and 'general' or 'simple'
handler = getattr(self, '_handle_%s_%s' % (call_type, match_name), None) handler = getattr(self, '_handle_%s_%s' % (call_type, match_name), None)
if handler is None: if handler is None:
......
#!/usr/bin/env python3
#
# Updates Cython's Lexicon.py with the unicode characters that are accepted as
# identifiers. Should be run with the most recent version of Python possible
# to ensure that Lexicon is as complete as possible.
#
# Python3 only (it relies on str.isidentifier which is a Python 3 addition)
#
# Run with either
# --overwrite to update the existing Lexicon.py file
# --here to create a copy of Lexicon.py in the current directory
import sys
from io import StringIO
import os
import functools
# Make sure we import the right Cython
cythonpath, _ = os.path.split(os.path.realpath(__file__)) # bin directory
cythonpath, _ = os.path.split(cythonpath)
if os.path.exists(os.path.join(cythonpath,"Cython")):
sys.path.insert(0, cythonpath)
print("Found (and using) local cython directory")
# else we aren't in a development directory
from Cython.Compiler import Lexicon
def main():
arg = '--overwrite'
if len(sys.argv) == 2:
arg = sys.argv[1]
if len(sys.argv) > 2 or arg not in ['--overwrite','--here']:
print("""Call the script with either:
--overwrite to update the existing Lexicon.py file (default)
--here to create an version of Lexicon.py in the current directory
""")
return
generated_code = StringIO()
print("# generated with:\n #", sys.implementation.name, sys.version, file=generated_code)
print(file=generated_code)
print(start_expression(), file=generated_code)
print(file=generated_code)
print(cont_expression(), file=generated_code)
print(file=generated_code)
generated_code = generated_code.getvalue()
output = StringIO()
mode = 0 # 1 when found generated section, 2 afterwards
print("Reading file", Lexicon.__file__)
with open(Lexicon.__file__,'r') as f:
for line in f:
if mode != 1:
output.write(line)
else:
if line.strip() == "# END GENERATED CODE":
mode = 2
output.write(line)
if mode == 0:
if line.strip() == "# BEGIN GENERATED CODE":
mode = 1
output.write(generated_code)
if mode != 2:
print("Warning: generated code section not found - code not inserted")
return
if arg == "--here":
outfile = "Lexicon.py"
else:
assert arg == "--overwrite"
outfile = Lexicon.__file__
print("Writing to file", outfile)
with open(outfile,'w') as f:
f.write(output.getvalue())
# The easiest way to generate an appropriate character set is just to use the str.isidentifier method
# An alternative approach for getting character sets is at https://stackoverflow.com/a/49332214/4657412
@functools.lru_cache(None)
def get_start_characters_as_number():
return [ i for i in range(sys.maxunicode) if str.isidentifier(chr(i)) ]
def get_continue_characters_as_number():
return [ i for i in range(sys.maxunicode) if str.isidentifier('a'+chr(i)) ]
def get_continue_not_start_as_number():
start = get_start_characters_as_number()
cont = get_continue_characters_as_number()
return sorted(set(cont)-set(start))
def to_ranges(char_num_list):
# Convert the large lists of character digits to
# list of characters
# a list pairs of characters representing closed ranges
char_num_list = sorted(char_num_list)
first_good_val = char_num_list[0]
single_chars = []
ranges = []
for n in range(1, len(char_num_list)):
if char_num_list[n]-1 != char_num_list[n-1]:
# discontinuous
if first_good_val == char_num_list[n-1]:
single_chars.append(chr(char_num_list[n-1]))
else:
ranges.append(chr(first_good_val)+chr(char_num_list[n-1]))
first_good_val = char_num_list[n]
return single_chars, ranges
def make_split_strings(chars, splitby=60):
out = []
for i in range(0, len(chars), splitby):
out.append('u"{}"'.format("".join(chars[i:i+splitby])))
return "\n ".join(out)
def start_expression():
output = StringIO()
print("unicode_start_ch_any = (\n ", end='', file=output)
single_chars, ranges = to_ranges(get_start_characters_as_number())
single_chars = "".join(single_chars)
ranges = "".join(ranges)
print(make_split_strings(single_chars), end='', file=output)
print(")", file=output)
print("unicode_start_ch_range = (\n ", end='', file=output)
print(make_split_strings(ranges), end='', file=output)
print(")", file=output)
return output.getvalue()
def cont_expression():
output = StringIO()
print("unicode_continuation_ch_any = (\n ", end='', file=output)
single_chars, ranges = to_ranges(get_continue_not_start_as_number())
single_chars = "".join(single_chars)
ranges = "".join(ranges)
print(make_split_strings(single_chars), end='', file=output)
print(")", file=output)
print("unicode_continuation_ch_range = (\n ", end='', file=output)
print(make_split_strings(ranges), end='', file=output)
print(")", file=output)
return output.getvalue()
if __name__ == "__main__":
main()
# -*- coding: utf-8 -*-
# cython: language_level=3
cdef Fα1()
cdef class Γναμε2:
cdef public int α
cdef boring_cdef(self)
cdef εxciting_cdef(self)
cpdef boring_cpdef(self)
cpdef εxciting_cpdef(self)
# -*- coding: utf-8 -*-
# cython: language_level=3
# mode: run
# tag: pep3131
# Code with unicode identifiers can be compiled with Cython running either Python 2 or 3.
# However Python access to unicode identifiers is only possible in Python 3. In Python 2
# it's only really safe to use the unicode identifiers for purely Cython interfaces
# (although this isn't enforced...). Therefore the majority of the doctests are
# Python3 only and only a limited set are run in Python2.
# This is controlled by putting the Python3 only tests in the module __doc__ attribute
# Most of the individual function and class docstrings are only present as a compile test
import sys
if sys.version_info[0]>2:
__doc__ = """
>>> f()()
2
>>> f().__name__
'nεsted'
The test is mainly to see if the traceback is generated correctly
>>> print_traceback_name()
unicode_identifiers.Fα1
Just check that a cpdef function is callable
>>> Fα3()
1
>>> Γναμε2.ναμε3
1
>>> x = Γναμε2()
>>> print(x.α)
100
>>> x.α = 200
>>> print(x.α)
200
Test generation of locals()
>>> sorted(Γναμε2().boring_function(1,2).keys())
['self', 'somevalue', 'x', 'ναμε5', 'ναμε6']
>>> Γναμε2().boring_cpdef() - Γναμε2().εxciting_cpdef()
0
>>> function_taking_fancy_argument(Γναμε2()).ναμε3
1
>>> NormalClassΓΓ().ναμε
10
>>> NormalClassΓΓ().εxciting_function(None).__qualname__
'NormalClassΓΓ.εxciting_function.<locals>.nestεd'
"""
else:
__doc__ = ""
global_ναμε1 = None
cdef double global_ναμε2 = 1.2
def f():
"""docstring"""
ναμε2 = 2
def nεsted():
return ναμε2
return nεsted
# Ƒ is notably awkward because its punycode starts with "2" causing
# C compile errors. Therefore try a few different variations...
cdef class A:
cdef int ναμε
def __init__(self):
self.ναμε = 1
cdef Ƒ(self):
return self.ναμε==1
def regular_function(self):
"""
Can use unicode cdef functions and (private) attributes iternally
>>> A().regular_function()
True
"""
return self.Ƒ()
cdef class B:
cpdef Ƒ(self):
pass
cdef class C:
def Ƒ(self):
pass
cdef class D:
cdef int Ƒ
def regular_function():
"""
Unicode names can be used internally on python2
>>> regular_function()
10
"""
cdef int variableƑ = 5
ναμε2 = 2
return variableƑ*ναμε2
cdef Fα1():
"""docstring"""
ναμε2 = 2
raise RuntimeError() # forces generation of a traceback
def print_traceback_name():
try:
Fα1()
except RuntimeError as e:
import traceback
# get the name of one level up in the traceback
print(traceback.extract_tb(e.__traceback__,2)[1][2])
def Fα2():
"""docstring"""
def nested_normal():
"""docstring"""
pass
def nεstεd_uni():
"""docstring"""
pass
return nested_normal, nεstεd_uni
cpdef Fα3():
"""docstring"""
return 1
cdef class Γναμε2:
"""
docstring
"""
ναμε3 = 1
def __init__(self):
self.α = 100
def boring_function(self,x,ναμε5):
"""docstring"""
ναμε6 = ναμε5
somevalue = global_ναμε1 == self.ναμε3
return locals()
def εxciting_function(self,y):
"""docstring"""
def nestεd():
pass
return nestεd
cdef boring_cdef(self):
"""docstring"""
pass
cdef εxciting_cdef(self):
"""docstring"""
pass
cpdef boring_cpdef(self):
"""docstring"""
return 2
cpdef εxciting_cpdef(self):
"""docstring"""
return 2
cdef class Derived(Γναμε2):
pass
cdef Γναμε2 global_ναμε3 = Γναμε2()
def function_taking_fancy_argument(Γναμε2 αrg):
return αrg
class NormalClassΓΓ(Γναμε2):
"""
docstring
"""
def __init__(self):
self.ναμε = 10
def boring_function(self,x,ναμε5):
"""docstring"""
ναμε6 = ναμε5
somevalue = global_ναμε1 == self.ναμε3
return locals()
def εxciting_function(self,y):
"""docstring"""
def nestεd():
pass
return nestεd
if sys.version_info[0]<=2:
# These symbols are causing problems for doctest
del NormalClassΓΓ
del globals()[u'Γναμε2'.encode('utf-8')]
# -*- coding: utf-8 -*-
# cython: language_level = 3
# mode: compile
# tag: pep3131
# compile only test since there's no way to get
# it to import another test module at runtime
# this test looks at [c]importing unicode stuff
from unicode_identifiers cimport Fα1, Γναμε2
cimport unicode_identifiers
from unicode_identifiers cimport Γναμε2 as Γναμε3
from unicode_identifiers import NormalClassΓΓ
from unicode_identifiers import NormalClassΓΓ as NörmalCläss
cdef class C(unicode_identifiers.Γναμε2):
pass
cdef class D(Γναμε2):
pass
cdef class E(Γναμε3):
pass
def f():
Fα1()
unicode_identifiers.Fα1()
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment