Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Support
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
C
cpython
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Issues
0
Issues
0
List
Boards
Labels
Milestones
Merge Requests
0
Merge Requests
0
Analytics
Analytics
Repository
Value Stream
Wiki
Wiki
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Commits
Issue Boards
Open sidebar
Kirill Smelkov
cpython
Commits
b6ed1734
Commit
b6ed1734
authored
Feb 09, 2013
by
Serhiy Storchaka
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
Issue #17156: pygettext.py now uses an encoding of source file and correctly
writes and escapes non-ascii characters.
parent
041d5533
Changes
2
Show whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
36 additions
and
33 deletions
+36
-33
Misc/NEWS
Misc/NEWS
+3
-0
Tools/i18n/pygettext.py
Tools/i18n/pygettext.py
+33
-33
No files found.
Misc/NEWS
View file @
b6ed1734
...
...
@@ -215,6 +215,9 @@ Core and Builtins
Library
-------
- Issue #17156: pygettext.py now uses an encoding of source file and correctly
writes and escapes non-ascii characters.
- Issue #16564: Fixed regression relative to Python2 in the operation of
email.encoders.encode_noop when used with binary data.
...
...
Tools/i18n/pygettext.py
View file @
b6ed1734
...
...
@@ -189,8 +189,8 @@ msgstr ""
"Last-Translator: FULL NAME <EMAIL@ADDRESS>
\
\
n"
"Language-Team: LANGUAGE <LL@li.org>
\
\
n"
"MIME-Version: 1.0
\
\
n"
"Content-Type: text/plain; charset=
CHARSET
\
\
n"
"Content-Transfer-Encoding:
ENCODING
\
\
n"
"Content-Type: text/plain; charset=
%(charset)s
\
\
n"
"Content-Transfer-Encoding:
%(encoding)s
\
\
n"
"Generated-By: pygettext.py %(version)s
\
\
n"
'''
)
...
...
@@ -204,35 +204,32 @@ def usage(code, msg=''):
escapes
=
[]
def
make_escapes
(
pass_iso8859
):
global
escapes
if
pass_iso8859
:
# Allow iso-8859 characters to pass through so that e.g. 'msgid
def
make_escapes
(
pass_nonascii
):
global
escapes
,
escape
if
pass_nonascii
:
# Allow non-ascii characters to pass through so that e.g. 'msgid
# "Höhe"' would result not result in 'msgid "H\366he"'. Otherwise we
# escape any character outside the 32..126 range.
mod
=
128
escape
=
escape_ascii
else
:
mod
=
256
for
i
in
range
(
256
):
if
32
<=
(
i
%
mod
)
<=
126
:
escapes
.
append
(
chr
(
i
))
e
lse
:
escapes
.
append
(
"
\
\
%03o"
%
i
)
escapes
[
ord
(
'
\
\
'
)]
=
'
\
\
\
\
'
escapes
[
ord
(
'
\
t
'
)]
=
'
\
\
t
'
escapes
[
ord
(
'
\
r
'
)]
=
'
\
\
r
'
escapes
[
ord
(
'
\
n
'
)]
=
'
\
\
n
'
escapes
[
ord
(
'
\
"
'
)]
=
'
\
\
"'
escape
=
escape_nonascii
escapes
=
[
r"\
%
03o"
%
i
for
i
in
range
(
mod
)]
for
i
in
range
(
32
,
127
):
e
scapes
[
i
]
=
chr
(
i
)
escapes
[
ord
(
'
\
\
'
)]
=
r'\\'
escapes
[
ord
(
'
\
t
'
)]
=
r'\t
'
escapes
[
ord
(
'
\
r
'
)]
=
r'\r
'
escapes
[
ord
(
'
\
n
'
)]
=
r'\n
'
escapes
[
ord
(
'
\
"
'
)]
=
r'\"
'
def
escape_ascii
(
s
,
encoding
):
return
''
.
join
(
escapes
[
ord
(
c
)]
if
ord
(
c
)
<
128
else
c
for
c
in
s
)
def
escape
(
s
):
global
escapes
s
=
list
(
s
)
for
i
in
range
(
len
(
s
)):
s
[
i
]
=
escapes
[
ord
(
s
[
i
])]
return
EMPTYSTRING
.
join
(
s
)
def
escape_nonascii
(
s
,
encoding
):
return
''
.
join
(
escapes
[
b
]
for
b
in
s
.
encode
(
encoding
))
def
safe_eval
(
s
):
...
...
@@ -240,18 +237,18 @@ def safe_eval(s):
return
eval
(
s
,
{
'__builtins__'
:{}},
{})
def
normalize
(
s
):
def
normalize
(
s
,
encoding
):
# This converts the various Python string types into a format that is
# appropriate for .po files, namely much closer to C style.
lines
=
s
.
split
(
'
\
n
'
)
if
len
(
lines
)
==
1
:
s
=
'"'
+
escape
(
s
)
+
'"'
s
=
'"'
+
escape
(
s
,
encoding
)
+
'"'
else
:
if
not
lines
[
-
1
]:
del
lines
[
-
1
]
lines
[
-
1
]
=
lines
[
-
1
]
+
'
\
n
'
for
i
in
range
(
len
(
lines
)):
lines
[
i
]
=
escape
(
lines
[
i
])
lines
[
i
]
=
escape
(
lines
[
i
]
,
encoding
)
lineterm
=
'
\
\
n"
\
n
"'
s
=
'""
\
n
"'
+
lineterm
.
join
(
lines
)
+
'"'
return
s
...
...
@@ -448,7 +445,10 @@ class TokenEater:
timestamp
=
time
.
strftime
(
'%Y-%m-%d %H:%M+%Z'
)
# The time stamp in the header doesn't have the same format as that
# generated by xgettext...
print
(
pot_header
%
{
'time'
:
timestamp
,
'version'
:
__version__
},
file
=
fp
)
encoding
=
fp
.
encoding
if
fp
.
encoding
else
'UTF-8'
print
(
pot_header
%
{
'time'
:
timestamp
,
'version'
:
__version__
,
'charset'
:
encoding
,
'encoding'
:
'8bit'
},
file
=
fp
)
# Sort the entries. First sort each particular entry's keys, then
# sort all the entries by their first item.
reverse
=
{}
...
...
@@ -492,7 +492,7 @@ class TokenEater:
print
(
locline
,
file
=
fp
)
if
isdocstring
:
print
(
'#, docstring'
,
file
=
fp
)
print
(
'msgid'
,
normalize
(
k
),
file
=
fp
)
print
(
'msgid'
,
normalize
(
k
,
encoding
),
file
=
fp
)
print
(
'msgstr ""
\
n
'
,
file
=
fp
)
...
...
@@ -588,7 +588,7 @@ def main():
fp
.
close
()
# calculate escapes
make_escapes
(
options
.
escape
)
make_escapes
(
not
options
.
escape
)
# calculate all keywords
options
.
keywords
.
extend
(
default_keywords
)
...
...
@@ -621,17 +621,17 @@ def main():
if
filename
==
'-'
:
if
options
.
verbose
:
print
(
_
(
'Reading standard input'
))
fp
=
sys
.
stdin
fp
=
sys
.
stdin
.
buffer
closep
=
0
else
:
if
options
.
verbose
:
print
(
_
(
'Working on %s'
)
%
filename
)
fp
=
open
(
filename
)
fp
=
open
(
filename
,
'rb'
)
closep
=
1
try
:
eater
.
set_filename
(
filename
)
try
:
tokens
=
tokenize
.
generate_tokens
(
fp
.
readline
)
tokens
=
tokenize
.
tokenize
(
fp
.
readline
)
for
_token
in
tokens
:
eater
(
*
_token
)
except
tokenize
.
TokenError
as
e
:
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment