Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Support
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
C
cpython
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Issues
0
Issues
0
List
Boards
Labels
Milestones
Merge Requests
0
Merge Requests
0
Analytics
Analytics
Repository
Value Stream
Wiki
Wiki
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Commits
Issue Boards
Open sidebar
Kirill Smelkov
cpython
Commits
859cd472
Commit
859cd472
authored
Feb 09, 2013
by
Serhiy Storchaka
Browse files
Options
Browse Files
Download
Plain Diff
Issue #17156: pygettext.py now uses an encoding of source file and correctly
writes and escapes non-ascii characters.
parents
7451a72e
b6ed1734
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
36 additions
and
33 deletions
+36
-33
Misc/NEWS
Misc/NEWS
+3
-0
Tools/i18n/pygettext.py
Tools/i18n/pygettext.py
+33
-33
No files found.
Misc/NEWS
View file @
859cd472
...
...
@@ -169,6 +169,9 @@ Core and Builtins
Library
-------
- Issue #17156: pygettext.py now uses an encoding of source file and correctly
writes and escapes non-ascii characters.
- Issue #16564: Fixed regression relative to Python2 in the operation of
email.encoders.encode_noop when used with binary data.
...
...
Tools/i18n/pygettext.py
View file @
859cd472
...
...
@@ -188,8 +188,8 @@ msgstr ""
"Last-Translator: FULL NAME <EMAIL@ADDRESS>
\
\
n"
"Language-Team: LANGUAGE <LL@li.org>
\
\
n"
"MIME-Version: 1.0
\
\
n"
"Content-Type: text/plain; charset=
CHARSET
\
\
n"
"Content-Transfer-Encoding:
ENCODING
\
\
n"
"Content-Type: text/plain; charset=
%(charset)s
\
\
n"
"Content-Transfer-Encoding:
%(encoding)s
\
\
n"
"Generated-By: pygettext.py %(version)s
\
\
n"
'''
)
...
...
@@ -203,35 +203,32 @@ def usage(code, msg=''):
escapes
=
[]
def
make_escapes
(
pass_iso8859
):
global
escapes
if
pass_iso8859
:
# Allow iso-8859 characters to pass through so that e.g. 'msgid
def
make_escapes
(
pass_nonascii
):
global
escapes
,
escape
if
pass_nonascii
:
# Allow non-ascii characters to pass through so that e.g. 'msgid
# "Höhe"' would result not result in 'msgid "H\366he"'. Otherwise we
# escape any character outside the 32..126 range.
mod
=
128
escape
=
escape_ascii
else
:
mod
=
256
for
i
in
range
(
256
):
if
32
<=
(
i
%
mod
)
<=
126
:
escapes
.
append
(
chr
(
i
))
e
lse
:
escapes
.
append
(
"
\
\
%03o"
%
i
)
escapes
[
ord
(
'
\
\
'
)]
=
'
\
\
\
\
'
escapes
[
ord
(
'
\
t
'
)]
=
'
\
\
t
'
escapes
[
ord
(
'
\
r
'
)]
=
'
\
\
r
'
escapes
[
ord
(
'
\
n
'
)]
=
'
\
\
n
'
escapes
[
ord
(
'
\
"
'
)]
=
'
\
\
"'
escape
=
escape_nonascii
escapes
=
[
r"\
%
03o"
%
i
for
i
in
range
(
mod
)]
for
i
in
range
(
32
,
127
):
e
scapes
[
i
]
=
chr
(
i
)
escapes
[
ord
(
'
\
\
'
)]
=
r'\\'
escapes
[
ord
(
'
\
t
'
)]
=
r'\t
'
escapes
[
ord
(
'
\
r
'
)]
=
r'\r
'
escapes
[
ord
(
'
\
n
'
)]
=
r'\n
'
escapes
[
ord
(
'
\
"
'
)]
=
r'\"
'
def
escape_ascii
(
s
,
encoding
):
return
''
.
join
(
escapes
[
ord
(
c
)]
if
ord
(
c
)
<
128
else
c
for
c
in
s
)
def
escape
(
s
):
global
escapes
s
=
list
(
s
)
for
i
in
range
(
len
(
s
)):
s
[
i
]
=
escapes
[
ord
(
s
[
i
])]
return
EMPTYSTRING
.
join
(
s
)
def
escape_nonascii
(
s
,
encoding
):
return
''
.
join
(
escapes
[
b
]
for
b
in
s
.
encode
(
encoding
))
def
safe_eval
(
s
):
...
...
@@ -239,18 +236,18 @@ def safe_eval(s):
return
eval
(
s
,
{
'__builtins__'
:{}},
{})
def
normalize
(
s
):
def
normalize
(
s
,
encoding
):
# This converts the various Python string types into a format that is
# appropriate for .po files, namely much closer to C style.
lines
=
s
.
split
(
'
\
n
'
)
if
len
(
lines
)
==
1
:
s
=
'"'
+
escape
(
s
)
+
'"'
s
=
'"'
+
escape
(
s
,
encoding
)
+
'"'
else
:
if
not
lines
[
-
1
]:
del
lines
[
-
1
]
lines
[
-
1
]
=
lines
[
-
1
]
+
'
\
n
'
for
i
in
range
(
len
(
lines
)):
lines
[
i
]
=
escape
(
lines
[
i
])
lines
[
i
]
=
escape
(
lines
[
i
]
,
encoding
)
lineterm
=
'
\
\
n"
\
n
"'
s
=
'""
\
n
"'
+
lineterm
.
join
(
lines
)
+
'"'
return
s
...
...
@@ -447,7 +444,10 @@ class TokenEater:
timestamp
=
time
.
strftime
(
'%Y-%m-%d %H:%M+%Z'
)
# The time stamp in the header doesn't have the same format as that
# generated by xgettext...
print
(
pot_header
%
{
'time'
:
timestamp
,
'version'
:
__version__
},
file
=
fp
)
encoding
=
fp
.
encoding
if
fp
.
encoding
else
'UTF-8'
print
(
pot_header
%
{
'time'
:
timestamp
,
'version'
:
__version__
,
'charset'
:
encoding
,
'encoding'
:
'8bit'
},
file
=
fp
)
# Sort the entries. First sort each particular entry's keys, then
# sort all the entries by their first item.
reverse
=
{}
...
...
@@ -491,7 +491,7 @@ class TokenEater:
print
(
locline
,
file
=
fp
)
if
isdocstring
:
print
(
'#, docstring'
,
file
=
fp
)
print
(
'msgid'
,
normalize
(
k
),
file
=
fp
)
print
(
'msgid'
,
normalize
(
k
,
encoding
),
file
=
fp
)
print
(
'msgstr ""
\
n
'
,
file
=
fp
)
...
...
@@ -587,7 +587,7 @@ def main():
fp
.
close
()
# calculate escapes
make_escapes
(
options
.
escape
)
make_escapes
(
not
options
.
escape
)
# calculate all keywords
options
.
keywords
.
extend
(
default_keywords
)
...
...
@@ -620,17 +620,17 @@ def main():
if
filename
==
'-'
:
if
options
.
verbose
:
print
(
_
(
'Reading standard input'
))
fp
=
sys
.
stdin
fp
=
sys
.
stdin
.
buffer
closep
=
0
else
:
if
options
.
verbose
:
print
(
_
(
'Working on %s'
)
%
filename
)
fp
=
open
(
filename
)
fp
=
open
(
filename
,
'rb'
)
closep
=
1
try
:
eater
.
set_filename
(
filename
)
try
:
tokens
=
tokenize
.
generate_tokens
(
fp
.
readline
)
tokens
=
tokenize
.
tokenize
(
fp
.
readline
)
for
_token
in
tokens
:
eater
(
*
_token
)
except
tokenize
.
TokenError
as
e
:
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment