Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Support
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
C
cpython
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Issues
0
Issues
0
List
Boards
Labels
Milestones
Merge Requests
0
Merge Requests
0
Analytics
Analytics
Repository
Value Stream
Wiki
Wiki
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Commits
Issue Boards
Open sidebar
Kirill Smelkov
cpython
Commits
ccc9e617
Commit
ccc9e617
authored
Oct 23, 2012
by
Ezio Melotti
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
#16245: add a script to generate the html.entities.html5 dict.
parent
d25b3982
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
106 additions
and
0 deletions
+106
-0
Misc/ACKS
Misc/ACKS
+1
-0
Tools/scripts/parse_html5_entities.py
Tools/scripts/parse_html5_entities.py
+105
-0
No files found.
Misc/ACKS
View file @
ccc9e617
...
...
@@ -929,6 +929,7 @@ Amrit Prem
Paul Prescod
Donovan Preston
Paul Price
Iuliia Proskurnia
Jyrki Pulliainen
Steve Purcell
Eduardo Pérez
...
...
Tools/scripts/parse_html5_entities.py
0 → 100644
View file @
ccc9e617
#!/usr/bin/env python3
"""
Utility for parsing HTML5 entity definitions available from:
http://dev.w3.org/html5/spec/entities.json
Written by Ezio Melotti and Iuliia Proskurnia.
"""
import
os
import
sys
import
json
from
urllib.request
import
urlopen
from
html.entities
import
html5
entities_url
=
'http://dev.w3.org/html5/spec/entities.json'
def
get_json
(
url
):
"""Download the json file from the url and returns a decoded object."""
with
urlopen
(
url
)
as
f
:
data
=
f
.
read
().
decode
(
'utf-8'
)
return
json
.
loads
(
data
)
def
create_dict
(
entities
):
"""Create the html5 dict from the decoded json object."""
new_html5
=
{}
for
name
,
value
in
entities
.
items
():
new_html5
[
name
.
lstrip
(
'&'
)]
=
value
[
'characters'
]
return
new_html5
def
compare_dicts
(
old
,
new
):
"""Compare the old and new dicts and print the differences."""
added
=
new
.
keys
()
-
old
.
keys
()
if
added
:
print
(
'{} entitie(s) have been added:'
.
format
(
len
(
added
)))
for
name
in
sorted
(
added
):
print
(
' {!r}: {!r}'
.
format
(
name
,
new
[
name
]))
removed
=
old
.
keys
()
-
new
.
keys
()
if
removed
:
print
(
'{} entitie(s) have been removed:'
.
format
(
len
(
removed
)))
for
name
in
sorted
(
removed
):
print
(
' {!r}: {!r}'
.
format
(
name
,
old
[
name
]))
changed
=
set
()
for
name
in
(
old
.
keys
()
&
new
.
keys
()):
if
old
[
name
]
!=
new
[
name
]:
changed
.
add
((
name
,
old
[
name
],
new
[
name
]))
if
changed
:
print
(
'{} entitie(s) have been modified:'
.
format
(
len
(
changed
)))
for
item
in
sorted
(
changed
):
print
(
' {!r}: {!r} -> {!r}'
.
format
(
*
item
))
def
write_items
(
entities
,
file
=
sys
.
stdout
):
"""Write the items of the dictionary in the specified file."""
# The keys in the generated dictionary should be sorted
# in a case-insensitive way, however, when two keys are equal,
# the uppercase version should come first so that the result
# looks like: ['Aacute', 'aacute', 'Aacute;', 'aacute;', ...]
# To do this we first sort in a case-sensitive way (so all the
# uppercase chars come first) and then sort with key=str.lower.
# Since the sorting is stable the uppercase keys will eventually
# be before their equivalent lowercase version.
keys
=
sorted
(
entities
.
keys
())
keys
=
sorted
(
keys
,
key
=
str
.
lower
)
print
(
'html5 = {'
,
file
=
file
)
for
name
in
keys
:
print
(
' {!r}: {!a},'
.
format
(
name
,
entities
[
name
]),
file
=
file
)
print
(
'}'
,
file
=
file
)
if
__name__
==
'__main__'
:
# without args print a diff between html.entities.html5 and new_html5
# with --create print the new html5 dict
# with --patch patch the Lib/html/entities.py file
new_html5
=
create_dict
(
get_json
(
entities_url
))
if
'--create'
in
sys
.
argv
:
print
(
'# map the HTML5 named character references to the '
'equivalent Unicode character(s)'
)
print
(
'# Generated by {}. Do not edit manually.'
.
format
(
__file__
))
write_items
(
new_html5
)
elif
'--patch'
in
sys
.
argv
:
fname
=
'Lib/html/entities.py'
temp_fname
=
fname
+
'.temp'
with
open
(
fname
)
as
f1
,
open
(
temp_fname
,
'w'
)
as
f2
:
skip
=
False
for
line
in
f1
:
if
line
.
startswith
(
'html5 = {'
):
write_items
(
new_html5
,
file
=
f2
)
skip
=
True
continue
if
skip
:
# skip the old items until the }
if
line
.
startswith
(
'}'
):
skip
=
False
continue
f2
.
write
(
line
)
os
.
remove
(
fname
)
os
.
rename
(
temp_fname
,
fname
)
else
:
if
html5
==
new_html5
:
print
(
'The current dictionary is updated.'
)
else
:
compare_dicts
(
html5
,
new_html5
)
print
(
'Run "./python {0} --patch" to update Lib/html/entities.html '
'or "./python {0} --create" to see the generated '
'dictionary.'
.
format
(
__file__
))
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment