Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Support
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
erp5
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Labels
Merge Requests
137
Merge Requests
137
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Analytics
Analytics
CI / CD
Repository
Value Stream
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Jobs
Commits
Open sidebar
nexedi
erp5
Commits
b2b504d4
Commit
b2b504d4
authored
Feb 07, 2024
by
Jérome Perrin
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
TextContent base_data bytes
parent
abd1bb99
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
32 additions
and
32 deletions
+32
-32
product/ERP5/bootstrap/erp5_core/DocumentTemplateItem/portal_components/document.erp5.Document.py
...tTemplateItem/portal_components/document.erp5.Document.py
+1
-1
product/ERP5/bootstrap/erp5_core/DocumentTemplateItem/portal_components/document.erp5.TextDocument.py
...plateItem/portal_components/document.erp5.TextDocument.py
+31
-31
No files found.
product/ERP5/bootstrap/erp5_core/DocumentTemplateItem/portal_components/document.erp5.Document.py
View file @
b2b504d4
...
...
@@ -410,7 +410,7 @@ class Document(DocumentExtensibleTraversableMixin, XMLObject, UrlMixin,
body_parser
=
re
.
compile
(
r'<body[^>]*>(.*?)</body>'
,
re
.
IGNORECASE
+
re
.
DOTALL
)
title_parser
=
re
.
compile
(
r'<title[^>]*>(.*?)</title>'
,
re
.
IGNORECASE
+
re
.
DOTALL
)
base_parser
=
re
.
compile
(
r'<base[^>]*href=[\'"](.*?)[\'"][^>]*>'
,
re
.
IGNORECASE
+
re
.
DOTALL
)
charset_parser
=
re
.
compile
(
r'(?P<keyword>charset="?)(?P<charset>[a-z0-9\
-]+)
', re.IGNORECASE)
charset_parser
=
re
.
compile
(
b
r'(?P<keyword>charset="?)(?P<charset>[a-z0-9\
-]+)
', re.IGNORECASE)
# Declarative security
security = ClassSecurityInfo()
...
...
product/ERP5/bootstrap/erp5_core/DocumentTemplateItem/portal_components/document.erp5.TextDocument.py
View file @
b2b504d4
...
...
@@ -163,10 +163,9 @@ class TextDocument(CachedConvertableMixin, BaseConvertableFileMixin, TextContent
if
mime_type
==
'text/html'
:
mime_type
=
'text/x-html-safe'
if
src_mimetype
!=
"image/svg+xml"
:
if
six
.
PY2
:
data
=
text_content
else
:
data
=
text_content
.
encode
()
data
=
text_content
if
not
isinstance
(
data
,
bytes
):
data
=
data
.
encode
(
'utf-8'
)
result
=
portal_transforms
.
convertToData
(
mime_type
,
data
,
object
=
self
,
context
=
self
,
filename
=
filename
,
...
...
@@ -186,6 +185,8 @@ class TextDocument(CachedConvertableMixin, BaseConvertableFileMixin, TextContent
file
=
BytesIO
(),
filename
=
self
.
getId
(),
temp_object
=
1
)
if
not
isinstance
(
result
,
bytes
):
result
=
result
.
encode
(
'utf-8'
)
temp_image
.
_setData
(
result
)
_
,
result
=
temp_image
.
convert
(
**
kw
)
...
...
@@ -227,7 +228,7 @@ class TextDocument(CachedConvertableMixin, BaseConvertableFileMixin, TextContent
def
setBaseData
(
self
,
value
):
"""Store base_data into text_content
"""
self
.
_setTextContent
(
value
)
self
.
_setTextContent
(
value
.
decode
(
'utf-8'
)
)
security
.
declareProtected
(
Permissions
.
ModifyPortalContent
,
'_setBaseData'
)
_setBaseData
=
setBaseData
...
...
@@ -253,9 +254,12 @@ class TextDocument(CachedConvertableMixin, BaseConvertableFileMixin, TextContent
"""
self
.
_checkConversionFormatPermission
(
None
)
if
default
is
_MARKER
:
return
self
.
getTextContent
()
text_content
=
self
.
getTextContent
()
else
:
return
self
.
getTextContent
(
default
=
default
)
text_content
=
self
.
getTextContent
(
default
=
default
)
if
six
.
PY3
and
text_content
and
text_content
is
not
default
:
text_content
=
text_content
.
encode
(
'utf-8'
)
return
text_content
security
.
declareProtected
(
Permissions
.
AccessContentsInformation
,
'hasBaseData'
)
def
hasBaseData
(
self
):
...
...
@@ -290,9 +294,12 @@ class TextDocument(CachedConvertableMixin, BaseConvertableFileMixin, TextContent
def
_convertToBaseFormat
(
self
):
"""Conversion to base format for TextDocument consist
to convert file content into utf-8
to convert file content into utf-8.
If the data embeds charset information, this information is updated
to the new (utf-8) charset. This supports XML and HTML.
"""
def
guessCharsetAndConvert
(
document
,
text_content
,
content_type
):
# type: (TextDocument, bytes, str) -> Tuple[bytes, str]
"""
return encoded content_type and message if encoding
is not utf-8
...
...
@@ -322,36 +329,32 @@ class TextDocument(CachedConvertableMixin, BaseConvertableFileMixin, TextContent
return
text_content
,
message
content_type
=
self
.
getContentType
()
or
DEFAULT_CONTENT_TYPE
text_content
=
self
.
getData
()
# TODO: don't we need to convert to bytes here ? what if it is PData ?
data
=
bytes
(
self
.
getData
())
if
content_type
.
endswith
(
'xml'
):
try
:
tree
=
etree
.
fromstring
(
text_content
)
text_content
=
etree
.
tostring
(
tree
,
encoding
=
'utf-8'
,
xml_declaration
=
True
)
tree
=
etree
.
fromstring
(
data
)
base_data
=
etree
.
tostring
(
tree
,
encoding
=
'utf-8'
,
xml_declaration
=
True
)
message
=
'Conversion to base format succeeds'
except
etree
.
XMLSyntaxError
:
# pylint: disable=catching-non-exception
message
=
'Conversion to base format without codec fails'
elif
content_type
==
'text/html'
:
re_match
=
self
.
charset_parser
.
search
(
# we don't really care about decoding errors for searching this
# regexp
text_content
.
decode
(
'ascii'
,
'replace'
)
if
six
.
PY3
else
text_content
)
re_match
=
self
.
charset_parser
.
search
(
data
)
message
=
'Conversion to base format succeeds'
if
re_match
is
not
None
:
charset
=
re_match
.
group
(
'charset'
)
base_data
=
data
charset
=
re_match
.
group
(
'charset'
).
decode
(
'ascii'
)
try
:
# Use encoding in html document
text_content
=
text_content
.
decode
(
charset
)
if
six
.
PY2
:
text_content
=
text_content
.
encode
(
'utf-8'
)
data
=
data
.
decode
(
charset
).
encode
(
'utf-8'
)
except
(
UnicodeDecodeError
,
LookupError
):
# Encoding read from document is wrong
text_content
,
message
=
guessCharsetAndConvert
(
self
,
text_content
,
content_type
)
base_data
,
message
=
guessCharsetAndConvert
(
self
,
data
,
content_type
)
else
:
message
=
'Conversion to base format with charset %r succeeds'
\
%
charset
if
charset
.
lower
()
!=
'utf-8'
:
charset
=
'utf-8'
# Override charset if convertion succeeds
charset
=
'utf-8'
# Override charset if convertion succeeds
# change charset value in html_document as well
def
subCharset
(
matchobj
):
keyword
=
matchobj
.
group
(
'keyword'
)
...
...
@@ -361,24 +364,21 @@ class TextDocument(CachedConvertableMixin, BaseConvertableFileMixin, TextContent
return
matchobj
.
group
(
0
)
elif
keyword
:
# if keyword is present, replace charset just after
return
keyword
+
'utf-8'
text_content
=
self
.
charset_parser
.
sub
(
subCharset
,
text_content
)
return
keyword
+
b
'utf-8'
base_data
=
self
.
charset_parser
.
sub
(
subCharset
,
data
)
else
:
text_content
,
message
=
guessCharsetAndConvert
(
self
,
text_content
,
content_type
)
base_data
,
message
=
guessCharsetAndConvert
(
self
,
data
,
content_type
)
else
:
# generaly text/plain
try
:
# if succeeds, not need to change encoding
# it's already utf-8
text_content
.
decode
(
'utf-8'
)
data
.
decode
(
'utf-8'
)
except
(
UnicodeDecodeError
,
LookupError
):
text_content
,
message
=
guessCharsetAndConvert
(
self
,
text_content
,
content_type
)
base_data
,
message
=
guessCharsetAndConvert
(
self
,
data
,
content_type
)
else
:
message
=
'Conversion to base format succeeds'
# TODO(zope4py3): rethink this, shouldn't we store bytes in base data ?
self
.
_setBaseData
(
text_content
)
self
.
_setBaseData
(
base_data
)
self
.
_setBaseContentType
(
content_type
)
return
message
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment