Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Support
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
erp5
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Issues
0
Issues
0
List
Boards
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Analytics
Analytics
CI / CD
Repository
Value Stream
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
Carlos Ramos Carreño
erp5
Commits
c24c3923
Commit
c24c3923
authored
Feb 07, 2024
by
Jérome Perrin
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
TextContent base_data bytes
parent
f1f4137c
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
25 additions
and
28 deletions
+25
-28
product/ERP5/bootstrap/erp5_core/DocumentTemplateItem/portal_components/document.erp5.Document.py
...tTemplateItem/portal_components/document.erp5.Document.py
+1
-1
product/ERP5/bootstrap/erp5_core/DocumentTemplateItem/portal_components/document.erp5.TextDocument.py
...plateItem/portal_components/document.erp5.TextDocument.py
+24
-27
No files found.
product/ERP5/bootstrap/erp5_core/DocumentTemplateItem/portal_components/document.erp5.Document.py
View file @
c24c3923
...
@@ -410,7 +410,7 @@ class Document(DocumentExtensibleTraversableMixin, XMLObject, UrlMixin,
...
@@ -410,7 +410,7 @@ class Document(DocumentExtensibleTraversableMixin, XMLObject, UrlMixin,
body_parser
=
re
.
compile
(
r'<body[^>]*>(.*?)</body>'
,
re
.
IGNORECASE
+
re
.
DOTALL
)
body_parser
=
re
.
compile
(
r'<body[^>]*>(.*?)</body>'
,
re
.
IGNORECASE
+
re
.
DOTALL
)
title_parser
=
re
.
compile
(
r'<title[^>]*>(.*?)</title>'
,
re
.
IGNORECASE
+
re
.
DOTALL
)
title_parser
=
re
.
compile
(
r'<title[^>]*>(.*?)</title>'
,
re
.
IGNORECASE
+
re
.
DOTALL
)
base_parser
=
re
.
compile
(
r'<base[^>]*href=[\'"](.*?)[\'"][^>]*>'
,
re
.
IGNORECASE
+
re
.
DOTALL
)
base_parser
=
re
.
compile
(
r'<base[^>]*href=[\'"](.*?)[\'"][^>]*>'
,
re
.
IGNORECASE
+
re
.
DOTALL
)
charset_parser
=
re
.
compile
(
r'(?P<keyword>charset="?)(?P<charset>[a-z0-9\
-]+)
', re.IGNORECASE)
charset_parser
=
re
.
compile
(
b
r'(?P<keyword>charset="?)(?P<charset>[a-z0-9\
-]+)
', re.IGNORECASE)
# Declarative security
# Declarative security
security = ClassSecurityInfo()
security = ClassSecurityInfo()
...
...
product/ERP5/bootstrap/erp5_core/DocumentTemplateItem/portal_components/document.erp5.TextDocument.py
View file @
c24c3923
...
@@ -186,6 +186,8 @@ class TextDocument(CachedConvertableMixin, BaseConvertableFileMixin, TextContent
...
@@ -186,6 +186,8 @@ class TextDocument(CachedConvertableMixin, BaseConvertableFileMixin, TextContent
file
=
BytesIO
(),
file
=
BytesIO
(),
filename
=
self
.
getId
(),
filename
=
self
.
getId
(),
temp_object
=
1
)
temp_object
=
1
)
if
not
isinstance
(
result
,
bytes
):
result
=
result
.
encode
(
'utf-8'
)
temp_image
.
_setData
(
result
)
temp_image
.
_setData
(
result
)
_
,
result
=
temp_image
.
convert
(
**
kw
)
_
,
result
=
temp_image
.
convert
(
**
kw
)
...
@@ -227,7 +229,7 @@ class TextDocument(CachedConvertableMixin, BaseConvertableFileMixin, TextContent
...
@@ -227,7 +229,7 @@ class TextDocument(CachedConvertableMixin, BaseConvertableFileMixin, TextContent
def
setBaseData
(
self
,
value
):
def
setBaseData
(
self
,
value
):
"""Store base_data into text_content
"""Store base_data into text_content
"""
"""
self
.
_setTextContent
(
value
)
self
.
_setTextContent
(
value
.
decode
(
'utf-8'
)
)
security
.
declareProtected
(
Permissions
.
ModifyPortalContent
,
'_setBaseData'
)
security
.
declareProtected
(
Permissions
.
ModifyPortalContent
,
'_setBaseData'
)
_setBaseData
=
setBaseData
_setBaseData
=
setBaseData
...
@@ -253,9 +255,9 @@ class TextDocument(CachedConvertableMixin, BaseConvertableFileMixin, TextContent
...
@@ -253,9 +255,9 @@ class TextDocument(CachedConvertableMixin, BaseConvertableFileMixin, TextContent
"""
"""
self
.
_checkConversionFormatPermission
(
None
)
self
.
_checkConversionFormatPermission
(
None
)
if
default
is
_MARKER
:
if
default
is
_MARKER
:
return
self
.
getTextContent
()
return
self
.
getTextContent
()
.
encode
(
'utf-8'
)
else
:
else
:
return
self
.
getTextContent
(
default
=
default
)
return
self
.
getTextContent
(
default
=
default
)
.
encode
(
'utf-8'
)
security
.
declareProtected
(
Permissions
.
AccessContentsInformation
,
'hasBaseData'
)
security
.
declareProtected
(
Permissions
.
AccessContentsInformation
,
'hasBaseData'
)
def
hasBaseData
(
self
):
def
hasBaseData
(
self
):
...
@@ -290,9 +292,12 @@ class TextDocument(CachedConvertableMixin, BaseConvertableFileMixin, TextContent
...
@@ -290,9 +292,12 @@ class TextDocument(CachedConvertableMixin, BaseConvertableFileMixin, TextContent
def
_convertToBaseFormat
(
self
):
def
_convertToBaseFormat
(
self
):
"""Conversion to base format for TextDocument consist
"""Conversion to base format for TextDocument consist
to convert file content into utf-8
to convert file content into utf-8.
If the data embeds charset information, this information is updated
to the new (utf-8) charset. This supports XML and HTML.
"""
"""
def
guessCharsetAndConvert
(
document
,
text_content
,
content_type
):
def
guessCharsetAndConvert
(
document
,
text_content
,
content_type
):
# type: (TextDocument, bytes, str) -> Tuple[bytes, str]
"""
"""
return encoded content_type and message if encoding
return encoded content_type and message if encoding
is not utf-8
is not utf-8
...
@@ -322,37 +327,32 @@ class TextDocument(CachedConvertableMixin, BaseConvertableFileMixin, TextContent
...
@@ -322,37 +327,32 @@ class TextDocument(CachedConvertableMixin, BaseConvertableFileMixin, TextContent
return
text_content
,
message
return
text_content
,
message
content_type
=
self
.
getContentType
()
or
DEFAULT_CONTENT_TYPE
content_type
=
self
.
getContentType
()
or
DEFAULT_CONTENT_TYPE
text_content
=
self
.
getData
()
# TODO: don't we need to convert to bytes here ? what if it is PData ?
data
=
bytes
(
self
.
getData
())
if
content_type
.
endswith
(
'xml'
):
if
content_type
.
endswith
(
'xml'
):
try
:
try
:
tree
=
etree
.
fromstring
(
text_content
)
tree
=
etree
.
fromstring
(
data
)
text_content
=
etree
.
tostring
(
tree
,
encoding
=
'utf-8'
,
xml_declaration
=
True
)
base_data
=
etree
.
tostring
(
tree
,
encoding
=
'utf-8'
,
xml_declaration
=
True
)
content_type
=
'application/xml'
content_type
=
'application/xml'
message
=
'Conversion to base format succeeds'
message
=
'Conversion to base format succeeds'
except
etree
.
XMLSyntaxError
:
# pylint: disable=catching-non-exception
except
etree
.
XMLSyntaxError
:
# pylint: disable=catching-non-exception
message
=
'Conversion to base format without codec fails'
message
=
'Conversion to base format without codec fails'
elif
content_type
==
'text/html'
:
elif
content_type
==
'text/html'
:
re_match
=
self
.
charset_parser
.
search
(
re_match
=
self
.
charset_parser
.
search
(
data
)
# we don't really care about decoding errors for searching this
# regexp
text_content
.
decode
(
'ascii'
,
'replace'
)
if
six
.
PY3
else
text_content
)
message
=
'Conversion to base format succeeds'
message
=
'Conversion to base format succeeds'
if
re_match
is
not
None
:
if
re_match
is
not
None
:
charset
=
re_match
.
group
(
'charset'
)
charset
=
re_match
.
group
(
'charset'
)
.
decode
(
'ascii'
)
try
:
try
:
# Use encoding in html document
# Use encoding in html document
text_content
=
text_content
.
decode
(
charset
)
data
=
data
.
decode
(
charset
).
encode
(
'utf-8'
)
if
six
.
PY2
:
text_content
=
text_content
.
encode
(
'utf-8'
)
except
(
UnicodeDecodeError
,
LookupError
):
except
(
UnicodeDecodeError
,
LookupError
):
# Encoding read from document is wrong
# Encoding read from document is wrong
text_content
,
message
=
guessCharsetAndConvert
(
self
,
base_data
,
message
=
guessCharsetAndConvert
(
self
,
text_content
,
content_type
)
data
,
content_type
)
else
:
else
:
message
=
'Conversion to base format with charset %r succeeds'
\
message
=
'Conversion to base format with charset %r succeeds'
\
%
charset
%
charset
if
charset
.
lower
()
!=
'utf-8'
:
if
charset
.
lower
()
!=
'utf-8'
:
charset
=
'utf-8'
# Override charset if convertion succeeds
charset
=
'utf-8'
# Override charset if convertion succeeds
# change charset value in html_document as well
# change charset value in html_document as well
def
subCharset
(
matchobj
):
def
subCharset
(
matchobj
):
keyword
=
matchobj
.
group
(
'keyword'
)
keyword
=
matchobj
.
group
(
'keyword'
)
...
@@ -362,24 +362,21 @@ class TextDocument(CachedConvertableMixin, BaseConvertableFileMixin, TextContent
...
@@ -362,24 +362,21 @@ class TextDocument(CachedConvertableMixin, BaseConvertableFileMixin, TextContent
return
matchobj
.
group
(
0
)
return
matchobj
.
group
(
0
)
elif
keyword
:
elif
keyword
:
# if keyword is present, replace charset just after
# if keyword is present, replace charset just after
return
keyword
+
'utf-8'
return
keyword
+
b
'utf-8'
text_content
=
self
.
charset_parser
.
sub
(
subCharset
,
text_content
)
base_data
=
self
.
charset_parser
.
sub
(
subCharset
,
data
)
else
:
else
:
text_content
,
message
=
guessCharsetAndConvert
(
self
,
base_data
,
message
=
guessCharsetAndConvert
(
self
,
data
,
content_type
)
text_content
,
content_type
)
else
:
else
:
# generaly text/plain
# generaly text/plain
try
:
try
:
# if succeeds, not need to change encoding
# if succeeds, not need to change encoding
# it's already utf-8
# it's already utf-8
text_content
.
decode
(
'utf-8'
)
data
.
decode
(
'utf-8'
)
except
(
UnicodeDecodeError
,
LookupError
):
except
(
UnicodeDecodeError
,
LookupError
):
text_content
,
message
=
guessCharsetAndConvert
(
self
,
base_data
,
message
=
guessCharsetAndConvert
(
self
,
data
,
content_type
)
text_content
,
content_type
)
else
:
else
:
message
=
'Conversion to base format succeeds'
message
=
'Conversion to base format succeeds'
# TODO(zope4py3): rethink this, shouldn't we store bytes in base data ?
self
.
_setBaseData
(
base_data
)
self
.
_setBaseData
(
text_content
)
self
.
_setBaseContentType
(
content_type
)
self
.
_setBaseContentType
(
content_type
)
return
message
return
message
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment