diff --git a/product/ERP5/Document/Document.py b/product/ERP5/Document/Document.py
index e79580c1909b8869c29b5184c0e48c145ac232bb..028498e4ef20346bf9314d7a6e56f00c5ecfb8d6 100644
--- a/product/ERP5/Document/Document.py
+++ b/product/ERP5/Document/Document.py
@@ -490,7 +490,7 @@ class Document(PermanentURLMixIn, XMLObject, UrlMixIn, CachedConvertableMixin, S
   href_parser = re.compile('<a[^>]*href=[\'"](.*?)[\'"]',re.IGNORECASE)
   body_parser = re.compile('<body[^>]*>(.*?)</body>', re.IGNORECASE + re.DOTALL)
   title_parser = re.compile('<title[^>]*>(.*?)</title>', re.IGNORECASE + re.DOTALL)
-  charset_parser = re.compile('charset="?([a-z0-9\-]+)', re.IGNORECASE)
+  charset_parser = re.compile('(?P<keyword>charset="?)(?P<charset>[a-z0-9\-]+)', re.IGNORECASE)
 
   # Declarative security
   security = ClassSecurityInfo()
diff --git a/product/ERP5/Document/TextDocument.py b/product/ERP5/Document/TextDocument.py
index 614bb9d7e4cc89163c813d831120ffa062fcdddd..c5bb1f7961670984d202fe11b14fe1d1582e4d48 100644
--- a/product/ERP5/Document/TextDocument.py
+++ b/product/ERP5/Document/TextDocument.py
@@ -230,9 +230,7 @@ class TextDocument(Document, TextContent):
             mime_type = 'text/x-html-safe'
             if charset is None:
               # find charset
-              charset_list = self.charset_parser.findall(text_content)
-              if charset_list:
-                charset = charset_list[0]
+              charset = self.charset_parser.search(text_content).group('charset')
             if charset and charset not in ('utf-8', 'UTF-8'):
               try:
                 text_content = text_content.decode(charset).encode('utf-8')
@@ -241,7 +239,16 @@ class TextDocument(Document, TextContent):
               else:
                 charset = 'utf-8' # Override charset if convertion succeeds
                 # change charset value in html_document as well
-                self.charset_parser.sub('utf-8', text_content)
+                def subCharset(matchobj):
+                  keyword = matchobj.group('keyword')
+                  charset = matchobj.group('charset')
+                  if not (keyword or charset):
+                    # no match, return same string
+                    return matchobj.group(0)
+                  elif keyword:
+                    # if keyword is present, replace charset just after
+                    return keyword + 'utf-8'
+                text_content = self.charset_parser.sub(subCharset, text_content)
           result = portal_transforms.convertToData(mime_type, text_content,
                                                    object=self, context=self,
                                                    filename=filename,