RtfFile linux build

git-svn-id: svn://fileserver/activex/AVS/Sources/TeamlabOffice/trunk/ServerComponents@62266 954022d7-b5bf-4e40-9824-e11837661b57

RtfFile linux build
git-svn-id: svn://fileserver/activex/AVS/Sources/TeamlabOffice/trunk/ServerComponents@62266 954022d7-b5bf-4e40-9824-e11837661b57
2357efe3 · Elen.Subbotina · Alexander Trofimov · 6c0650b2 · 2357efe3 · 2357efe3
Commit 2357efe3 authored Apr 24, 2015 by Elen.Subbotina Committed by Alexander Trofimov May 20, 2016
6 changed files
--- a/ASCOfficeRtfFile/Linux/RtfFileConverter.pro
+++ b/ASCOfficeRtfFile/Linux/RtfFileConverter.pro
@@ -120,10 +120,4 @@ else{

 unix:!macx: LIBS += -lxml2

-unix:!macx: LIBS += -L$$PWD/../../../../../../../usr/local/lib/ -lboost_locale
-unix:!macx: PRE_TARGETDEPS += $$PWD/../../../../../../../usr/local/lib/libboost_locale.a
-
-unix:!macx: LIBS += -L$$PWD/../../../../../../../usr/local/lib/ -lboost_system
-unix:!macx: PRE_TARGETDEPS += $$PWD/../../../../../../../usr/local/lib/libboost_system.a
-
 ########################################################
--- a/ASCOfficeRtfFile/RtfFormatLib/source/DestinationCommand.h
+++ b/ASCOfficeRtfFile/RtfFormatLib/source/DestinationCommand.h
@@ -2869,7 +2869,6 @@ public:
            if( true == hasParameter )
            {
 				oDocument.m_oProperty.m_nAnsiCodePage = parameter;
-                oDocument.m_oProperty.m_sAnsiCodePage = RtfUtility::GetCodepageName(oDocument.m_oProperty.m_nAnsiCodePage);
            }
 		}
 		else if( _T("deff") == sCommand )

--- a/ASCOfficeRtfFile/RtfFormatLib/source/RtfChar.cpp
+++ b/ASCOfficeRtfFile/RtfFormatLib/source/RtfChar.cpp
@@ -31,36 +31,32 @@ CString RtfChar::renderRtfText( CString& sText, void* poDocument, RtfCharPropert
                nCodePage = CP_ACP;
            break;
        }
-        case RtfDocumentProperty::cp_mac: nCodePage = CP_MACCP;break;
-        case RtfDocumentProperty::cp_pc: nCodePage = 437;break;
-        case RtfDocumentProperty::cp_pca: nCodePage = 850;break;
+        case RtfDocumentProperty::cp_mac: nCodePage = CP_MACCP; break;
+        case RtfDocumentProperty::cp_pc:  nCodePage = 437;      break;
+        case RtfDocumentProperty::cp_pca: nCodePage = 850;      break;
        }
    }
    //если ничего нет ставим ANSI
    if( -1 == nCodePage )
        nCodePage = CP_ACP;

-    //todooo проверить !!!!!
-    //делаем Ansi строку
-    std::wstring unicodeStr(sText.GetBuffer());
+    std::wstring    unicodeStr (sText.GetBuffer(), sText.GetLength());
+    std::string     ansiStr    = RtfUtility::convert_string(unicodeStr.begin(), unicodeStr.end(), nCodePage);

-    std::string ansiStr(unicodeStr.begin(), unicodeStr.end());
+    CString sTextBack  = RtfUtility::convert_string(ansiStr.begin(), ansiStr.end(), nCodePage);
+    //обратное преобразование чтобы понять какие символы свонвертировались неправильно

-    //делаем обратное преобразование чтобы понять какие символы свонвертировались неправильно
-
-    std::wstring unicodeStrBack(ansiStr.begin(), ansiStr.end());
-
-    for( int i = 0; i < unicodeStr.length() && i < unicodeStrBack.length() ; i++ )
+    for( int i = 0; i < sText.GetLength() && i < sTextBack.GetLength() ; i++ )
    {
        bool bWriteUnicode = true;

-        if( unicodeStrBack[i] == unicodeStr[i] )
+        if(sTextBack[i] == sText[i] )
        {
            CString sUniChar; sUniChar.AppendChar( unicodeStr[i] );

            //делаем Ansi строку sUniChar
            // -> sTempAnsiChars
-            std::string sTempAnsiChars(unicodeStr.begin()+i, unicodeStr.begin() + i + 1);
+            std::string sTempAnsiChars = RtfUtility::convert_string(unicodeStr.begin()+i, unicodeStr.begin() + i + 1, nCodePage);

            for( int k = 0; k < sTempAnsiChars.length(); k++ )
            {

--- a/ASCOfficeRtfFile/RtfFormatLib/source/RtfReader.cpp
+++ b/ASCOfficeRtfFile/RtfFormatLib/source/RtfReader.cpp
@@ -2,11 +2,6 @@
 #include "Writer/OOXWriter.h"
 #include "DestinationCommand.h"

-#if !defined (_WIN32) && !defined(_WIN64)
-    #include <boost/locale.hpp>
-    #include <locale>
-    typedef std::codecvt<wchar_t, char, mbstate_t> codecvt_type;
-#endif

 RtfReader::RtfReader(RtfDocument& oDocument, CString sFilename ):m_oDocument(oDocument),m_sFilename(sFilename)
 {
@@ -46,6 +41,7 @@ void RtfReader::PopState()
 	//delete psaveOld;
 }

+
 CString RtfAbstractReader::ExecuteTextInternalCodePage( std::string& sCharString, RtfDocument& oDocument, RtfReader& oReader)
 {
    CString sResult;
@@ -53,7 +49,6 @@ CString RtfAbstractReader::ExecuteTextInternalCodePage( std::string& sCharString
    if( false == sCharString.empty())
    {
        int         nCodepage = -1;
-        std::string sCodepage;

        //применяем параметры codepage от текущего шрифта todo associated fonts.
        RtfFont oFont;
@@ -78,59 +73,21 @@ CString RtfAbstractReader::ExecuteTextInternalCodePage( std::string& sCharString
                    if( PROP_DEF != oDocument.m_oProperty.m_nAnsiCodePage )
                    {
                        nCodepage = oDocument.m_oProperty.m_nAnsiCodePage;
-                        sCodepage = oDocument.m_oProperty.m_sAnsiCodePage;
                    }
                    else
                        nCodepage = CP_ACP;
                    break;
                }
-            case RtfDocumentProperty::cp_mac:   nCodepage = CP_MACCP;   sCodepage = "macintosh";    break; //?? todooo
-            case RtfDocumentProperty::cp_pc:    nCodepage = 437;        sCodepage = "IBM437";       break; //ms dos latin us
-            case RtfDocumentProperty::cp_pca:   nCodepage = 850;        sCodepage = "ibm850";       break; //ms dos latin eu
+            case RtfDocumentProperty::cp_mac:   nCodepage = CP_MACCP;   break; //?? todooo
+            case RtfDocumentProperty::cp_pc:    nCodepage = 437;        break; //ms dos latin us
+            case RtfDocumentProperty::cp_pca:   nCodepage = 850;        break; //ms dos latin eu
            }
        }
        //если ничего нет ставим ANSI
        if( -1 == nCodepage )
            nCodepage = CP_ACP;

-#if defined (_WIN32) || defined (_WIN64)
-        int nLengthW ;
-		nLengthW = MultiByteToWideChar(nCodepage, 0, sCharString.c_str(), -1, NULL, NULL);
-		MultiByteToWideChar(nCodepage, 0, sCharString.c_str(), -1, sResult.GetBuffer( nLengthW ), nLengthW);
-        sResult.ReleaseBuffer();
-#else
-        if (nCodepage > 0)
-        {
-            std::mbstate_t state;
-
-            boost::locale::generator gen;
-            std::locale loc(gen(sCodepage.c_str()));
-
-            const codecvt_type& cdcvt = std::use_facet<codecvt_type>(loc);
-
-            wchar_t * wchars = new wchar_t [sCharString.size()+ 1];
-
-            const char *in_next = 0;
-            wchar_t *out_next = 0;
-
-            std::codecvt_base::result r;
-            r = cdcvt.in (state, sCharString.c_str(), sCharString.c_str() + sCharString.length(), in_next,
-                          wchars, wchars + sCharString.size() + 1, out_next);
-            *out_next = '\0';
-
-            sResult = CString(wchars);
-
-            delete [] wchars;
-        }
-        else
-        {
-            //ansi
-            std::wstring s(sCharString.begin(), sCharString.end());
-
-            sResult = std_string2string(s);
-        }
-
-#endif
+        sResult = RtfUtility::convert_string(sCharString.begin(), sCharString.end(), nCodepage);
    }
    return sResult;
 }
--- a/ASCOfficeRtfFile/RtfFormatLib/source/RtfSection.h
+++ b/ASCOfficeRtfFile/RtfFormatLib/source/RtfSection.h
@@ -15,7 +15,6 @@ public:
 	
 	CodePage m_eCodePage;

-    std::string m_sAnsiCodePage;
    int m_nAnsiCodePage;
 	int m_nDeffFont;
 	int m_bHypCaps;//(doNotHyphenateCaps)\hyphcaps*	Switches hyphenation of capitalized words (default is on). Append 1 or leave control word by itself to toggle property on; append 0 to turn it off.

--- a/ASCOfficeRtfFile/RtfFormatLib/source/Utils.h
+++ b/ASCOfficeRtfFile/RtfFormatLib/source/Utils.h
@@ -185,154 +185,6 @@ static const int aCodePages[][2] = {
    255,	850//OEM
 };

-static const struct
-{
-    int id;
-    std::string name;
-} aCodePagesNames[] =
-{
-    {	37	,	"IBM037"	},
-    {	437	,	"IBM437"	},
-    {	500	,	"IBM500"	},
-    {	708	,	"ASMO-708"	},
-    {	720	,	"DOS-720"	},
-    {	737	,	"ibm737"	},
-    {	775	,	"ibm775"	},
-    {	850	,	"ibm850"	},
-    {	852	,	"ibm852"	},
-    {	855	,	"IBM855"	},
-    {	857	,	"ibm857"	},
-    {	858	,	"IBM00858"	},
-    {	860	,	"IBM860"	},
-    {	861	,	"ibm861"	},
-    {	862	,	"DOS-862"	},
-    {	863	,	"IBM863"	},
-    {	864	,	"IBM864"	},
-    {	865	,	"IBM865"	},
-    {	866	,	"cp866"	},
-    {	869	,	"ibm869"	},
-    {	870	,	"IBM870"	},
-    {	874	,	"windows-874"	},
-    {	875	,	"cp875"	},
-    {	932	,	"shift_jis"	},
-    {	936	,	"gb2312"	},
-    {	949	,	"KS_C_5601-1987"	},
-    {	950	,	"big5"	},
-    {	1026	,	"IBM1026"	},
-    {	1047	,	"IBM01047"	},
-    {	1140	,	"IBM01140"	},
-    {	1141	,	"IBM01141"	},
-    {	1142	,	"IBM01142"	},
-    {	1143	,	"IBM01143"	},
-    {	1144	,	"IBM01144"	},
-    {	1145	,	"IBM01145"	},
-    {	1146	,	"IBM01146"	},
-    {	1147	,	"IBM01147"	},
-    {	1148	,	"IBM01148"	},
-    {	1149	,	"IBM01149"	},
-    {	1200	,	"utf-16"	},
-    {	1201	,	"unicodeFFFE"	},
-    {	1250	,	"windows-1250"	},
-    {	1251	,	"windows-1251"	},
-    {	1252	,	"windows-1252"	},
-    {	1253	,	"windows-1253"	},
-    {	1254	,	"windows-1254"	},
-    {	1255	,	"windows-1255"	},
-    {	1256	,	"windows-1256"	},
-    {	1257	,	"windows-1257"	},
-    {	1258	,	"windows-1258"	},
-    {	1361	,	"Johab"	},
-    {	10000	,	"macintosh"	},
-    {	10001	,	"x-mac-japanese"	},
-    {	10002	,	"x-mac-chinesetrad"	},
-    {	10003	,	"x-mac-korean"	},
-    {	10004	,	"x-mac-arabic"	},
-    {	10005	,	"x-mac-hebrew"	},
-    {	10006	,	"x-mac-greek"	},
-    {	10007	,	"x-mac-cyrillic"	},
-    {	10008	,	"x-mac-chinesesimp"	},
-    {	10010	,	"x-mac-romanian"	},
-    {	10017	,	"x-mac-ukrainian"	},
-    {	10021	,	"x-mac-thai"	},
-    {	10029	,	"x-mac-ce"	},
-    {	10079	,	"x-mac-icelandic"	},
-    {	10081	,	"x-mac-turkish"	},
-    {	10082	,	"x-mac-croatian"	},
-    {	12000	,	"utf-32"	},
-    {	12001	,	"utf-32BE"	},
-    {	20000	,	"x-Chinese_CNS"	},
-    {	20001	,	"x-cp20001"	},
-    {	20002	,	"x_Chinese-Eten"	},
-    {	20003	,	"x-cp20003"	},
-    {	20004	,	"x-cp20004"	},
-    {	20005	,	"x-cp20005"	},
-    {	20105	,	"x-IA5"	},
-    {	20106	,	"x-IA5-German"	},
-    {	20107	,	"x-IA5-Swedish"	},
-    {	20108	,	"x-IA5-Norwegian"	},
-    {	20127	,	"us-ascii"	},
-    {	20261	,	"x-cp20261"	},
-    {	20269	,	"x-cp20269"	},
-    {	20273	,	"IBM273"	},
-    {	20277	,	"IBM277"	},
-    {	20278	,	"IBM278"	},
-    {	20280	,	"IBM280"	},
-    {	20284	,	"IBM284"	},
-    {	20285	,	"IBM285"	},
-    {	20290	,	"IBM290"	},
-    {	20297	,	"IBM297"	},
-    {	20420	,	"IBM420"	},
-    {	20423	,	"IBM423"	},
-    {	20424	,	"IBM424"	},
-    {	20833	,	"x-EBCDIC-KoreanExtended"	},
-    {	20838	,	"IBM-Thai"	},
-    {	20866	,	"koi8-r"	},
-    {	20871	,	"IBM871"	},
-    {	20880	,	"IBM880"	},
-    {	20905	,	"IBM905"	},
-    {	20924	,	"IBM00924"	},
-    {	20932	,	"EUC-JP"	},
-    {	20936	,	"x-cp20936"	},
-    {	20949	,	"x-cp20949"	},
-    {	21025	,	"cp1025"	},
-    {	21866	,	"koi8-u"	},
-    {	28591	,	"iso-8859-1"	},
-    {	28592	,	"iso-8859-2"	},
-    {	28593	,	"iso-8859-3"	},
-    {	28594	,	"iso-8859-4"	},
-    {	28595	,	"iso-8859-5"	},
-    {	28596	,	"iso-8859-6"	},
-    {	28597	,	"iso-8859-7"	},
-    {	28598	,	"iso-8859-8"	},
-    {	28599	,	"iso-8859-9"	},
-    {	28603	,	"iso-8859-13"	},
-    {	28605	,	"iso-8859-15"	},
-    {	29001	,	"x-Europa"	},
-    {	38598	,	"iso-8859-8-i"	},
-    {	50220	,	"iso-2022-jp"	},
-    {	50221	,	"csISO2022JP"	},
-    {	50222	,	"iso-2022-jp"	},
-    {	50225	,	"iso-2022-kr"	},
-    {	50227	,	"x-cp50227"	},
-    {	51932	,	"euc-jp"	},
-    {	51936	,	"EUC-CN"	},
-    {	51949	,	"euc-kr"	},
-    {	52936	,	"hz-gb-2312"	},
-    {	54936	,	"GB18030"	},
-    {	57002	,	"x-iscii-de"	},
-    {	57003	,	"x-iscii-be"	},
-    {	57004	,	"x-iscii-ta"	},
-    {	57005	,	"x-iscii-te"	},
-    {	57006	,	"x-iscii-as"	},
-    {	57007	,	"x-iscii-or"	},
-    {	57008	,	"x-iscii-ka"	},
-    {	57009	,	"x-iscii-ma"	},
-    {	57010	,	"x-iscii-gu"	},
-    {	57011	,	"x-iscii-pa"	},
-    {	65000	,	"utf-7"	},
-    {	65001	,	"utf-8"	}
-};
-
 class RtfUtility
 {
 public: 
@@ -601,16 +453,6 @@ public:
 		return sResult;
 	}

-    static std::string GetCodepageName( int nCodepage )
-    {
-        int nCodePagesLength =  140;
-
-        for( int i = 0; i < nCodePagesLength; i++ )
-            if( aCodePagesNames[i].id == nCodepage )
-                return aCodePagesNames[i].name;
-
-        return "";
-    }

    static int CharsetToCodepage( int nCharset )
    {
@@ -629,6 +471,87 @@ public:

        return 1252;//ANSI
    }
+    static CString convert_string(std::string::const_iterator start, std::string::const_iterator end, int nCodepage = 0)
+    {
+        bool ansi = true;
+
+        size_t insize = end- start;
+        CString w_out;
+
+        w_out.GetBuffer(insize);
+
+        if (nCodepage > 0)
+        {
+#if defined (_WIN32) || defined (_WIN64)
+            int insize = MultiByteToWideChar(nCodepage, 0, start, -1, NULL, NULL);
+            if (MultiByteToWideChar(nCodepage, 0, start, -1, w_out.GetBuffer(), insize) > 0)
+            {
+                w_out.ReleaseBuffer();
+                ansi = false;
+            }
+#else
+            std::string sCodepage =  "CP" + std::to_string(nCodepage);
+
+            iconv_t ic= iconv_open("WCHAR_T", sCodepage.c_str());
+            if (ic != (iconv_t) -1)
+            {
+                size_t nconv = 0, avail = (insize) * sizeof(wchar_t);
+                char *inptr = (char*)start.operator ->();
+
+                char* outptr = (char*)w_out.GetBuffer();
+
+                nconv = iconv (ic, &inptr, &insize, &outptr, &avail);
+                if (nconv == 0)
+                {
+                    w_out.ReleaseBuffer();
+                    ansi = false;
+                }
+                iconv_close(ic);
+            }
+#endif
+        }
+        if (ansi)
+            w_out = std::wstring(start, end).c_str();
+
+        return w_out;
+    }
+    static std::string convert_string(std::wstring::const_iterator start, std::wstring::const_iterator end, int nCodepage = 0)
+    {
+        std::string out;
+        bool ansi = true;
+
+        size_t insize = end- start;
+        out.reserve(insize);
+
+        if (nCodepage > 0)
+        {
+#if defined (_WIN32) || defined (_WIN64)
+            insize = WideCharToMultiByte(nCodepage, 0, start, -1, NULL, NULL);
+
+            WideCharToMultiByte(nCodepage, 0, start, -1, out.c_str() , insize);
+#else
+            std::string sCodepage =  "CP" + std::to_string(nCodepage);
+
+            iconv_t ic= iconv_open(sCodepage.c_str(), "WCHAR_T");
+            if (ic != (iconv_t) -1)
+            {
+                size_t nconv = 0, avail = insize * sizeof(wchar_t);
+                char *inptr = (char*)start.operator ->();
+
+                char* outptr = (char*)out.c_str();
+
+                nconv = iconv (ic, &inptr, &insize, &outptr, &avail);
+                if (nconv == 0) ansi = false;
+                iconv_close(ic);
+            }
+#endif
+        }
+
+        if (ansi)
+            out = std::string(start, end);
+
+        return out;
+    }
    static int CodepageToCharset( int nCodepage )
    {
 #if defined (_WIN32) || defined(_WIN64)