Commit 928703b5 authored by Sergey.Konovalov's avatar Sergey.Konovalov Committed by Alexander Trofimov

txt: newline character:\r,n,\r\n. BigEndian, LittleEndian.

git-svn-id: svn://fileserver/activex/AVS/Sources/TeamlabOffice/trunk/ServerComponents@64959 954022d7-b5bf-4e40-9824-e11837661b57
parent ae4391c5

#include "TxtFile.h" #include "TxtFile.h"
#include "../Common/Encoding.h" #include "../Common/Encoding.h"
#include "../../../../Common/DocxFormat/Source/SystemUtility/File.h" #include "../../../../Common/DocxFormat/Source/SystemUtility/File.h"
#include "../../../../DesktopEditor/common/File.h"
static const std::string BadSymbols = "\x0A\x0B\x0C\x0D\x0E\x0F\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19"; static const std::string BadSymbols = "\x0A\x0B\x0C\x0D\x0E\x0F\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19";
static std::wstring convertUtf16ToWString(UTF16 * Data, int nLength)
{
UTF32 *pStrUtf32 = new UTF32 [nLength + 1];
memset ((void *) pStrUtf32, 0, sizeof (UTF32) * (nLength + 1));
// this values will be modificated
const UTF16 *pStrUtf16_Conv = Data;
UTF32 *pStrUtf32_Conv = pStrUtf32;
ConversionResult eUnicodeConversionResult =
ConvertUTF16toUTF32 (&pStrUtf16_Conv,
&Data[nLength]
, &pStrUtf32_Conv
, &pStrUtf32 [nLength]
, strictConversion);
if (conversionOK != eUnicodeConversionResult)
{
delete [] pStrUtf32;
return std::wstring();
}
std::wstring wstr ((wchar_t *) pStrUtf32);
delete [] pStrUtf32;
return wstr;
}
TxtFile::TxtFile(const std::wstring & path) : m_path(path), m_linesCount(0) TxtFile::TxtFile(const std::wstring & path) : m_path(path), m_linesCount(0)
{ {
} }
...@@ -43,7 +15,7 @@ const int TxtFile::getLinesCount() ...@@ -43,7 +15,7 @@ const int TxtFile::getLinesCount()
{ {
return m_linesCount; return m_linesCount;
} }
const std::list<std::string> TxtFile::readAnsiOrCodePage() // == readUtf8withoutPref const std::list<std::string> TxtFile::readAnsiOrCodePage() // == readUtf8withoutPref также
{ {
std::list<std::string> result; std::list<std::string> result;
CFile file_binary; CFile file_binary;
...@@ -61,12 +33,18 @@ const std::list<std::string> TxtFile::readAnsiOrCodePage() // == readUtf8without ...@@ -61,12 +33,18 @@ const std::list<std::string> TxtFile::readAnsiOrCodePage() // == readUtf8without
for (long end_pos = 0; end_pos < file_size; end_pos++) for (long end_pos = 0; end_pos < file_size; end_pos++)
{ {
if (file_data[end_pos] == 0x0a) BYTE cCurChar = file_data[end_pos];
if (0x0a == cCurChar || 0x0d == cCurChar)
{ {
//string from start_pos to end_pos //string from start_pos to end_pos
std::string str(file_data + start_pos, file_data + end_pos); std::string str(file_data + start_pos, file_data + end_pos);
start_pos = end_pos + 1; start_pos = end_pos + 1;
if(0x0d == cCurChar && start_pos < file_size && 0x0a == file_data[start_pos])
{
end_pos++;
start_pos++;
}
result.push_back(str); result.push_back(str);
m_linesCount++; m_linesCount++;
...@@ -78,57 +56,33 @@ const std::list<std::string> TxtFile::readAnsiOrCodePage() // == readUtf8without ...@@ -78,57 +56,33 @@ const std::list<std::string> TxtFile::readAnsiOrCodePage() // == readUtf8without
return result; return result;
} }
const std::list<std::wstring> TxtFile::readUnicodeFromBytes(char *file_data, long file_size)
const std::list<std::wstring> TxtFile::readUnicode()
{ {
std::list<std::wstring> result; std::list<std::wstring> result;
CFile file_binary;
if (file_binary.OpenFile(std_string2string(m_path)) != S_OK) return result;
long file_size = file_binary.GetFileSize();
char *file_data = new char[file_size];
if (file_data == NULL) return result;
file_binary.ReadFile((BYTE*)file_data, file_size);
long start_pos = 2; // skip Header long start_pos = 2; // skip Header
for (long end_pos = 2; end_pos < file_size; end_pos+=2) for (long end_pos = start_pos; end_pos + 1 < file_size; end_pos += 2)
{
if ((((UTF16*)(file_data+end_pos))[0] == 0x000a) &&
((UTF16*)(file_data+end_pos))[1] == 0x000d)
{ {
if (sizeof(wchar_t) == 4) BYTE cCurChar = file_data[end_pos];
if (0x00 == file_data[end_pos + 1] && (0x0a == cCurChar || 0x0d == cCurChar))
{ {
result.push_back(convertUtf16ToWString((UTF16*)(file_data + start_pos), (end_pos-start_pos) /2)); result.push_back(NSFile::CUtf8Converter::GetWStringFromUTF16((unsigned short*)(file_data + start_pos), (end_pos - start_pos) / 2));
} start_pos = end_pos + 2;
else if(0x0d == cCurChar && start_pos + 1 < file_size && 0x00 == file_data[start_pos + 1] && 0x0a == file_data[start_pos])
{ {
std::wstring wstr((wchar_t*)(file_data + start_pos), (wchar_t*)(file_data + end_pos)); end_pos += 2;
result.push_back(wstr); start_pos += 2;
} }
start_pos = end_pos + 4;
m_linesCount++; m_linesCount++;
}
} }
//last
if (sizeof(wchar_t) == 4)
{
result.push_back(convertUtf16ToWString((UTF16*)(file_data + start_pos), (file_size-start_pos) /2));
}
else
{
std::wstring wstr(file_data + start_pos, file_data + file_size);
result.push_back(wstr);
} }
//last
result.push_back(NSFile::CUtf8Converter::GetWStringFromUTF16((unsigned short*)(file_data + start_pos), (file_size - start_pos) / 2));
m_linesCount++; m_linesCount++;
return result; return result;
} }
const std::list<std::wstring> TxtFile::readBigEndian() const std::list<std::wstring> TxtFile::readUnicode()
{ {
std::list<std::wstring> result; std::list<std::wstring> result;
CFile file_binary; CFile file_binary;
...@@ -142,54 +96,31 @@ const std::list<std::wstring> TxtFile::readBigEndian() ...@@ -142,54 +96,31 @@ const std::list<std::wstring> TxtFile::readBigEndian()
file_binary.ReadFile((BYTE*)file_data, file_size); file_binary.ReadFile((BYTE*)file_data, file_size);
long start_pos = 2; // skip Header return readUnicodeFromBytes(file_data, file_size);
}
for (long end_pos = 2; end_pos < file_size; end_pos+=2) const std::list<std::wstring> TxtFile::readBigEndian()
{ {
if (((UTF16*)(file_data+end_pos))[0] == 0x000d && std::list<std::wstring> result;
((UTF16*)(file_data+end_pos))[1] == 0x000a) CFile file_binary;
{
//swap bytes
for (long i = start_pos; i < end_pos; i+=2)
{
char v = file_data[i];
file_data[i] = file_data[i+1];
file_data[i+1] = v;
}
if (sizeof(wchar_t) == 4) if (file_binary.OpenFile(std_string2string(m_path)) != S_OK) return result;
{
result.push_back(convertUtf16ToWString((UTF16*)(file_data + start_pos), (end_pos-start_pos) /2)); long file_size = file_binary.GetFileSize();
} char *file_data = new char[file_size];
else
{ if (file_data == NULL) return result;
std::wstring wstr((wchar_t*)(file_data + start_pos), (wchar_t*)(file_data + end_pos));
result.push_back(wstr); file_binary.ReadFile((BYTE*)file_data, file_size);
}
start_pos = end_pos + 4;
m_linesCount++;
}
}
//last
//swap bytes //swap bytes
for (long i = start_pos; i < file_size; i+=2) for (long i = 0; i < file_size; i+=2)
{ {
char v = file_data[i]; char v = file_data[i];
file_data[i] = file_data[i+1]; file_data[i] = file_data[i+1];
file_data[i+1] = v; file_data[i+1] = v;
} }
if (sizeof(wchar_t) == 4) return readUnicodeFromBytes(file_data, file_size);
{
result.push_back(convertUtf16ToWString((UTF16*)(file_data + start_pos), (file_size-start_pos) /2));
}
else
{
std::wstring wstr(file_data + start_pos, file_data + file_size);
result.push_back(wstr);
}
m_linesCount++;
return result;
} }
...@@ -209,15 +140,20 @@ const std::list<std::string> TxtFile::readUtf8() ...@@ -209,15 +140,20 @@ const std::list<std::string> TxtFile::readUtf8()
long start_pos = 3; //skip header long start_pos = 3; //skip header
for (long end_pos = 3; end_pos < file_size; end_pos++) for (long end_pos = start_pos; end_pos < file_size; end_pos++)
{ {
if (file_data[end_pos] == 0x0a) BYTE cCurChar = file_data[end_pos];
if (0x0a == cCurChar || 0x0d == cCurChar)
{ {
//string from start_pos to end_pos //string from start_pos to end_pos
std::string str(file_data + start_pos, file_data + end_pos); std::string str(file_data + start_pos, file_data + end_pos);
start_pos = end_pos + 1; start_pos = end_pos + 1;
if(0x0d == cCurChar && start_pos < file_size && 0x0a == file_data[start_pos])
{
end_pos++;
start_pos++;
}
result.push_back(str); result.push_back(str);
m_linesCount++; m_linesCount++;
} }
...@@ -228,7 +164,7 @@ const std::list<std::string> TxtFile::readUtf8() ...@@ -228,7 +164,7 @@ const std::list<std::string> TxtFile::readUtf8()
return result; return result;
} }
void TxtFile::writeAnsiOrCodePage(const std::list<std::string>& content) // === writeUtf8withoutPref void TxtFile::writeAnsiOrCodePage(const std::list<std::string>& content) // === writeUtf8withoutPref также
{ {
CFile file; CFile file;
if (file.CreateFile(std_string2string(m_path)) == S_OK) if (file.CreateFile(std_string2string(m_path)) == S_OK)
......
#pragma once #pragma once
#ifndef UTILITY_TXT_FILE_INCLUDE_H_ #ifndef UTILITY_TXT_FILE_INCLUDE_H_
#define UTILITY_TXT_FILE_INCLUDE_H_ #define UTILITY_TXT_FILE_INCLUDE_H_
...@@ -13,8 +13,9 @@ public: ...@@ -13,8 +13,9 @@ public:
TxtFile(const std::wstring & path); TxtFile(const std::wstring & path);
const std::list<std::string> readAnsiOrCodePage(); const std::list<std::string> readAnsiOrCodePage();
const std::list<std::wstring> readUnicodeFromBytes(char *file_data, long file_size);
const std::list<std::wstring> readUnicode(); const std::list<std::wstring> readUnicode();
//const std::list<std::wstring> readUnicodeWithOutBOM(); /// //const std::list<std::wstring> readUnicodeWithOutBOM(); /// не используем
const std::list<std::wstring> readBigEndian(); const std::list<std::wstring> readBigEndian();
const std::list<std::string> readUtf8(); const std::list<std::string> readUtf8();
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment