2010-04-20 09:04:26 +00:00
|
|
|
// tstring.cpp
|
|
|
|
//
|
|
|
|
// This file is a part of Unicode NSIS.
|
|
|
|
//
|
|
|
|
// Copyright (C) 2007-2009 Jim Park
|
|
|
|
//
|
|
|
|
// Licensed under the zlib/libpng license (the "License");
|
|
|
|
// you may not use this file except in compliance with the License.
|
|
|
|
//
|
|
|
|
// This software is provided 'as-is', without any expressed or implied
|
|
|
|
// warranty.
|
|
|
|
//
|
|
|
|
// Provides TSTRING support.
|
|
|
|
|
|
|
|
#ifdef _UNICODE
|
|
|
|
|
|
|
|
#include "tstring.h"
|
|
|
|
#include "validateunicode.h"
|
2010-05-03 10:12:33 +00:00
|
|
|
#include "util.h"
|
2010-04-20 09:04:26 +00:00
|
|
|
#include <vector>
|
|
|
|
|
2010-05-28 13:10:16 +00:00
|
|
|
FILE* FileOpenUnicodeText(const TCHAR* file, const TCHAR* mode, BOOL* unicode)
|
2010-04-20 09:04:26 +00:00
|
|
|
{
|
|
|
|
extern FILE *g_output;
|
2010-05-05 14:14:48 +00:00
|
|
|
CValidateUnicode::FILE_TYPE ftype = CValidateUnicode::UTF_8; // default file format is UTF-8
|
2010-05-28 13:10:16 +00:00
|
|
|
if (unicode) *unicode = TRUE;
|
2010-04-20 09:04:26 +00:00
|
|
|
|
|
|
|
// If we are reading an existing file, check to see what type of file it
|
|
|
|
// is first.
|
|
|
|
if (_tcsstr(mode, _T("w+")) ||
|
|
|
|
_tcsstr(mode, _T("r")))
|
|
|
|
{
|
2010-05-03 10:12:33 +00:00
|
|
|
FILE* fp = _tfopen(file, _T("rb"));
|
2010-04-20 09:04:26 +00:00
|
|
|
|
|
|
|
if (fp)
|
|
|
|
{
|
2010-05-03 10:12:33 +00:00
|
|
|
MANAGE_WITH(fp, fclose);
|
2010-04-20 09:04:26 +00:00
|
|
|
fseek(fp, 0, SEEK_END);
|
|
|
|
size_t fileSize = ftell(fp);
|
|
|
|
if (fileSize == 0)
|
|
|
|
{
|
|
|
|
// Empty files are treated as UTF-8.
|
|
|
|
ftype = CValidateUnicode::UTF_8;
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
std::vector<unsigned char> buffer(fileSize);
|
|
|
|
fseek(fp, 0, SEEK_SET);
|
|
|
|
fread(&buffer[0], sizeof(unsigned char), fileSize, fp);
|
|
|
|
|
|
|
|
ftype = CValidateUnicode::CheckBOM(&buffer[0], buffer.size());
|
|
|
|
|
|
|
|
switch (ftype)
|
|
|
|
{
|
|
|
|
case CValidateUnicode::UTF_8:
|
|
|
|
case CValidateUnicode::UTF_16LE:
|
|
|
|
case CValidateUnicode::UTF_16BE:
|
|
|
|
break;
|
|
|
|
case CValidateUnicode::UTF_32LE:
|
|
|
|
case CValidateUnicode::UTF_32BE:
|
2011-11-09 10:30:11 +00:00
|
|
|
PrintColorFmtMsg_ERR(_T("File '%s' has a BOM marked as %s which is not supported at this time.\n"),
|
2010-04-20 09:04:26 +00:00
|
|
|
file, CValidateUnicode::TypeToName(ftype));
|
|
|
|
exit(-1);
|
|
|
|
break;
|
|
|
|
case CValidateUnicode::UNKNOWN:
|
|
|
|
// If unknown, let's see if it's not just UTF_8 without a BOM.
|
2010-04-20 15:29:55 +00:00
|
|
|
if (CValidateUnicode::ValidateUTF8(&buffer[0], buffer.size()) == 2)
|
2010-04-20 09:04:26 +00:00
|
|
|
{
|
2010-04-20 15:29:55 +00:00
|
|
|
// contains UTF-8 characters sequences
|
2010-04-20 09:04:26 +00:00
|
|
|
_ftprintf(g_output, _T("File '%s' has no BOM but seems to be UTF-8.\n"), file);
|
2010-04-20 15:29:55 +00:00
|
|
|
ftype = CValidateUnicode::UTF_8;
|
|
|
|
}
|
2010-04-20 09:04:26 +00:00
|
|
|
break;
|
|
|
|
default:
|
2011-11-09 10:30:11 +00:00
|
|
|
PrintColorFmtMsg_ERR(_T("CValidateUnicode::CheckBOM() for file '%s' returned an unknown return value: %d\n"),
|
2010-04-20 09:04:26 +00:00
|
|
|
file, ftype);
|
|
|
|
exit(-1);
|
|
|
|
break;
|
|
|
|
}
|
2010-04-20 15:29:55 +00:00
|
|
|
}
|
2010-04-20 09:04:26 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
tstring strMode(mode);
|
|
|
|
|
|
|
|
switch (ftype)
|
|
|
|
{
|
|
|
|
case CValidateUnicode::UTF_8:
|
|
|
|
strMode.append(_T(", ccs=UTF-8"));
|
|
|
|
break;
|
|
|
|
case CValidateUnicode::UTF_16LE:
|
|
|
|
strMode.append(_T(", ccs=UTF-16LE"));
|
|
|
|
break;
|
|
|
|
default:
|
|
|
|
// Looks like fopen() doesn't support other encodings of Unicode.
|
2010-05-28 13:10:16 +00:00
|
|
|
if (unicode) *unicode = FALSE;
|
2010-04-20 09:04:26 +00:00
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
|
|
|
return _tfopen(file, strMode.c_str());
|
|
|
|
}
|
|
|
|
|
|
|
|
#endif
|