Basic UTF-8 support in ansi build so it can read UTF-8 .nlf files and LangStrings
git-svn-id: https://svn.code.sf.net/p/nsis/code/NSIS/trunk@6196 212acab6-be3b-0410-9dea-997c60f758d6
This commit is contained in:
parent
359ad0a055
commit
ef8a83bd41
10 changed files with 227 additions and 5 deletions
|
@ -448,6 +448,9 @@ typedef DWORDLONG ULONGLONG,*PULONGLONG;
|
|||
#ifndef CP_ACP
|
||||
# define CP_ACP 0
|
||||
#endif
|
||||
#ifndef CP_UTF8
|
||||
# define CP_UTF8 65001
|
||||
#endif
|
||||
|
||||
#ifndef COLOR_BTNFACE
|
||||
# define COLOR_BTNFACE 15
|
||||
|
|
|
@ -24,6 +24,7 @@ makensis_files = Split("""
|
|||
strlist.cpp
|
||||
tokens.cpp
|
||||
tstring.cpp
|
||||
utf.cpp
|
||||
util.cpp
|
||||
validateunicode.cpp
|
||||
winchar.cpp
|
||||
|
|
|
@ -121,6 +121,9 @@ CEXEBuild::CEXEBuild() :
|
|||
multiple_entries_instruction=0;
|
||||
|
||||
build_include_depth=0;
|
||||
#ifndef _UNICODE
|
||||
build_include_isutf8=false;
|
||||
#endif
|
||||
|
||||
has_called_write_output=false;
|
||||
|
||||
|
|
|
@ -328,6 +328,9 @@ class CEXEBuild {
|
|||
* this will return a PS_WARNING.
|
||||
*/
|
||||
int SetLangString(TCHAR *name, LANGID lang, const TCHAR *str, BOOL unicode);
|
||||
#ifndef _UNICODE
|
||||
int SetUTF8LangString(TCHAR *name, LANGID lang, const char* stru8);
|
||||
#endif
|
||||
|
||||
/**
|
||||
* Sets the user string to the specific NLF_STRINGS id.
|
||||
|
@ -424,6 +427,9 @@ class CEXEBuild {
|
|||
TCHAR build_output_filename[1024];
|
||||
|
||||
int build_include_depth;
|
||||
#ifndef _UNICODE
|
||||
bool build_include_isutf8; // UTF-8 LangString in .nsh hack for ANSI builds
|
||||
#endif
|
||||
|
||||
// Added by ramon 6 jun 2003
|
||||
#ifdef NSIS_SUPPORT_VERSION_INFO
|
||||
|
|
|
@ -26,6 +26,7 @@
|
|||
#include "exehead/resource.h"
|
||||
#include <nsis-version.h>
|
||||
#include "tstring.h"
|
||||
#include "utf.h"
|
||||
|
||||
using namespace std;
|
||||
|
||||
|
@ -492,6 +493,9 @@ int CEXEBuild::SetLangString(TCHAR *name, LANGID lang, const TCHAR *str, BOOL un
|
|||
|
||||
int sn;
|
||||
|
||||
if (_tcsclen(str) > NSIS_MAX_STRLEN-1)
|
||||
warning_fl("LangString \"%s\" longer than NSIS_MAX_STRLEN!", name);
|
||||
|
||||
int pos = build_langstrings.get(name, &sn);
|
||||
if (pos < 0)
|
||||
pos = build_langstrings.add(name, &sn);
|
||||
|
@ -502,6 +506,21 @@ int CEXEBuild::SetLangString(TCHAR *name, LANGID lang, const TCHAR *str, BOOL un
|
|||
return PS_OK;
|
||||
}
|
||||
|
||||
#ifndef _UNICODE
|
||||
int CEXEBuild::SetUTF8LangString(TCHAR *name, LANGID lang, const char* stru8)
|
||||
{
|
||||
LanguageTable *table = GetLangTable(lang);
|
||||
if (!table) return PS_ERROR;
|
||||
if (!Platform_SupportsUTF8Conversion()) return PS_ERROR;
|
||||
|
||||
EXEHEADTCHAR_T *bufEHTStr = UTF8ToExeHeadTStr(stru8, table->nlf.m_uCodePage);
|
||||
if (!bufEHTStr) return PS_ERROR;
|
||||
const int ret = SetLangString(name, lang, bufEHTStr, sizeof(EXEHEADTCHAR_T) > 1);
|
||||
ExeHeadTStrFree(bufEHTStr);
|
||||
return ret;
|
||||
}
|
||||
#endif
|
||||
|
||||
// Sets the user string to the specific NLF_STRINGS id.
|
||||
//
|
||||
// @return If the id is invalid or the string is not valid, it will return a
|
||||
|
@ -925,6 +944,11 @@ LanguageTable * CEXEBuild::LoadLangFile(TCHAR *filename) {
|
|||
return 0;
|
||||
}
|
||||
|
||||
#ifndef _UNICODE
|
||||
char fencoding = 0; // 0 = ansi, 8 = utf-8 (16/17 for uft-16le/be not supported)
|
||||
if (IsUTF8BOM(f)) fencoding = 8;
|
||||
#endif
|
||||
|
||||
// Check header
|
||||
TCHAR buf[NSIS_MAX_STRLEN];
|
||||
buf[0] = SkipComments(f);
|
||||
|
@ -1096,8 +1120,31 @@ LanguageTable * CEXEBuild::LoadLangFile(TCHAR *filename) {
|
|||
buf[0] = SkipComments(f);
|
||||
|
||||
_fgetts(buf+1, NSIS_MAX_STRLEN, f);
|
||||
#ifndef _UNICODE
|
||||
if (8 == fencoding)
|
||||
{
|
||||
if (!Platform_SupportsUTF8Conversion()) {
|
||||
ERROR_MSG(_T("Error: UTF-8 language files not supported on this OS!\n"));
|
||||
return 0;
|
||||
}
|
||||
EXEHEADTCHAR_T *bufConv = UTF8ToExeHeadTStr(buf, nlf->m_uCodePage);
|
||||
if (!bufConv) {
|
||||
ERROR_MSG(_T("Error: Invalid UTF-8? (string #%d - \"%s\")\n"), i, NLFStrings[i].szLangStringName);
|
||||
return 0;
|
||||
}
|
||||
else {
|
||||
UINT cch = _tcslen(bufConv);
|
||||
_tcsnccpy(buf, bufConv, NSIS_MAX_STRLEN);
|
||||
if (cch >= NSIS_MAX_STRLEN-1) {
|
||||
buf[NSIS_MAX_STRLEN-1] = _T('\0'); // Make sure we fail the "String too long" check
|
||||
}
|
||||
}
|
||||
ExeHeadTStrFree(bufConv);
|
||||
}
|
||||
#endif
|
||||
|
||||
if (_tcslen(buf) == NSIS_MAX_STRLEN-1) {
|
||||
ERROR_MSG(_T("Error: String too long (string #%d - \"%s\")"), i, NLFStrings[i].szLangStringName);
|
||||
ERROR_MSG(_T("Error: String too long (string #%d - \"%s\")\n"), i, NLFStrings[i].szLangStringName);
|
||||
return 0;
|
||||
}
|
||||
temp=_tcslen(buf);
|
||||
|
|
|
@ -34,6 +34,7 @@
|
|||
#include <cassert> // for assert(3)
|
||||
#include <time.h>
|
||||
#include "tstring.h"
|
||||
#include "utf.h"
|
||||
#include <algorithm>
|
||||
#include "boost/scoped_ptr.hpp"
|
||||
|
||||
|
@ -813,6 +814,10 @@ int CEXEBuild::includeScript(TCHAR *f)
|
|||
return PS_ERROR;
|
||||
}
|
||||
build_include_depth++;
|
||||
#ifndef _UNICODE
|
||||
const bool org_build_include_isutf8 = build_include_isutf8;
|
||||
build_include_isutf8 = IsUTF8BOM(incfp);
|
||||
#endif
|
||||
|
||||
int last_linecnt=linecnt;
|
||||
linecnt=0;
|
||||
|
@ -837,6 +842,10 @@ int CEXEBuild::includeScript(TCHAR *f)
|
|||
restore_timestamp_predefine(oldtimestamp);
|
||||
#endif
|
||||
|
||||
#ifndef _UNICODE
|
||||
build_include_isutf8 = org_build_include_isutf8;
|
||||
#endif
|
||||
|
||||
int errlinecnt=linecnt;
|
||||
|
||||
linecnt=last_linecnt;
|
||||
|
@ -1712,13 +1721,21 @@ int CEXEBuild::doCommand(int which_token, LineParser &line)
|
|||
TCHAR *name = line.gettoken_str(1);
|
||||
LANGID lang = line.gettoken_int(2);
|
||||
TCHAR *str = line.gettoken_str(3);
|
||||
int ret = SetLangString(name, lang, str, curfile_unicode);
|
||||
int ret;
|
||||
#ifndef _UNICODE
|
||||
if (build_include_isutf8)
|
||||
ret = SetUTF8LangString(name, lang, str);
|
||||
else
|
||||
#endif
|
||||
ret = SetLangString(name, lang, str, curfile_unicode);
|
||||
|
||||
if (ret == PS_WARNING)
|
||||
warning_fl(_T("LangString \"%s\" set multiple times for %d, wasting space"), name, lang);
|
||||
else if (ret == PS_ERROR) {
|
||||
ERROR_MSG(_T("Error: can't set LangString \"%s\"!\n"), name);
|
||||
return PS_ERROR;
|
||||
}
|
||||
// BUGBUG: Does not display UTF-8 properly.
|
||||
SCRIPT_MSG(_T("LangString: \"%s\" %d \"%s\"\n"), name, lang, str);
|
||||
}
|
||||
return PS_OK;
|
||||
|
|
85
Source/utf.cpp
Normal file
85
Source/utf.cpp
Normal file
|
@ -0,0 +1,85 @@
|
|||
/*
|
||||
* utf.cpp
|
||||
*
|
||||
* This file is a part of NSIS.
|
||||
*
|
||||
* Copyright (C) 2011 Anders Kjersem
|
||||
*
|
||||
* Licensed under the zlib/libpng license (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
*
|
||||
* Licence details can be found in the file COPYING.
|
||||
*
|
||||
* This software is provided 'as-is', without any express or implied
|
||||
* warranty.
|
||||
*
|
||||
*/
|
||||
|
||||
#include "utf.h"
|
||||
|
||||
// BUGBUG: We might want to use MB_ERR_INVALID_CHARS but it is not supported
|
||||
// on < WinXP or in our current POSIX implementation.
|
||||
static const int UTF8MBTWCFLAGS = 0;
|
||||
|
||||
|
||||
#define ExeHeadWStrFree free
|
||||
static EXEHEADWCHAR_T* ExeHeadWStrAlloc(UINT cch)
|
||||
{
|
||||
EXEHEADWCHAR_T* s = (EXEHEADWCHAR_T*) malloc(cch*sizeof(EXEHEADWCHAR_T));
|
||||
#if 0
|
||||
// TODO: We should add POSIX versions of G/SetLastError
|
||||
// if we want to tell _why_ UTF8ToExeHeadTStr failed...
|
||||
if (!s) SetLastError(ERROR_OUTOFMEMORY);
|
||||
#endif
|
||||
return s;
|
||||
}
|
||||
|
||||
#ifdef _UNICODE
|
||||
#else // !_UNICODE
|
||||
|
||||
EXEHEADTCHAR_T* UTF8ToExeHeadTStr(LPCSTR StrU8,UINT Codepage)
|
||||
{
|
||||
int cchW = MultiByteToWideChar(CP_UTF8,UTF8MBTWCFLAGS,StrU8,-1,NULL,0);
|
||||
if (!cchW) return NULL;
|
||||
WCHAR *bufWStr = (WCHAR*) ExeHeadWStrAlloc(cchW);
|
||||
if (!bufWStr) return NULL;
|
||||
EXEHEADTCHAR_T *outstr = NULL;
|
||||
if (MultiByteToWideChar(CP_UTF8,UTF8MBTWCFLAGS,StrU8,-1,bufWStr,cchW))
|
||||
{
|
||||
int cbA = WideCharToMultiByte(Codepage,0,bufWStr,cchW,NULL,0,NULL,NULL);
|
||||
if (cbA && (outstr = ExeHeadTStrAlloc(cbA)))
|
||||
{
|
||||
if (!WideCharToMultiByte(Codepage,0,bufWStr,cchW,outstr,cbA,NULL,NULL))
|
||||
{
|
||||
ExeHeadTStrFree(outstr);
|
||||
outstr = NULL;
|
||||
}
|
||||
}
|
||||
}
|
||||
ExeHeadWStrFree(bufWStr);
|
||||
return outstr;
|
||||
}
|
||||
|
||||
#endif // ?_UNICODE
|
||||
|
||||
|
||||
bool IsUTF8BOM(FILE*fstrm)
|
||||
{
|
||||
// ungetc is only guaranteed to support 1 pushback,
|
||||
// lets hope no ASCII file starts with 0xEF and is not a BOM!
|
||||
const int c = fgetc(fstrm);
|
||||
if (EOF == c) return false;
|
||||
if (0xef == c)
|
||||
{
|
||||
const int c2 = fgetc(fstrm);
|
||||
if (0xbb == c2)
|
||||
{
|
||||
const int c3 = fgetc(fstrm);
|
||||
if (0xbf == c3) return true;
|
||||
ungetc(c3,fstrm);
|
||||
}
|
||||
ungetc(c2,fstrm);
|
||||
}
|
||||
ungetc(c,fstrm);
|
||||
return false;
|
||||
}
|
43
Source/utf.h
Normal file
43
Source/utf.h
Normal file
|
@ -0,0 +1,43 @@
|
|||
/*
|
||||
* utf.h
|
||||
*
|
||||
* This file is a part of NSIS.
|
||||
*
|
||||
* Copyright (C) 2011 Anders Kjersem
|
||||
*
|
||||
* Licensed under the zlib/libpng license (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
*
|
||||
* Licence details can be found in the file COPYING.
|
||||
*
|
||||
* This software is provided 'as-is', without any express or implied
|
||||
* warranty.
|
||||
*
|
||||
*/
|
||||
|
||||
#include "Platform.h"
|
||||
#include <stdlib.h>
|
||||
#include <stdio.h>
|
||||
|
||||
typedef unsigned short EXEHEADWCHAR_T;
|
||||
|
||||
|
||||
#ifdef _UNICODE
|
||||
typedef EXEHEADWCHAR_T EXEHEADTCHAR_T;
|
||||
|
||||
#else // !_UNICODE
|
||||
typedef char EXEHEADTCHAR_T;
|
||||
|
||||
#define ExeHeadTStrFree free
|
||||
inline EXEHEADTCHAR_T* ExeHeadTStrAlloc(UINT cb) {return (EXEHEADTCHAR_T*) malloc(cb);}
|
||||
extern EXEHEADTCHAR_T* UTF8ToExeHeadTStr(LPCSTR StrU8,UINT Codepage);
|
||||
|
||||
#endif // ?_UNICODE
|
||||
|
||||
|
||||
/**
|
||||
* Tries to peek at the first few bytes in the stream to determine if it is a UTF-8 BOM.
|
||||
* If it is a UTF-8 BOM it will eat the BOM,
|
||||
* if it is not it tries its best to restore the data.
|
||||
*/
|
||||
extern bool IsUTF8BOM(FILE*fstrm);
|
|
@ -200,8 +200,15 @@ inline size_t nsis_iconv_adaptor
|
|||
}
|
||||
|
||||
void static create_code_page_string(TCHAR *buf, size_t len, UINT code_page) {
|
||||
if (code_page == CP_ACP)
|
||||
switch(code_page)
|
||||
{
|
||||
case CP_ACP:
|
||||
code_page = 1252;
|
||||
break;
|
||||
case CP_UTF8:
|
||||
_sntprintf(buf, len, _T("UTF-8"));
|
||||
return;
|
||||
}
|
||||
|
||||
_sntprintf(buf, len, _T("CP%d"), code_page);
|
||||
}
|
||||
|
@ -209,7 +216,7 @@ void static create_code_page_string(TCHAR *buf, size_t len, UINT code_page) {
|
|||
int WideCharToMultiByte(UINT CodePage, DWORD dwFlags, LPCWSTR lpWideCharStr,
|
||||
int cchWideChar, LPSTR lpMultiByteStr, int cbMultiByte, LPCSTR lpDefaultChar,
|
||||
LPBOOL lpUsedDefaultChar) {
|
||||
static char buffer[4096];
|
||||
static char buffer[4096]; // BUGBUG: Should this be 4*NSIS_MAX_STRLEN for large string build?
|
||||
|
||||
char cp[128];
|
||||
create_code_page_string(cp, sizeof(cp), CodePage);
|
||||
|
@ -245,7 +252,7 @@ int WideCharToMultiByte(UINT CodePage, DWORD dwFlags, LPCWSTR lpWideCharStr,
|
|||
|
||||
int MultiByteToWideChar(UINT CodePage, DWORD dwFlags, LPCSTR lpMultiByteStr,
|
||||
int cbMultiByte, LPWSTR lpWideCharStr, int cchWideChar) {
|
||||
static WCHAR buffer[4096];
|
||||
static WCHAR buffer[4096]; // BUGBUG: Should this be 4*NSIS_MAX_STRLEN for large string build?
|
||||
|
||||
char cp[128];
|
||||
create_code_page_string(cp, sizeof(cp), CodePage);
|
||||
|
@ -900,3 +907,10 @@ bool GetDLLVersion(const tstring& filepath, DWORD& high, DWORD& low)
|
|||
|
||||
return false;
|
||||
}
|
||||
|
||||
bool Platform_SupportsUTF8Conversion()
|
||||
{
|
||||
static unsigned char cached = -1;
|
||||
if (-1 == cached) cached = !!IsValidCodePage(CP_UTF8);
|
||||
return cached != 0;
|
||||
}
|
|
@ -185,4 +185,7 @@ RM_DEFINE_FREEFUNC(my_convert_free);
|
|||
# define PATH_CONVERT(x)
|
||||
#endif
|
||||
|
||||
// Platform detection
|
||||
bool Platform_SupportsUTF8Conversion();
|
||||
|
||||
#endif //_UTIL_H_
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue