Basic UTF-8 support in ansi build so it can read UTF-8 .nlf files and LangStrings
git-svn-id: https://svn.code.sf.net/p/nsis/code/NSIS/trunk@6196 212acab6-be3b-0410-9dea-997c60f758d6
This commit is contained in:
parent
359ad0a055
commit
ef8a83bd41
10 changed files with 227 additions and 5 deletions
|
@ -448,6 +448,9 @@ typedef DWORDLONG ULONGLONG,*PULONGLONG;
|
||||||
#ifndef CP_ACP
|
#ifndef CP_ACP
|
||||||
# define CP_ACP 0
|
# define CP_ACP 0
|
||||||
#endif
|
#endif
|
||||||
|
#ifndef CP_UTF8
|
||||||
|
# define CP_UTF8 65001
|
||||||
|
#endif
|
||||||
|
|
||||||
#ifndef COLOR_BTNFACE
|
#ifndef COLOR_BTNFACE
|
||||||
# define COLOR_BTNFACE 15
|
# define COLOR_BTNFACE 15
|
||||||
|
|
|
@ -24,6 +24,7 @@ makensis_files = Split("""
|
||||||
strlist.cpp
|
strlist.cpp
|
||||||
tokens.cpp
|
tokens.cpp
|
||||||
tstring.cpp
|
tstring.cpp
|
||||||
|
utf.cpp
|
||||||
util.cpp
|
util.cpp
|
||||||
validateunicode.cpp
|
validateunicode.cpp
|
||||||
winchar.cpp
|
winchar.cpp
|
||||||
|
|
|
@ -121,6 +121,9 @@ CEXEBuild::CEXEBuild() :
|
||||||
multiple_entries_instruction=0;
|
multiple_entries_instruction=0;
|
||||||
|
|
||||||
build_include_depth=0;
|
build_include_depth=0;
|
||||||
|
#ifndef _UNICODE
|
||||||
|
build_include_isutf8=false;
|
||||||
|
#endif
|
||||||
|
|
||||||
has_called_write_output=false;
|
has_called_write_output=false;
|
||||||
|
|
||||||
|
|
|
@ -328,6 +328,9 @@ class CEXEBuild {
|
||||||
* this will return a PS_WARNING.
|
* this will return a PS_WARNING.
|
||||||
*/
|
*/
|
||||||
int SetLangString(TCHAR *name, LANGID lang, const TCHAR *str, BOOL unicode);
|
int SetLangString(TCHAR *name, LANGID lang, const TCHAR *str, BOOL unicode);
|
||||||
|
#ifndef _UNICODE
|
||||||
|
int SetUTF8LangString(TCHAR *name, LANGID lang, const char* stru8);
|
||||||
|
#endif
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Sets the user string to the specific NLF_STRINGS id.
|
* Sets the user string to the specific NLF_STRINGS id.
|
||||||
|
@ -424,6 +427,9 @@ class CEXEBuild {
|
||||||
TCHAR build_output_filename[1024];
|
TCHAR build_output_filename[1024];
|
||||||
|
|
||||||
int build_include_depth;
|
int build_include_depth;
|
||||||
|
#ifndef _UNICODE
|
||||||
|
bool build_include_isutf8; // UTF-8 LangString in .nsh hack for ANSI builds
|
||||||
|
#endif
|
||||||
|
|
||||||
// Added by ramon 6 jun 2003
|
// Added by ramon 6 jun 2003
|
||||||
#ifdef NSIS_SUPPORT_VERSION_INFO
|
#ifdef NSIS_SUPPORT_VERSION_INFO
|
||||||
|
|
|
@ -26,6 +26,7 @@
|
||||||
#include "exehead/resource.h"
|
#include "exehead/resource.h"
|
||||||
#include <nsis-version.h>
|
#include <nsis-version.h>
|
||||||
#include "tstring.h"
|
#include "tstring.h"
|
||||||
|
#include "utf.h"
|
||||||
|
|
||||||
using namespace std;
|
using namespace std;
|
||||||
|
|
||||||
|
@ -492,6 +493,9 @@ int CEXEBuild::SetLangString(TCHAR *name, LANGID lang, const TCHAR *str, BOOL un
|
||||||
|
|
||||||
int sn;
|
int sn;
|
||||||
|
|
||||||
|
if (_tcsclen(str) > NSIS_MAX_STRLEN-1)
|
||||||
|
warning_fl("LangString \"%s\" longer than NSIS_MAX_STRLEN!", name);
|
||||||
|
|
||||||
int pos = build_langstrings.get(name, &sn);
|
int pos = build_langstrings.get(name, &sn);
|
||||||
if (pos < 0)
|
if (pos < 0)
|
||||||
pos = build_langstrings.add(name, &sn);
|
pos = build_langstrings.add(name, &sn);
|
||||||
|
@ -502,6 +506,21 @@ int CEXEBuild::SetLangString(TCHAR *name, LANGID lang, const TCHAR *str, BOOL un
|
||||||
return PS_OK;
|
return PS_OK;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#ifndef _UNICODE
|
||||||
|
int CEXEBuild::SetUTF8LangString(TCHAR *name, LANGID lang, const char* stru8)
|
||||||
|
{
|
||||||
|
LanguageTable *table = GetLangTable(lang);
|
||||||
|
if (!table) return PS_ERROR;
|
||||||
|
if (!Platform_SupportsUTF8Conversion()) return PS_ERROR;
|
||||||
|
|
||||||
|
EXEHEADTCHAR_T *bufEHTStr = UTF8ToExeHeadTStr(stru8, table->nlf.m_uCodePage);
|
||||||
|
if (!bufEHTStr) return PS_ERROR;
|
||||||
|
const int ret = SetLangString(name, lang, bufEHTStr, sizeof(EXEHEADTCHAR_T) > 1);
|
||||||
|
ExeHeadTStrFree(bufEHTStr);
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
// Sets the user string to the specific NLF_STRINGS id.
|
// Sets the user string to the specific NLF_STRINGS id.
|
||||||
//
|
//
|
||||||
// @return If the id is invalid or the string is not valid, it will return a
|
// @return If the id is invalid or the string is not valid, it will return a
|
||||||
|
@ -925,6 +944,11 @@ LanguageTable * CEXEBuild::LoadLangFile(TCHAR *filename) {
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#ifndef _UNICODE
|
||||||
|
char fencoding = 0; // 0 = ansi, 8 = utf-8 (16/17 for uft-16le/be not supported)
|
||||||
|
if (IsUTF8BOM(f)) fencoding = 8;
|
||||||
|
#endif
|
||||||
|
|
||||||
// Check header
|
// Check header
|
||||||
TCHAR buf[NSIS_MAX_STRLEN];
|
TCHAR buf[NSIS_MAX_STRLEN];
|
||||||
buf[0] = SkipComments(f);
|
buf[0] = SkipComments(f);
|
||||||
|
@ -1096,8 +1120,31 @@ LanguageTable * CEXEBuild::LoadLangFile(TCHAR *filename) {
|
||||||
buf[0] = SkipComments(f);
|
buf[0] = SkipComments(f);
|
||||||
|
|
||||||
_fgetts(buf+1, NSIS_MAX_STRLEN, f);
|
_fgetts(buf+1, NSIS_MAX_STRLEN, f);
|
||||||
|
#ifndef _UNICODE
|
||||||
|
if (8 == fencoding)
|
||||||
|
{
|
||||||
|
if (!Platform_SupportsUTF8Conversion()) {
|
||||||
|
ERROR_MSG(_T("Error: UTF-8 language files not supported on this OS!\n"));
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
EXEHEADTCHAR_T *bufConv = UTF8ToExeHeadTStr(buf, nlf->m_uCodePage);
|
||||||
|
if (!bufConv) {
|
||||||
|
ERROR_MSG(_T("Error: Invalid UTF-8? (string #%d - \"%s\")\n"), i, NLFStrings[i].szLangStringName);
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
UINT cch = _tcslen(bufConv);
|
||||||
|
_tcsnccpy(buf, bufConv, NSIS_MAX_STRLEN);
|
||||||
|
if (cch >= NSIS_MAX_STRLEN-1) {
|
||||||
|
buf[NSIS_MAX_STRLEN-1] = _T('\0'); // Make sure we fail the "String too long" check
|
||||||
|
}
|
||||||
|
}
|
||||||
|
ExeHeadTStrFree(bufConv);
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
if (_tcslen(buf) == NSIS_MAX_STRLEN-1) {
|
if (_tcslen(buf) == NSIS_MAX_STRLEN-1) {
|
||||||
ERROR_MSG(_T("Error: String too long (string #%d - \"%s\")"), i, NLFStrings[i].szLangStringName);
|
ERROR_MSG(_T("Error: String too long (string #%d - \"%s\")\n"), i, NLFStrings[i].szLangStringName);
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
temp=_tcslen(buf);
|
temp=_tcslen(buf);
|
||||||
|
|
|
@ -34,6 +34,7 @@
|
||||||
#include <cassert> // for assert(3)
|
#include <cassert> // for assert(3)
|
||||||
#include <time.h>
|
#include <time.h>
|
||||||
#include "tstring.h"
|
#include "tstring.h"
|
||||||
|
#include "utf.h"
|
||||||
#include <algorithm>
|
#include <algorithm>
|
||||||
#include "boost/scoped_ptr.hpp"
|
#include "boost/scoped_ptr.hpp"
|
||||||
|
|
||||||
|
@ -813,6 +814,10 @@ int CEXEBuild::includeScript(TCHAR *f)
|
||||||
return PS_ERROR;
|
return PS_ERROR;
|
||||||
}
|
}
|
||||||
build_include_depth++;
|
build_include_depth++;
|
||||||
|
#ifndef _UNICODE
|
||||||
|
const bool org_build_include_isutf8 = build_include_isutf8;
|
||||||
|
build_include_isutf8 = IsUTF8BOM(incfp);
|
||||||
|
#endif
|
||||||
|
|
||||||
int last_linecnt=linecnt;
|
int last_linecnt=linecnt;
|
||||||
linecnt=0;
|
linecnt=0;
|
||||||
|
@ -837,6 +842,10 @@ int CEXEBuild::includeScript(TCHAR *f)
|
||||||
restore_timestamp_predefine(oldtimestamp);
|
restore_timestamp_predefine(oldtimestamp);
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
#ifndef _UNICODE
|
||||||
|
build_include_isutf8 = org_build_include_isutf8;
|
||||||
|
#endif
|
||||||
|
|
||||||
int errlinecnt=linecnt;
|
int errlinecnt=linecnt;
|
||||||
|
|
||||||
linecnt=last_linecnt;
|
linecnt=last_linecnt;
|
||||||
|
@ -1712,13 +1721,21 @@ int CEXEBuild::doCommand(int which_token, LineParser &line)
|
||||||
TCHAR *name = line.gettoken_str(1);
|
TCHAR *name = line.gettoken_str(1);
|
||||||
LANGID lang = line.gettoken_int(2);
|
LANGID lang = line.gettoken_int(2);
|
||||||
TCHAR *str = line.gettoken_str(3);
|
TCHAR *str = line.gettoken_str(3);
|
||||||
int ret = SetLangString(name, lang, str, curfile_unicode);
|
int ret;
|
||||||
|
#ifndef _UNICODE
|
||||||
|
if (build_include_isutf8)
|
||||||
|
ret = SetUTF8LangString(name, lang, str);
|
||||||
|
else
|
||||||
|
#endif
|
||||||
|
ret = SetLangString(name, lang, str, curfile_unicode);
|
||||||
|
|
||||||
if (ret == PS_WARNING)
|
if (ret == PS_WARNING)
|
||||||
warning_fl(_T("LangString \"%s\" set multiple times for %d, wasting space"), name, lang);
|
warning_fl(_T("LangString \"%s\" set multiple times for %d, wasting space"), name, lang);
|
||||||
else if (ret == PS_ERROR) {
|
else if (ret == PS_ERROR) {
|
||||||
ERROR_MSG(_T("Error: can't set LangString \"%s\"!\n"), name);
|
ERROR_MSG(_T("Error: can't set LangString \"%s\"!\n"), name);
|
||||||
return PS_ERROR;
|
return PS_ERROR;
|
||||||
}
|
}
|
||||||
|
// BUGBUG: Does not display UTF-8 properly.
|
||||||
SCRIPT_MSG(_T("LangString: \"%s\" %d \"%s\"\n"), name, lang, str);
|
SCRIPT_MSG(_T("LangString: \"%s\" %d \"%s\"\n"), name, lang, str);
|
||||||
}
|
}
|
||||||
return PS_OK;
|
return PS_OK;
|
||||||
|
|
85
Source/utf.cpp
Normal file
85
Source/utf.cpp
Normal file
|
@ -0,0 +1,85 @@
|
||||||
|
/*
|
||||||
|
* utf.cpp
|
||||||
|
*
|
||||||
|
* This file is a part of NSIS.
|
||||||
|
*
|
||||||
|
* Copyright (C) 2011 Anders Kjersem
|
||||||
|
*
|
||||||
|
* Licensed under the zlib/libpng license (the "License");
|
||||||
|
* you may not use this file except in compliance with the License.
|
||||||
|
*
|
||||||
|
* Licence details can be found in the file COPYING.
|
||||||
|
*
|
||||||
|
* This software is provided 'as-is', without any express or implied
|
||||||
|
* warranty.
|
||||||
|
*
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include "utf.h"
|
||||||
|
|
||||||
|
// BUGBUG: We might want to use MB_ERR_INVALID_CHARS but it is not supported
|
||||||
|
// on < WinXP or in our current POSIX implementation.
|
||||||
|
static const int UTF8MBTWCFLAGS = 0;
|
||||||
|
|
||||||
|
|
||||||
|
#define ExeHeadWStrFree free
|
||||||
|
static EXEHEADWCHAR_T* ExeHeadWStrAlloc(UINT cch)
|
||||||
|
{
|
||||||
|
EXEHEADWCHAR_T* s = (EXEHEADWCHAR_T*) malloc(cch*sizeof(EXEHEADWCHAR_T));
|
||||||
|
#if 0
|
||||||
|
// TODO: We should add POSIX versions of G/SetLastError
|
||||||
|
// if we want to tell _why_ UTF8ToExeHeadTStr failed...
|
||||||
|
if (!s) SetLastError(ERROR_OUTOFMEMORY);
|
||||||
|
#endif
|
||||||
|
return s;
|
||||||
|
}
|
||||||
|
|
||||||
|
#ifdef _UNICODE
|
||||||
|
#else // !_UNICODE
|
||||||
|
|
||||||
|
EXEHEADTCHAR_T* UTF8ToExeHeadTStr(LPCSTR StrU8,UINT Codepage)
|
||||||
|
{
|
||||||
|
int cchW = MultiByteToWideChar(CP_UTF8,UTF8MBTWCFLAGS,StrU8,-1,NULL,0);
|
||||||
|
if (!cchW) return NULL;
|
||||||
|
WCHAR *bufWStr = (WCHAR*) ExeHeadWStrAlloc(cchW);
|
||||||
|
if (!bufWStr) return NULL;
|
||||||
|
EXEHEADTCHAR_T *outstr = NULL;
|
||||||
|
if (MultiByteToWideChar(CP_UTF8,UTF8MBTWCFLAGS,StrU8,-1,bufWStr,cchW))
|
||||||
|
{
|
||||||
|
int cbA = WideCharToMultiByte(Codepage,0,bufWStr,cchW,NULL,0,NULL,NULL);
|
||||||
|
if (cbA && (outstr = ExeHeadTStrAlloc(cbA)))
|
||||||
|
{
|
||||||
|
if (!WideCharToMultiByte(Codepage,0,bufWStr,cchW,outstr,cbA,NULL,NULL))
|
||||||
|
{
|
||||||
|
ExeHeadTStrFree(outstr);
|
||||||
|
outstr = NULL;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
ExeHeadWStrFree(bufWStr);
|
||||||
|
return outstr;
|
||||||
|
}
|
||||||
|
|
||||||
|
#endif // ?_UNICODE
|
||||||
|
|
||||||
|
|
||||||
|
bool IsUTF8BOM(FILE*fstrm)
|
||||||
|
{
|
||||||
|
// ungetc is only guaranteed to support 1 pushback,
|
||||||
|
// lets hope no ASCII file starts with 0xEF and is not a BOM!
|
||||||
|
const int c = fgetc(fstrm);
|
||||||
|
if (EOF == c) return false;
|
||||||
|
if (0xef == c)
|
||||||
|
{
|
||||||
|
const int c2 = fgetc(fstrm);
|
||||||
|
if (0xbb == c2)
|
||||||
|
{
|
||||||
|
const int c3 = fgetc(fstrm);
|
||||||
|
if (0xbf == c3) return true;
|
||||||
|
ungetc(c3,fstrm);
|
||||||
|
}
|
||||||
|
ungetc(c2,fstrm);
|
||||||
|
}
|
||||||
|
ungetc(c,fstrm);
|
||||||
|
return false;
|
||||||
|
}
|
43
Source/utf.h
Normal file
43
Source/utf.h
Normal file
|
@ -0,0 +1,43 @@
|
||||||
|
/*
|
||||||
|
* utf.h
|
||||||
|
*
|
||||||
|
* This file is a part of NSIS.
|
||||||
|
*
|
||||||
|
* Copyright (C) 2011 Anders Kjersem
|
||||||
|
*
|
||||||
|
* Licensed under the zlib/libpng license (the "License");
|
||||||
|
* you may not use this file except in compliance with the License.
|
||||||
|
*
|
||||||
|
* Licence details can be found in the file COPYING.
|
||||||
|
*
|
||||||
|
* This software is provided 'as-is', without any express or implied
|
||||||
|
* warranty.
|
||||||
|
*
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include "Platform.h"
|
||||||
|
#include <stdlib.h>
|
||||||
|
#include <stdio.h>
|
||||||
|
|
||||||
|
typedef unsigned short EXEHEADWCHAR_T;
|
||||||
|
|
||||||
|
|
||||||
|
#ifdef _UNICODE
|
||||||
|
typedef EXEHEADWCHAR_T EXEHEADTCHAR_T;
|
||||||
|
|
||||||
|
#else // !_UNICODE
|
||||||
|
typedef char EXEHEADTCHAR_T;
|
||||||
|
|
||||||
|
#define ExeHeadTStrFree free
|
||||||
|
inline EXEHEADTCHAR_T* ExeHeadTStrAlloc(UINT cb) {return (EXEHEADTCHAR_T*) malloc(cb);}
|
||||||
|
extern EXEHEADTCHAR_T* UTF8ToExeHeadTStr(LPCSTR StrU8,UINT Codepage);
|
||||||
|
|
||||||
|
#endif // ?_UNICODE
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Tries to peek at the first few bytes in the stream to determine if it is a UTF-8 BOM.
|
||||||
|
* If it is a UTF-8 BOM it will eat the BOM,
|
||||||
|
* if it is not it tries its best to restore the data.
|
||||||
|
*/
|
||||||
|
extern bool IsUTF8BOM(FILE*fstrm);
|
|
@ -200,8 +200,15 @@ inline size_t nsis_iconv_adaptor
|
||||||
}
|
}
|
||||||
|
|
||||||
void static create_code_page_string(TCHAR *buf, size_t len, UINT code_page) {
|
void static create_code_page_string(TCHAR *buf, size_t len, UINT code_page) {
|
||||||
if (code_page == CP_ACP)
|
switch(code_page)
|
||||||
|
{
|
||||||
|
case CP_ACP:
|
||||||
code_page = 1252;
|
code_page = 1252;
|
||||||
|
break;
|
||||||
|
case CP_UTF8:
|
||||||
|
_sntprintf(buf, len, _T("UTF-8"));
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
_sntprintf(buf, len, _T("CP%d"), code_page);
|
_sntprintf(buf, len, _T("CP%d"), code_page);
|
||||||
}
|
}
|
||||||
|
@ -209,7 +216,7 @@ void static create_code_page_string(TCHAR *buf, size_t len, UINT code_page) {
|
||||||
int WideCharToMultiByte(UINT CodePage, DWORD dwFlags, LPCWSTR lpWideCharStr,
|
int WideCharToMultiByte(UINT CodePage, DWORD dwFlags, LPCWSTR lpWideCharStr,
|
||||||
int cchWideChar, LPSTR lpMultiByteStr, int cbMultiByte, LPCSTR lpDefaultChar,
|
int cchWideChar, LPSTR lpMultiByteStr, int cbMultiByte, LPCSTR lpDefaultChar,
|
||||||
LPBOOL lpUsedDefaultChar) {
|
LPBOOL lpUsedDefaultChar) {
|
||||||
static char buffer[4096];
|
static char buffer[4096]; // BUGBUG: Should this be 4*NSIS_MAX_STRLEN for large string build?
|
||||||
|
|
||||||
char cp[128];
|
char cp[128];
|
||||||
create_code_page_string(cp, sizeof(cp), CodePage);
|
create_code_page_string(cp, sizeof(cp), CodePage);
|
||||||
|
@ -245,7 +252,7 @@ int WideCharToMultiByte(UINT CodePage, DWORD dwFlags, LPCWSTR lpWideCharStr,
|
||||||
|
|
||||||
int MultiByteToWideChar(UINT CodePage, DWORD dwFlags, LPCSTR lpMultiByteStr,
|
int MultiByteToWideChar(UINT CodePage, DWORD dwFlags, LPCSTR lpMultiByteStr,
|
||||||
int cbMultiByte, LPWSTR lpWideCharStr, int cchWideChar) {
|
int cbMultiByte, LPWSTR lpWideCharStr, int cchWideChar) {
|
||||||
static WCHAR buffer[4096];
|
static WCHAR buffer[4096]; // BUGBUG: Should this be 4*NSIS_MAX_STRLEN for large string build?
|
||||||
|
|
||||||
char cp[128];
|
char cp[128];
|
||||||
create_code_page_string(cp, sizeof(cp), CodePage);
|
create_code_page_string(cp, sizeof(cp), CodePage);
|
||||||
|
@ -900,3 +907,10 @@ bool GetDLLVersion(const tstring& filepath, DWORD& high, DWORD& low)
|
||||||
|
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
bool Platform_SupportsUTF8Conversion()
|
||||||
|
{
|
||||||
|
static unsigned char cached = -1;
|
||||||
|
if (-1 == cached) cached = !!IsValidCodePage(CP_UTF8);
|
||||||
|
return cached != 0;
|
||||||
|
}
|
|
@ -185,4 +185,7 @@ RM_DEFINE_FREEFUNC(my_convert_free);
|
||||||
# define PATH_CONVERT(x)
|
# define PATH_CONVERT(x)
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
// Platform detection
|
||||||
|
bool Platform_SupportsUTF8Conversion();
|
||||||
|
|
||||||
#endif //_UTIL_H_
|
#endif //_UTIL_H_
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue