Basic UTF-8 support in ansi build so it can read UTF-8 .nlf files and LangStrings

git-svn-id: https://svn.code.sf.net/p/nsis/code/NSIS/trunk@6196 212acab6-be3b-0410-9dea-997c60f758d6
This commit is contained in:
anders_k 2011-12-05 23:44:26 +00:00
parent 359ad0a055
commit ef8a83bd41
10 changed files with 227 additions and 5 deletions

View file

@ -448,6 +448,9 @@ typedef DWORDLONG ULONGLONG,*PULONGLONG;
#ifndef CP_ACP #ifndef CP_ACP
# define CP_ACP 0 # define CP_ACP 0
#endif #endif
#ifndef CP_UTF8
# define CP_UTF8 65001
#endif
#ifndef COLOR_BTNFACE #ifndef COLOR_BTNFACE
# define COLOR_BTNFACE 15 # define COLOR_BTNFACE 15

View file

@ -24,6 +24,7 @@ makensis_files = Split("""
strlist.cpp strlist.cpp
tokens.cpp tokens.cpp
tstring.cpp tstring.cpp
utf.cpp
util.cpp util.cpp
validateunicode.cpp validateunicode.cpp
winchar.cpp winchar.cpp

View file

@ -121,6 +121,9 @@ CEXEBuild::CEXEBuild() :
multiple_entries_instruction=0; multiple_entries_instruction=0;
build_include_depth=0; build_include_depth=0;
#ifndef _UNICODE
build_include_isutf8=false;
#endif
has_called_write_output=false; has_called_write_output=false;

View file

@ -328,6 +328,9 @@ class CEXEBuild {
* this will return a PS_WARNING. * this will return a PS_WARNING.
*/ */
int SetLangString(TCHAR *name, LANGID lang, const TCHAR *str, BOOL unicode); int SetLangString(TCHAR *name, LANGID lang, const TCHAR *str, BOOL unicode);
#ifndef _UNICODE
int SetUTF8LangString(TCHAR *name, LANGID lang, const char* stru8);
#endif
/** /**
* Sets the user string to the specific NLF_STRINGS id. * Sets the user string to the specific NLF_STRINGS id.
@ -424,6 +427,9 @@ class CEXEBuild {
TCHAR build_output_filename[1024]; TCHAR build_output_filename[1024];
int build_include_depth; int build_include_depth;
#ifndef _UNICODE
bool build_include_isutf8; // UTF-8 LangString in .nsh hack for ANSI builds
#endif
// Added by ramon 6 jun 2003 // Added by ramon 6 jun 2003
#ifdef NSIS_SUPPORT_VERSION_INFO #ifdef NSIS_SUPPORT_VERSION_INFO

View file

@ -26,6 +26,7 @@
#include "exehead/resource.h" #include "exehead/resource.h"
#include <nsis-version.h> #include <nsis-version.h>
#include "tstring.h" #include "tstring.h"
#include "utf.h"
using namespace std; using namespace std;
@ -492,6 +493,9 @@ int CEXEBuild::SetLangString(TCHAR *name, LANGID lang, const TCHAR *str, BOOL un
int sn; int sn;
if (_tcsclen(str) > NSIS_MAX_STRLEN-1)
warning_fl("LangString \"%s\" longer than NSIS_MAX_STRLEN!", name);
int pos = build_langstrings.get(name, &sn); int pos = build_langstrings.get(name, &sn);
if (pos < 0) if (pos < 0)
pos = build_langstrings.add(name, &sn); pos = build_langstrings.add(name, &sn);
@ -502,6 +506,21 @@ int CEXEBuild::SetLangString(TCHAR *name, LANGID lang, const TCHAR *str, BOOL un
return PS_OK; return PS_OK;
} }
#ifndef _UNICODE
int CEXEBuild::SetUTF8LangString(TCHAR *name, LANGID lang, const char* stru8)
{
LanguageTable *table = GetLangTable(lang);
if (!table) return PS_ERROR;
if (!Platform_SupportsUTF8Conversion()) return PS_ERROR;
EXEHEADTCHAR_T *bufEHTStr = UTF8ToExeHeadTStr(stru8, table->nlf.m_uCodePage);
if (!bufEHTStr) return PS_ERROR;
const int ret = SetLangString(name, lang, bufEHTStr, sizeof(EXEHEADTCHAR_T) > 1);
ExeHeadTStrFree(bufEHTStr);
return ret;
}
#endif
// Sets the user string to the specific NLF_STRINGS id. // Sets the user string to the specific NLF_STRINGS id.
// //
// @return If the id is invalid or the string is not valid, it will return a // @return If the id is invalid or the string is not valid, it will return a
@ -925,6 +944,11 @@ LanguageTable * CEXEBuild::LoadLangFile(TCHAR *filename) {
return 0; return 0;
} }
#ifndef _UNICODE
char fencoding = 0; // 0 = ansi, 8 = utf-8 (16/17 for uft-16le/be not supported)
if (IsUTF8BOM(f)) fencoding = 8;
#endif
// Check header // Check header
TCHAR buf[NSIS_MAX_STRLEN]; TCHAR buf[NSIS_MAX_STRLEN];
buf[0] = SkipComments(f); buf[0] = SkipComments(f);
@ -1096,8 +1120,31 @@ LanguageTable * CEXEBuild::LoadLangFile(TCHAR *filename) {
buf[0] = SkipComments(f); buf[0] = SkipComments(f);
_fgetts(buf+1, NSIS_MAX_STRLEN, f); _fgetts(buf+1, NSIS_MAX_STRLEN, f);
#ifndef _UNICODE
if (8 == fencoding)
{
if (!Platform_SupportsUTF8Conversion()) {
ERROR_MSG(_T("Error: UTF-8 language files not supported on this OS!\n"));
return 0;
}
EXEHEADTCHAR_T *bufConv = UTF8ToExeHeadTStr(buf, nlf->m_uCodePage);
if (!bufConv) {
ERROR_MSG(_T("Error: Invalid UTF-8? (string #%d - \"%s\")\n"), i, NLFStrings[i].szLangStringName);
return 0;
}
else {
UINT cch = _tcslen(bufConv);
_tcsnccpy(buf, bufConv, NSIS_MAX_STRLEN);
if (cch >= NSIS_MAX_STRLEN-1) {
buf[NSIS_MAX_STRLEN-1] = _T('\0'); // Make sure we fail the "String too long" check
}
}
ExeHeadTStrFree(bufConv);
}
#endif
if (_tcslen(buf) == NSIS_MAX_STRLEN-1) { if (_tcslen(buf) == NSIS_MAX_STRLEN-1) {
ERROR_MSG(_T("Error: String too long (string #%d - \"%s\")"), i, NLFStrings[i].szLangStringName); ERROR_MSG(_T("Error: String too long (string #%d - \"%s\")\n"), i, NLFStrings[i].szLangStringName);
return 0; return 0;
} }
temp=_tcslen(buf); temp=_tcslen(buf);

View file

@ -34,6 +34,7 @@
#include <cassert> // for assert(3) #include <cassert> // for assert(3)
#include <time.h> #include <time.h>
#include "tstring.h" #include "tstring.h"
#include "utf.h"
#include <algorithm> #include <algorithm>
#include "boost/scoped_ptr.hpp" #include "boost/scoped_ptr.hpp"
@ -813,6 +814,10 @@ int CEXEBuild::includeScript(TCHAR *f)
return PS_ERROR; return PS_ERROR;
} }
build_include_depth++; build_include_depth++;
#ifndef _UNICODE
const bool org_build_include_isutf8 = build_include_isutf8;
build_include_isutf8 = IsUTF8BOM(incfp);
#endif
int last_linecnt=linecnt; int last_linecnt=linecnt;
linecnt=0; linecnt=0;
@ -837,6 +842,10 @@ int CEXEBuild::includeScript(TCHAR *f)
restore_timestamp_predefine(oldtimestamp); restore_timestamp_predefine(oldtimestamp);
#endif #endif
#ifndef _UNICODE
build_include_isutf8 = org_build_include_isutf8;
#endif
int errlinecnt=linecnt; int errlinecnt=linecnt;
linecnt=last_linecnt; linecnt=last_linecnt;
@ -1712,13 +1721,21 @@ int CEXEBuild::doCommand(int which_token, LineParser &line)
TCHAR *name = line.gettoken_str(1); TCHAR *name = line.gettoken_str(1);
LANGID lang = line.gettoken_int(2); LANGID lang = line.gettoken_int(2);
TCHAR *str = line.gettoken_str(3); TCHAR *str = line.gettoken_str(3);
int ret = SetLangString(name, lang, str, curfile_unicode); int ret;
#ifndef _UNICODE
if (build_include_isutf8)
ret = SetUTF8LangString(name, lang, str);
else
#endif
ret = SetLangString(name, lang, str, curfile_unicode);
if (ret == PS_WARNING) if (ret == PS_WARNING)
warning_fl(_T("LangString \"%s\" set multiple times for %d, wasting space"), name, lang); warning_fl(_T("LangString \"%s\" set multiple times for %d, wasting space"), name, lang);
else if (ret == PS_ERROR) { else if (ret == PS_ERROR) {
ERROR_MSG(_T("Error: can't set LangString \"%s\"!\n"), name); ERROR_MSG(_T("Error: can't set LangString \"%s\"!\n"), name);
return PS_ERROR; return PS_ERROR;
} }
// BUGBUG: Does not display UTF-8 properly.
SCRIPT_MSG(_T("LangString: \"%s\" %d \"%s\"\n"), name, lang, str); SCRIPT_MSG(_T("LangString: \"%s\" %d \"%s\"\n"), name, lang, str);
} }
return PS_OK; return PS_OK;

85
Source/utf.cpp Normal file
View file

@ -0,0 +1,85 @@
/*
* utf.cpp
*
* This file is a part of NSIS.
*
* Copyright (C) 2011 Anders Kjersem
*
* Licensed under the zlib/libpng license (the "License");
* you may not use this file except in compliance with the License.
*
* Licence details can be found in the file COPYING.
*
* This software is provided 'as-is', without any express or implied
* warranty.
*
*/
#include "utf.h"
// BUGBUG: We might want to use MB_ERR_INVALID_CHARS but it is not supported
// on < WinXP or in our current POSIX implementation.
static const int UTF8MBTWCFLAGS = 0;
#define ExeHeadWStrFree free
static EXEHEADWCHAR_T* ExeHeadWStrAlloc(UINT cch)
{
EXEHEADWCHAR_T* s = (EXEHEADWCHAR_T*) malloc(cch*sizeof(EXEHEADWCHAR_T));
#if 0
// TODO: We should add POSIX versions of G/SetLastError
// if we want to tell _why_ UTF8ToExeHeadTStr failed...
if (!s) SetLastError(ERROR_OUTOFMEMORY);
#endif
return s;
}
#ifdef _UNICODE
#else // !_UNICODE
EXEHEADTCHAR_T* UTF8ToExeHeadTStr(LPCSTR StrU8,UINT Codepage)
{
int cchW = MultiByteToWideChar(CP_UTF8,UTF8MBTWCFLAGS,StrU8,-1,NULL,0);
if (!cchW) return NULL;
WCHAR *bufWStr = (WCHAR*) ExeHeadWStrAlloc(cchW);
if (!bufWStr) return NULL;
EXEHEADTCHAR_T *outstr = NULL;
if (MultiByteToWideChar(CP_UTF8,UTF8MBTWCFLAGS,StrU8,-1,bufWStr,cchW))
{
int cbA = WideCharToMultiByte(Codepage,0,bufWStr,cchW,NULL,0,NULL,NULL);
if (cbA && (outstr = ExeHeadTStrAlloc(cbA)))
{
if (!WideCharToMultiByte(Codepage,0,bufWStr,cchW,outstr,cbA,NULL,NULL))
{
ExeHeadTStrFree(outstr);
outstr = NULL;
}
}
}
ExeHeadWStrFree(bufWStr);
return outstr;
}
#endif // ?_UNICODE
bool IsUTF8BOM(FILE*fstrm)
{
// ungetc is only guaranteed to support 1 pushback,
// lets hope no ASCII file starts with 0xEF and is not a BOM!
const int c = fgetc(fstrm);
if (EOF == c) return false;
if (0xef == c)
{
const int c2 = fgetc(fstrm);
if (0xbb == c2)
{
const int c3 = fgetc(fstrm);
if (0xbf == c3) return true;
ungetc(c3,fstrm);
}
ungetc(c2,fstrm);
}
ungetc(c,fstrm);
return false;
}

43
Source/utf.h Normal file
View file

@ -0,0 +1,43 @@
/*
* utf.h
*
* This file is a part of NSIS.
*
* Copyright (C) 2011 Anders Kjersem
*
* Licensed under the zlib/libpng license (the "License");
* you may not use this file except in compliance with the License.
*
* Licence details can be found in the file COPYING.
*
* This software is provided 'as-is', without any express or implied
* warranty.
*
*/
#include "Platform.h"
#include <stdlib.h>
#include <stdio.h>
typedef unsigned short EXEHEADWCHAR_T;
#ifdef _UNICODE
typedef EXEHEADWCHAR_T EXEHEADTCHAR_T;
#else // !_UNICODE
typedef char EXEHEADTCHAR_T;
#define ExeHeadTStrFree free
inline EXEHEADTCHAR_T* ExeHeadTStrAlloc(UINT cb) {return (EXEHEADTCHAR_T*) malloc(cb);}
extern EXEHEADTCHAR_T* UTF8ToExeHeadTStr(LPCSTR StrU8,UINT Codepage);
#endif // ?_UNICODE
/**
* Tries to peek at the first few bytes in the stream to determine if it is a UTF-8 BOM.
* If it is a UTF-8 BOM it will eat the BOM,
* if it is not it tries its best to restore the data.
*/
extern bool IsUTF8BOM(FILE*fstrm);

View file

@ -200,8 +200,15 @@ inline size_t nsis_iconv_adaptor
} }
void static create_code_page_string(TCHAR *buf, size_t len, UINT code_page) { void static create_code_page_string(TCHAR *buf, size_t len, UINT code_page) {
if (code_page == CP_ACP) switch(code_page)
{
case CP_ACP:
code_page = 1252; code_page = 1252;
break;
case CP_UTF8:
_sntprintf(buf, len, _T("UTF-8"));
return;
}
_sntprintf(buf, len, _T("CP%d"), code_page); _sntprintf(buf, len, _T("CP%d"), code_page);
} }
@ -209,7 +216,7 @@ void static create_code_page_string(TCHAR *buf, size_t len, UINT code_page) {
int WideCharToMultiByte(UINT CodePage, DWORD dwFlags, LPCWSTR lpWideCharStr, int WideCharToMultiByte(UINT CodePage, DWORD dwFlags, LPCWSTR lpWideCharStr,
int cchWideChar, LPSTR lpMultiByteStr, int cbMultiByte, LPCSTR lpDefaultChar, int cchWideChar, LPSTR lpMultiByteStr, int cbMultiByte, LPCSTR lpDefaultChar,
LPBOOL lpUsedDefaultChar) { LPBOOL lpUsedDefaultChar) {
static char buffer[4096]; static char buffer[4096]; // BUGBUG: Should this be 4*NSIS_MAX_STRLEN for large string build?
char cp[128]; char cp[128];
create_code_page_string(cp, sizeof(cp), CodePage); create_code_page_string(cp, sizeof(cp), CodePage);
@ -245,7 +252,7 @@ int WideCharToMultiByte(UINT CodePage, DWORD dwFlags, LPCWSTR lpWideCharStr,
int MultiByteToWideChar(UINT CodePage, DWORD dwFlags, LPCSTR lpMultiByteStr, int MultiByteToWideChar(UINT CodePage, DWORD dwFlags, LPCSTR lpMultiByteStr,
int cbMultiByte, LPWSTR lpWideCharStr, int cchWideChar) { int cbMultiByte, LPWSTR lpWideCharStr, int cchWideChar) {
static WCHAR buffer[4096]; static WCHAR buffer[4096]; // BUGBUG: Should this be 4*NSIS_MAX_STRLEN for large string build?
char cp[128]; char cp[128];
create_code_page_string(cp, sizeof(cp), CodePage); create_code_page_string(cp, sizeof(cp), CodePage);
@ -900,3 +907,10 @@ bool GetDLLVersion(const tstring& filepath, DWORD& high, DWORD& low)
return false; return false;
} }
bool Platform_SupportsUTF8Conversion()
{
static unsigned char cached = -1;
if (-1 == cached) cached = !!IsValidCodePage(CP_UTF8);
return cached != 0;
}

View file

@ -185,4 +185,7 @@ RM_DEFINE_FREEFUNC(my_convert_free);
# define PATH_CONVERT(x) # define PATH_CONVERT(x)
#endif #endif
// Platform detection
bool Platform_SupportsUTF8Conversion();
#endif //_UTIL_H_ #endif //_UTIL_H_