diff --git a/Docs/src/compiler.but b/Docs/src/compiler.but index ac88ded3..38b80de2 100644 --- a/Docs/src/compiler.but +++ b/Docs/src/compiler.but @@ -38,9 +38,11 @@ Causes the NSIS compiler to scan the given directory for plug-in DLLs. If you do \S1{appendfile} !appendfile -\c file text +\# NOTE: \NsisOutputCharset define cannot be used in a \c block -Appends \e{text} to \e{file}. +\c [/CHARSET=ACP|OEM|CP#|UTF8[SIG]|UTF16[BOM]] file text + +Appends \e{text} to \e{file}. The text is written as ANSI (ACP) unless the file already has a BOM. Using /CHARSET will force a specific character encoding. \c !tempfile FILE \c !appendfile "${FILE}" "XPStyle on$\n" diff --git a/Docs/src/history.but b/Docs/src/history.but index 875f8fc6..58d01464 100644 --- a/Docs/src/history.but +++ b/Docs/src/history.but @@ -28,6 +28,8 @@ Released on ?, 2013 \S2{} Minor Changes +\b Added !appendfile /CHARSET parameter + \b %temp%\\Low will be used if the installer cannot write to %temp% nor %windir%\\Temp (\W{http://sourceforge.net/support/tracker.php?aid=2909242}{bug #2909242}, \W{http://sourceforge.net/support/tracker.php?aid=2912824}{patch #2912824}) \b Added $\{NSIS_PACKEDVERSION\}, the NSIS version packed in a hex number (\W{http://sourceforge.net/support/tracker.php?aid=2680832}{patch #2680832}) diff --git a/Source/makenssi.cpp b/Source/makenssi.cpp index 0d7981b7..17064e55 100644 --- a/Source/makenssi.cpp +++ b/Source/makenssi.cpp @@ -467,7 +467,7 @@ int _tmain(int argc, TCHAR **argv) build.warning(OPT_STR _T("INPUTCHARSET: Ignoring invalid charset %s"), argv[argpos]); cp = NStreamEncoding::AUTO; } - inputenc.SetCodepage(cp); + inputenc.SafeSetCodepage(cp); } else break; diff --git a/Source/script.cpp b/Source/script.cpp index 0a648597..6a34a37b 100644 --- a/Source/script.cpp +++ b/Source/script.cpp @@ -893,7 +893,7 @@ int CEXEBuild::includeScript(const TCHAR *f, NStreamEncoding&enc) const int errlinecnt=linecnt; linecnt=last_linecnt; curfilename=last_filename; - curlinereader = last_linereader; + curlinereader=last_linereader; build_include_depth--; if (r != PS_EOF && r != PS_OK) @@ -962,7 +962,6 @@ int CEXEBuild::LoadLicenseFile(const TCHAR *file, TCHAR** pdata, const TCHAR *cm TCHAR*data=(TCHAR*)malloc(cbTotalData); if (!data) { -l_OOM: ERROR_MSG(_T("Internal compiler error #12345: %s malloc(%d) failed.\n"),cmdname,cbTotalData); return PS_ERROR; } @@ -980,6 +979,7 @@ l_OOM: const UINT cbcu=NStreamEncoding::GetCodeUnitSize(srccp); if (sizeof(TCHAR) < cbcu) { +l_errwcconv: ERROR_MSG(_T("%s: wchar_t conversion failed!\n"),cmdname); return PS_ERROR; } @@ -989,15 +989,10 @@ l_OOM: if (cbcu > 1) *((WORD*)lichdr)='X'; //BUGBUG: No room: if (cbcu > 2) *((UINT32*)lichdr)='X'; wchar_t*wcdata=DupWCFromBytes(lichdr,cbcu+cbFileData,srccp); - if ((wchar_t*)-1==wcdata) - { - ERROR_MSG(_T("%s: wchar_t conversion failed!\n"),cmdname); - return PS_ERROR; - } + if (!wcdata) goto l_errwcconv; free(data); *pdata=data=wcdata; ldata=data+1; - if (!data) goto l_OOM; const bool isRTF=!memcmp(ldata,_T("{\\rtf"),5*sizeof(TCHAR)); if (isRTF) @@ -1407,26 +1402,35 @@ int CEXEBuild::doCommand(int which_token, LineParser &line) case TOK_P_APPENDFILE: { - TCHAR *file = line.gettoken_str(1); - TCHAR *text = line.gettoken_str(2); - - FILE *fp = FOPENTEXT(file, "a"); - if (!fp) + WORD tok = 0, cp; + bool bom = false, forceEnc = false; + TCHAR *param = line.gettoken_str(++tok), buf[9+1]; + my_strncpy(buf,param,COUNTOF(buf)); + if(!_tcsicmp(buf,_T("/charset="))) { - ERROR_MSG(_T("!appendfile: \"%s\" couldn't be opened.\n"), file); + ++tok, ++forceEnc, cp = GetEncodingFromString(param+9, bom); + if (NStreamEncoding::UNKNOWN == cp) + { + ERROR_MSG(_T("!appendfile: Invalid parameter \"%s\"!\n"), param); + return PS_ERROR; + } + } + param = line.gettoken_str(tok); + NOStream ostrm; + if (!ostrm.CreateFileForAppending(param, NStreamEncoding::ACP)) + { + ERROR_MSG(_T("!appendfile: \"%s\" couldn't be opened.\n"), param); return PS_ERROR; } - - if (_fputts(text, fp) < 0) + if (ostrm.IsUnicode()) bom = false; + if (forceEnc) ostrm.StreamEncoding().SetCodepage(cp); + const TCHAR *const text = line.gettoken_str(++tok); + if ((bom ? !ostrm.WriteBOM(ostrm.StreamEncoding()) : 0) || !ostrm.WriteString(text)) { - fclose(fp); - ERROR_MSG(_T("!appendfile: error writing to \"%s\".\n"), file); + ERROR_MSG(_T("!appendfile: error writing to \"%s\".\n"), param); return PS_ERROR; } - - fclose(fp); - - SCRIPT_MSG(_T("!appendfile: \"%s\" \"%s\"\n"), file, text); + SCRIPT_MSG(_T("!appendfile: \"%s\" \"%s\"\n"), param, text); } return PS_OK; diff --git a/Source/strlist.cpp b/Source/strlist.cpp index 50570d20..c6af2227 100644 --- a/Source/strlist.cpp +++ b/Source/strlist.cpp @@ -41,7 +41,7 @@ unsigned int ExeHeadStringList::getnum() const for(;;) { if (pos+=cb >= cbList) break; - cb = StrLenUTF16LE(p+=cb) + 1, ++num; + cb = StrLenUTF16(p+=cb) + 1, ++num; } } else @@ -87,7 +87,7 @@ unsigned int ExeHeadStringList::find(const TCHAR *str, WORD codepage, bool proce { WCToUTF16LEHlpr cnv; if (!cnv.Create(str)) return -1; - unsigned int pos = find(cnv.Get(),StrLenUTF16LE(cnv.Get()),codepage,processed,ppBufMB); + unsigned int pos = find(cnv.Get(),StrLenUTF16(cnv.Get()),codepage,processed,ppBufMB); cnv.Destroy(); return pos; } @@ -130,7 +130,7 @@ unsigned int ExeHeadStringList::find(const void *ptr, unsigned int cchF, WORD co for(;;) { if (pos+=cb >= cbList) break; - cb = (StrLenUTF16LE(p+=cb) + 1) * 2; + cb = (StrLenUTF16(p+=cb) + 1) * 2; if (cb < cbF) continue; if (byte_rev_match(p,find,cbF)) { retval = pos / WIDEDIV; break; } } diff --git a/Source/tokens.cpp b/Source/tokens.cpp index fdc25515..535707db 100644 --- a/Source/tokens.cpp +++ b/Source/tokens.cpp @@ -271,7 +271,7 @@ static tokenType tokenlist[TOK__LAST] = {TOK_P_TEMPFILE,_T("!tempfile"),1,0,_T("symbol"),TP_ALL}, {TOK_P_DELFILE,_T("!delfile"),1,1,_T("[/nonfatal] file"),TP_ALL}, -{TOK_P_APPENDFILE,_T("!appendfile"),2,0,_T("file appended_line"),TP_ALL}, +{TOK_P_APPENDFILE,_T("!appendfile"),2,1,_T("[/CHARSET=<") TSTR_OUTPUTCHARSET _T(">] file appended_line"),TP_ALL}, {TOK_P_GETDLLVERSION,_T("!getdllversion"),2,0,_T("localfilename define_basename"),TP_ALL}, {TOK_P_SEARCHPARSESTRING,_T("!searchparse"),3,-1,_T("[/ignorecase] [/noerrors] [/file] source_string substring OUTPUTSYM1 [substring [OUTPUTSYM2 [substring ...]]]"),TP_ALL}, diff --git a/Source/utf.cpp b/Source/utf.cpp index bcf85278..b36734a4 100644 --- a/Source/utf.cpp +++ b/Source/utf.cpp @@ -26,7 +26,7 @@ void RawTStrToASCII(const TCHAR*in,char*out,UINT maxcch) if (!empty) *out = 0; } -UINT StrLenUTF16LE(const void*str) +UINT StrLenUTF16(const void*str) { unsigned short *p = (unsigned short *) str; UINT cch = 0; @@ -44,6 +44,12 @@ bool StrSetUTF16LE(tstring&dest, const void*src) return true; } +void UTF16InplaceEndianSwap(void*Buffer, UINT cch) +{ + unsigned short *p = (unsigned short *) Buffer; + while(cch--) p[cch] = SWAP_ENDIAN_INT16(p[cch]); +} + inline UINT UTF8ToWC_Convert(LPCSTR StrU8,UINT cbU8,wchar_t*Buffer,UINT cchBuf) { #ifndef MB_ERR_INVALID_CHARS @@ -87,34 +93,170 @@ wchar_t* DupWCFromBytes(void*Buffer,UINT cbBuffer,WORD SrcCP) { /*\ Converts a buffer encoded with SrcCP to a \0 terminated wchar_t malloc'ed buffer. - Returns 0 if malloc failed or -1 if conversion to wchar_t failed. + Returns 0 on failure. \*/ - NStreamEncoding srcenc(SrcCP); - wchar_t*pwc = 0; -#ifdef _WIN32 - if (srcenc.IsUTF16LE()) - { - // Assuming wchar_t==UTF16LE - pwc = (wchar_t*) malloc(cbBuffer + 2); - if (!pwc) return pwc; - memcpy(pwc, Buffer, cbBuffer); - *((wchar_t*)(((char*)pwc)+cbBuffer)) = L'\0'; - return pwc; - } - // TODO: MBTWC on Windows is lame, we are going to fail if SrcCP is UTF16BE or UTF32 -#endif - UINT cchW = MultiByteToWideChar(SrcCP,0,(char*)Buffer,cbBuffer,0,0); - if (!cchW && NStreamEncoding::GetCodeUnitSize(SrcCP) <= cbBuffer) - { - return (wchar_t*)-1; - } - pwc = (wchar_t*) malloc((cchW+1)*sizeof(wchar_t)); - if (!pwc) return pwc; - MultiByteToWideChar(SrcCP,0,(char*)Buffer,cbBuffer,pwc,cchW); - pwc[cchW] = L'\0'; - return pwc; + CharEncConv cec; + if (!cec.Initialize(-1, SrcCP)) return 0; + wchar_t *pWC = (wchar_t*) cec.Convert(Buffer, cbBuffer); + return pWC ? (wchar_t*) cec.Detach() : 0; } +BOOL CharEncConv::IsValidCodePage(UINT cp) +{ +#ifdef _WIN32 + if (cp <= 1 || NStreamEncoding::IsUnicodeCodepage(cp)) return true; // Allow ACP/OEM/UTF* +#endif + return cp < (WORD)-1 && ::IsValidCodePage(cp); +} +bool CharEncConv::Initialize(UINT32 ToEnc, UINT32 FromEnc) +{ + /* + ** Initialize() with a Windows codepage or -1 for wchar_t + */ + const WORD UTF32LE = NStreamEncoding::UTF32LE; +#ifdef _WIN32 + if (NStreamEncoding::UTF16LE == ToEnc) ToEnc = -1; + if (NStreamEncoding::UTF16LE == FromEnc) FromEnc = -1; +#endif + m_TE = (WORD) ToEnc, m_FE = (WORD) FromEnc; + if ((UTF32LE|1) == (m_FE|1) || (UTF32LE|1) == (m_TE|1)) return false; // UTF32 is a pain to deal with on Windows +#ifdef _WIN32 + return (IsWE(m_FE) || IsValidCodePage(FromEnc)) && (IsWE(m_TE) || IsValidCodePage(ToEnc)); +#else + char f[50], t[COUNTOF(f)]; + if (IsWE(m_FE)) strcpy(f, "wchar_t"); else create_code_page_string(f, COUNTOF(f), m_FE); + if (IsWE(m_TE)) strcpy(t, "wchar_t"); else create_code_page_string(t, COUNTOF(t), m_TE); + return m_TE == m_FE || m_iconvd.Open(t, f); +#endif +} +size_t CharEncConv::GuessOutputSize(size_t cbConverted) +{ + UINT cus = IsWE(m_TE) ? sizeof(wchar_t) : NStreamEncoding::GetCodeUnitSize(m_TE); + size_t zt = 1, cch = cbConverted / cus; + if (!cch) return 0; + switch(cus) + { + case 1: zt = !!((char*)m_Result)[--cch]; break; + case 2: zt = !!((WORD*)m_Result)[--cch]; break; + case 4: zt = !!((UINT32*)m_Result)[--cch]; break; + } + return (cch + (zt ? 0 : 1)) * cus; +} +void* CharEncConv::Convert(const void*Src, size_t cbSrc, size_t*cbOut) +{ + /* + ** Convert() mallocs a buffer and converts Src (as m_FE) to m_TE. + ** If cbSrc is -1 the size is calculated. cbOut can be NULL. + ** Returns a pointer to the buffer on success or 0 on error. + ** The buffer is valid until you call Close() or Convert(). + */ + if ((size_t)-1 == cbSrc) + { + UINT cus = IsWE(m_FE) ? sizeof(wchar_t) : NStreamEncoding::GetCodeUnitSize(m_FE); + switch(cus) + { + case 1: cbSrc = strlen((char*)Src); break; + case 2: cbSrc = StrLenUTF16(Src); break; + //case 4: // No UTF32 support... + default: + if (sizeof(wchar_t) > 2 && sizeof(wchar_t) == cus) + { + cbSrc = wcslen((wchar_t*)Src); + break; + } + assert(0); + return 0; + } + cbSrc = (cbSrc + 1) * cus; + } + if (m_FE == m_TE) + { +#ifdef _WIN32 + if (m_AllowOptimizedReturn && IsWE(m_FE)) + { + if (cbOut) + { + cbSrc /= sizeof(wchar_t); + if (cbSrc && ((WORD*)Src)[--cbSrc]) ++cbSrc; + *cbOut = cbSrc * sizeof(wchar_t); + } + return (void*) Src; + } +#endif + char *p = (char*) realloc(m_Result, cbSrc + sizeof(UINT32)); + if (p) m_Result = p; else return 0; + memcpy(p, Src, cbSrc); + *((UINT32*)(p+cbSrc)) = 0; + if (cbOut) *cbOut = GuessOutputSize(cbSrc); + return m_Result; + } +#ifdef _WIN32 + if (!IsWE(m_FE) && !IsWE(m_TE) && NStreamEncoding::UTF16BE != m_TE) + { + // We need a middle step: Src -> wchar_t -> Target + CharEncConv cec; + if (!cec.Initialize(-1, m_FE)) return 0; + size_t cbConv; + char *pWC = (char*) cec.Convert(Src, cbSrc, &cbConv); + if (!pWC) return 0; + this->m_FE = -1; + return this->Convert(pWC, cbConv, cbOut); + } + if (IsWE(m_FE)) + { + if (NStreamEncoding::UTF16BE == m_TE) goto l_swapUTF16; + cbSrc /= sizeof(wchar_t); + UINT cbDest = WideCharToMultiByte(m_TE, 0, (LPWSTR)Src, cbSrc, 0, 0, 0, 0); + char *p = (char*) realloc(m_Result, (cbDest + 1) * sizeof(char)); + if (p) m_Result = p; else return 0; + if (!(cbDest = WideCharToMultiByte(m_TE, 0, (LPWSTR)Src, cbSrc, p, cbDest, 0, 0))) return 0; + if (p[--cbDest]) p[++cbDest] = '\0'; // Always \0 terminate + if (cbOut) *cbOut = cbDest; // cbOut never includes the \0 terminator + } + else + { + UINT cchDest; + if (NStreamEncoding::UTF16BE == m_FE) // UTF16BE -> UTF16LE/wchar_t + { +l_swapUTF16: + char *p = (char*) realloc(m_Result, cbSrc + sizeof(wchar_t)); + if (p) m_Result = p; else return 0; + memcpy(p, Src, cbSrc); + cchDest = cbSrc / sizeof(wchar_t); + UTF16InplaceEndianSwap(p, cchDest); + if (!cchDest) *((WORD*)p) = 0, ++cchDest; // For "--cchDest" during \0 termination + } + else + { + cchDest = MultiByteToWideChar(m_FE, 0, (char*)Src, cbSrc, 0, 0); + char *p = (char*) realloc(m_Result, (cchDest + 1) * sizeof(wchar_t)); + if (p) m_Result = p; else return 0; + if (!(cchDest = MultiByteToWideChar(m_FE, 0, (char*)Src, cbSrc, (LPWSTR)p, cchDest))) return 0; + if (NStreamEncoding::UTF16BE == m_TE) UTF16InplaceEndianSwap(p, cchDest); + } + if (((WORD*)m_Result)[--cchDest]) ((WORD*)m_Result)[++cchDest] = '\0'; + if (cbOut) *cbOut = cchDest * sizeof(wchar_t); + } +#else + char *in = (char*) Src; + size_t cbConv; + if (!nsis_iconv_reallociconv(m_iconvd, &in, &cbSrc, &m_Result, cbConv)) return 0; + if (cbOut) *cbOut = GuessOutputSize(cbConv); +#endif + return m_Result; +} + +#if !defined(_WIN32) || !defined(_UNICODE) +bool WCToUTF16LEHlpr::Create(const TCHAR*in) +{ + CharEncConv cec; + if (!cec.Initialize(NStreamEncoding::UTF16LE, -1)) return false; + if (!cec.Convert(in)) return false; + m_s = cec.Detach(); + return true; +} +#endif + UINT DetectUTFBOM(FILE*strm) { /*\ @@ -165,13 +307,18 @@ UINT DetectUTFBOM(FILE*strm) return 0; } -WORD GetEncodingFromString(const TCHAR*s) +WORD GetEncodingFromString(const TCHAR*s, bool&BOM) { + BOM = false; if (!_tcsicmp(s,_T("ACP"))) return NStreamEncoding::ACP; if (!_tcsicmp(s,_T("OEM"))) return NStreamEncoding::OEMCP; if (!_tcsicmp(s,_T("UTF8"))) return NStreamEncoding::UTF8; - if (!_tcsicmp(s,_T("UTF16LE"))) return NStreamEncoding::UTF16LE; - if (!_tcsicmp(s,_T("UTF16BE"))) return NStreamEncoding::UTF16BE; + if ((!_tcsicmp(s,_T("UTF8SIG")) || !_tcsicmp(s,_T("UTF8BOM"))) && ++BOM) + return NStreamEncoding::UTF8; + if (!_tcsicmp(s,_T("UTF16LE")) || (!_tcsicmp(s,_T("UTF16LEBOM")) && ++BOM)) + return NStreamEncoding::UTF16LE; + if (!_tcsicmp(s,_T("UTF16BE")) || (!_tcsicmp(s,_T("UTF16BEBOM")) && ++BOM)) + return NStreamEncoding::UTF16BE; if (S7IsChEqualI('C',*s++) && S7IsChEqualI('P',*s++)) { int cp = _tstoi(s); @@ -179,6 +326,11 @@ WORD GetEncodingFromString(const TCHAR*s) } return NStreamEncoding::UNKNOWN; } +WORD GetEncodingFromString(const TCHAR*s) +{ + bool bom; + return GetEncodingFromString(s, bom); +} void NStreamEncoding::GetCPDisplayName(WORD CP, TCHAR*Buf) { @@ -200,6 +352,34 @@ void NStreamEncoding::GetCPDisplayName(WORD CP, TCHAR*Buf) _tcscpy(Buf,p); } +bool NBaseStream::Attach(FILE*hFile, WORD enc, bool Seek /*= true*/) +{ + Close(); + m_hFile = hFile; + if (!m_hFile || !NStream::SetBinaryMode(m_hFile)) return false; + fpos_t pos; + if (Seek && !fgetpos(m_hFile, &pos)) rewind(m_hFile); else Seek = false; + WORD cp = DetectUTFBOM(m_hFile); + if (Seek) + { + fsetpos(m_hFile, &pos); + if (cp) DetectUTFBOM(m_hFile); // parseScript() etc does not like the BOM, make sure we skip past it + } + if (!cp) cp = enc; + m_Enc.SafeSetCodepage(cp); + return true; +} + +bool NOStream::WriteString(const wchar_t*Str, size_t cch /*= -1*/) +{ + CharEncConv cec; + if (!cec.Initialize(m_Enc.GetCodepage(), -1)) return false; + cec.SetAllowOptimizedReturn(true); + size_t cbConv; + char *p = (char*) cec.Convert(Str, cch, &cbConv); + return p && WriteOctets(p, cbConv); +} + tstring NStreamLineReader::GetErrorMessage(UINT Error, const TCHAR*Filename, UINT Line) { tstring msg; @@ -347,8 +527,7 @@ l_restart: else { const UINT cp = StreamEncoding().GetCodepage(); - UINT mbtowcflags = 0; - if (cp < 50220 && cp != 42) mbtowcflags = MB_ERR_INVALID_CHARS; + UINT mbtowcflags = (cp < 50220 && cp != 42) ? MB_ERR_INVALID_CHARS : 0; for(;;) { BYTE bufMB[2]; diff --git a/Source/utf.h b/Source/utf.h index 3a54a10e..a2cad855 100644 --- a/Source/utf.h +++ b/Source/utf.h @@ -22,10 +22,15 @@ #include #include #include "util.h" // For my_fopen +#ifdef _WIN32 +#include // For _setmode +#include // For _O_BINARY +#endif -const UINT16 UNICODE_REPLACEMENT_CHARACTER = 0xfffd; +const WORD UNICODE_REPLACEMENT_CHARACTER = 0xfffd; #define TSTR_INPUTCHARSET _T("ACP|OEM|CP#|UTF8|UTF16LE") +#define TSTR_OUTPUTCHARSET _T("ACP|OEM|CP#|UTF8[SIG]|UTF16[BOM]") void RawTStrToASCII(const TCHAR*in,char*out,UINT maxcch); @@ -53,36 +58,72 @@ inline UINT32 CodePointFromUTF16SurrogatePair(unsigned short lea,unsigned short return ((UINT32)lea << 10) + tra + surrogate_offset; } -UINT StrLenUTF16LE(const void*str); +void UTF16InplaceEndianSwap(void*Buffer, UINT cch); +UINT StrLenUTF16(const void*str); bool StrSetUTF16LE(tstring&dest, const void*src); UINT WCFromCodePoint(wchar_t*Dest,UINT cchDest,UINT32 CodPt); wchar_t* DupWCFromBytes(void*Buffer,UINT cbBuffer,WORD SrcCP); UINT DetectUTFBOM(FILE*strm); +WORD GetEncodingFromString(const TCHAR*s, bool&BOM); WORD GetEncodingFromString(const TCHAR*s); +class CharEncConv { + char *m_Result; + WORD m_TE, m_FE; +#ifdef _WIN32 + bool m_AllowOptimizedReturn; // Can Convert() return Src buffer? +#else + iconvdescriptor m_iconvd; +#endif +protected: + size_t GuessOutputSize(size_t cbConverted); + static bool IsWE(WORD Encoding) { return (WORD)-1 == Encoding; } + static bool IsWE(UINT32 Encoding) { return (UINT32)-1 == Encoding; } +public: + CharEncConv() : m_Result(0) {} + ~CharEncConv() { Close(); } + void Close() + { + free(m_Result); + m_Result = 0; +#ifndef _WIN32 + m_iconvd.Close(); +#endif + } + void* Detach() { void *p = m_Result; m_Result = 0; return p; } + bool Initialize(UINT32 ToEnc, UINT32 FromEnc); + void* Convert(const void*Src, size_t cbSrc = -1, size_t*cbOut = 0); +#ifdef _WIN32 + void SetAllowOptimizedReturn(bool val = true) { m_AllowOptimizedReturn = val; } +#else + void SetAllowOptimizedReturn(bool val = false) {} +#endif + static BOOL IsValidCodePage(UINT cp); +}; + class WCToUTF16LEHlpr { unsigned short* m_s; public: WCToUTF16LEHlpr() : m_s(0) {} bool Create(const TCHAR*in) - { -#if defined(_WIN32) && defined(_UNICODE) - m_s = (unsigned short*) in; +#if !defined(_WIN32) || !defined(_UNICODE) + ; #else -#error TODO: wchar_t to UTF16LE -#endif + { + m_s = (unsigned short*) in; return true; } +#endif void Destroy() { -#if !defined(_WIN32) && !defined(_UNICODE) - delete[] m_s; +#if !defined(_WIN32) || !defined(_UNICODE) + free(m_s); #endif } const unsigned short* Get() const { return m_s; } - UINT GetLen() const { return StrLenUTF16LE(m_s); } + UINT GetLen() const { return StrLenUTF16(m_s); } UINT GetSize() const { return (GetLen()+1) * 2; } }; @@ -110,7 +151,6 @@ public: void SafeSetCodepage(WORD cp) { if (NStreamEncoding::AUTO==cp) cp = GetPlatformDefaultCodepage(); - if (NStreamEncoding::UNKNOWN==cp) cp = GetPlatformDefaultCodepage(); SetCodepage(cp); } void Reset() { SetCodepage(GetPlatformDefaultCodepage()); } @@ -163,16 +203,23 @@ public: } return false; } +#ifdef _WIN32 + static bool SetBinaryMode(int fd) { return -1 != _setmode(fd, _O_BINARY); } + static bool SetBinaryMode(FILE*f) { return SetBinaryMode(_fileno(f)); } +#else + static bool SetBinaryMode(int fd) { return true; } + static bool SetBinaryMode(FILE*f) { return true; } +#endif }; -class NIStream { +class NBaseStream { protected: FILE* m_hFile; NStreamEncoding m_Enc; public: - NIStream() : m_hFile(0) {} - ~NIStream() { Close(); } + NBaseStream() : m_hFile(0) {} + ~NBaseStream() { Close(); } FILE* GetHandle() const { return m_hFile; } NStreamEncoding& StreamEncoding() { return m_Enc; } bool IsEOF() const { return feof(m_hFile) != 0; } @@ -187,21 +234,12 @@ public: bool OpenFileForReading(const TCHAR* Path, WORD enc = NStreamEncoding::AUTO) { - FILE *hFile = my_fopen(Path, "rb"); - return Attach(hFile, enc); + return Attach(my_fopen(Path, "rb"), enc); } bool OpenFileForReading(const TCHAR* Path, NStreamEncoding&Enc) { return OpenFileForReading(Path, Enc.GetCodepage()); } - bool OpenStdIn(WORD enc = NStreamEncoding::AUTO) - { - return Attach(stdin, enc); - } - bool OpenStdIn(NStreamEncoding&Enc) - { - return OpenStdIn(Enc.GetCodepage()); - } FILE* Detach() { @@ -209,18 +247,7 @@ public: m_hFile = 0; return hFile; } - bool Attach(FILE*hFile, WORD enc) - { - Close(); - m_hFile = hFile; - if (m_hFile) - { - WORD cp = DetectUTFBOM(m_hFile); - if (!cp) cp = enc; - m_Enc.SafeSetCodepage(cp); - } - return 0 != m_hFile; - } + bool Attach(FILE*hFile, WORD enc, bool Seek = true); UINT ReadOctets(void*Buffer, UINT cbBuf) { @@ -237,6 +264,56 @@ public: bool ReadInt16(void*Buffer) { return 2 == ReadOctets(Buffer, 2); } }; +class NIStream : public NBaseStream { +public: + bool OpenStdIn(WORD enc = NStreamEncoding::AUTO) + { + return Attach(stdin, enc, false); + } + bool OpenStdIn(NStreamEncoding&Enc) + { + return OpenStdIn(Enc.GetCodepage()); + } +}; + +class NOStream : public NBaseStream { +public: + bool CreateFileForWriting(const TCHAR* Path, WORD enc = NStreamEncoding::AUTO) + { + return Attach(my_fopen(Path, "w+b"), enc); + } + bool CreateFileForWriting(const TCHAR* Path, NStreamEncoding&Enc) + { + return CreateFileForWriting(Path, Enc.GetCodepage()); + } + bool CreateFileForAppending(const TCHAR* Path, WORD enc = NStreamEncoding::AUTO) + { + return Attach(my_fopen(Path, "a+b"), enc); + } + bool CreateFileForAppending(const TCHAR* Path, NStreamEncoding&Enc) + { + return CreateFileForAppending(Path, Enc.GetCodepage()); + } + + bool WriteOctets(void*Buffer, size_t cbBuf) + { + return cbBuf == fwrite(Buffer, 1, cbBuf, m_hFile); + } + bool WriteBOM(NStreamEncoding&Enc) + { + static const unsigned char u8b[] = {0xEF,0xBB,0xBF}; + static const unsigned char u16lb[] = {0xFF,0xFE}, u16bb[] = {0xFE,0xFF}; + switch(Enc.GetCodepage()) + { + case NStreamEncoding::UTF8: return WriteOctets((void*) u8b, sizeof(u8b)); + case NStreamEncoding::UTF16LE: return WriteOctets((void*) u16lb, sizeof(u16lb)); + case NStreamEncoding::UTF16BE: return WriteOctets((void*) u16bb, sizeof(u16bb)); + } + return false; + } + bool WriteString(const wchar_t*Str, size_t cch = -1); +}; + class NStreamLineReader { protected: NIStream &m_Strm; diff --git a/Source/util.cpp b/Source/util.cpp index b9c2b716..973f62af 100644 --- a/Source/util.cpp +++ b/Source/util.cpp @@ -211,6 +211,30 @@ int wsprintf(TCHAR *s, const TCHAR *format, ...) { return res; } +bool nsis_iconv_reallociconv(iconv_t CD, char**In, size_t*cbInLeft, char**Mem, size_t&cbConverted) +{ + char *in, *heap = *Mem; + UINT cbMem = 512; + size_t inleft, outleft, icvret = (size_t) -1; + for(;;) + { + in = *In, inleft = *cbInLeft, outleft = cbMem - sizeof(UINT32); // Leave room for \0 + char *p = (char*) realloc(heap, cbMem), *out = p; + if (!p) break; + heap = p, icvret = nsis_iconv_adaptor(iconv, CD, &in, &inleft, &out, &outleft); + if ((size_t) -1 != icvret || E2BIG != errno) break; + cbMem *= 4; + } + *In = in, *Mem = heap; + cbConverted = cbMem - (outleft + sizeof(UINT32)), *cbInLeft = inleft; + if ((size_t) -1 != icvret) + { + *((UINT32*)(&heap[cbConverted])) = 0; + return true; + } + return false; +} + void static create_code_page_string(TCHAR *buf, size_t len, UINT code_page) { switch(code_page) { @@ -221,8 +245,11 @@ void static create_code_page_string(TCHAR *buf, size_t len, UINT code_page) { case CP_UTF8: _sntprintf(buf, len, _T("UTF-8")); return; + case 1200: // UTF16LE + case 1201: // UTF16BE + _sntprintf(buf, len, _T("UTF-16%cE"), 1200==code_page ? _T('L') : _T('B')); + return; } - _sntprintf(buf, len, _T("CP%d"), code_page); } diff --git a/Source/util.h b/Source/util.h index a9be61d6..0df8227f 100644 --- a/Source/util.h +++ b/Source/util.h @@ -123,6 +123,8 @@ inline size_t nsis_iconv_adaptor return iconv_func (cd, (T)inbuf, inbytesleft, outbuf, outbytesleft); } +bool nsis_iconv_reallociconv(iconv_t CD, char**In, size_t*cbInLeft, char**Mem, size_t&cbConverted); + class iconvdescriptor { iconv_t m_cd; public: