diff --git a/Docs/src/compiler.but b/Docs/src/compiler.but
index 076dc98a..4ca90b3d 100644
--- a/Docs/src/compiler.but
+++ b/Docs/src/compiler.but
@@ -8,7 +8,7 @@ These commands are similar to the C preprocessor in terms of purpose and functio
\# NOTE: \NsisInputCharset define cannot be used in a \c block
-\c [/NONFATAL] [/CHARSET=ACP|OEM|CP#|UTF8] file
+\c [/NONFATAL] [/CHARSET=ACP|OEM|CP#|UTF8|UTF16LE] file
This command will include 'file' as if it was part of the original script. Note that if a file is included in another directory, the current directory is still where the script was compiled from (not where the included file resides). If the compiler can't find the file it will look for it in every include directory. See \R{addincludedir}{!addincludedir} for more information. If the /nonfatal switch is used and no files are found, a warning will be issued instead of an error. /charset can be used to specify a codepage for plain text files without a BOM.
diff --git a/Docs/src/config.but b/Docs/src/config.but
index 979a8773..8895e10a 100644
--- a/Docs/src/config.but
+++ b/Docs/src/config.but
@@ -12,7 +12,7 @@
\define{NsisACPcp} system default ANSI codepage (ACP)
-\define{NsisInputCharset} ACP|OEM|CP#|UTF8
+\define{NsisInputCharset} ACP|OEM|CP#|UTF8|UTF16LE
\define{NsisWarnBlockContainerBegin} \\
diff --git a/Source/utf.cpp b/Source/utf.cpp
index b83ea6dd..ac0f499d 100644
--- a/Source/utf.cpp
+++ b/Source/utf.cpp
@@ -29,9 +29,8 @@ void RawTStrToASCII(const TCHAR*in,char*out,UINT maxcch)
UINT StrLenUTF16LE(const void*str)
{
unsigned short *p = (unsigned short *) str;
- for(;*p;) ++p;
UINT cch = 0;
- if ((size_t)p > (size_t)str) cch = ((size_t)p - (size_t)str) - 1;
+ for(;p[cch];) ++cch;
return cch;
}
@@ -215,6 +214,9 @@ UINT NStreamLineReader::ReadLine(wchar_t*Buffer, UINT cchBuf)
#endif
const UINT cchFullBuf = cchBuf;
NIStream&strm = GetStream();
+#ifndef _WIN32
+ iconvdescriptor iconvd;
+#endif
l_restart:
// Only supports MBCS and UTF-8 for now...
@@ -273,9 +275,11 @@ l_restart:
if (CompleteLine(Buffer,cchWC,cchBuf,true)) goto l_success;
}
}
-#ifdef _WIN32
else if (StreamEncoding().IsUTF16LE())
{
+#ifndef _WIN32
+ if (!iconvd.Open("wchar_t", iconvd::GetHostEndianUCS4Code())) goto ERR_UNSUPPORTEDENCODING;
+#endif
unsigned short lead, trail, cchWC;
for(;;)
{
@@ -283,20 +287,31 @@ l_restart:
FIX_ENDIAN_INT16LETOHOST_INPLACE(lead);
if (IsTrailSurrogateUTF16(lead)) goto l_badutf;
UINT32 codpt = lead;
- Buffer[0] = lead, cchWC = 0;
+ if (cchBuf <= 1) goto l_lineoverflow;
+ Buffer[0] = lead, cchWC = 1;
if (IsLeadSurrogateUTF16(lead))
{
if (!strm.ReadInt16(&trail)) goto l_ioerror;
FIX_ENDIAN_INT16LETOHOST_INPLACE(trail);
if (!IsTrailSurrogateUTF16(trail)) goto l_badutf;
codpt = CodePointFromUTF16SurrogatePair(lead,trail);
+#ifdef _WIN32
+ if (cchBuf <= 2) goto l_lineoverflow;
Buffer[1] = trail, ++cchWC;
+#endif
}
if (!IsValidUnicodeCodePoint(codpt)) goto l_badutf;
- if (CompleteLine(Buffer,++cchWC,cchBuf,true)) goto l_success;
+#ifndef _WIN32
+ char tmpdest[8]; // Should be plenty of space to store one UCS4 character as wchar_t(s)
+ size_t inleft = 4;
+ cchWC = iconvd.Convert(&codpt,&inleft,tmpdest,sizeof(tmpdest)) / sizeof(wchar_t);
+ if (!cchWC) goto l_badutf;
+ if (cchBuf <= cchWC) goto l_lineoverflow;
+ for (UINT i = cchWC; i;) --i, Buffer[i] = ((wchar_t*)tmpdest)[i];
+#endif
+ if (CompleteLine(Buffer,cchWC,cchBuf,true)) goto l_success;
}
}
-#endif
else if (StreamEncoding().IsUnicode())
{
goto l_unsupportedencoding;
diff --git a/Source/utf.h b/Source/utf.h
index 4fd4bdd0..9d4c5e3f 100644
--- a/Source/utf.h
+++ b/Source/utf.h
@@ -23,7 +23,7 @@
#include
#include "util.h" // For my_fopen
-#define TSTR_INPUTCHARSET _T("ACP|OEM|CP#|UTF8")
+#define TSTR_INPUTCHARSET _T("ACP|OEM|CP#|UTF8|UTF16LE")
void RawTStrToASCII(const TCHAR*in,char*out,UINT maxcch);
diff --git a/Source/util.cpp b/Source/util.cpp
index 0ab1a67b..b4b1364e 100644
--- a/Source/util.cpp
+++ b/Source/util.cpp
@@ -211,16 +211,6 @@ int wsprintf(TCHAR *s, const TCHAR *format, ...) {
return res;
}
-// iconv const inconsistency workaround by Alexandre Oliva
-template
-inline size_t nsis_iconv_adaptor
- (size_t (*iconv_func)(iconv_t, T, size_t *, TCHAR**,size_t*),
- iconv_t cd, TCHAR **inbuf, size_t *inbytesleft,
- TCHAR **outbuf, size_t *outbytesleft)
-{
- return iconv_func (cd, (T)inbuf, inbytesleft, outbuf, outbytesleft);
-}
-
void static create_code_page_string(TCHAR *buf, size_t len, UINT code_page) {
switch(code_page)
{
diff --git a/Source/util.h b/Source/util.h
index 6085f05c..f93a80e1 100644
--- a/Source/util.h
+++ b/Source/util.h
@@ -87,6 +87,51 @@ inline void PrintColorFmtMsg_ERR(const TCHAR *fmtstr, ...)
#ifndef _WIN32
+// iconv const inconsistency workaround by Alexandre Oliva
+template
+inline size_t nsis_iconv_adaptor
+ (size_t (*iconv_func)(iconv_t, T, size_t *, TCHAR**,size_t*),
+ iconv_t cd, TCHAR **inbuf, size_t *inbytesleft,
+ TCHAR **outbuf, size_t *outbytesleft)
+{
+ return iconv_func (cd, (T)inbuf, inbytesleft, outbuf, outbytesleft);
+}
+
+class iconvdescriptor {
+ iconv_t m_cd;
+public:
+ iconvdescriptor(iconv_t cd = (iconv_t)-1) : m_cd(cd) {}
+ ~iconvdescriptor() { Close(); }
+ void Close()
+ {
+ if ((iconv_t)-1 != m_cd)
+ {
+ iconv_close(m_cd);
+ m_cd = (iconv_t)-1;
+ }
+ }
+ bool Open(const char*tocode, const char*fromcode)
+ {
+ m_cd = iconv_open(tocode, fromcode);
+ return (iconv_t)-1 != m_cd;
+ }
+ UINT Convert(void*inbuf, size_t*pInLeft, void*outbuf, size_t outLeft)
+ {
+ char *in = (char*) inbuf, *out = (char*) outbuf;
+ size_t orgOutLeft = outLeft;
+ size_t ret = nsis_iconv_adaptor(iconv, *this, &in, &out, &outLeft);
+ if (-1 == ret)
+ ret = 0, *pInLeft = 1;
+ else
+ ret = orgOutLeft - outLeft;
+ return ret;
+ }
+ iconv_t GetDescriptor() const { return m_cd; }
+ operator iconv_t() const { return m_cd; }
+
+ static const char* GetHostEndianUCS4Code() { return "UCS-4-INTERNAL"; }
+}
+
TCHAR *CharPrev(const TCHAR *s, const TCHAR *p);
char *CharNextA(const char *s);
WCHAR *CharNextW(const WCHAR *s);