Experimental UTF16 reading support on POSIX

git-svn-id: https://svn.code.sf.net/p/nsis/code/NSIS/trunk@6291 212acab6-be3b-0410-9dea-997c60f758d6
This commit is contained in:
anders_k 2013-03-08 19:38:46 +00:00
parent 9b287fc648
commit a0cd389c78
6 changed files with 69 additions and 19 deletions

View file

@ -29,9 +29,8 @@ void RawTStrToASCII(const TCHAR*in,char*out,UINT maxcch)
UINT StrLenUTF16LE(const void*str)
{
unsigned short *p = (unsigned short *) str;
for(;*p;) ++p;
UINT cch = 0;
if ((size_t)p > (size_t)str) cch = ((size_t)p - (size_t)str) - 1;
for(;p[cch];) ++cch;
return cch;
}
@ -215,6 +214,9 @@ UINT NStreamLineReader::ReadLine(wchar_t*Buffer, UINT cchBuf)
#endif
const UINT cchFullBuf = cchBuf;
NIStream&strm = GetStream();
#ifndef _WIN32
iconvdescriptor iconvd;
#endif
l_restart:
// Only supports MBCS and UTF-8 for now...
@ -273,9 +275,11 @@ l_restart:
if (CompleteLine(Buffer,cchWC,cchBuf,true)) goto l_success;
}
}
#ifdef _WIN32
else if (StreamEncoding().IsUTF16LE())
{
#ifndef _WIN32
if (!iconvd.Open("wchar_t", iconvd::GetHostEndianUCS4Code())) goto ERR_UNSUPPORTEDENCODING;
#endif
unsigned short lead, trail, cchWC;
for(;;)
{
@ -283,20 +287,31 @@ l_restart:
FIX_ENDIAN_INT16LETOHOST_INPLACE(lead);
if (IsTrailSurrogateUTF16(lead)) goto l_badutf;
UINT32 codpt = lead;
Buffer[0] = lead, cchWC = 0;
if (cchBuf <= 1) goto l_lineoverflow;
Buffer[0] = lead, cchWC = 1;
if (IsLeadSurrogateUTF16(lead))
{
if (!strm.ReadInt16(&trail)) goto l_ioerror;
FIX_ENDIAN_INT16LETOHOST_INPLACE(trail);
if (!IsTrailSurrogateUTF16(trail)) goto l_badutf;
codpt = CodePointFromUTF16SurrogatePair(lead,trail);
#ifdef _WIN32
if (cchBuf <= 2) goto l_lineoverflow;
Buffer[1] = trail, ++cchWC;
#endif
}
if (!IsValidUnicodeCodePoint(codpt)) goto l_badutf;
if (CompleteLine(Buffer,++cchWC,cchBuf,true)) goto l_success;
#ifndef _WIN32
char tmpdest[8]; // Should be plenty of space to store one UCS4 character as wchar_t(s)
size_t inleft = 4;
cchWC = iconvd.Convert(&codpt,&inleft,tmpdest,sizeof(tmpdest)) / sizeof(wchar_t);
if (!cchWC) goto l_badutf;
if (cchBuf <= cchWC) goto l_lineoverflow;
for (UINT i = cchWC; i;) --i, Buffer[i] = ((wchar_t*)tmpdest)[i];
#endif
if (CompleteLine(Buffer,cchWC,cchBuf,true)) goto l_success;
}
}
#endif
else if (StreamEncoding().IsUnicode())
{
goto l_unsupportedencoding;

View file

@ -23,7 +23,7 @@
#include <stdio.h>
#include "util.h" // For my_fopen
#define TSTR_INPUTCHARSET _T("ACP|OEM|CP#|UTF8")
#define TSTR_INPUTCHARSET _T("ACP|OEM|CP#|UTF8|UTF16LE")
void RawTStrToASCII(const TCHAR*in,char*out,UINT maxcch);

View file

@ -211,16 +211,6 @@ int wsprintf(TCHAR *s, const TCHAR *format, ...) {
return res;
}
// iconv const inconsistency workaround by Alexandre Oliva
template <typename T>
inline size_t nsis_iconv_adaptor
(size_t (*iconv_func)(iconv_t, T, size_t *, TCHAR**,size_t*),
iconv_t cd, TCHAR **inbuf, size_t *inbytesleft,
TCHAR **outbuf, size_t *outbytesleft)
{
return iconv_func (cd, (T)inbuf, inbytesleft, outbuf, outbytesleft);
}
void static create_code_page_string(TCHAR *buf, size_t len, UINT code_page) {
switch(code_page)
{

View file

@ -87,6 +87,51 @@ inline void PrintColorFmtMsg_ERR(const TCHAR *fmtstr, ...)
#ifndef _WIN32
// iconv const inconsistency workaround by Alexandre Oliva
template <typename T>
inline size_t nsis_iconv_adaptor
(size_t (*iconv_func)(iconv_t, T, size_t *, TCHAR**,size_t*),
iconv_t cd, TCHAR **inbuf, size_t *inbytesleft,
TCHAR **outbuf, size_t *outbytesleft)
{
return iconv_func (cd, (T)inbuf, inbytesleft, outbuf, outbytesleft);
}
class iconvdescriptor {
iconv_t m_cd;
public:
iconvdescriptor(iconv_t cd = (iconv_t)-1) : m_cd(cd) {}
~iconvdescriptor() { Close(); }
void Close()
{
if ((iconv_t)-1 != m_cd)
{
iconv_close(m_cd);
m_cd = (iconv_t)-1;
}
}
bool Open(const char*tocode, const char*fromcode)
{
m_cd = iconv_open(tocode, fromcode);
return (iconv_t)-1 != m_cd;
}
UINT Convert(void*inbuf, size_t*pInLeft, void*outbuf, size_t outLeft)
{
char *in = (char*) inbuf, *out = (char*) outbuf;
size_t orgOutLeft = outLeft;
size_t ret = nsis_iconv_adaptor(iconv, *this, &in, &out, &outLeft);
if (-1 == ret)
ret = 0, *pInLeft = 1;
else
ret = orgOutLeft - outLeft;
return ret;
}
iconv_t GetDescriptor() const { return m_cd; }
operator iconv_t() const { return m_cd; }
static const char* GetHostEndianUCS4Code() { return "UCS-4-INTERNAL"; }
}
TCHAR *CharPrev(const TCHAR *s, const TCHAR *p);
char *CharNextA(const char *s);
WCHAR *CharNextW(const WCHAR *s);