FileRead in Unicode installers can handle DBCS, conversion output is limited to UCS-2
git-svn-id: https://svn.code.sf.net/p/nsis/code/NSIS/trunk@6399 212acab6-be3b-0410-9dea-997c60f758d6
This commit is contained in:
parent
7ce021a376
commit
d387a32658
4 changed files with 25 additions and 13 deletions
|
@ -26,7 +26,7 @@ Reads a string (ANSI characters) from a file opened with \R{FileOpen}{FileOpen}.
|
||||||
\NsisWarnBlockContainerBegin
|
\NsisWarnBlockContainerBegin
|
||||||
\NsisBlockHeaderExeheadU
|
\NsisBlockHeaderExeheadU
|
||||||
\#{This is a bug in exehead but it is probably a good idea to document it here...}
|
\#{This is a bug in exehead but it is probably a good idea to document it here...}
|
||||||
The \R{intro-unicode}{Unicode} version can only read text encoded with a single byte character set! The \NsisACPcp is used during the conversion.
|
DBCS text is supported but conversion output is limited to UCS-2/BMP, surrogate pairs are not supported. The \NsisACPcp is used during the conversion.
|
||||||
\NsisWarnBlockContainerEnd
|
\NsisWarnBlockContainerEnd
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -8,6 +8,10 @@ Released on ?
|
||||||
|
|
||||||
\S2{} Minor Changes
|
\S2{} Minor Changes
|
||||||
|
|
||||||
|
\b FileRead in Unicode installers can handle DBCS, conversion output is limited to UCS-2.
|
||||||
|
|
||||||
|
\b FileRead in Unicode installers now uses the Unicode replacement character (U+FFFD) for invalid characters and not '?'.
|
||||||
|
|
||||||
\b FileReadByte no longer performs a Unicode conversion on non-ASCII characters
|
\b FileReadByte no longer performs a Unicode conversion on non-ASCII characters
|
||||||
|
|
||||||
\H{v3.0a1} 3.0 Alpha 1
|
\H{v3.0a1} 3.0 Alpha 1
|
||||||
|
|
|
@ -1406,7 +1406,7 @@ static int NSISCALL ExecuteEntry(entry *entry_)
|
||||||
#endif
|
#endif
|
||||||
{
|
{
|
||||||
TCHAR *textout=var1;
|
TCHAR *textout=var1;
|
||||||
int rpos=0;
|
int rpos=0, ungetseek=sizeof(TCHAR);
|
||||||
TCHAR *hptr=var0;
|
TCHAR *hptr=var0;
|
||||||
int maxlen=GetIntFromParm(2);
|
int maxlen=GetIntFromParm(2);
|
||||||
if (maxlen<1) break;
|
if (maxlen<1) break;
|
||||||
|
@ -1419,30 +1419,38 @@ static int NSISCALL ExecuteEntry(entry *entry_)
|
||||||
{
|
{
|
||||||
TCHAR c;
|
TCHAR c;
|
||||||
#ifdef _UNICODE
|
#ifdef _UNICODE
|
||||||
|
c=0; // Make sure high byte is 0 for FileReadByte
|
||||||
if (which==EW_FGETS && !parm3)
|
if (which==EW_FGETS && !parm3)
|
||||||
{
|
{
|
||||||
/* BUGBUG:
|
char tmpc[2];
|
||||||
How is MBTWC supposed to be able to determine the correct WCHAR for a multibyte string when it only has 1 byte to look at?
|
DWORD mbtwcflags=MB_ERR_INVALID_CHARS, cbio;
|
||||||
And what if the multibyte character needs two WCHARs?
|
if (!ReadFile(h,tmpc,2,&cbio,NULL) || !cbio) break;
|
||||||
*/
|
ungetseek=cbio;
|
||||||
char tmpc;
|
for(;;) // Try to parse as DBCS first, if that fails try again as a single byte
|
||||||
if (!myReadFile(h,&tmpc,1)) break;
|
{
|
||||||
if (0==MultiByteToWideChar(CP_ACP, 0, &tmpc, 1, &c, 1)) c = _T('?');
|
// BUGBUG: Limited to UCS-2/BMP, surrogate pairs are not supported.
|
||||||
|
if (MultiByteToWideChar(CP_ACP,mbtwcflags,tmpc,cbio,&c,1)) break;
|
||||||
|
c=0xfffd; // Unicode replacement character
|
||||||
|
// If we read 2 bytes and it was not a DBCS character, we need to seek -1
|
||||||
|
if (--cbio) SetFilePointer(h,-(--ungetseek),NULL,FILE_CURRENT); else break;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
#endif
|
#endif
|
||||||
{
|
{
|
||||||
if (!myReadFile(h,&c,1)) break;
|
// Read 1 TCHAR (FileReadUTF16LE and (Ansi)FileRead) or
|
||||||
|
// parm3 bytes (FileReadByte and (Unicode)FileReadWord)
|
||||||
|
if (!myReadFile(h,&c,!parm3 ? sizeof(TCHAR) : sizeof(TCHAR) > 1 ? parm3 : 1)) break;
|
||||||
}
|
}
|
||||||
if (parm3)
|
if (parm3)
|
||||||
{
|
{
|
||||||
myitoa(textout,(unsigned char)c);
|
myitoa(textout,(UINT)(_TUCHAR)c);
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
if (lc == _T('\r') || lc == _T('\n'))
|
if (lc == _T('\r') || lc == _T('\n'))
|
||||||
{
|
{
|
||||||
if (lc == c || (c != _T('\r') && c != _T('\n')))
|
if (lc == c || (c != _T('\r') && c != _T('\n')))
|
||||||
SetFilePointer(h,-((int)(sizeof(c))),NULL,FILE_CURRENT);
|
SetFilePointer(h,-((int)ungetseek),NULL,FILE_CURRENT);
|
||||||
else
|
else
|
||||||
textout[rpos++]=c;
|
textout[rpos++]=c;
|
||||||
break;
|
break;
|
||||||
|
|
|
@ -5872,7 +5872,7 @@ int CEXEBuild::doCommand(int which_token, LineParser &line)
|
||||||
ent.offsets[0]=GetUserVarIndex(line, 1); // file handle
|
ent.offsets[0]=GetUserVarIndex(line, 1); // file handle
|
||||||
ent.offsets[1]=GetUserVarIndex(line, 2); // output string
|
ent.offsets[1]=GetUserVarIndex(line, 2); // output string
|
||||||
ent.offsets[2]=add_asciistring(_T("1"));
|
ent.offsets[2]=add_asciistring(_T("1"));
|
||||||
ent.offsets[3]=1;
|
ent.offsets[3]=2;
|
||||||
if (ent.offsets[0]<0 || ent.offsets[1]<0) PRINTHELP()
|
if (ent.offsets[0]<0 || ent.offsets[1]<0) PRINTHELP()
|
||||||
SCRIPT_MSG(_T("FileReadWord: %s->%s\n"),line.gettoken_str(1),line.gettoken_str(2));
|
SCRIPT_MSG(_T("FileReadWord: %s->%s\n"),line.gettoken_str(1),line.gettoken_str(2));
|
||||||
return add_entry(&ent);
|
return add_entry(&ent);
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue