Use a custom file reader with UTF8 support for nsi/nsh/nlf files and store UTF16LE or MBCS (stringblock) strings in ExeHeadStringList

git-svn-id: https://svn.code.sf.net/p/nsis/code/NSIS/trunk@6289 212acab6-be3b-0410-9dea-997c60f758d6
2013-03-07 21:25:35 +00:00 · 2013-03-07 21:25:35 +00:00 · dcddf977b2
commit dcddf977b2
parent e6ac4e6d9b
18 changed files with 1208 additions and 623 deletions
--- a/Docs/src/compiler.but
+++ b/Docs/src/compiler.but
@ -6,13 +6,15 @@ These commands are similar to the C preprocessor in terms of purpose and functio

 \S1{include} !include

-\c [/NONFATAL] file
+\# NOTE: \NsisInputCharset define cannot be used in a \c block 

-This command will include 'file' as if it was part of the original script. Note that if a file is included in another directory, the current directory is still where the script was compiled from (not where the included file resides). If the compiler can't find the file it will look for it in every include directory. See \R{addincludedir}{!addincludedir} for more information. If the /nonfatal switch is used and no files are found, a warning will be issued instead of an error.
+\c [/NONFATAL] [/CHARSET=ACP|OEM|CP#|UTF8] file
+
+This command will include 'file' as if it was part of the original script. Note that if a file is included in another directory, the current directory is still where the script was compiled from (not where the included file resides). If the compiler can't find the file it will look for it in every include directory. See \R{addincludedir}{!addincludedir} for more information. If the /nonfatal switch is used and no files are found, a warning will be issued instead of an error. /charset can be used to specify a codepage for plain text files without a BOM.

 \c !include WinMessages.nsh
 \c !include Library.nsh
-\c !include C:\MyConfig.nsi
+\c !include /CHARSET=CP1252 C:\MyConfig.nsi
 \c !include ..\MyConfig.nsh
 \c !include /NONFATAL file_that_may_exist_or_not.nsh

--- a/Docs/src/config.but
+++ b/Docs/src/config.but
@ -12,6 +12,8 @@

 \define{NsisACPcp} system default ANSI codepage (ACP)

+\define{NsisInputCharset} ACP|OEM|CP#|UTF8
+
 \define{NsisWarnBlockContainerBegin} \\<div class=\\"wb\\"\\>

 \define{NsisWarnBlockContainerEnd} \\</div\\>
--- a/Docs/src/usage.but
+++ b/Docs/src/usage.but
@ -29,6 +29,8 @@ If you want to use MakeNSIS on the command line, the syntax of the makensis comm

 \b /NOCD disables the current directory change to that of the .nsi file

+\b /INPUTCHARSET allows you to specify a specific codepage for files without a BOM. (\NsisInputCharset)
+
 \b Using the /D switch one or more times will add to symbols to the globally defined list (See !define).

 \b Using the /X switch one or more times will execute the code you specify following it. Example: "/XAutoCloseWindow false"
--- a/Source/ShConstants.cpp
+++ b/Source/ShConstants.cpp
@ -88,3 +88,14 @@ int ConstantsStringList::get_internal_idx(int idx)
  }
  return -1;
 }
+
+bool ConstantsStringList::set_values(const TCHAR *name, int val1, int val2)
+{
+  int v = SortedStringListND<struct constantstring>::find(name, -1);
+  if (-1 == v) return false;
+
+  struct constantstring & cs = ((struct constantstring*) m_gr.get())[v];
+  cs.value1 = val1;
+  cs.value2 = val2;
+  return true;
+}
--- a/Source/ShConstants.h
+++ b/Source/ShConstants.h
@ -40,6 +40,7 @@ class ConstantsStringList : public SortedStringListND<struct constantstring>
    int get_value1(int idx);
    int get_value2(int idx);
    TCHAR *idx2name(int idx);
+    bool set_values(const TCHAR *name, int val1, int val2);

  private:
    int m_index;
--- a/Source/build.cpp
+++ b/Source/build.cpp
@ -101,9 +101,8 @@ CEXEBuild::CEXEBuild() :
    m_exehead_size(0)
 {
  linecnt = 0;
-  fp = 0;
+  curlinereader = 0;
  curfilename = 0;
-  curfile_unicode = FALSE;

  display_info=1;
  display_script=1;
@ -116,9 +115,6 @@ CEXEBuild::CEXEBuild() :
  multiple_entries_instruction=0;

  build_include_depth=0;
-#ifndef _UNICODE
-  build_include_isutf8=false;
-#endif

  has_called_write_output=false;

@ -131,6 +127,7 @@ CEXEBuild::CEXEBuild() :
 #endif

  build_unicode=false;
+  build_lockedunicodetarget=false;
  m_target_type=TARGET_X86ANSI;

  // automatically generated header file containing all defines
@ -244,8 +241,8 @@ CEXEBuild::CEXEBuild() :

  uninstaller_writes_used=0;

-  build_strlist.add(_T(""), CP_ACP, false, build_unicode);
-  ubuild_strlist.add(_T(""), CP_ACP, false, build_unicode);
+  build_strlist.addemptystring();
+  ubuild_strlist.addemptystring();

  build_langstring_num=0;
  ubuild_langstring_num=0;
@ -365,77 +362,17 @@ CEXEBuild::CEXEBuild() :
  m_ShellConstants.add(_T("RESOURCES"), CSIDL_RESOURCES, CSIDL_RESOURCES);
  m_ShellConstants.add(_T("RESOURCES_LOCALIZED"), CSIDL_RESOURCES_LOCALIZED, CSIDL_RESOURCES_LOCALIZED);
  m_ShellConstants.add(_T("CDBURN_AREA"), CSIDL_CDBURN_AREA, CSIDL_CDBURN_AREA);
-
-  unsigned int program_files = add_string(_T("ProgramFilesDir"), 0);
-  unsigned int program_files_def = add_string(_T("C:\\Program Files"));
-
-  if ((program_files >= 0x40) || (program_files_def >= 0xFF))
-  {
-    // see Source\exehead\util.c for implementation details
-    // basically, it knows it needs to get folders from the registry when the 0x80 is on
-    ERROR_MSG(_T("Internal compiler error: too many strings added to strings block before adding shell constants!\n"));
-    throw out_of_range("Internal compiler error: too many strings added to strings block before adding shell constants!");
-  }
-
-  m_ShellConstants.add(_T("PROGRAMFILES"),   0x80 | program_files, program_files_def);
-
-  unsigned int program_files64_def = add_string(_T("$PROGRAMFILES"));
-
-  if (program_files64_def > 0xFF)
-  {
-    ERROR_MSG(_T("Internal compiler error: too many strings added to strings block before adding shell constants!\n"));
-    throw out_of_range("Internal compiler error: too many strings added to strings block before adding shell constants!");
-  }
-
-  m_ShellConstants.add(_T("PROGRAMFILES32"), 0x80 | program_files, program_files_def);
-  m_ShellConstants.add(_T("PROGRAMFILES64"), 0xC0 | program_files, program_files64_def);
-
-  unsigned int common_files = add_string(_T("CommonFilesDir"), 0);
-  unsigned int common_files_def = add_string(_T("$PROGRAMFILES\\Common Files"));
-
-  if ((common_files > 0x40) || (common_files_def > 0xFF))
-  {
-    ERROR_MSG(_T("Internal compiler error: too many strings added to strings block before adding shell constants!\n"));
-    throw out_of_range("Internal compiler error: too many strings added to strings block before adding shell constants!");
-  }
-
-  m_ShellConstants.add(_T("COMMONFILES"),    0x80 | common_files,  common_files_def);
-
-  unsigned int common_files64_def = add_string(_T("$COMMONFILES"));
-
-  if (common_files64_def > 0xFF)
-  {
-    ERROR_MSG(_T("Internal compiler error: too many strings added to strings block before adding shell constants!\n"));
-    throw out_of_range("Internal compiler error: too many strings added to strings block before adding shell constants!");
-  }
-
-  m_ShellConstants.add(_T("COMMONFILES32"),  0x80 | common_files,  common_files_def);
-  m_ShellConstants.add(_T("COMMONFILES64"),  0xC0 | common_files,  common_files64_def);
-
-  set_uninstall_mode(1);
-
-  unsigned int uprogram_files = add_string(_T("ProgramFilesDir"), 0);
-  unsigned int uprogram_files_def = add_string(_T("C:\\Program Files"));
-  unsigned int uprogram_files64_def = add_string(_T("$PROGRAMFILES"));
-  unsigned int ucommon_files = add_string(_T("CommonFilesDir"), 0);
-  unsigned int ucommon_files_def = add_string(_T("$PROGRAMFILES\\Common Files"));
-  unsigned int ucommon_files64_def = add_string(_T("$COMMONFILES"));
-
-  if (uprogram_files != program_files
-      || uprogram_files_def != program_files_def
-      || uprogram_files64_def != program_files64_def
-      || ucommon_files != common_files
-      || ucommon_files_def != common_files_def
-      || ucommon_files64_def != common_files64_def)
-  {
-    ERROR_MSG(_T("Internal compiler error: installer's shell constants are different than uninstallers!\n"));
-    throw out_of_range("Internal compiler error: installer's shell constants are different than uninstallers!");
-  }
+  // PROGRAMFILES&COMMONFILES does a registry lookup and the required string offsets are filled in later.
+  // We do this because the unicode mode has to be locked when we call add_string...
+  m_ShellConstants.add(_T("PROGRAMFILES"),   0, 0);
+  m_ShellConstants.add(_T("PROGRAMFILES32"), 0, 0);
+  m_ShellConstants.add(_T("PROGRAMFILES64"), 0, 0);
+  m_ShellConstants.add(_T("COMMONFILES"),   0, 0);
+  m_ShellConstants.add(_T("COMMONFILES32"), 0, 0);
+  m_ShellConstants.add(_T("COMMONFILES64"), 0, 0);

  set_uninstall_mode(0);
-
  set_code_type_predefines();
-
  set_target_architecture_predefines();
 }

@ -472,12 +409,75 @@ void CEXEBuild::initialize(const TCHAR *makensis_path)

 int CEXEBuild::getcurdbsize() { return cur_datablock->getlen(); }

+
+void CEXEBuild::init_shellconstantvalues()
+{
+  static bool done = false;
+  if (done) return ;
+  done = true;
+
+  // Note: The order matters because some of the strings are preprocessed and cf must be <= 0x40
+  unsigned int pf       = add_asciistring(_T("ProgramFilesDir"), 0);
+  unsigned int cf       = add_asciistring(_T("CommonFilesDir"), 0);
+  unsigned int pf_def   = add_asciistring(_T("C:\\Program Files"));
+  m_ShellConstants.set_values(_T("PROGRAMFILES"),   0x80 | pf, pf_def);
+  unsigned int pf64_def = add_asciistring(_T("$PROGRAMFILES"));
+  m_ShellConstants.set_values(_T("PROGRAMFILES32"), 0x80 | pf, pf_def);
+  m_ShellConstants.set_values(_T("PROGRAMFILES64"), 0xC0 | pf, pf64_def);
+  unsigned int cf_def   = add_asciistring(_T("$PROGRAMFILES\\Common Files"));
+  m_ShellConstants.set_values(_T("COMMONFILES"),    0x80 | cf, cf_def);
+  unsigned int cf64_def = add_asciistring(_T("$COMMONFILES"));
+  m_ShellConstants.set_values(_T("COMMONFILES32"),  0x80 | cf, cf_def);
+  m_ShellConstants.set_values(_T("COMMONFILES64"),  0xC0 | cf, cf64_def);
+
+  if ( (pf >= 0x40 || pf_def >= 0xFF || pf64_def > 0xFF) // BUGBUG: pf_def should be ">"?
+    || (cf >  0x40 || cf_def >  0xFF || cf64_def > 0xFF) )
+  {
+    // see Source\exehead\util.c for implementation details
+    // basically, it knows it needs to get folders from the registry when the 0x80 is on
+    ERROR_MSG(_T("Internal compiler error: too many strings added to strings block before adding shell constants!\n"));
+    throw out_of_range("Internal compiler error: too many strings added to strings block before adding shell constants!");
+  }
+
+  const int orgunmode = uninstall_mode;
+  set_uninstall_mode(1);
+  unsigned int unpf = add_asciistring(_T("ProgramFilesDir"), 0);
+  unsigned int uncf = add_asciistring(_T("CommonFilesDir"), 0);
+  unsigned int unpf_def = add_asciistring(_T("C:\\Program Files"));
+  unsigned int unpf64_def = add_asciistring(_T("$PROGRAMFILES"));
+  unsigned int uncf_def = add_asciistring(_T("$PROGRAMFILES\\Common Files"));
+  unsigned int uncf64_def = add_asciistring(_T("$COMMONFILES"));
+  set_uninstall_mode(orgunmode);
+
+  if ( unpf != pf
+    || unpf_def != pf_def
+    || unpf64_def != pf64_def
+    || uncf != cf
+    || uncf_def != cf_def
+    || uncf64_def != cf64_def)
+  {
+    ERROR_MSG(_T("Internal compiler error: installer's shell constants are different than uninstallers!\n"));
+    throw out_of_range("Internal compiler error: installer's shell constants are different than uninstallers!");
+  }
+}
+
 // returns offset in stringblock
-int CEXEBuild::add_string(const TCHAR *string, int process/*=1*/, WORD codepage/*=CP_ACP*/)
+int CEXEBuild::add_string(const TCHAR *string, int process/*=1*/, UINT codepage/*=-2*/)
 {
  if (!string || !*string) return 0;
-
-  if (*string == _T('$') && *(string+1) == _T('(')) {
+  build_lockedunicodetarget = true;
+  init_shellconstantvalues();
+  if (-2 == codepage)
+  {
+    assert(curlinereader);
+    codepage = curlinereader->StreamEncoding().GetCodepage();
+    // If the current source file is Unicode we have to pick a real codepage for ANSI!
+    // It might not be the correct codepage but its the best we can do.
+    // Not using CP_ACP to avoid heisenbugs when compiled on a different system.
+    if (NStreamEncoding::IsUnicodeCodepage(codepage)) codepage = 1252;
+  }
+  if (*string == _T('$') && *(string+1) == _T('('))
+  {
    int idx = 0;
    TCHAR *cp = _tcsdup(string+2);
    TCHAR *p = _tcschr(cp, _T(')'));
@ -489,18 +489,28 @@ int CEXEBuild::add_string(const TCHAR *string, int process/*=1*/, WORD codepage/
    if (idx < 0) return idx;
  }

-  if (!process) return cur_strlist->add(string, codepage, false, build_unicode);
+  int i;
+  if (process)
+  {
+    TCHAR buf[NSIS_MAX_STRLEN*4];
+    preprocess_string(buf, string, codepage);
+    i = cur_strlist->add(buf, (WORD)codepage, true);
+  }
+  else
+    i = cur_strlist->add(string, (WORD)codepage, false);
+  return i;
+}

-  TCHAR buf[NSIS_MAX_STRLEN*4];
-  preprocess_string(buf,string,codepage);
-  return cur_strlist->add(buf,codepage, true, build_unicode);
+int CEXEBuild::add_asciistring(const TCHAR *string, int process/*=1*/)
+{
+  return add_string(string, process, 1252);
 }

 int CEXEBuild::add_intstring(const int i) // returns offset in stringblock
 {
-  TCHAR i_str[32];
-  wsprintf(i_str, _T("%d"), i);
-  return add_string(i_str);
+  TCHAR buf[32];
+  wsprintf(buf, _T("%d"), i);
+  return add_asciistring(buf, false);
 }

 #ifdef _UNICODE
@ -514,6 +524,7 @@ char* convert_processed_string_to_ansi(char *out, const TCHAR *in, WORD codepage
        {
            // convert all character up to, and including this code
            int cb = WideCharToMultiByte(codepage, 0, in, p-in, out, (p-in)*2, NULL, NULL);
+            if (!cb && i) return 0;
            out += cb;
            if (i == _T('\0'))
                break;
@ -629,6 +640,7 @@ int CEXEBuild::preprocess_string(TCHAR *out, const TCHAR *in, WORD codepage/*=CP
              // If found...
              if (idxConst >= 0)
              {
+                init_shellconstantvalues();
                int CSIDL_Value_current = m_ShellConstants.get_value1(idxConst);
                int CSIDL_Value_all = m_ShellConstants.get_value2(idxConst);
                *out++=(TCHAR)NS_SHELL_CODE; // Constant code identifier
@ -1540,7 +1552,7 @@ int CEXEBuild::resolve_coderefs(const TCHAR *str)
    {
      int x=sec->name_ptr;
      TCHAR fname[1024];
-      const TCHAR *section_name;
+      tstring section_name;
      if (x < 0)
      {
        // lang string
@ -1549,9 +1561,9 @@ int CEXEBuild::resolve_coderefs(const TCHAR *str)
      else
      {
        // normal string
-        section_name = cur_strlist->getTchar() + x;
+        cur_strlist->get(x,section_name);
      }
-      if (x) wsprintf(fname,_T("%s section \"%s\" (%d)"),str,section_name,cnt);
+      if (x) wsprintf(fname,_T("%s section \"%s\" (%d)"),str,section_name.c_str(),cnt);
      else wsprintf(fname,_T("unnamed %s section (%d)"),str,cnt);
      for (x = sec->code; x < sec->code+sec->code_size; x ++)
      {
@ -2253,12 +2265,12 @@ void CEXEBuild::AddStandardStrings()
 #ifdef NSIS_CONFIG_UNINSTALL_SUPPORT
  if (uninstall_mode)
  {
-    cur_header->str_uninstchild = add_string(_T("$TEMP\\$1u_.exe"));
-    cur_header->str_uninstcmd = add_string(_T("\"$TEMP\\$1u_.exe\" $0 _?=$INSTDIR\\"));
+    cur_header->str_uninstchild = add_asciistring(_T("$TEMP\\$1u_.exe"));
+    cur_header->str_uninstcmd = add_asciistring(_T("\"$TEMP\\$1u_.exe\" $0 _?=$INSTDIR\\"));
  }
 #endif//NSIS_CONFIG_UNINSTALL_SUPPORT
 #ifdef NSIS_SUPPORT_MOVEONREBOOT
-  cur_header->str_wininit = add_string(_T("$WINDIR\\wininit.ini"));
+  cur_header->str_wininit = add_asciistring(_T("$WINDIR\\wininit.ini"));
 #endif//NSIS_SUPPORT_MOVEONREBOOT
 }

@ -2279,12 +2291,7 @@ void CEXEBuild::PrepareHeaders(IGrowBuf *hdrbuf)
  entry_writer::write_block(cur_entries, &sink);

  cur_header->blocks[NB_STRINGS].offset = sizeof(header) + blocks_buf.getlen();
-#ifdef _UNICODE
-  if (!build_unicode)
-      blocks_buf.add(cur_strlist->getAnsi(), cur_strlist->getcount());
-  else
-#endif
-    blocks_buf.add(cur_strlist->getTchar(), cur_strlist->getcount()*sizeof(TCHAR));
+  blocks_buf.add(cur_strlist->getstorageptr(), cur_strlist->gettotalsize());

  cur_header->blocks[NB_LANGTABLES].offset = sizeof(header) + blocks_buf.getlen();
  lang_table_writer::write_block(cur_langtables, &sink, cur_header->langtable_size);
@ -2724,7 +2731,7 @@ int CEXEBuild::write_output(void)
  int ne=build_header.blocks[NB_ENTRIES].num;
  INFO_MSG(_T("%d instruction%s (%d bytes), "),ne,ne==1?_T(""):_T("s"),ne*sizeof(entry));
  int ns=build_strlist.getnum();
-  INFO_MSG(_T("%d string%s (%d bytes), "),ns,ns==1?_T(""):_T("s"),build_strlist.getcount()*(build_unicode ? sizeof(CHAR) : sizeof(TCHAR)));
+  INFO_MSG(_T("%d string%s (%d bytes), "),ns,ns==1?_T(""):_T("s"),build_strlist.gettotalsize());
  int nlt=build_header.blocks[NB_LANGTABLES].num;
  INFO_MSG(_T("%d language table%s (%d bytes).\n"),nlt,nlt==1?_T(""):_T("s"),build_langtables.getlen());
  if (ubuild_entries.getlen())
@ -2753,7 +2760,7 @@ int CEXEBuild::write_output(void)
    ne=build_uninst.blocks[NB_ENTRIES].num;
    INFO_MSG(_T("%d instruction%s (%d bytes), "),ne,ne==1?_T(""):_T("s"),ubuild_entries.getlen());
    ns=ubuild_strlist.getnum();
-    INFO_MSG(_T("%d string%s (%d bytes), "),ns,ns==1?_T(""):_T("s"),ubuild_strlist.getcount()*(build_unicode ? sizeof(CHAR) : sizeof(TCHAR)));
+    INFO_MSG(_T("%d string%s (%d bytes), "),ns,ns==1?_T(""):_T("s"),ubuild_strlist.gettotalsize());
    nlt=build_uninst.blocks[NB_LANGTABLES].num;
    INFO_MSG(_T("%d language table%s (%d bytes).\n"),nlt,nlt==1?_T(""):_T("s"),ubuild_langtables.getlen());
  }
@ -3460,14 +3467,14 @@ again:
  if (ret != PS_OK) return ret;

  // don't move this, depends on [un.]
-  zero_offset=add_string(_T("$0"));
+  zero_offset=add_asciistring(_T("$0"));

  // SetDetailsPrint none
  ret=add_entry_direct(EW_SETFLAG, FLAG_OFFSET(status_update), add_intstring(6));
  if (ret != PS_OK) return ret;

  // StrCmp $PLUGINSDIR ""
-  ret=add_entry_direct(EW_STRCMP, add_string(_T("$PLUGINSDIR")), 0, 0, ns_label.add(_T("Initialize_____Plugins_done"),0));
+  ret=add_entry_direct(EW_STRCMP, add_asciistring(_T("$PLUGINSDIR")), 0, 0, ns_label.add(_T("Initialize_____Plugins_done"),0));
  if (ret != PS_OK) return ret;
  // Push $0
  ret=add_entry_direct(EW_PUSHPOP, zero_offset);
@ -3476,7 +3483,7 @@ again:
  ret=add_entry_direct(EW_SETFLAG, FLAG_OFFSET(exec_error));
  if (ret != PS_OK) return ret;
  // GetTempFileName $0
-  ret=add_entry_direct(EW_GETTEMPFILENAME, var_zero, add_string(_T("$TEMP")));
+  ret=add_entry_direct(EW_GETTEMPFILENAME, var_zero, add_asciistring(_T("$TEMP")));
  if (ret != PS_OK) return ret;
  // Delete $0 [simple, nothing that could clash with special temp permissions]
  ret=add_entry_direct(EW_DELETEFILE, zero_offset, DEL_SIMPLE);
@ -3503,7 +3510,7 @@ again:
  // error
  if (add_label(_T("Initialize_____Plugins_error"))) return PS_ERROR;
  // error message box
-  ret=add_entry_direct(EW_MESSAGEBOX, MB_OK|MB_ICONSTOP|(IDOK<<21), add_string(_T("Error! Can't initialize plug-ins directory. Please try again later.")));
+  ret=add_entry_direct(EW_MESSAGEBOX, MB_OK|MB_ICONSTOP|(IDOK<<21), add_asciistring(_T("Error! Can't initialize plug-ins directory. Please try again later.")));
  if (ret != PS_OK) return ret;
  // Quit
  ret=add_entry_direct(EW_QUIT);
@ -3673,7 +3680,10 @@ int CEXEBuild::change_target_architecture()
 #ifdef _UNICODE
 int CEXEBuild::set_target_charset(bool unicode)
 {
+  if (build_lockedunicodetarget) return PS_ERROR;
  build_unicode = unicode;
+  build_strlist.setunicode(unicode);
+  ubuild_strlist.setunicode(unicode);
  return change_target_architecture();
 }
 #endif
--- a/Source/build.h
+++ b/Source/build.h
@ -30,6 +30,7 @@
 #include "manifest.h"
 #include "icon.h"
 #include <memory.h>
+#include "utf.h"

 #include "exehead/fileform.h"
 #include "exehead/config.h"
@ -114,13 +115,13 @@ class CEXEBuild {

    // process a script (you can process as many scripts as you want,
    // it is as if they are concatenated)
-    int process_script(FILE *filepointer, const TCHAR *filename, BOOL unicode);
+    int process_script(NIStream&Strm, const TCHAR *filename);
    int process_oneline(TCHAR *line, const TCHAR *curfilename, int lineptr);
    
    // you only get to call write_output once, so use it wisely.
    int write_output(void);

-    void print_help(TCHAR *commandname=NULL);
+    void print_help(const TCHAR *commandname=NULL);

    DefineList definedlist; // List of identifiers marked as "defined" like
                            // C++ macro definitions such as _UNICODE.
@ -133,8 +134,7 @@ class CEXEBuild {

    int linecnt;
    const TCHAR *curfilename;
-    BOOL curfile_unicode;
-    FILE *fp;
+    NStreamLineReader* curlinereader;

    HWND notify_hwnd;
    void notify(notify_e code, const TCHAR *data) const;
@ -176,9 +176,9 @@ class CEXEBuild {
    void del_date_time_predefines();
 #endif
    int parseScript();
-    int includeScript(TCHAR *f);
+    int includeScript(const TCHAR *f, NStreamEncoding&enc);
    int MacroExists(const TCHAR *macroname);
-    int LoadLicenseFile(TCHAR *file, TCHAR** pdata, LineParser &line, BOOL* unicode);
+    int LoadLicenseFile(const TCHAR *file, TCHAR** pdata, const TCHAR *cmdname, WORD AnsiCP);
 #ifdef NSIS_FIX_DEFINES_IN_STRINGS
    void ps_addtoline(const TCHAR *str, GrowBuf &linedata, StringList &hist, bool bIgnoreDefines = false);
 #else
@ -238,10 +238,12 @@ class CEXEBuild {
    int add_db_data(IMMap *map); // returns offset
    int add_db_data(const char *data, int length); // returns offset
    int add_data(const char *data, int length, IGrowBuf *dblock); // returns offset
-    int add_string(const TCHAR *string, int process=1, WORD codepage=CP_ACP); // returns offset (in string table)
+    int add_string(const TCHAR *string, int process=1, UINT codepage=-2); // returns offset (in string table)
+    int add_asciistring(const TCHAR *string, int process=1); // For hardcoded 7bit/ASCII strings
    int add_intstring(const int i); // returns offset in stringblock

    int preprocess_string(TCHAR *out, const TCHAR *in, WORD codepage=CP_ACP);
+    void init_shellconstantvalues();

 #ifdef NSIS_CONFIG_PLUGIN_SUPPORT
    int add_plugins_dir_initializer(void);
@ -328,10 +330,8 @@ class CEXEBuild {
     * return a PS_ERROR.  If this function call is overwriting a set user string,
     * this will return a PS_WARNING.
     */
-    int SetLangString(TCHAR *name, LANGID lang, const TCHAR *str, BOOL unicode);
-#ifndef _UNICODE
-    int SetUTF8LangString(TCHAR *name, LANGID lang, const char* stru8);
-#endif
+    int SetLangString(const TCHAR *name, LANGID lang, const TCHAR *str, BOOL LicenseData);
+    int SetLangString(const TCHAR *name, LANGID lang, const TCHAR *str);

    /**
     * Sets the user string to the specific NLF_STRINGS id.
@ -405,6 +405,7 @@ class CEXEBuild {

    bool no_space_texts;
    bool build_unicode;// generate installer with unicode exehead?
+    bool build_lockedunicodetarget;

    bool has_called_write_output;

@ -427,9 +428,6 @@ class CEXEBuild {
    TCHAR build_output_filename[1024];

    int build_include_depth;
-#ifndef _UNICODE
-    bool build_include_isutf8; // UTF-8 LangString in .nsh hack for ANSI builds
-#endif

    // Added by ramon 6 jun 2003
 #ifdef NSIS_SUPPORT_VERSION_INFO
@ -465,7 +463,7 @@ class CEXEBuild {
    GrowBuf build_instruction_entry_map,ubuild_instruction_entry_map, *cur_instruction_entry_map;
    TinyGrowBuf build_functions, ubuild_functions, *cur_functions;
    TinyGrowBuf build_labels, ubuild_labels, *cur_labels;
-    MLStringList build_strlist, ubuild_strlist, *cur_strlist;
+    ExeHeadStringList build_strlist, ubuild_strlist, *cur_strlist;
    GrowBuf build_langtables, ubuild_langtables, *cur_langtables;
    TinyGrowBuf build_pages, ubuild_pages, *cur_pages;
    TinyGrowBuf build_ctlcolors, ubuild_ctlcolors, *cur_ctlcolors;
--- a/Source/exehead/Ui.c
+++ b/Source/exehead/Ui.c
@ -713,23 +713,23 @@ skipPage:
 #define _RICHEDIT_VER 0x0200
 #include <richedit.h>
 #undef _RICHEDIT_VER
-static DWORD dwRead;
+static DWORD g_cbLicRead;
 DWORD CALLBACK StreamLicense(DWORD_PTR dwCookie, LPBYTE pbBuff, LONG cb, LONG *pcb)
 {
-  lstrcpyn((LPTSTR)pbBuff,(LPTSTR)(dwCookie+dwRead),cb/sizeof(TCHAR));
+  lstrcpyn((LPTSTR)pbBuff,(LPTSTR)(dwCookie+g_cbLicRead),cb/sizeof(TCHAR));
  *pcb=lstrlen((LPTSTR)pbBuff)*sizeof(TCHAR);
-  dwRead+=*pcb;
+  g_cbLicRead+=*pcb;
  return 0;
 }
 #ifdef _UNICODE
 // on-the-fly conversion of Unicode to ANSI (because Windows doesn't recognize Unicode RTF data)
 DWORD CALLBACK StreamLicenseRTF(DWORD_PTR dwCookie, LPBYTE pbBuff, LONG cb, LONG *pcb)
 {
-  size_t len = lstrlen(((LPWSTR) dwCookie)+dwRead);
+  size_t len = lstrlen(((LPWSTR) dwCookie)+g_cbLicRead);
  len = min(len, cb/sizeof(WCHAR));
-  *pcb=WideCharToMultiByte(CP_ACP,0,((LPWSTR) dwCookie)+dwRead,len,(char*)pbBuff,cb,NULL,NULL);
+  *pcb=WideCharToMultiByte(CP_ACP,0,((LPWSTR) dwCookie)+g_cbLicRead,len,(char*)pbBuff,cb,NULL,NULL);
  // RTF uses only ASCII characters, so we can assume "number of output bytes" = "number of source WChar consumed"
-  dwRead+=*pcb;
+  g_cbLicRead+=*pcb;
  return 0;
 }
 #endif
@ -738,7 +738,7 @@ static BOOL CALLBACK LicenseProc(HWND hwndDlg, UINT uMsg, WPARAM wParam, LPARAM
 {
  page *m_this_page=g_this_page;
  HWND hwLicense;
-  static int ignoreWMCommand;
+#define LicIgnoreWMCommand g_cbLicRead // g_cbLicRead is only used in WM_INITDIALOG during EM_STREAMIN

  if (uMsg == WM_INITDIALOG)
  {
@ -768,13 +768,13 @@ static BOOL CALLBACK LicenseProc(HWND hwndDlg, UINT uMsg, WPARAM wParam, LPARAM
    SendMessage(hwLicense,EM_SETBKGNDCOLOR,0,lbg>=0?lbg:GetSysColor(-lbg));
 #undef lbg
    SendMessage(hwLicense,EM_SETEVENTMASK,0,ENM_LINK|ENM_KEYEVENTS); //XGE 8th September 2002 Or'd in ENM_KEYEVENTS
-    dwRead=0;
    SendMessage(hwLicense,EM_EXLIMITTEXT,0,mystrlen(l));
+    g_cbLicRead = 0;
    SendMessage(hwLicense,EM_STREAMIN,lt,(LPARAM)&es);
-    ignoreWMCommand = 0;
+    LicIgnoreWMCommand = 0;
    return FALSE;
  }
-  if (uMsg == WM_COMMAND && HIWORD(wParam) == BN_CLICKED && !ignoreWMCommand) {
+  if (uMsg == WM_COMMAND && HIWORD(wParam) == BN_CLICKED && !LicIgnoreWMCommand) {
    if (m_this_page->flags & PF_LICENSE_FORCE_SELECTION) {
      int is = SendMessage(GetUIItem(IDC_LICENSEAGREE), BM_GETCHECK, 0, 0) & BST_CHECKED;
      m_this_page->flags &= ~PF_LICENSE_SELECTED;
@ -836,7 +836,7 @@ static BOOL CALLBACK LicenseProc(HWND hwndDlg, UINT uMsg, WPARAM wParam, LPARAM
  }
  if (uMsg == WM_NOTIFY_INIGO_MONTOYA)
  {
-    ignoreWMCommand++;
+    LicIgnoreWMCommand++;
  }
  return HandleStaticBkColor();
 }
--- a/Source/lang.cpp
+++ b/Source/lang.cpp
@ -485,7 +485,7 @@ int CEXEBuild::DefineInnerLangString(int id, int process/*=-1*/) {
 // @return If the language id, the variable name or string is invalid, it will
 // return a PS_ERROR.  If this function call is overwriting a set user string,
 // this will return a PS_WARNING.
-int CEXEBuild::SetLangString(TCHAR *name, LANGID lang, const TCHAR *str, BOOL unicode) {
+int CEXEBuild::SetLangString(const TCHAR *name, LANGID lang, const TCHAR *str, BOOL LicenseData) {
  if (!str || !name) return PS_ERROR;

  LanguageTable *table = GetLangTable(lang);
@ -493,33 +493,24 @@ int CEXEBuild::SetLangString(TCHAR *name, LANGID lang, const TCHAR *str, BOOL un

  int sn;

-  if (_tcsclen(str) > NSIS_MAX_STRLEN-1)
+  if (!LicenseData && _tcsclen(str) > NSIS_MAX_STRLEN-1)
    warning_fl(_T("LangString \"%s\" longer than NSIS_MAX_STRLEN!"), name);

  int pos = build_langstrings.get(name, &sn);
  if (pos < 0)
    pos = build_langstrings.add(name, &sn);

-  if (table->lang_strings->set(sn, unicode ? str : CtoTString2(TtoCString(str),table->nlf.m_uCodePage)))
+  if (table->lang_strings->set(sn, str))
    return PS_WARNING;

  return PS_OK;
 }

-#ifndef _UNICODE
-int CEXEBuild::SetUTF8LangString(TCHAR *name, LANGID lang, const char* stru8)
+int CEXEBuild::SetLangString(const TCHAR *name, LANGID lang, const TCHAR *str)
 {
-  LanguageTable *table = GetLangTable(lang);
-  if (!table) return PS_ERROR;
-  if (!Platform_SupportsUTF8Conversion()) return PS_ERROR;
-
-  EXEHEADTCHAR_T *bufEHTStr = UTF8ToExeHeadTStrDup(stru8, table->nlf.m_uCodePage);
-  if (!bufEHTStr) return PS_ERROR;
-  const int ret = SetLangString(name, lang, bufEHTStr, sizeof(EXEHEADTCHAR_T) > 1);
-  free(bufEHTStr);
-  return ret;
+  return SetLangString(name, lang, str, false);
 }
-#endif
+

 // Sets the user string to the specific NLF_STRINGS id.
 //
@ -912,41 +903,36 @@ void CEXEBuild::FillLanguageTable(LanguageTable *table) {
  }
 }

-TCHAR SkipComments(FILE *f) {
-  int c;
-  while ((c = _fgettc(f))) {
-    while (c == _T('\n') || c == _T('\r')) {
-      c = _fgettc(f); // Skip empty lines
+static UINT GetNextNLFLine(NStreamLineReader&lr, TCHAR*Buf, UINT cchBuf) {
+  for (;;) {
+    UINT err = lr.ReadLine(Buf, cchBuf);
+    if (NStream::OK != err) {
+      if (lr.IsEOF()) err = NStream::OK;
+      return err;
    }
-    if (c == _T('#') || c == _T(';')) {
-      while ((c = _fgettc(f))) {
-       if (c == _T('\n')) break;
-      }
-    }
-    else break;
+    if (NStream::IsNewline(*Buf, false)) continue;
+    if (_T('#') != *Buf && _T(';') != *Buf) return err;
  }
-  return (TCHAR) c;
+}
+static inline bool GetNextNLFLine(NStreamLineReader&lr, TCHAR*Buf, UINT cchBuf, UINT&errlr) {
+  errlr = GetNextNLFLine(lr, Buf, cchBuf);
+  return NStream::OK == errlr;
 }

 // NSIS Language File parser
 LanguageTable * CEXEBuild::LoadLangFile(TCHAR *filename) {
-  BOOL unicode;
-  FILE *f = FOPENTEXT2(filename, "r", &unicode);
-  if (!f) {
+  NIStream strm;
+  strm.StreamEncoding().SetCodepage(NStreamEncoding::ACP);
+  if (!strm.OpenFileForReading(filename)) {
    ERROR_MSG(_T("Error: Can't open language file - \"%s\"!\n"),filename);
    return 0;
  }
-
-#ifndef _UNICODE
-  char fencoding = 0; // 0 = ansi, 8 = utf-8 (16/17 for uft-16le/be not supported)
-  if (IsUTF8BOM(f)) fencoding = 8;
-#endif
+  NStreamLineReader lr(strm);
+  UINT errlr;

  // Check header
  TCHAR buf[NSIS_MAX_STRLEN];
-  buf[0] = SkipComments(f);
-  _fgetts(buf+1, NSIS_MAX_STRLEN, f);
-
+  if (!GetNextNLFLine(lr, buf, NSIS_MAX_STRLEN, errlr)) goto l_readerr;
  if (_tcsncmp(buf, _T("NLF v"), 5)) {
    ERROR_MSG(_T("Error: Invalid language file.\n"));
    return 0;
@ -960,31 +946,29 @@ LanguageTable * CEXEBuild::LoadLangFile(TCHAR *filename) {
  }

  // Get language ID
-  buf[0] = SkipComments(f);
-  _fgetts(buf+1, NSIS_MAX_STRLEN, f);
+  if (!GetNextNLFLine(lr, buf, NSIS_MAX_STRLEN, errlr)) goto l_readerr;
  LANGID lang_id = _ttoi(buf);

  // Get appropriate table
  LanguageTable *table = GetLangTable(lang_id);
-  if (!table)
-    return 0;
+  if (!table) return 0;

  NLF *nlf = &table->nlf;
-
  if (nlf->m_bLoaded) {
    ERROR_MSG(_T("Error: can't load same language file twice.\n"));
    return 0;
  }

  // Generate language name
-  TCHAR *p, *p2, t = 0;
+  TCHAR *p, *p2, *p3, t = 0;

  p = _tcsrchr(filename, _T('.'));
  if (p) {
    t = *p;
    *p = 0;
  }
-  p2 = _tcsrchr(filename, _T('\\'));
+  p2 = _tcsrchr(filename, _T('\\')), p3 = _tcsrchr(filename, _T('/'));
+  if (p3 > p2) p2 = p3;
  if (p2) {
    p2++;
    nlf->m_szName = (TCHAR*)malloc((_tcslen(p2)+1)*sizeof(TCHAR));
@ -1006,8 +990,7 @@ LanguageTable * CEXEBuild::LoadLangFile(TCHAR *filename) {
  int temp;

  // Get font
-  buf[0] = SkipComments(f);
-  _fgetts(buf+1, NSIS_MAX_STRLEN, f);
+  if (!GetNextNLFLine(lr, buf, NSIS_MAX_STRLEN, errlr)) goto l_readerr;
  if (!nlf->m_szFont) {
    temp=_tcslen(buf);
    while (buf[temp-1] == _T('\n') || buf[temp-1] == _T('\r')) {
@ -1020,8 +1003,7 @@ LanguageTable * CEXEBuild::LoadLangFile(TCHAR *filename) {
    }
  }

-  buf[0] = SkipComments(f);
-  _fgetts(buf+1, NSIS_MAX_STRLEN, f);
+  if (!GetNextNLFLine(lr, buf, NSIS_MAX_STRLEN, errlr)) goto l_readerr;
  if (!nlf->m_iFontSize) {
    if (buf[0] != _T('-') || buf[1] != 0) {
      nlf->m_iFontSize = _ttoi(buf);
@ -1030,48 +1012,41 @@ LanguageTable * CEXEBuild::LoadLangFile(TCHAR *filename) {

  // Get code page
  nlf->m_uCodePage = CP_ACP;
-  buf[0] = SkipComments(f);
-  _fgetts(buf+1, NSIS_MAX_STRLEN, f);
+  if (!GetNextNLFLine(lr, buf, NSIS_MAX_STRLEN, errlr)) goto l_readerr;
  if (buf[0] != _T('-') || buf[1] != 0) {
    nlf->m_uCodePage = _ttoi(buf);
    if (!IsValidCodePage(nlf->m_uCodePage))
+    {
+      warning_fl(_T("%s language file uses a codepage that is not supported on this system, using ACP!"), nlf->m_szName);
      nlf->m_uCodePage = CP_ACP;
+    }
  }

-#ifdef _UNICODE
-  if (!unicode && nlf->m_szFont) // convert font name from ANSI to Unicode now that we know the language codepage
+  if (!lr.IsUnicode())
  {
-    TCHAR* str = nlf->m_szFont;
-    nlf->m_szFont = _tcsdup(CtoTString2(TtoCString(str), table->nlf.m_uCodePage));
-    free(str);
-  }
-#else
-  if (8 == fencoding && nlf->m_szFont)
-  {
-    TCHAR* str = nlf->m_szFont;
-    EXEHEADTCHAR_T *bufConv = UTF8ToExeHeadTStrDup(str, table->nlf.m_uCodePage);
-    nlf->m_szFont = bufConv;
-    if (!nlf->m_szFont)
+    if (NStreamEncoding::IsUnicodeCodepage(nlf->m_uCodePage))
    {
-      ERROR_MSG(_T("Error: Unable to convert font name\n"));
-      return 0;
+      warning_fl(_T("%s unicode language file does not have a BOM!"), nlf->m_szName);
+    }
+
+    if (nlf->m_szFont)
+    {
+      // Convert font name now that we know the codepage: ACP > NLF CP > TCHAR.
+      TCHAR* oldfont = nlf->m_szFont;
+      nlf->m_szFont = _tcsdup(CtoTString2(TtoCString(oldfont), nlf->m_uCodePage));
+      free(oldfont);
    }
-    free(str);
  }
-#endif
+

  // Get RTL setting
-  nlf->m_szStrings[NLF_RTL] = (TCHAR *)malloc(2*sizeof(TCHAR));
+  if (!GetNextNLFLine(lr, buf, NSIS_MAX_STRLEN, errlr)) goto l_readerr;
+  nlf->m_szStrings[NLF_RTL] = (TCHAR*) malloc(2*sizeof(TCHAR));
  nlf->m_bRTL = false;
-  buf[0] = SkipComments(f);
-  _fgetts(buf+1, NSIS_MAX_STRLEN, f);
  if (buf[0] == _T('R') && buf[1] == _T('T') && buf[2] == _T('L') && (!buf[3] || buf[3] == _T('\r') || buf[3] == _T('\n'))) {
    nlf->m_bRTL = true;
-    _tcscpy(nlf->m_szStrings[NLF_RTL], _T("1"));
-  }
-  else {
-    _tcscpy(nlf->m_szStrings[NLF_RTL], _T("0"));
  }
+  _tcscpy(nlf->m_szStrings[NLF_RTL], nlf->m_bRTL ? _T("1") : _T("0"));

  // Read strings
  for (int i = 0; i < NLF_STRINGS_NO_SPECIAL; i++) {
@ -1123,38 +1098,14 @@ LanguageTable * CEXEBuild::LoadLangFile(TCHAR *filename) {
        continue;
    }

-    buf[0] = SkipComments(f);
-
-    _fgetts(buf+1, NSIS_MAX_STRLEN, f);
-#ifndef _UNICODE
-    if (8 == fencoding)
-    {
-      if (!Platform_SupportsUTF8Conversion()) {
-        ERROR_MSG(_T("Error: UTF-8 language files not supported on this OS!\n"));
-        return 0;
-      }
-      EXEHEADTCHAR_T *bufConv = UTF8ToExeHeadTStrDup(buf, nlf->m_uCodePage);
-      if (!bufConv) {
-        ERROR_MSG(_T("Error: Invalid UTF-8? (string #%d - \"%s\")\n"), i, NLFStrings[i].szLangStringName);
-        return 0;
-      }
-      else {
-        UINT cch = _tcslen(bufConv);
-        _tcsnccpy(buf, bufConv, NSIS_MAX_STRLEN);
-        if (cch >= NSIS_MAX_STRLEN-1) {
-          buf[NSIS_MAX_STRLEN-1] = _T('\0'); // Make sure we fail the "String too long" check
-        }
-      }
-      free(bufConv);
-    }
-#endif
-
+    errlr = GetNextNLFLine(lr, buf, NSIS_MAX_STRLEN);
    if (_tcslen(buf) == NSIS_MAX_STRLEN-1) {
      ERROR_MSG(_T("Error: String too long (string #%d - \"%s\")\n"), i, NLFStrings[i].szLangStringName);
      return 0;
    }
+    if (NStream::OK != errlr) goto l_readerr;
+    
    temp=_tcslen(buf);
-
    while (buf[temp-1] == _T('\n') || buf[temp-1] == _T('\r')) {
      buf[--temp] = 0;
    }
@ -1190,20 +1141,20 @@ LanguageTable * CEXEBuild::LoadLangFile(TCHAR *filename) {
      else *out = *in;
    }
    *out = 0;
-#ifdef _UNICODE
-    if (!unicode)
+    if (!lr.IsUnicode())
    {
-        TCHAR* str = nlf->m_szStrings[i];
-        nlf->m_szStrings[i] = _tcsdup(CtoTString2(TtoCString(str),table->nlf.m_uCodePage));
-        free(str);
+        TCHAR* oldstr = nlf->m_szStrings[i];
+        nlf->m_szStrings[i] = _tcsdup(CtoTString2(TtoCString(oldstr),table->nlf.m_uCodePage));
+        free(oldstr);
    }
-#endif
  }
-  fclose(f);

  nlf->m_bLoaded = true;
-
  return table;
+
+l_readerr:
+  ERROR_MSG(lr.GetErrorMessage(errlr).c_str());
+  return 0;
 }

 void CEXEBuild::DeleteLangTable(LanguageTable *table) {
--- a/Source/makenssi.cpp
+++ b/Source/makenssi.cpp
@ -150,6 +150,7 @@ static void print_usage()
         _T("  ") OPT_STR _T("PAUSE pauses after execution\n")
         _T("  ") OPT_STR _T("NOCONFIG disables inclusion of <path to makensis.exe>") PLATFORM_PATH_SEPARATOR_STR _T("nsisconf.nsh\n")
         _T("  ") OPT_STR _T("NOCD disabled the current directory change to that of the .nsi file\n")
+         _T("  ") OPT_STR _T("INPUTCHARSET <") TSTR_INPUTCHARSET _T(">\n")
         _T("  ") OPT_STR _T("Ddefine[=value] defines the symbol \"define\" for the script [to value]\n")
         _T("  ") OPT_STR _T("Xscriptcmd executes scriptcmd in script (i.e. \"") OPT_STR _T("XOutFile poop.exe\")\n")
         _T("  ")         _T("  parameters are processed by order (") OPT_STR _T("Ddef ins.nsi != ins.nsi ") OPT_STR _T("Ddef)\n")
@ -201,13 +202,11 @@ static tstring get_home()

 static int process_config(CEXEBuild& build, tstring& conf)
 {
-  BOOL unicode;
-  FILE *cfg=FOPENTEXT2(conf.c_str(),"rt",&unicode);
-  if (cfg)
+  NIStream strm;
+  if (strm.OpenFileForReading(conf.c_str()))
  {
-    build.INFO_MSG(_T("Processing config: %s\n"), conf.c_str());
-    int ret=build.process_script(cfg,(TCHAR*)conf.c_str(),unicode);
-    fclose(cfg);
+    build.INFO_MSG(_T("Processing config: %s\n"),conf.c_str());
+    int ret=build.process_script(strm,conf.c_str());
    if (ret != PS_OK && ret != PS_EOF)
    {
      build.ERROR_MSG(_T("Error in config on line %d -- aborting creation process\n"),build.linecnt); 
@ -234,6 +233,16 @@ static int change_to_script_dir(CEXEBuild& build, tstring& script)
  return 0;
 }

+static inline bool HasReqParam(TCHAR**argv,int argi,int argc)
+{
+  if (argi>=argc || !*argv[argi])
+  {
+    PrintColorFmtMsg_ERR(_T("Error: Missing required parameter!\n"));
+    return false;
+  }
+  return true;
+}
+
 #ifdef NSIS_HPUX_ALLOW_UNALIGNED_DATA_ACCESS
 extern "C" void allow_unaligned_data_access();
 #endif
@ -247,16 +256,16 @@ int _tmain(int argc, TCHAR **argv)
 #endif

  CEXEBuild build;
-  int do_cd=1;
-  int outputtried=0;
-  int argpos=1;
+  NStreamEncoding inputenc;
+  bool outputtried=0;
+  bool in_files=0;
+  bool do_cd=1;
+  bool no_logo=0;
  int nousage=0;
+  int argpos=1;
+  int tmpargpos=1;
  int files_processed=0;
  int cmds_processed=0;
-  FILE *fp;
-  int tmpargpos=1;
-  int no_logo=0;
-  int in_files=0;

 #ifdef _UNICODE
 #ifndef _O_U8TEXT
@ -329,7 +338,7 @@ int _tmain(int argc, TCHAR **argv)
      }
      else if (S7IsChEqualI('x',argv[argpos][1]) && argv[argpos][2])
      {
-        if (build.process_oneline(argv[argpos]+2,_T("command line"),argpos+1) != PS_OK)
+        if (build.process_oneline(argv[argpos]+2,_T("<command line>"),argpos+1) != PS_OK)
        {
          return 1;
        }
@ -379,12 +388,12 @@ int _tmain(int argc, TCHAR **argv)
      }
      else if (!_tcsicmp(&argv[argpos][1],_T("NOTIFYHWND")))
      {
+        if (!HasReqParam(argv, ++argpos, argc)) break;
 #ifdef _WIN32
-        build.notify_hwnd=(HWND)_ttol(argv[++argpos]);
+        build.notify_hwnd=(HWND)_ttol(argv[argpos]);
        if (!IsWindow(build.notify_hwnd))
          build.notify_hwnd=0;
 #else
-        argpos++;
        build.warning(OPT_STR _T("NOTIFYHWND is disabled for non Win32 platforms."));
 #endif
      }
@ -426,7 +435,20 @@ int _tmain(int argc, TCHAR **argv)
        build.warning(OPT_STR _T("Px is disabled for non Win32 platforms."));
 #endif
      }
-      else break;
+      else if (!_tcsicmp(&argv[argpos][1],_T("INPUTCHARSET")) || !_tcsicmp(&argv[argpos][1],_T("ICS")))
+      {
+        if (!HasReqParam(argv, ++argpos, argc)) break;
+        WORD cp = GetEncodingFromString(argv[argpos]);
+        if (NStreamEncoding::UNKNOWN == cp)
+        {
+          if (_tcsicmp(argv[argpos], _T("AUTO")))
+            build.warning(OPT_STR _T("INPUTCHARSET: Ignoring invalid charset %s"), argv[argpos]);
+          cp = NStreamEncoding::AUTO;
+        }
+        inputenc.SetCodepage(cp);
+      }
+      else
+        break;
    }
    else
    {
@ -465,45 +487,43 @@ int _tmain(int argc, TCHAR **argv)
      }

      {
-        TCHAR sfile[1024];
-        BOOL unicode=FALSE;
+        tstring nsifile;
+        NIStream strm;
        if (!_tcscmp(argv[argpos],_T("-")) && !in_files)
        {
-          fp=stdin;
-          _tcscpy(sfile,_T("stdin"));
+          strm.OpenStdIn(inputenc);
+          nsifile = _T("<stdin>");
        }
        else
        {
-          _tcscpy(sfile,argv[argpos]);
-          fp=FOPENTEXT2(sfile,"rt",&unicode);
-          if (!fp)
+          nsifile = argv[argpos];
+          if (!strm.OpenFileForReading(nsifile.c_str(),inputenc))
          {
-            _stprintf(sfile,_T("%s.nsi"),argv[argpos]);
-            fp=FOPENTEXT2(sfile,"rt",&unicode);
-            if (!fp)
+            nsifile += _T(".nsi");
+            if (!strm.OpenFileForReading(nsifile.c_str(),inputenc))
            {
-              sfile[_tcslen(sfile)-4]=0;
-              build.ERROR_MSG(_T("Can't open script \"%s\"\n"),sfile);
+              nsifile = argv[argpos];
+              build.ERROR_MSG(_T("Can't open script \"%s\"\n"),nsifile.c_str());
              return 1;
            }
          }
          if (do_cd)
          {
-            tstring script_file = tstring(sfile);
-            if (change_to_script_dir(build, script_file))
+            if (change_to_script_dir(build, nsifile))
              return 1;
          }
-          build.set_default_output_filename(remove_file_extension(sfile)+_T(".exe"));
+          build.set_default_output_filename(remove_file_extension(nsifile)+_T(".exe"));
        }

-        build.notify(MAKENSIS_NOTIFY_SCRIPT,sfile);
-        build.INFO_MSG(_T("Processing script file: \"%s\"\n"),sfile);
-        int ret=build.process_script(fp,sfile,unicode);
-        if (fp != stdin) fclose(fp);
+        build.notify(MAKENSIS_NOTIFY_SCRIPT,nsifile.c_str());
+        TCHAR bufcpdisp[20];
+        strm.StreamEncoding().GetCPDisplayName(bufcpdisp);
+        build.INFO_MSG(_T("Processing script file: \"%s\" (%s)\n"),nsifile.c_str(),bufcpdisp);
+        int ret=build.process_script(strm,nsifile.c_str());

        if (ret != PS_EOF && ret != PS_OK)
        {
-          build.ERROR_MSG(_T("Error in script \"%s\" on line %d -- aborting creation process\n"),sfile,build.linecnt);
+          build.ERROR_MSG(_T("Error in script \"%s\" on line %d -- aborting creation process\n"),nsifile.c_str(),build.linecnt);
          return 1;
        }
      }
@ -534,5 +554,5 @@ int _tmain(int argc, TCHAR **argv)
    build.ERROR_MSG(_T("Error - aborting creation process\n"));
    return 1;
  }
-  return 0; 
+  return 0;
 }
--- a/Source/script.cpp
+++ b/Source/script.cpp
@ -225,12 +225,12 @@ void CEXEBuild::del_date_time_predefines()
 }
 #endif

-int CEXEBuild::process_script(FILE *filepointer, const TCHAR *filename, BOOL unicode)
+int CEXEBuild::process_script(NIStream&Strm, const TCHAR *filename)
 {
-  linecnt = 0;
-  fp = filepointer;
+  NStreamLineReader linereader(Strm);
+  curlinereader = &linereader;
  curfilename = filename;
-  curfile_unicode = unicode;
+  linecnt = 0;

  if (has_called_write_output)
  {
@ -254,9 +254,8 @@ int CEXEBuild::process_script(FILE *filepointer, const TCHAR *filename, BOOL uni
  del_date_time_predefines();
 #endif

-  fp = 0;
+  curlinereader = 0;
  curfilename = 0;
-  curfile_unicode = FALSE;

  if (m_linebuild.getlen())
  {
@ -770,17 +769,29 @@ void CEXEBuild::ps_addtoline(const TCHAR *str, GrowBuf &linedata, StringList &hi

 int CEXEBuild::parseScript()
 {
+  assert(curlinereader);
  TCHAR str[MAX_LINELENGTH];
+  NStreamLineReader &linereader = *curlinereader;

  for (;;)
  {
-    TCHAR *p=str;
-    *p=0;
-    _fgetts(str,MAX_LINELENGTH,fp);
+    UINT lrres = linereader.ReadLine(str,MAX_LINELENGTH);
    linecnt++;
-    if (feof(fp)&&!str[0]) break;
+    if (NStream::OK != lrres)
+    {
+      if (linereader.IsEOF())
+      {
+        if(!str[0]) break;
+      }
+      else
+      {
+        ERROR_MSG(linereader.GetErrorMessage(lrres,curfilename,linecnt).c_str());
+        return PS_ERROR;
+      }
+    }

    // remove trailing whitespace
+    TCHAR *p = str;
    while (*p) p++;
    if (p > str) p--;
    while (p >= str && (*p == _T('\r') || *p == _T('\n') || *p == _T(' ') || *p == _T('\t'))) p--;
@ -809,39 +820,33 @@ int CEXEBuild::parseScript()
  return PS_EOF;
 }

-int CEXEBuild::includeScript(TCHAR *f)
+int CEXEBuild::includeScript(const TCHAR *f, NStreamEncoding&enc)
 {
-  SCRIPT_MSG(_T("!include: \"%s\"\n"),f);
-  BOOL unicode;
-  FILE *incfp=FOPENTEXT2(f,"rt",&unicode);
-  if (!incfp)
+  NIStream incstrm;
+  const bool openok = incstrm.OpenFileForReading(f,enc);
+  TCHAR bufcpdisp[20];
+  incstrm.StreamEncoding().GetCPDisplayName(bufcpdisp);
+  SCRIPT_MSG(_T("!include: \"%s\" (%s)\n"),f,bufcpdisp);
+  if (!openok)
  {
    ERROR_MSG(_T("!include: could not open file: \"%s\"\n"),f);
    return PS_ERROR;
  }

-  // auto-fclose(3) incfp
-  MANAGE_WITH(incfp, fclose);
-
  if (build_include_depth >= MAX_INCLUDEDEPTH)
  {
-    ERROR_MSG(_T("parseScript: too many levels of includes (%d max).\n"),MAX_INCLUDEDEPTH);
+    ERROR_MSG(_T("!include: too many levels of includes (%d max).\n"),MAX_INCLUDEDEPTH);
    return PS_ERROR;
  }
  build_include_depth++;
-#ifndef _UNICODE
-  const bool org_build_include_isutf8 = build_include_isutf8;
-  build_include_isutf8 = IsUTF8BOM(incfp);
-#endif

-  int last_linecnt=linecnt;
+  const int last_linecnt=linecnt;
  linecnt=0;
  const TCHAR *last_filename=curfilename;
-  BOOL last_unicode=curfile_unicode;
  curfilename=f;
-  curfile_unicode=unicode;
-  FILE *last_fp=fp;
-  fp=incfp;
+  NStreamLineReader linereader(incstrm);
+  NStreamLineReader*last_linereader=curlinereader;
+  curlinereader=&linereader;

 #ifdef NSIS_SUPPORT_STANDARD_PREDEFINES
  // Added by Sunil Kamath 11 June 2003
@ -857,16 +862,10 @@ int CEXEBuild::includeScript(TCHAR *f)
  restore_timestamp_predefine(oldtimestamp);
 #endif

-#ifndef _UNICODE
-  build_include_isutf8 = org_build_include_isutf8;
-#endif
-
-  int errlinecnt=linecnt;
-
+  const int errlinecnt=linecnt;
  linecnt=last_linecnt;
  curfilename=last_filename;
-  curfile_unicode=last_unicode;
-  fp=last_fp;
+  curlinereader = last_linereader;

  build_include_depth--;
  if (r != PS_EOF && r != PS_OK)
@ -904,52 +903,75 @@ int CEXEBuild::MacroExists(const TCHAR *macroname)
  return 0;
 }

-int CEXEBuild::LoadLicenseFile(TCHAR *file, TCHAR** pdata, LineParser &line, BOOL* unicode) // caller must free *pdata, even on error result
+int CEXEBuild::LoadLicenseFile(const TCHAR *file, TCHAR** pdata, const TCHAR *cmdname, WORD AnsiCP) // caller must free *pdata, even on error result
 {
-  FILE *fp=FOPENTEXT2(file,"rt",unicode);
-  if (!fp)
+  NIStream strm;
+  if (!strm.OpenFileForReading(file)) 
  {
-    ERROR_MSG(_T("%s: open failed \"%s\"\n"),line.gettoken_str(0),file);
-    PRINTHELP()
-  }
-  MANAGE_WITH(fp, fclose);
-  unsigned int beginning=ftell(fp); // (we might be positionned after a BOM)
-  fseek(fp,0,SEEK_END);
-  unsigned int datalen=ftell(fp)-beginning; // size of file in bytes! not a number of characters
-  if (!datalen)
-  {
-    ERROR_MSG(_T("%s: empty license file \"%s\"\n"),line.gettoken_str(0),file);
+    ERROR_MSG(_T("%s: open failed \"%s\"\n"),cmdname,file);
+    print_help(cmdname);
    return PS_ERROR;
  }
-  fseek(fp,beginning,SEEK_SET);
-  TCHAR *data=(TCHAR*)malloc((datalen+2)*sizeof(TCHAR)); // alloc enough for worst-case scenario (ANSI/UTF8 characters read in WCHARs)
+  FILE *f=strm.GetHandle();
+  UINT cbBOMOffset=ftell(f); // We might be positioned after a BOM
+  fseek(f,0,SEEK_END);
+  UINT cbFileData=ftell(f)-cbBOMOffset; // Size of file in bytes!
+
+  if (!cbFileData)
+  {
+    warning_fl(_T("%s: empty license file \"%s\"\n"),cmdname,file);
+  }
+  else
+    build_lockedunicodetarget=true;
+
+  fseek(f,cbBOMOffset,SEEK_SET);
+  UINT cbTotalData=sizeof(TCHAR)+cbFileData+sizeof(TCHAR); // SF_*+file+\0
+  TCHAR*data=(TCHAR*)malloc(cbTotalData);
  if (!data)
  {
-    ERROR_MSG(_T("Internal compiler error #12345: %s malloc(%d) failed.\n"),line.gettoken_str(0),(datalen+2)*sizeof(TCHAR));
+l_OOM:
+    ERROR_MSG(_T("Internal compiler error #12345: %s malloc(%d) failed.\n"),cmdname,cbTotalData);
    return PS_ERROR;
  }
-  *pdata = data; // memory will be released by caller
-  TCHAR *ldata=data+1;
-  while (_fgetts(ldata, data+datalen+2-ldata, fp)) // _fgetts translates ANSI/UTF8 characters to TCHAR //BUGBUG: There is no reason to store ASCII files as TCHAR
-      ldata += _tcslen(ldata);
-  if (ferror(fp))
+  *pdata=data; // memory will be released by caller
+  *((TCHAR*)((char*)data+cbTotalData-sizeof(TCHAR)))=_T('\0');
+
+  TCHAR*ldata=data+1;
+  if (!strm.ReadOctets(ldata,&cbFileData))
  {
-    ERROR_MSG(_T("%s: can't read file.\n"),line.gettoken_str(0));
+    ERROR_MSG(_T("%s: can't read file.\n"),cmdname);
    return PS_ERROR;
  }
-  bool disallowrichedunicode = false;
-#ifdef _UNICODE
-  if (!build_unicode)
-    disallowrichedunicode = true; //RichEdit 1.0 does not support unicode
+  // We have to convert the content of the license file to wchar_t
+  const WORD srccp=strm.StreamEncoding().IsUnicode() ? strm.StreamEncoding().GetCodepage() : AnsiCP;
+  const UINT cbcu=NStreamEncoding::GetCodeUnitSize(srccp);
+  if (sizeof(TCHAR) < cbcu)
+  {
+    ERROR_MSG(_T("%s: wchar_t conversion failed!\n"),cmdname);
+    return PS_ERROR;
+  }
+  // Create a fake character in the "header" part of the buffer
+  char*lichdr=((char*)ldata) - cbcu;
+  *((char*)lichdr)='X';
+  if (cbcu > 1) *((WORD*)lichdr)='X';
+  //BUGBUG: No room: if (cbcu > 2) *((UINT32*)lichdr)='X';
+  wchar_t*wcdata=DupWCFromBytes(lichdr,cbcu+cbFileData,srccp);
+  if ((wchar_t*)-1==wcdata)
+  {
+    ERROR_MSG(_T("%s: wchar_t conversion failed!\n"),cmdname);
+    return PS_ERROR;
+  }
+  free(data);
+  *pdata=data=wcdata;
+  ldata=data+1;
+  if (!data) goto l_OOM;
+
+  const bool isRTF=!memcmp(ldata,_T("{\\rtf"),5*sizeof(TCHAR));
+  if (isRTF)
+    *data=SF_RTF;
  else
-    *unicode = true; // _fgetts converted to TCHAR
-#endif
-  if (!memcmp(data+1,_T("{\\rtf"),5*sizeof(TCHAR)))
-    *data = SF_RTF;
-  else if (*unicode && !disallowrichedunicode)
-    *data = SF_TEXT|SF_UNICODE;
-  else
-    *data = SF_TEXT;
+    *data=build_unicode ? (SF_TEXT|SF_UNICODE) : (SF_TEXT);
+
  return PS_OK;
 }

@ -968,7 +990,7 @@ int CEXEBuild::process_oneline(TCHAR *line, const TCHAR *filename, int linenum)
  TCHAR *oldfilename = NULL;
  TCHAR *oldtimestamp = NULL;
  TCHAR *oldline = NULL;
-  BOOL is_commandline = !_tcscmp(filename,_T("command line"));
+  BOOL is_commandline = !_tcscmp(filename,_T("<command line>"));
  BOOL is_macro = !_tcsncmp(filename,_T("macro:"),_tcslen(_T("macro:")));

  if(!is_commandline) { // Don't set the predefines for command line /X option
@ -1047,7 +1069,6 @@ int CEXEBuild::doCommand(int which_token, LineParser &line)
 #endif

  multiple_entries_instruction=0;
-
  entry ent={0,};
  switch (which_token)
  {
@ -1105,14 +1126,24 @@ int CEXEBuild::doCommand(int which_token, LineParser &line)
        {
          TCHAR str[MAX_LINELENGTH];
          TCHAR *p=str;
-          str[0]=0;
-          _fgetts(str,MAX_LINELENGTH,fp);
-          //SCRIPT_MSG(_T("%s%s"), str, str[_tcslen(str)-1]==_T('\n')?_T(""):_T("\n"));
-          if (feof(fp) && !str[0])
+          UINT lrres = curlinereader->ReadLine(str,MAX_LINELENGTH);
+          if (NStream::OK != lrres)
          {
-            ERROR_MSG(_T("!macro \"%s\": unterminated (no !macroend found in file)!\n"),line.gettoken_str(1));
-            return PS_ERROR;
+            if (curlinereader->IsEOF())
+            {
+              if (!str[0])
+              {
+                ERROR_MSG(_T("!macro \"%s\": unterminated (no !macroend found in file)!\n"),line.gettoken_str(1));
+                return PS_ERROR;
+              }
+            }
+            else
+            {
+              ERROR_MSG(curlinereader->GetErrorMessage(lrres).c_str());
+              return PS_ERROR;
+            }
          }
+          //SCRIPT_MSG(_T("%s%s"), str, str[_tcslen(str)-1]==_T('\n')?_T(""):_T("\n"));
          // remove trailing whitespace
          while (*p) p++;
          if (p > str) p--;
@ -1742,21 +1773,13 @@ int CEXEBuild::doCommand(int which_token, LineParser &line)
      TCHAR *name = line.gettoken_str(1);
      LANGID lang = line.gettoken_int(2);
      TCHAR *str = line.gettoken_str(3);
-      int ret;
-#ifndef _UNICODE
-        if (build_include_isutf8)
-          ret = SetUTF8LangString(name, lang, str);
-        else
-#endif
-          ret = SetLangString(name, lang, str, curfile_unicode);
-
+      const int ret = SetLangString(name, lang, str);
      if (ret == PS_WARNING)
        warning_fl(_T("LangString \"%s\" set multiple times for %d, wasting space"), name, lang);
      else if (ret == PS_ERROR) {
        ERROR_MSG(_T("Error: can't set LangString \"%s\"!\n"), name);
        return PS_ERROR;
      }
-      // BUGBUG: Does not display UTF-8 properly.
      SCRIPT_MSG(_T("LangString: \"%s\" %d \"%s\"\n"), name, lang, str);
    }
    return PS_OK;
@ -1765,10 +1788,11 @@ int CEXEBuild::doCommand(int which_token, LineParser &line)
    return PS_ERROR;
    case TOK_LICENSELANGSTRING:
    {
+      const TCHAR *cmdnam = get_commandtoken_name(which_token);
 #ifdef NSIS_CONFIG_SILENT_SUPPORT
      if (build_header.flags&(CH_FLAGS_SILENT|CH_FLAGS_SILENT_LOG))
      {
-        warning_fl(_T("LicenseLangString: SilentInstall enabled, wasting space"));
+        warning_fl(_T("%s: SilentInstall enabled, wasting space"), cmdnam);
      }
 #endif
      TCHAR *name = line.gettoken_str(1);
@ -1777,22 +1801,26 @@ int CEXEBuild::doCommand(int which_token, LineParser &line)

      TCHAR *data = NULL;
      MANAGE_WITH(data, free);
-      BOOL unicode;

-      int ret = LoadLicenseFile(file, &data, line, &unicode);
+      WORD AnsiCP = CP_ACP;
+      LanguageTable *pLT = GetLangTable(lang);
+      if (pLT) AnsiCP = pLT->nlf.m_uCodePage;
+  
+      int ret = LoadLicenseFile(file, &data, cmdnam, AnsiCP);
      if (ret != PS_OK)
          return ret;

-      ret = SetLangString(name, lang, data, unicode);
+      ret = SetLangString(name, lang, data, true);
      if (ret == PS_WARNING)
-        warning_fl(_T("LicenseLangString \"%s\" set multiple times for %d, wasting space"), name, lang);
+        warning_fl(_T("%s \"%s\" set multiple times for %d, wasting space"), cmdnam, name, lang);
      else if (ret == PS_ERROR)
      {
-        ERROR_MSG(_T("Error: can't set LicenseLangString \"%s\"!\n"), name);
+        ERROR_MSG(_T("Error: can't set %s \"%s\"!\n"), cmdnam, name);
        return PS_ERROR;
      }

-      SCRIPT_MSG(_T("LicenseLangString: \"%s\" %d \"%s\"\n"), name, lang, file);
+      SCRIPT_MSG(_T("%s: \"%s\" %d \"%s\"\n"), cmdnam, name, lang, file);
+      return PS_ERROR;
    }
    return PS_OK;
    case TOK_NAME:
@ -2033,11 +2061,13 @@ int CEXEBuild::doCommand(int which_token, LineParser &line)
    return PS_OK;
    case TOK_LICENSEDATA:
      {
+        const TCHAR *cmdnam = get_commandtoken_name(which_token);
        int idx = 0;
        TCHAR *file = line.gettoken_str(1);
        TCHAR *data = NULL;
        TCHAR *filedata = NULL;
        MANAGE_WITH(filedata, free);
+        WORD cp = CP_ACP;

        if (file[0] == _T('$') && file[1] == _T('('))
        {
@ -2053,24 +2083,22 @@ int CEXEBuild::doCommand(int which_token, LineParser &line)

        if (!idx)
        {
-          BOOL unicode;
-          int ret = LoadLicenseFile(file, &filedata, line, &unicode);
+          int ret = LoadLicenseFile(file, &filedata, cmdnam, cp);
          if (ret != PS_OK)
-              return ret;
+            return ret;
          data = filedata;
        }

        if (!cur_page) {
          if (SetInnerString(NLF_LICENSE_DATA,data) == PS_WARNING)
-            warning_fl(_T("%s: specified multiple times, wasting space"),line.gettoken_str(0));
+            warning_fl(_T("%s: specified multiple times, wasting space"),cmdnam);
        }
        else {
          if (cur_page_type != PAGE_LICENSE) {
            ERROR_MSG(_T("Error: LicenseData can only be used inside PageEx license.\n"));
            return PS_ERROR;
          }
-
-          cur_page->parms[1] = add_string(data, 0);
+          cur_page->parms[1] = add_string(data, false, cp);
        }

        SCRIPT_MSG(_T("LicenseData: \"%s\"\n"),file);
@ -2791,17 +2819,7 @@ int CEXEBuild::doCommand(int which_token, LineParser &line)
        LANGID lang_id = _ttoi(line.gettoken_str(1) + 6);
        LanguageTable *table = GetLangTable(lang_id);
        const TCHAR*facename = line.gettoken_str(2);
-#ifndef _UNICODE
-        if (build_include_isutf8)
-        {
-          EXEHEADTCHAR_T *bufEHTStr = UTF8ToExeHeadTStrDup(facename, table->nlf.m_uCodePage);
-          table->nlf.m_szFont = bufEHTStr;
-        }
-        else
-#endif
-        {
-          table->nlf.m_szFont = _tcsdup(facename);
-        }
+        table->nlf.m_szFont = _tcsdup(facename);
        table->nlf.m_iFontSize = line.gettoken_int(3);
        
        if (table->nlf.m_szFont)
@ -2812,19 +2830,7 @@ int CEXEBuild::doCommand(int which_token, LineParser &line)
      else
      {
        const TCHAR*facename = line.gettoken_str(1);
-#ifndef _UNICODE
-        if (build_include_isutf8)
-        {
-          EXEHEADTCHAR_T *bufEHTStr = UTF8ToExeHeadTStrDup(facename, CP_ACP);
-          if (!bufEHTStr) ++failed;
-          _tcsnccpy(build_font, bufEHTStr, COUNTOF(build_font));
-          free(bufEHTStr);
-        }
-        else
-#endif
-        {
-          _tcsnccpy(build_font, facename, COUNTOF(build_font));
-        }
+        _tcsnccpy(build_font, facename, COUNTOF(build_font));
        build_font_size = line.gettoken_int(2);

        if (!failed) SCRIPT_MSG(_T("SetFont: \"%s\" %s\n"), facename, line.gettoken_str(2));
@ -2893,15 +2899,19 @@ int CEXEBuild::doCommand(int which_token, LineParser &line)
 #ifdef _UNICODE
    case TOK_TARGETUNICODE:
    {
-      if (build_compressor_set)
-      {
-        ERROR_MSG(_T("Error: Can't change target charset after data already got compressed or header already changed!\n"));
-        return PS_ERROR;
-      }
      int k = line.gettoken_enum(1,_T("false\0true\0"));
      if (-1==k) PRINTHELP();
      SCRIPT_MSG(_T("Unicode: %s\n"),k?_T("true"):_T("false"));
-      if (set_target_charset(!!k) != PS_OK)
+      const bool newtargetcs = !!k;
+      if (newtargetcs != build_unicode)
+      {
+        if (build_compressor_set || build_lockedunicodetarget)
+        {
+          ERROR_MSG(_T("Error: Can't change target charset after data already got compressed or header already changed!\n"));
+          return PS_ERROR;
+        }
+      }
+      if (set_target_charset(newtargetcs) != PS_OK)
      {
        ERROR_MSG(_T("Error: Unable to set target charset (adequate stub not found?)\n"));
        return PS_ERROR;
@ -3035,7 +3045,7 @@ int CEXEBuild::doCommand(int which_token, LineParser &line)
      {
        line.eattoken();
        define=line.gettoken_str(1);
-        if (dupemode==1 && definedlist.find(define))return PS_OK;
+        if (dupemode==1 && definedlist.find(define)) return PS_OK;
      }


@ -3242,19 +3252,26 @@ int CEXEBuild::doCommand(int which_token, LineParser &line)
    case TOK_P_INCLUDE:
      {
        bool required = true;
-
-        TCHAR *f = line.gettoken_str(1);
-        
-        if(!_tcsicmp(f,_T("/nonfatal"))) {
-          if (line.getnumtokens()!=3)
-            PRINTHELP();
-        
-          f = line.gettoken_str(2);
+        NStreamEncoding enc(NStreamEncoding::AUTO);
+        TCHAR *f;
+        unsigned int toks = line.getnumtokens() - 1;
+        for(unsigned int tok = 0; toks;)
+        {
+          f = line.gettoken_str(++tok);
+          if (tok >= toks) break;
+          if(!_tcsicmp(f,_T("/nonfatal"))) {
            required = false;
-        } else if (line.getnumtokens()!=2) {
-          PRINTHELP();
+          }
+          TCHAR buf[9+1];
+          my_strncpy(buf,f,COUNTOF(buf));
+          if(!_tcsicmp(buf,_T("/charset="))) {
+            WORD cp = GetEncodingFromString(f+9);
+            if (NStreamEncoding::UNKNOWN == cp) toks = 0;
+            enc.SafeSetCodepage(cp);
+          }
        }
-        
+        if (!toks || !*f) PRINTHELP();
+
        TCHAR *fc = my_convert(f);
        int included = 0;

@ -3282,7 +3299,7 @@ int CEXEBuild::doCommand(int which_token, LineParser &line)

          tstring incfile = basedir + *files_itr;

-          if (includeScript((TCHAR *) incfile.c_str()) != PS_OK) {
+          if (includeScript(incfile.c_str(), enc) != PS_OK) {
            return PS_ERROR;
          }

@ -3311,7 +3328,7 @@ int CEXEBuild::doCommand(int which_token, LineParser &line)

            tstring incfile = tstring(incdir) + PLATFORM_PATH_SEPARATOR_STR + basedir + *incdir_itr;

-            if (includeScript((TCHAR *) incfile.c_str()) != PS_OK) {
+            if (includeScript(incfile.c_str(), enc) != PS_OK) {
              return PS_ERROR;
            }

@ -3327,13 +3344,13 @@ int CEXEBuild::doCommand(int which_token, LineParser &line)
        if (!included)
        {
          if(required) {
-          ERROR_MSG(_T("!include: could not find: \"%s\"\n"),f);
-          return PS_ERROR;
-        } else {
-          warning_fl(_T("!include: could not find: \"%s\""),f);
+            ERROR_MSG(_T("!include: could not find: \"%s\"\n"),f);
+            return PS_ERROR;
+          } else {
+            warning_fl(_T("!include: could not find: \"%s\""),f);
+          }
        }
      }
-      }
    return PS_OK;
    case TOK_P_CD:
      if (!line.gettoken_str(1)[0] || _tchdir(line.gettoken_str(1)))
@ -4214,7 +4231,7 @@ int CEXEBuild::doCommand(int which_token, LineParser &line)
      ent.offsets[0]=add_string(line.gettoken_str(1));
      if (which_token == TOK_UNREGDLL)
      {
-        ent.offsets[1]=add_string(_T("DllUnregisterServer"));
+        ent.offsets[1]=add_asciistring(_T("DllUnregisterServer"));
        ent.offsets[2]=DefineInnerLangString(NLF_UNREGISTERING);
      }
      else if (which_token == TOK_CALLINSTDLL)
@ -4232,7 +4249,7 @@ int CEXEBuild::doCommand(int which_token, LineParser &line)
      else // register
      {
        ent.offsets[1] = add_string(line.gettoken_str(2));
-        if (!ent.offsets[1]) ent.offsets[1]=add_string(_T("DllRegisterServer"));
+        if (!ent.offsets[1]) ent.offsets[1]=add_asciistring(_T("DllRegisterServer"));
        ent.offsets[2]=DefineInnerLangString(NLF_REGISTERING);
      }

@ -4677,8 +4694,8 @@ int CEXEBuild::doCommand(int which_token, LineParser &line)
    return add_entry(&ent);
    case TOK_HIDEWINDOW:
      ent.which=EW_SHOWWINDOW;
-      ent.offsets[0]=add_string(_T("$HWNDPARENT"));
-      ent.offsets[1]=add_string(_T("0")/*SW_HIDE*/);
+      ent.offsets[0]=add_asciistring(_T("$HWNDPARENT"));
+      ent.offsets[1]=add_asciistring(_T("0")/*SW_HIDE*/);
      ent.offsets[2]=1;
      SCRIPT_MSG(_T("HideWindow\n"));
    return add_entry(&ent);
@ -4686,8 +4703,8 @@ int CEXEBuild::doCommand(int which_token, LineParser &line)
    {
      int ret;
      ent.which=EW_SHOWWINDOW;
-      ent.offsets[0]=add_string(_T("$HWNDPARENT"));
-      ent.offsets[1]=add_string(_T("5")/*SW_SHOW*/);
+      ent.offsets[0]=add_asciistring(_T("$HWNDPARENT"));
+      ent.offsets[1]=add_asciistring(_T("5")/*SW_SHOW*/);
      ret = add_entry(&ent);
      if (ret != PS_OK) return ret;
      ent.which=EW_BRINGTOFRONT;
@ -5343,7 +5360,7 @@ int CEXEBuild::doCommand(int which_token, LineParser &line)
      if (line.getnumtokens() == 3)
        ent.offsets[1]=add_string(line.gettoken_str(2));
      else
-        ent.offsets[1]=add_string(_T("$TEMP"));
+        ent.offsets[1]=add_asciistring(_T("$TEMP"));
      if (ent.offsets[0]<0) PRINTHELP()
      SCRIPT_MSG(_T("GetTempFileName -> %s\n"),line.gettoken_str(1));
    return add_entry(&ent);
@ -5415,7 +5432,7 @@ int CEXEBuild::doCommand(int which_token, LineParser &line)
      if (ent.offsets[3] != 7 && ent.offsets[3] != 13) ent.offsets[2]=add_string(line.gettoken_str(4));
      if (ent.offsets[3] == 13) {
        ent.offsets[3]=6;
-        ent.offsets[2]=add_string(_T("0xFFFFFFFF"));
+        ent.offsets[2]=add_asciistring(_T("0xFFFFFFFF"));
      }
      SCRIPT_MSG(_T("IntOp: %s=%s%s%s\n"),line.gettoken_str(1),line.gettoken_str(2),line.gettoken_str(3),line.gettoken_str(4));
    return add_entry(&ent);
@ -5774,7 +5791,7 @@ int CEXEBuild::doCommand(int which_token, LineParser &line)
      ent.which=EW_FGETS;
      ent.offsets[0]=GetUserVarIndex(line, 1); // file handle
      ent.offsets[1]=GetUserVarIndex(line, 2); // output string
-      ent.offsets[2]=add_string(_T("1"));
+      ent.offsets[2]=add_asciistring(_T("1"));
      ent.offsets[3]=1;
      if (ent.offsets[0]<0 || ent.offsets[1]<0) PRINTHELP()
      SCRIPT_MSG(_T("FileReadByte: %s->%s\n"),line.gettoken_str(1),line.gettoken_str(2));
@ -5825,7 +5842,7 @@ int CEXEBuild::doCommand(int which_token, LineParser &line)
      ent.which=EW_FGETWS;
      ent.offsets[0]=GetUserVarIndex(line, 1); // file handle
      ent.offsets[1]=GetUserVarIndex(line, 2); // output string
-      ent.offsets[2]=add_string(_T("1"));
+      ent.offsets[2]=add_asciistring(_T("1"));
      ent.offsets[3]=1;
      if (ent.offsets[0]<0 || ent.offsets[1]<0) PRINTHELP()
      SCRIPT_MSG(_T("FileReadWord: %s->%s\n"),line.gettoken_str(1),line.gettoken_str(2));
@ -6459,7 +6476,7 @@ int CEXEBuild::do_add_file(const TCHAR *lgss, int attrib, int recurse, int *tota

    if (recurse) {
      // save $OUTDIR into $_OUTDIR [StrCpy $_OUTDIR $OUTDIR]
-      if (add_entry_direct(EW_ASSIGNVAR, m_UserVarNames.get(_T("_OUTDIR")), add_string(_T("$OUTDIR"))) != PS_OK) {
+      if (add_entry_direct(EW_ASSIGNVAR, m_UserVarNames.get(_T("_OUTDIR")), add_asciistring(_T("$OUTDIR"))) != PS_OK) {
        return PS_ERROR;
      }
    }
@ -6540,7 +6557,7 @@ int CEXEBuild::do_add_file(const TCHAR *lgss, int attrib, int recurse, int *tota
    SCRIPT_MSG(_T("%sFile: Returning to: \"%s\"\n"), generatecode ? _T("") : _T("Reserve"), dir.c_str());

    // restore $OUTDIR from $_OUTDIR [SetOutPath $_OUTDIR]
-    if (add_entry_direct(EW_CREATEDIR, add_string(_T("$_OUTDIR")), 1) != PS_OK) {
+    if (add_entry_direct(EW_CREATEDIR, add_asciistring(_T("$_OUTDIR")), 1) != PS_OK) {
      return PS_ERROR;
    }
  }
@ -6778,7 +6795,7 @@ int CEXEBuild::do_add_file_create_dir(const tstring& local_dir, const tstring& d

 #ifdef _WIN32
  if (attrib) {
-    int ndc = add_string(_T("."));
+    int ndc = add_asciistring(_T("."));

    DWORD attr = GetFileAttributes(local_dir.c_str());

--- a/Source/strlist.cpp
+++ b/Source/strlist.cpp
@ -17,98 +17,177 @@
 */

 #include "strlist.h"
-
-MLStringList::MLStringList()
-{
-    m_gr.set_zeroing(1);
-#ifdef _UNICODE
-    m_grAnsi.set_zeroing(1);
-#endif
-}
+#include "utf.h"

 #ifdef _UNICODE
 char* convert_processed_string_to_ansi(char *out, const TCHAR *in, WORD codepage); // defined in build.cpp
-
-// use 2 for case sensitive end-of-string matches too
-int MLStringList::findAnsi(const char *str, int case_sensitive) const // returns -1 if not found
-{
-  const char *s=(const char*) m_grAnsi.get();
-  int ml=getcount();
-  int offs=0;
-
-  size_t str_slen = strlen(str);
-  size_t offs_slen;
-
-  while (offs < ml)
-  {
-    // Check if the whole string matches str.
-    if ((case_sensitive && !strcmp(s+offs,str)) ||
-        (!case_sensitive && !stricmp(s+offs,str)))
-    {
-      return offs;
-    }
-
-    offs_slen = strlen(s+offs);
-
-    // Check if just the end of the string matches str.
-    if (case_sensitive==2 &&
-        str_slen < offs_slen &&  // check for end of string
-        !strcmp(s + offs + offs_slen - str_slen,str))
-    {
-      return offs + offs_slen - str_slen;
-    }
-    offs += offs_slen + 1;
-  }
-  return -1;
-}
 #endif

-int MLStringList::add(const TCHAR *str, WORD codepage /*= CP_ACP*/, bool processed, bool build_unicode)
+static inline bool byte_rev_match(const void*ptr1, const void*ptr2, size_t cb)
 {
-#ifndef _UNICODE
-  int a=find(str,2);
-  if (a >= 0)
-      return a;
-  int len = _tcslen(str)+1;
-  return m_gr.add(str,len*sizeof(TCHAR))/sizeof(TCHAR);
-#else
-  if (build_unicode)
+  char *p1 = (char*) ptr1, *p2 = (char*) ptr2;
+  if (cb) for(; --cb;) if (p1[cb] != p2[cb]) return false;
+  return true;
+}
+
+unsigned int ExeHeadStringList::getnum() const
+{
+  char *p = (char*) m_gr.get();
+  if (!p) return 1; // The empty string always exists
+  unsigned int cbList = gettotalsize(), cb = 0, num = 1, pos;
+  pos = 1 + !!m_wide, p += pos; // Skip empty string
+  if (m_wide)
  {
-    int a=find(str,2);
-    if (a >= 0)
-      return a;
+    for(;;)
+    {
+      if (pos+=cb >= cbList) break;
+      cb = StrLenUTF16LE(p+=cb) + 1, ++num;
+    }
  }
-  // convert to ANSI
-  int len = _tcslen(str)+1;
-  char* ansiBuf = new char[len*2];
-  int cbMultiByte;
-  if (processed)
-    cbMultiByte = convert_processed_string_to_ansi(ansiBuf, str, codepage)-ansiBuf;
  else
-    cbMultiByte = WideCharToMultiByte(codepage, 0, str, len, ansiBuf, len*2, NULL, NULL);
-  if (!build_unicode)
  {
-    int a=findAnsi(ansiBuf,2);
-    if (a >= 0)
+    for(;;)
    {
-      delete[] ansiBuf;
-      return a;
+      if (pos+=cb >= cbList) break;
+      cb = strlen(p+=cb) + 1, ++num;
    }
  }
-  // string not found, add it
-  int a=m_gr.add(str,len*sizeof(TCHAR))/sizeof(TCHAR);
-  m_grAnsi.add(ansiBuf,cbMultiByte);
-  delete[] ansiBuf;
-  if (len != cbMultiByte)
-  { // resize buffers to align future strings on same offsets
-    len = a + STD_MAX(len,cbMultiByte);
-    m_gr.resize(len*sizeof(TCHAR));
-    m_grAnsi.resize(len);
-  }
-  return a;
-#endif
+  return num;
 }

+bool ExeHeadStringList::get(unsigned int offset, tstring&outstr) const
+{
+  if (0 == offset)
+  {
+    outstr.clear();
+    return true;
+  }
+  char *p = (char*) m_gr.get();
+  unsigned int cbList = gettotalsize();
+  if (p && cbList < offset)
+  {
+    if (m_wide)
+      StrSetUTF16LE(outstr,&p[offset*WIDEDIV]);
+    else
+      // BUGBUG: There is no way for us to know the correct codepage
+      outstr = CtoTString(&p[offset]);
+    return true;
+  }
+  return false;
+}
+
+/*
+ * find() finds the offset where the string is stored, returns -1 if not found.
+ * It only compares raw byte values, there is no Unicode normalization handling.
+ * If ppBufMB is non-null you must delete[] it (Only valid when m_wide is false)!
+*/
+unsigned int ExeHeadStringList::find(const TCHAR *str, WORD codepage, bool processed, char**ppBufMB) const
+{
+  if (m_wide && *str)
+  {
+    WCToUTF16LEHlpr cnv;
+    if (!cnv.Create(str)) return -1;
+    unsigned int pos = find(cnv.Get(),StrLenUTF16LE(cnv.Get()),codepage,processed,ppBufMB);
+    cnv.Destroy();
+    return pos;
+  }
+  else
+  {
+    return find(str,_tcslen(str),codepage,processed,ppBufMB);
+  }
+}
+unsigned int ExeHeadStringList::find(const void *ptr, unsigned int cchF, WORD codepage, bool processed, char**ppBufMB) const
+{
+  const wchar_t *find = (const wchar_t*) ptr; // Data is: m_wide ? UTF16LE : wchar_t
+  if (!*find) return 0; // The empty string is always first.
+
+  char *p = (char*) m_gr.get();
+  if (!p) return -1;
+
+  unsigned int cbF = ++cchF * 2; // Include \0 as part of cchF, * 2 for UTF16 & DBCS.
+  char *bufMB = 0;
+  if (!m_wide)
+  {
+    unsigned int cbMB;
+    bufMB = new char[cbF];
+    if (processed)
+    {
+      char *pTmp = convert_processed_string_to_ansi(bufMB,find,codepage);
+      cbMB = pTmp ? pTmp - bufMB : 0;
+    }
+    else
+    {
+      cbMB = WideCharToMultiByte(codepage,0,find,cchF,bufMB,cbF,0,0);
+    }
+    assert(cbMB);
+    cbF = cbMB, find = (const wchar_t*) bufMB;
+  }
+
+  unsigned int cbList = gettotalsize(), cb = 0, retval = -1, pos;
+  pos = 1 + !!m_wide, p += pos; // Skip empty string
+  if (m_wide)
+  {
+    for(;;)
+    {
+      if (pos+=cb >= cbList) break;
+      cb = (StrLenUTF16LE(p+=cb) + 1) * 2;
+      if (cb < cbF) continue;
+      if (byte_rev_match(p,find,cbF)) { retval = pos / WIDEDIV; break; }
+    }
+  }
+  else
+  {
+    for(;;)
+    {
+      if (pos+=cb >= cbList) break;
+      cb = strlen(p+=cb) + 1;
+      if (cb < cbF) continue;
+      if (byte_rev_match(p,find,cbF)) { retval = pos; break; }
+    }
+    if (ppBufMB) 
+      *ppBufMB = bufMB;
+    else 
+      delete[] bufMB;
+  }
+  return retval;
+}
+
+int ExeHeadStringList::add(const TCHAR *str, WORD codepage, bool processed)
+{
+  char *p = (char*) m_gr.get();
+  if (!p)
+  {
+    if (!*str) return 0; // Delay allocating the empty string
+    char *&zero = p, cb = 1 + !!m_wide;
+    unsigned int pos = m_gr.add(&zero,cb);
+    assert(0 == pos);
+  }
+
+  char *bufMB = 0;
+  unsigned int pos = find(str,codepage,processed,m_wide ? 0 : &bufMB);
+  if (-1 != pos)
+  {
+    delete[] bufMB;
+    return pos;
+  }
+
+  if (m_wide)
+  {
+    WCToUTF16LEHlpr cnv;
+    if (!cnv.Create(str)) throw std::bad_alloc();
+    pos = m_gr.add(cnv.Get(),cnv.GetSize()) / WIDEDIV;
+    cnv.Destroy();
+  }
+  else
+  {
+    unsigned int cbMB = strlen(bufMB) + 1;
+    pos = m_gr.add(bufMB,cbMB);
+    delete[] bufMB;
+  }
+  return pos;
+}
+
+
 int StringList::add(const TCHAR *str, int case_sensitive)
 {
  int a=find(str,case_sensitive);
@ -232,7 +311,7 @@ int DefineList::add(const TCHAR *name, const TCHAR *value/*=_T("")*/)
    extern void quit();
    if (g_display_errors)
    {
-      PrintColorFmtMsg_ERR(_T("\nInternal compiler error #12345: GrowBuf realloc/malloc(%lu) failed.\n"), (unsigned long) size_in_bytes);
+      PrintColorFmtMsg_ERR(_T("\nInternal compiler error #12345: DefineList malloc(%lu) failed.\n"), (unsigned long) size_in_bytes);
    }
    quit();
  }
--- a/Source/strlist.h
+++ b/Source/strlist.h
@ -118,31 +118,42 @@ protected:
  GrowBuf m_gr;
 };

+
+#include "tstring.h"
 /**
- * Similar to StringList with case_sensitive=2, but stores strings as both Unicode AND ANSI (codepaged)
+ * Stores a list of strings as UTF16LE or raw bytes (multibyte char*).
 */
-class MLStringList : private StringList
+class ExeHeadStringList
 {
 private: // don't copy instances
-  MLStringList(const MLStringList&);
-  void operator=(const MLStringList&);
+  ExeHeadStringList(const ExeHeadStringList&);
+  void operator=(const ExeHeadStringList&);

 public:
-  MLStringList();
-  ~MLStringList() {}
+  ExeHeadStringList()
+  {
+    m_wide = false;
+    m_gr.set_zeroing(true);
+  }

-  int add(const TCHAR *str, WORD codepage, bool processed, bool build_unicode);
-  int getnum() const                { return StringList::getnum(); }
-  int getcount() const              { return StringList::getcount(); }
-  const TCHAR *getTchar() const     { return (const TCHAR*) m_gr.get(); }
-#ifdef _UNICODE
-  const char *getAnsi() const       { return (const char*) m_grAnsi.get(); }
-  int findAnsi(const char *str, int case_sensitive) const;
-private:
-  GrowBuf m_grAnsi;
-#endif
+  void setunicode(bool unicode) { m_wide = unicode; }
+  bool addemptystring() { return true; } // Added by add() when the first real string is added
+  int add(const TCHAR *str, WORD codepage, bool processed);
+  unsigned int find(const TCHAR *str, WORD codepage, bool processed, char**ppBufMB) const;
+  bool get(unsigned int offset, tstring&str) const;
+  unsigned int getnum() const;
+  unsigned int gettotalsize() const { return m_gr.get() ? m_gr.getlen() : (m_wide ? 2 : 1); }
+  void* getstorageptr() const { return m_gr.get() ? m_gr.get() : L""; }
+
+protected:
+  unsigned int find(const void *str, unsigned int cchF, WORD codepage, bool processed, char**ppBufMB) const;
+
+  GrowBuf m_gr;
+  bool m_wide;
+  enum {WIDEDIV=2}; // ExeHead expects us to provide offsets this way, also helps UTF16 offsets for shell constants to fit in < 0xFF
 };

+
 /**
 * This class maintains a list of T types in a GrowBuf sorted by T.name which
 * is assumed to be a string (TCHAR*).  So it's really sort of a 
@ -182,14 +193,15 @@ class SortedStringList
      T newstruct={0,};
      int pos=find(name,case_sensitive,1);
      if (pos==-1) return -1;
-      newstruct.name=(TCHAR*)malloc((_tcslen(name)+1)*sizeof(TCHAR));
+      const UINT cbName=(_tcslen(name)+1)*sizeof(TCHAR);
+      newstruct.name=(TCHAR*)malloc(cbName);
      if (!newstruct.name)
      {
        extern int g_display_errors;
        extern void quit();
        if (g_display_errors)
        {
-          PrintColorFmtMsg_ERR(_T("\nInternal compiler error #12345: GrowBuf realloc/malloc(%lu) failed.\n"),(unsigned long)((_tcslen(name)+1)*sizeof(TCHAR)));
+          PrintColorFmtMsg_ERR(_T("\nInternal compiler error #12345: SortedStringList malloc(%lu) failed.\n"),(unsigned long)cbName);
        }
        quit();
      }
--- a/Source/tokens.cpp
+++ b/Source/tokens.cpp
@ -247,7 +247,7 @@ static tokenType tokenlist[TOK__LAST] =
 {TOK_P_SYSTEMEXEC,_T("!system"),1,2,_T("command [<|>|<>|=) retval]"),TP_ALL},
 {TOK_P_EXECUTE,_T("!execute"),1,0,_T("command"),TP_ALL},
 {TOK_P_ADDINCLUDEDIR,_T("!AddIncludeDir"),1,0,_T("dir"),TP_ALL},
-{TOK_P_INCLUDE,_T("!include"),1,1,_T("[/NONFATAL] filename.nsh"),TP_ALL},
+{TOK_P_INCLUDE,_T("!include"),1,2,_T("[/NONFATAL] [/CHARSET=<") TSTR_INPUTCHARSET _T(">] filename.nsh"),TP_ALL},
 {TOK_P_CD,_T("!cd"),1,0,_T("absolute_or_relative_new_directory"),TP_ALL},
 {TOK_P_IF,_T("!if"),1,3,_T("[!] (value [(==,!=,S==,S!=,=,<>,<=,<,>,>=,&,&&,||) value2] | /FILEEXISTS path)"),TP_ALL},
 {TOK_P_IFDEF,_T("!ifdef"),1,-1,_T("symbol [| symbol2 [& symbol3 [...]]]"),TP_ALL},
@ -308,7 +308,7 @@ const TCHAR* CEXEBuild::get_commandtoken_name(int tok)
  return 0;
 }

-void CEXEBuild::print_help(TCHAR *commandname)
+void CEXEBuild::print_help(const TCHAR *commandname)
 {
  int x;
  for (x = 0; x < TOK__LAST; x ++)
--- a/Source/utf.cpp
+++ b/Source/utf.cpp
@ -17,24 +17,7 @@

 #include "utf.h"

-// BUGBUG: We might want to use MB_ERR_INVALID_CHARS but it is not supported
-// on < WinXP or in our current POSIX implementation.
-static const int UTF8MBTWCFLAGS  = 0;
-
-
-#define ExeHeadWStrFree free
-static EXEHEADWCHAR_T* ExeHeadWStrAlloc(UINT cch) 
-{
-  EXEHEADWCHAR_T* s = (EXEHEADWCHAR_T*) malloc(cch*sizeof(EXEHEADWCHAR_T));
-#if 0
-  // TODO: We should add POSIX versions of  G/SetLastError
-  // if we want to tell _why_ UTF8ToExeHeadTStr failed...
-  if (!s) SetLastError(ERROR_OUTOFMEMORY);
-#endif
-  return s;
-}
-
-#ifdef _UNICODE
+#define FIX_ENDIAN_INT16LETOHOST_INPLACE FIX_ENDIAN_INT16_INPLACE

 void RawTStrToASCII(const TCHAR*in,char*out,UINT maxcch)
 {
@ -43,51 +26,326 @@ void RawTStrToASCII(const TCHAR*in,char*out,UINT maxcch)
  if (!empty) *out = 0;
 }

-#else // !_UNICODE
-
-EXEHEADTCHAR_T* UTF8ToExeHeadTStrDup(LPCSTR StrU8,UINT Codepage) 
+UINT StrLenUTF16LE(const void*str)
 {
-  int cchW = MultiByteToWideChar(CP_UTF8,UTF8MBTWCFLAGS,StrU8,-1,NULL,0);
-  if (!cchW) return NULL;
-  WCHAR *bufWStr = (WCHAR*) ExeHeadWStrAlloc(cchW);
-  if (!bufWStr) return NULL;
-  EXEHEADTCHAR_T *outstr = NULL;
-  if (MultiByteToWideChar(CP_UTF8,UTF8MBTWCFLAGS,StrU8,-1,bufWStr,cchW))
+  unsigned short *p = (unsigned short *) str;
+  for(;*p;) ++p;
+  UINT cch = 0;
+  if ((size_t)p > (size_t)str) cch = ((size_t)p - (size_t)str) - 1;
+  return cch;
+}
+
+bool StrSetUTF16LE(tstring&dest, const void*src)
+{
+#ifdef _WIN32
+  dest = (unsigned short *) src;
+#else
+#error TODO: UTF16LE to wchar_t
+#endif
+  return true;
+}
+
+inline UINT UTF8ToWC_Convert(LPCSTR StrU8,UINT cbU8,wchar_t*Buffer,UINT cchBuf)
+{
+#ifndef MB_ERR_INVALID_CHARS
+  const UINT MB_ERR_INVALID_CHARS = 8; // MSDN says this flag is OK for CP_UTF8
+#endif
+  return (UINT) MultiByteToWideChar(CP_UTF8,MB_ERR_INVALID_CHARS,StrU8,cbU8,Buffer,cchBuf);
+}
+inline UINT UTF8ToWC_Prepare(LPCSTR StrU8,UINT cbU8)
+{
+  return UTF8ToWC_Convert(StrU8,cbU8,0,0);
+}
+
+wchar_t* DupWCFromBytes(void*Buffer,UINT cbBuffer,WORD SrcCP)
+{
+  /*\
+  Converts a buffer encoded with SrcCP to a \0 terminated wchar_t malloc'ed buffer.
+  Returns 0 if malloc failed or -1 if conversion to wchar_t failed.
+  \*/
+  NStreamEncoding srcenc(SrcCP);
+  wchar_t*pwc = 0;
+#ifdef _WIN32 
+  if (srcenc.IsUTF16LE())
  {
-    int cbA = WideCharToMultiByte(Codepage,0,bufWStr,cchW,NULL,0,NULL,NULL);
-    if (cbA && (outstr = ExeHeadTStrAlloc(cbA)))
+    // Assuming wchar_t==UTF16LE
+    pwc = (wchar_t*) malloc(cbBuffer + 2);
+    if (!pwc) return pwc;
+    memcpy(pwc, Buffer, cbBuffer);
+    *((wchar_t*)(((char*)pwc)+cbBuffer)) = L'\0';
+    return pwc;
+  }
+  // TODO: MBTWC on Windows is lame, we are going to fail if SrcCP is UTF16BE or UTF32
+#endif
+  UINT cchW = MultiByteToWideChar(SrcCP,0,(char*)Buffer,cbBuffer,0,0);
+  if (!cchW && NStreamEncoding::GetCodeUnitSize(SrcCP) <= cbBuffer)
+  {
+    return (wchar_t*)-1;
+  }
+  pwc = (wchar_t*) malloc((cchW+1)*sizeof(wchar_t));
+  if (!pwc) return pwc;
+  MultiByteToWideChar(SrcCP,0,(char*)Buffer,cbBuffer,pwc,cchW);
+  pwc[cchW] = L'\0';
+  return pwc;
+}
+
+UINT DetectUTFBOM(FILE*strm)
+{
+  /*\
+  Tries to detect a BOM at the start of a stream. If a BOM is found it is eaten.
+  NOTE: ungetc is only guaranteed to support 1 pushback, 
+  lets hope no MBCS file starts with parts of a BOM.
+  \*/
+  const int b1 = fgetc(strm);
+  if (EOF == b1) return 0;
+  if (0xef == b1)
+  {
+    const int b2 = fgetc(strm);
+    if (0xbb == b2)
    {
-      if (!WideCharToMultiByte(Codepage,0,bufWStr,cchW,outstr,cbA,NULL,NULL))
+      const int b3 = fgetc(strm);
+      if (0xbf == b3) return NStreamEncoding::UTF8;
+      ungetc(b3,strm);
+    }
+    ungetc(b2,strm);
+  }
+  if (0xfe == b1 || 0xff == b1 || 0x00 == b1)
+  {
+    const int b2 = fgetc(strm), b3 = fgetc(strm);
+    if (b1 && (b1^b2) == (0xfe^0xff))
+    {
+      if (0xff == b1 && 0 == b3)
      {
-        free(outstr);
-        outstr = NULL;
+        const int b4 = fgetc(strm);
+        if (0 == b4) return NStreamEncoding::UTF32LE;
+        ungetc(b4,strm);
+      }
+      ungetc(b3,strm);
+      return 0xff == b1 ? NStreamEncoding::UTF16LE : NStreamEncoding::UTF16BE;
+    }
+    if (0 == b1 && 0 == b2)
+    {
+      if (0xfe == b3)
+      {
+        const int b4 = fgetc(strm);
+        if (0xff == b4) return NStreamEncoding::UTF32BE;
+        ungetc(b4,strm);
      }
    }
+    ungetc(b3,strm);
+    ungetc(b2,strm);
  }
-  ExeHeadWStrFree(bufWStr);
-  return outstr;
+  ungetc(b1,strm);
+  return 0;
 }

-#endif // ?_UNICODE
-
-
-bool IsUTF8BOM(FILE*fstrm) 
+WORD GetEncodingFromString(const TCHAR*s)
 {
-  // ungetc is only guaranteed to support 1 pushback, 
-  // lets hope no ASCII file starts with 0xEF and is not a BOM!
-  const int c = fgetc(fstrm);
-  if (EOF == c) return false;
-  if (0xef == c)
+  if (!_tcsicmp(s,_T("ACP"))) return NStreamEncoding::ACP;
+  if (!_tcsicmp(s,_T("OEM"))) return NStreamEncoding::OEMCP;
+  if (!_tcsicmp(s,_T("UTF8"))) return NStreamEncoding::UTF8;
+  if (!_tcsicmp(s,_T("UTF16LE"))) return NStreamEncoding::UTF16LE;
+  if (!_tcsicmp(s,_T("UTF16BE"))) return NStreamEncoding::UTF16BE;
+  if (S7IsChEqualI('C',*s++) && S7IsChEqualI('P',*s++))
  {
-    const int c2 = fgetc(fstrm);
-    if (0xbb == c2)
-    {
-      const int c3 = fgetc(fstrm);
-      if (0xbf == c3) return true;
-      ungetc(c3,fstrm);
-    }
-    ungetc(c2,fstrm);
+    int cp = _tstoi(s);
+    if (cp > 0 && cp < NStreamEncoding::CPCOUNT) return (WORD) cp;
  }
-  ungetc(c,fstrm);
-  return false;
+  return NStreamEncoding::UNKNOWN;
+}
+
+void NStreamEncoding::GetCPDisplayName(WORD CP, TCHAR*Buf)
+{
+  TCHAR mybuf[10], *p = mybuf;
+  switch(CP)
+  {
+  case ACP: p = _T("ACP"); break;
+  case OEMCP: p = _T("OEM"); break;
+  case UTF16LE: p = _T("UTF16LE"); break;
+  case UTF16BE: p = _T("UTF16BE"); break;
+  case UTF32LE: p = _T("UTF32LE"); break;
+  case UTF32BE: p = _T("UTF32BE"); break;
+  case UTF8: p = _T("UTF8"); break;
+  default: 
+    _stprintf(mybuf,_T("CP%u"),CP);
+    if (CP >= NStreamEncoding::CPCOUNT) p = _T("?");
+  }
+  _tcscpy(Buf,p);
+}
+
+tstring NStreamLineReader::GetErrorMessage(UINT Error, const TCHAR*Filename, UINT Line)
+{
+  tstring msg;
+  TCHAR buf[40];
+  switch(Error)
+  {
+  case NStream::ERR_BUFFEROVERFLOW:
+    msg = _T("Line too long: ");
+    break;
+  case NStream::ERR_IOERROR:
+    msg = _T("I/O  error"), Filename = 0;
+    break;
+  case NStream::ERR_UNSUPPORTEDENCODING:
+    StreamEncoding().GetCPDisplayName(buf);
+    msg = tstring(buf) + _T(" is not supported"), Filename = 0;
+    break;
+  default:
+    msg = _T("Bad text encoding: ");
+    break;
+  }
+  if (Filename)
+  {
+    _stprintf(buf,_T("%u"),Line);
+    msg = msg + Filename + _T(":") + buf;
+  }
+  return msg + _T("\n");
+}
+
+UINT NStreamLineReader::ReadLine(wchar_t*Buffer, UINT cchBuf)
+{
+  /*\
+  Reads from the associated stream until it finds a new-line or 
+  the read fails (I/O error or EOF). It fails with ERR_BUFFEROVERFLOW if 
+  cchBuf-1 wchar_t's are read without finding the end of the line.
+  Buffer MUST be a valid pointer, it will be \0 terminated as long as cchBuf > 0.
+  \*/
+  if (!cchBuf) return NStream::ERR_BUFFEROVERFLOW;
+#ifndef MB_ERR_INVALID_CHARS
+  const UINT MB_ERR_INVALID_CHARS = 8;
+#endif
+  const UINT cchFullBuf = cchBuf;
+  NIStream&strm = GetStream();
+
+l_restart:
+  // Only supports MBCS and UTF-8 for now...
+  if (StreamEncoding().IsUTF8())
+  {
+    for(;;)
+    {
+      BYTE cb = 0; // bytes in chU8 -1
+      BYTE chU8[6];
+      if (!strm.ReadOctet(&chU8[0])) goto l_ioerror;
+      UINT cchWC;
+#if defined(WIN32) // TODO: Is wchar_t==UTF16LE under cygwin?
+      // Fast path if wchar_t == UTF16 and in ASCII range
+      if (chU8[0] <= 127 && sizeof(wchar_t) == 2)
+      {
+        cchWC = ++cb;
+        if (cchBuf <= cchWC) goto l_lineoverflow;
+        *Buffer = (wchar_t) chU8[0];
+      }
+      else
+#endif
+      {
+        if (0xC0 == (0xC0 & chU8[0]))
+        {
+          ++cb;
+          if (0xE0 == (0xE0 & chU8[0]))
+          {
+            ++cb;
+            if (0xF0 == (0xF0 & chU8[0]))
+            {
+              ++cb;
+              if (0xF8 == (0xF8 & chU8[0]))
+              {
+                ++cb;
+                if (0xFC == (0xFE & chU8[0]))
+                  ++cb; 
+                else 
+                  goto l_badutf;
+              }
+            }
+          }
+        }
+        for(BYTE moreU8 = 0; moreU8 < cb;) 
+        {
+          BYTE b;
+          if (!strm.ReadOctet(&b)) goto l_ioerror;
+          if (0x80 != (0xC0 & b)) goto l_badutf; // chU8[1..n] must be 0b10xxxxxx
+          chU8[++moreU8] = b;
+        }
+        ++cb;
+        cchWC = UTF8ToWC_Prepare((LPCSTR)chU8,cb);
+        if (!cchWC) goto l_badutf;
+        if (cchBuf <= cchWC) goto l_lineoverflow;
+        cchWC = UTF8ToWC_Convert((LPCSTR)chU8,cb,Buffer,cchWC);
+      }
+      if (CompleteLine(Buffer,cchWC,cchBuf,true)) goto l_success;
+    }
+  }
+#ifdef _WIN32
+  else if (StreamEncoding().IsUTF16LE())
+  {
+    unsigned short lead, trail, cchWC;
+    for(;;)
+    {
+      if (!strm.ReadInt16(&lead)) goto l_ioerror;
+      FIX_ENDIAN_INT16LETOHOST_INPLACE(lead);
+      if (IsTrailSurrogateUTF16(lead)) goto l_badutf;
+      UINT32 codpt = lead;
+      Buffer[0] = lead, cchWC = 0;
+      if (IsLeadSurrogateUTF16(lead))
+      {
+        if (!strm.ReadInt16(&trail)) goto l_ioerror;
+        FIX_ENDIAN_INT16LETOHOST_INPLACE(trail);
+        if (!IsTrailSurrogateUTF16(trail)) goto l_badutf;
+        codpt = CodePointFromUTF16SurrogatePair(lead,trail);
+        Buffer[1] = trail, ++cchWC;
+      }
+      if (!IsValidUnicodeCodePoint(codpt)) goto l_badutf;
+      if (CompleteLine(Buffer,++cchWC,cchBuf,true)) goto l_success;
+    }
+  }
+#endif
+  else if (StreamEncoding().IsUnicode())
+  {
+    goto l_unsupportedencoding; 
+  }
+  else
+  {
+    const UINT cp = StreamEncoding().GetCodepage();
+    UINT mbtowcflags = 0;
+    if (cp < 50220 && cp != 42) mbtowcflags = MB_ERR_INVALID_CHARS;
+    for(;;) 
+    {
+      BYTE bufMB[2];
+      BYTE mb = 0;
+      if (!strm.ReadOctet(&bufMB[0])) goto l_ioerror;
+      if (IsDBCSLeadByteEx(cp,bufMB[0]))
+      {
+        if (!strm.ReadOctet(&bufMB[++mb])) goto l_ioerror;
+      }
+      ++mb;
+      UINT cchWC = MultiByteToWideChar(cp,mbtowcflags,(LPCSTR)bufMB,mb,0,0);
+      if (!cchWC) goto l_badencoding;
+      if (cchBuf <= cchWC) goto l_lineoverflow;
+      cchWC = MultiByteToWideChar(cp,mbtowcflags,(LPCSTR)bufMB,mb,Buffer,cchWC);
+      if (CompleteLine(Buffer,cchWC,cchBuf,false)) goto l_success;
+    }
+  }
+l_ioerror:
+  *Buffer = 0;
+  return NStream::ERR_IOERROR;
+l_lineoverflow:
+  *Buffer = 0;
+  return NStream::ERR_BUFFEROVERFLOW;
+l_badutf:
+l_badencoding:
+  *Buffer = 0;
+  return NStream::ERR_INVALIDENCODING;
+l_unsupportedencoding:
+  *Buffer = 0;
+  return NStream::ERR_UNSUPPORTEDENCODING;
+l_success:
+  *Buffer = 0;
+  // "Foo\r\nBar" is 2 and not 3 lines
+  const wchar_t chThisNL = *--Buffer, chPrevNL = m_PrevNL;
+  const bool onlyNL = ++cchBuf == cchFullBuf;
+  m_PrevNL = chThisNL;
+  if (onlyNL && (chPrevNL^chThisNL) == ('\r'^'\n'))
+  {
+    m_PrevNL = 0;
+    goto l_restart; // Previous line was "Foo\r". This line was "\n", ignore it.
+  }
+  return NStream::OK;
 }
--- a/Source/utf.h
+++ b/Source/utf.h
@ -21,41 +21,244 @@
 #include "Platform.h"
 #include <stdlib.h>
 #include <stdio.h>
+#include "util.h" // For my_fopen

-typedef unsigned short EXEHEADWCHAR_T;
+#define TSTR_INPUTCHARSET _T("ACP|OEM|CP#|UTF8")


-#ifdef _UNICODE
-typedef EXEHEADWCHAR_T EXEHEADTCHAR_T;
-
 void RawTStrToASCII(const TCHAR*in,char*out,UINT maxcch);
-#else // !_UNICODE
-typedef char EXEHEADTCHAR_T;
-
-
-inline EXEHEADTCHAR_T* ExeHeadTStrAlloc(UINT cb) 
-{
-  // NOTE: UTF8ToExeHeadTStrDup calls this so we are required to use malloc
-  return (EXEHEADTCHAR_T*) malloc(cb);
-}
-extern EXEHEADTCHAR_T* UTF8ToExeHeadTStrDup(LPCSTR StrU8,UINT Codepage);
-
-inline void RawTStrToASCII(const TCHAR*in,char*out,UINT maxcch) { lstrcpyn(out,in,maxcch); }
-#endif // ?_UNICODE

 template<typename T> T S7ChLwr(T c) { return c>='A' && c<='Z' ? (T)(c|32) : c; }
 template<typename T> T S7ChUpr(T c) { return c>='a' && c<='z' ? (T)(c-'a'+'A') : c; }
 template<typename T> bool S7IsChEqualI(char ch,T cmp)
 {
-  return cmp==(T)S7ChLwr(ch) || cmp==(T)S7ChUpr(ch);
+  return S7ChLwr((T)ch) == S7ChLwr(cmp);
 }

+inline bool IsValidUnicodeCodePoint(UINT32 c,bool StrictUTF32=false)
+{
+  // Unicode 6.1: 16.7 Noncharacters
+  if ((c&0xfffe) == 0xfffe) return false; // ..FFFE & ..FFFF is reserved in each plane
+  if (c >= 0xfdd0 && c <= 0xfdef) return false; // Reserved in BMP
+  if (StrictUTF32 && c > 0x10ffff) return false;
+  return true;
+}
+inline bool IsLeadSurrogateUTF16(unsigned short c) { return c >= 0xd800 && c <= 0xdbff; }
+inline bool IsTrailSurrogateUTF16(unsigned short c) { return c >= 0xdc00 && c <= 0xdfff; }
+inline UINT32 CodePointFromUTF16SurrogatePair(unsigned short lea,unsigned short tra)
+{
+  const UINT32 surrogate_offset = 0x10000 - (0xD800 << 10) - 0xDC00;
+  return ((UINT32)lea << 10) + tra + surrogate_offset;
+}

-/**
- * Tries to peek at the first few bytes in the stream to determine if it is a UTF-8 BOM.
- * If it is a UTF-8 BOM it will eat the BOM, 
- * if it is not it tries its best to restore the data.
- */
-extern bool IsUTF8BOM(FILE*fstrm);
+UINT StrLenUTF16LE(const void*str);
+bool StrSetUTF16LE(tstring&dest, const void*src);
+
+wchar_t* DupWCFromBytes(void*Buffer,UINT cbBuffer,WORD SrcCP);
+UINT DetectUTFBOM(FILE*strm);
+WORD GetEncodingFromString(const TCHAR*s);
+
+class WCToUTF16LEHlpr {
+  unsigned short* m_s;
+public:
+  WCToUTF16LEHlpr() : m_s(0) {}
+
+  bool Create(const TCHAR*in)
+  {
+#if defined(_WIN32) && defined(_UNICODE)
+    m_s = (unsigned short*) in;
+#else
+#error TODO: wchar_t to UTF16LE
+#endif
+    return true;
+  }
+  void Destroy()
+  {
+#if !defined(_WIN32) && !defined(_UNICODE)
+    delete[] m_s;
+#endif
+  }
+  const unsigned short* Get() const { return m_s; }
+  UINT GetLen() const { return StrLenUTF16LE(m_s); }
+  UINT GetSize() const { return (GetLen()+1) * 2; }
+};
+
+class NStreamEncoding {
+protected:
+  WORD m_cp;
+public:
+  enum {
+    ACP = CP_ACP,
+    OEMCP = 1,
+    UTF16LE = 1200,
+    UTF16BE = 1201,
+    UTF32LE = 12000,
+    UTF32BE = 12001,
+    UTF8 = CP_UTF8,
+    UNKNOWN = (0xffff-0),
+    AUTO = (0xffff-1),
+    CPCOUNT = (0xffff-2) // Must be less than our other magic numbers
+  };
+
+  NStreamEncoding() { Reset(); }
+  NStreamEncoding(WORD cp) { Reset();SetCodepage(cp); }
+  WORD GetCodepage() const { return m_cp; }
+  void SetCodepage(WORD cp) { m_cp = cp; }
+  void SafeSetCodepage(WORD cp)
+  {
+    if (NStreamEncoding::AUTO==cp) cp = GetPlatformDefaultCodepage();
+    if (NStreamEncoding::UNKNOWN==cp) cp = GetPlatformDefaultCodepage();
+    SetCodepage(cp);
+  }
+  void Reset() { SetCodepage(GetPlatformDefaultCodepage()); }
+  WORD GetPlatformDefaultCodepage() const
+  {
+#ifdef _WIN32
+    return ACP;
+#else
+    return UTF8;
+#endif
+  }
+  bool IsUTF8() const { return UTF8==GetCodepage(); }
+  bool IsUTF16LE() const { return UTF16LE==GetCodepage(); }
+  bool IsUnicode() const { return IsUnicodeCodepage(GetCodepage()); }
+  void GetCPDisplayName(TCHAR*Buf) { GetCPDisplayName(m_cp, Buf); }
+
+  static UINT GetCodeUnitSize(WORD cp)
+  {
+    if ((UTF16LE|1)==(cp|1)) return 2;
+    if ((UTF32LE|1)==(cp|1)) return 4;
+    return 1;
+  }
+  static bool IsUnicodeCodepage(WORD cp)
+  {
+    return UTF8==cp || (UTF16LE|1)==(cp|1) || (UTF32LE|1)==(cp|1); 
+  }
+  static void GetCPDisplayName(WORD CP, TCHAR*Buf);
+};
+
+class NStream {
+public:
+  enum {
+    OK = 0,
+    ERR_BUFFEROVERFLOW,
+    ERR_IOERROR,
+    ERR_INVALIDENCODING,
+    ERR_UNSUPPORTEDENCODING,
+  };
+  static bool IsNewline(wchar_t chW, bool HandleUnicodeNL)
+  {
+    if (L'\n'==chW || L'\r'==chW) return true;
+    if (HandleUnicodeNL)
+    {
+      // www.unicode.org/standard/reports/tr13/tr13-5.html#UNICODE NEWLINE GUIDELINES
+      if (L'\f'==chW) return true; // FF/Form Feed
+      if (L'\v'==chW) return true; // VT/Vertical Tab
+      // NOTIMPLEMENTED: NEL/Next Line/U+0085
+      // NOTIMPLEMENTED: LS/Line Separator/U+2028
+      // NOTIMPLEMENTED: PS/Paragraph Separator/U+2029
+    }
+    return false;
+  }
+};
+
+class NIStream {
+protected:
+  FILE* m_hFile;
+  NStreamEncoding m_Enc;
+
+public:
+  NIStream() : m_hFile(0) {}
+  ~NIStream() { Close(); }
+  FILE* GetHandle() const { return m_hFile; }
+  NStreamEncoding& StreamEncoding() { return m_Enc; }
+  bool IsEOF() const { return feof(m_hFile) != 0; }
+  bool IsError() const { return ferror(m_hFile) != 0; }
+  bool IsUnicode() const { return m_Enc.IsUnicode(); }
+
+  void Close()
+  {
+    FILE*hF = Detach();
+    if (hF) fclose(hF);
+  }
+  
+  bool OpenFileForReading(const TCHAR* Path, WORD enc = NStreamEncoding::AUTO)
+  {
+    FILE *hFile = my_fopen(Path, "rb");
+    return Attach(hFile, enc);
+  }
+  bool OpenFileForReading(const TCHAR* Path, NStreamEncoding&Enc)
+  {
+    return OpenFileForReading(Path, Enc.GetCodepage());
+  }
+  bool OpenStdIn(WORD enc = NStreamEncoding::AUTO)
+  {
+    return Attach(stdin, enc);
+  }
+  bool OpenStdIn(NStreamEncoding&Enc)
+  {
+    return OpenStdIn(Enc.GetCodepage());
+  }
+
+  FILE* Detach() 
+  {
+    FILE *hFile = m_hFile;
+    m_hFile = 0;
+    return hFile;
+  }
+  bool Attach(FILE*hFile, WORD enc)
+  {
+    Close();
+    m_hFile = hFile;
+    if (m_hFile)
+    {
+      WORD cp = DetectUTFBOM(m_hFile);
+      if (!cp) cp = enc;
+      m_Enc.SafeSetCodepage(cp);
+    }
+    return 0 != m_hFile;
+  }
+
+  UINT ReadOctets(void*Buffer, UINT cbBuf)
+  {
+    size_t cb = fread(Buffer, 1, cbBuf, m_hFile);
+    return (UINT) cb;
+  }
+  bool ReadOctets(void*Buffer, UINT*pcbBuf)
+  {
+    UINT cbReq = *pcbBuf, cb = ReadOctets(Buffer, cbReq);
+    *pcbBuf = cb;
+    return cbReq == cb;
+  }
+  bool ReadOctet(void*Buffer) { return 1 == ReadOctets(Buffer, 1); }
+  bool ReadInt16(void*Buffer) { return 2 == ReadOctets(Buffer, 2); }
+};
+
+class NStreamLineReader {
+protected:
+  NIStream &m_Strm;
+  wchar_t m_PrevNL;
+
+public:
+  NStreamLineReader(NIStream &Strm) : m_Strm(Strm), m_PrevNL(0) {}
+
+  NIStream& GetStream() { return m_Strm; }
+  NStreamEncoding& StreamEncoding() { return m_Strm.StreamEncoding(); }
+  bool IsEOF() const { return m_Strm.IsEOF(); }
+  bool IsUnicode() const { return m_Strm.IsUnicode(); }
+
+  UINT ReadLine(wchar_t*Buffer, UINT cchBuf);
+  tstring GetErrorMessage(UINT Error, const TCHAR*Filename=0, UINT Line=0);
+
+protected:
+  bool CompleteLine(wchar_t*&BufWC, UINT cchWC, UINT&cchRemain, bool HandleUnicodeNL)
+  {
+    const wchar_t chW = *BufWC;
+    BufWC += cchWC, cchRemain -= cchWC;
+    if (0 == --cchWC) // We only care about code points that fit in a single wchar_t
+      return NStream::IsNewline(chW, HandleUnicodeNL);
+    return false;
+  }
+};

 #endif // NSIS_UTF_H
--- a/Source/util.cpp
+++ b/Source/util.cpp
@ -67,11 +67,28 @@ void dopause(void)

 double my_wtof(const wchar_t *str) 
 {
-	char buf[100];
-	WideCharToMultiByte(0,0,str,-1,buf,100,0,0);
-	return atof(buf);
+  char buf[100];
+  WideCharToMultiByte(0,0,str,-1,buf,100,0,0);
+  return atof(buf);
 }

+unsigned int my_strncpy(TCHAR*Dest, const TCHAR*Src, unsigned int cchMax)
+{
+  // Dest and Src must be valid, Dest is always \0 terminated.
+  // Returns number of TCHARs copied to Dest; min(strlen(Src),cchMax-1).
+  unsigned int cch = 0;
+  if (cchMax)
+  {
+    for(;--cchMax;)
+    {
+      TCHAR ch = Src[cch];
+      if (!ch) break;
+      Dest[cch++] = ch;
+    }
+    Dest[cch] = _T('\0');
+  }
+  return cch;
+}

 // Returns 0 if everything is OK
 // Returns -1 if can't find the file
@ -208,6 +225,7 @@ void static create_code_page_string(TCHAR *buf, size_t len, UINT code_page) {
  switch(code_page)
  {
  case CP_ACP:
+  case 1: // OEMCP
    code_page = 1252;
    break;
  case CP_UTF8:
@ -226,7 +244,7 @@ int WideCharToMultiByte(UINT CodePage, DWORD dwFlags, LPCWSTR lpWideCharStr,
  char cp[128];
  create_code_page_string(cp, sizeof(cp), CodePage);

-  iconv_t cd = iconv_open(cp, "UCS-2LE"); //TODO: Should "UCS-2LE" be "wchar_t"?
+  iconv_t cd = iconv_open(cp, "wchar_t");
  if (cd == (iconv_t) -1) {
    return 0;
  }
@ -262,7 +280,7 @@ int MultiByteToWideChar(UINT CodePage, DWORD dwFlags, LPCSTR lpMultiByteStr,
  char cp[128];
  create_code_page_string(cp, sizeof(cp), CodePage);

-  iconv_t cd = iconv_open("UCS-2LE", cp); //TODO: Should "UCS-2LE" be "wchar_t"?
+  iconv_t cd = iconv_open("wchar_t", cp);
  if (cd == (iconv_t) -1) {
    return 0;
  }
@ -296,7 +314,7 @@ BOOL IsValidCodePage(UINT CodePage)
  TCHAR cp[128];
  create_code_page_string(cp, sizeof(cp), CodePage);

-  iconv_t cd = iconv_open(_T("UCS-2LE"), cp); //TODO: Should "UCS-2LE" be "wchar_t"?
+  iconv_t cd = iconv_open(_T("wchar_t"), cp);
  if (cd == (iconv_t) -1)
    return FALSE;

--- a/Source/util.h
+++ b/Source/util.h
@ -36,6 +36,7 @@ extern int g_dopause;
 extern void dopause(void);

 extern double my_wtof(const wchar_t *str);
+extern unsigned int my_strncpy(TCHAR*Dest, const TCHAR*Src, unsigned int cchMax);

 // Adds the bitmap in filename using resource editor re as id id.
 // If width or height are specified it will also make sure the bitmap is in that size