Basic UTF-8 support in ansi build so it can read UTF-8 .nlf files and LangStrings

git-svn-id: https://svn.code.sf.net/p/nsis/code/NSIS/trunk@6196 212acab6-be3b-0410-9dea-997c60f758d6
2011-12-05 23:44:26 +00:00 · 2011-12-05 23:44:26 +00:00 · ef8a83bd41
commit ef8a83bd41
parent 359ad0a055
10 changed files with 227 additions and 5 deletions
--- a/Source/utf.cpp
+++ b/Source/utf.cpp
@ -0,0 +1,85 @@
+/*
+ * utf.cpp
+ * 
+ * This file is a part of NSIS.
+ * 
+ * Copyright (C) 2011 Anders Kjersem
+ * 
+ * Licensed under the zlib/libpng license (the "License");
+ * you may not use this file except in compliance with the License.
+ * 
+ * Licence details can be found in the file COPYING.
+ * 
+ * This software is provided 'as-is', without any express or implied
+ * warranty.
+ *
+ */
+
+#include "utf.h"
+
+// BUGBUG: We might want to use MB_ERR_INVALID_CHARS but it is not supported
+// on < WinXP or in our current POSIX implementation.
+static const int UTF8MBTWCFLAGS  = 0;
+
+
+#define ExeHeadWStrFree free
+static EXEHEADWCHAR_T* ExeHeadWStrAlloc(UINT cch) 
+{
+  EXEHEADWCHAR_T* s = (EXEHEADWCHAR_T*) malloc(cch*sizeof(EXEHEADWCHAR_T));
+#if 0
+  // TODO: We should add POSIX versions of  G/SetLastError
+  // if we want to tell _why_ UTF8ToExeHeadTStr failed...
+  if (!s) SetLastError(ERROR_OUTOFMEMORY);
+#endif
+  return s;
+}
+
+#ifdef _UNICODE
+#else // !_UNICODE
+
+EXEHEADTCHAR_T* UTF8ToExeHeadTStr(LPCSTR StrU8,UINT Codepage) 
+{
+  int cchW = MultiByteToWideChar(CP_UTF8,UTF8MBTWCFLAGS,StrU8,-1,NULL,0);
+  if (!cchW) return NULL;
+  WCHAR *bufWStr = (WCHAR*) ExeHeadWStrAlloc(cchW);
+  if (!bufWStr) return NULL;
+  EXEHEADTCHAR_T *outstr = NULL;
+  if (MultiByteToWideChar(CP_UTF8,UTF8MBTWCFLAGS,StrU8,-1,bufWStr,cchW))
+  {
+    int cbA = WideCharToMultiByte(Codepage,0,bufWStr,cchW,NULL,0,NULL,NULL);
+    if (cbA && (outstr = ExeHeadTStrAlloc(cbA)))
+    {
+      if (!WideCharToMultiByte(Codepage,0,bufWStr,cchW,outstr,cbA,NULL,NULL))
+      {
+        ExeHeadTStrFree(outstr);
+        outstr = NULL;
+      }
+    }
+  }
+  ExeHeadWStrFree(bufWStr);
+  return outstr;
+}
+
+#endif // ?_UNICODE
+
+
+bool IsUTF8BOM(FILE*fstrm) 
+{
+  // ungetc is only guaranteed to support 1 pushback, 
+  // lets hope no ASCII file starts with 0xEF and is not a BOM!
+  const int c = fgetc(fstrm);
+  if (EOF == c) return false;
+  if (0xef == c)
+  {
+    const int c2 = fgetc(fstrm);
+    if (0xbb == c2)
+    {
+      const int c3 = fgetc(fstrm);
+      if (0xbf == c3) return true;
+      ungetc(c3,fstrm);
+    }
+    ungetc(c2,fstrm);
+  }
+  ungetc(c,fstrm);
+  return false;
+}