[cmake-developers] [PATCH] Improve encoding handling on Windows

Dāvis Mosāns davispuh at gmail.com
Thu Jun 30 22:18:13 EDT 2016


On Windows getenv uses ANSI codepage so it needs to be encoded to
internally used encoding (eg. UTF-8). Here we use _wgetenv instead
and encode that.

Also typically Windows applications (eg. MSVC compiler) use current
console's codepage for output to pipes so we need to encode that
to internally used encoding (KWSYS_ENCODING_DEFAULT_CODEPAGE).

Next, when we're outputing to console need to use wide functions.

This change allows that compilers such as MSVC on Windows can be
installed in non-ASCII path and will work correctly for all
console's codepages which supports that path's characters.
---
 Source/cmBuildCommand.cxx                |  4 ++--
 Source/cmCommandArgumentParserHelper.cxx |  4 ++--
 Source/cmExtraEclipseCDT4Generator.cxx   |  2 +-
 Source/cmSetCommand.cxx                  |  2 +-
 Source/cmSystemTools.cxx                 |  5 +++++
 Source/cmake.cxx                         |  6 +++---
 Source/cmakemain.cxx                     | 29 ++++++++++++++++++++++++++---
 Source/kwsys/CMakeLists.txt              |  2 ++
 Source/kwsys/Directory.cxx               |  2 +-
 Source/kwsys/FStream.hxx.in              | 19 +++++++++++++++++--
 Source/kwsys/ProcessWin32.c              | 21 ++++++++++++++++++++-
 Source/kwsys/SystemInformation.cxx       |  8 +++++---
 Source/kwsys/SystemTools.cxx             | 28 ++++++++++++++++++++--------
 13 files changed, 105 insertions(+), 27 deletions(-)

diff --git a/Source/cmBuildCommand.cxx b/Source/cmBuildCommand.cxx
index fb143a2..16771cc 100644
--- a/Source/cmBuildCommand.cxx
+++ b/Source/cmBuildCommand.cxx
@@ -77,7 +77,7 @@ bool cmBuildCommand::MainSignature(std::vector<std::string> const& args)
   // as the original 2-arg build_command signature:
   //
   if (!configuration || !*configuration) {
-    configuration = getenv("CMAKE_CONFIG_TYPE");
+    configuration = cmSystemTools::GetEnv("CMAKE_CONFIG_TYPE");
   }
   if (!configuration || !*configuration) {
     configuration = "Release";
@@ -109,7 +109,7 @@ bool cmBuildCommand::TwoArgsSignature(std::vector<std::string> const& args)
   const char* cacheValue = this->Makefile->GetDefinition(define);
 
   std::string configType = "Release";
-  const char* cfg = getenv("CMAKE_CONFIG_TYPE");
+  const char* cfg = cmSystemTools::GetEnv("CMAKE_CONFIG_TYPE");
   if (cfg && *cfg) {
     configType = cfg;
   }
diff --git a/Source/cmCommandArgumentParserHelper.cxx b/Source/cmCommandArgumentParserHelper.cxx
index 294117c..5c615c4 100644
--- a/Source/cmCommandArgumentParserHelper.cxx
+++ b/Source/cmCommandArgumentParserHelper.cxx
@@ -71,12 +71,12 @@ char* cmCommandArgumentParserHelper::ExpandSpecialVariable(const char* key,
     return this->EmptyVariable;
   }
   if (strcmp(key, "ENV") == 0) {
-    char* ptr = getenv(var);
+    const char* ptr = cmSystemTools::GetEnv(var);
     if (ptr) {
       if (this->EscapeQuotes) {
         return this->AddString(cmSystemTools::EscapeQuotes(ptr));
       } else {
-        return ptr;
+        return (char *)ptr;
       }
     }
     return this->EmptyVariable;
diff --git a/Source/cmExtraEclipseCDT4Generator.cxx b/Source/cmExtraEclipseCDT4Generator.cxx
index 16cb082..6c9e9a1 100644
--- a/Source/cmExtraEclipseCDT4Generator.cxx
+++ b/Source/cmExtraEclipseCDT4Generator.cxx
@@ -208,7 +208,7 @@ void cmExtraEclipseCDT4Generator::AddEnvVar(std::ostream& out,
   // get the variables from the environment and from the cache and then
   // figure out which one to use:
 
-  const char* envVarValue = getenv(envVar);
+  const char* envVarValue = cmSystemTools::GetEnv(envVar);
 
   std::string cacheEntryName = "CMAKE_ECLIPSE_ENVVAR_";
   cacheEntryName += envVar;
diff --git a/Source/cmSetCommand.cxx b/Source/cmSetCommand.cxx
index 1484368..c0f8ab6 100644
--- a/Source/cmSetCommand.cxx
+++ b/Source/cmSetCommand.cxx
@@ -31,7 +31,7 @@ bool cmSetCommand::InitialPass(std::vector<std::string> const& args,
     putEnvArg += "=";
 
     // what is the current value if any
-    const char* currValue = getenv(varName);
+    const char* currValue = cmSystemTools::GetEnv(varName);
     delete[] varName;
 
     // will it be set to something, then set it
diff --git a/Source/cmSystemTools.cxx b/Source/cmSystemTools.cxx
index 2d463f3..d8a1437 100644
--- a/Source/cmSystemTools.cxx
+++ b/Source/cmSystemTools.cxx
@@ -333,7 +333,12 @@ void cmSystemTools::Message(const char* m1, const char* title)
                          s_MessageCallbackClientData);
     return;
   } else {
+#if defined(_WIN32)
+    std::wstring wm1 = cmsys::Encoding::ToWide(m1);
+    std::wcerr << wm1 << std::endl << std::flush;
+#else
     std::cerr << m1 << std::endl << std::flush;
+#endif
   }
 }
 
diff --git a/Source/cmake.cxx b/Source/cmake.cxx
index c597605..94ecd81 100644
--- a/Source/cmake.cxx
+++ b/Source/cmake.cxx
@@ -955,8 +955,8 @@ void cmake::SetGlobalGenerator(cmGlobalGenerator* gg)
   cmSystemTools::SetForceUnixPaths(this->GlobalGenerator->GetForceUnixPaths());
 
   // Save the environment variables CXX and CC
-  const char* cxx = getenv("CXX");
-  const char* cc = getenv("CC");
+  const char* cxx = cmSystemTools::GetEnv("CXX");
+  const char* cc = cmSystemTools::GetEnv("CC");
   if (cxx) {
     this->CXXEnvironment = cxx;
   } else {
@@ -1429,7 +1429,7 @@ int cmake::Run(const std::vector<std::string>& args, bool noconfigure)
   // should fail (if "-i" is an option).  We cannot simply test
   // whether "-i" is given and remove it because some make programs
   // encode the MAKEFLAGS variable in a strange way.
-  if (getenv("MAKEFLAGS")) {
+  if (cmSystemTools::GetEnv("MAKEFLAGS")) {
     cmSystemTools::PutEnv("MAKEFLAGS=");
   }
 
diff --git a/Source/cmakemain.cxx b/Source/cmakemain.cxx
index 521a5bf..8708a36 100644
--- a/Source/cmakemain.cxx
+++ b/Source/cmakemain.cxx
@@ -127,7 +127,13 @@ static std::string cmakemainGetStack(void* clientdata)
 static void cmakemainMessageCallback(const char* m, const char*, bool&,
                                      void* clientdata)
 {
+#if defined(_WIN32)
+  std::wstring wm = cmsys::Encoding::ToWide(m);
+  std::wstring wstack = cmsys::Encoding::ToWide(cmakemainGetStack(clientdata).c_str());
+  std::wcerr << wm << wstack << std::endl << std::flush;
+#else
   std::cerr << m << cmakemainGetStack(clientdata) << std::endl << std::flush;
+#endif
 }
 
 static void cmakemainProgressCallback(const char* m, float prog,
@@ -144,15 +150,32 @@ static void cmakemainProgressCallback(const char* m, float prog,
   }
 
   if ((prog < 0) || (!dir.empty())) {
+#if defined(_WIN32)
+    std::wstring wm = cmsys::Encoding::ToWide(m);
+    std::wstring wdir = cmsys::Encoding::ToWide(dir.c_str());
+    std::wstring wstack = cmsys::Encoding::ToWide(cmakemainGetStack(clientdata).c_str());
+    std::wcout << L"-- " << wm << wdir << wstack << std::endl << std::flush;
+#else
     std::cout << "-- " << m << dir << cmakemainGetStack(clientdata)
-              << std::endl;
+              << std::endl << std::flush;
+#endif
   }
-
-  std::cout.flush();
 }
 
 int main(int ac, char const* const* av)
 {
+#if defined(_WIN32)
+  // Kinda hack, with MSVC (and MinGW) for some reason std::wcout
+  // (and all other std::w*) fails once it encounters non-ASCII
+  // string unless locale is set.
+  // Note that even with this, seems it can't output characters
+  // which aren't present in ANSI codepage no matter what encoding
+  // is used for console.
+  // Also once any character outside of ANSI codepage is tried to
+  // be outputed then after there anymore won't be output from
+  // any of std::w* functions.
+  _wsetlocale(LC_ALL, L"");
+#endif
   cmsys::Encoding::CommandLineArguments args =
     cmsys::Encoding::CommandLineArguments::Main(ac, av);
   ac = args.argc();
diff --git a/Source/kwsys/CMakeLists.txt b/Source/kwsys/CMakeLists.txt
index 8b15394..cab7f38 100644
--- a/Source/kwsys/CMakeLists.txt
+++ b/Source/kwsys/CMakeLists.txt
@@ -709,6 +709,8 @@ IF(KWSYS_USE_Process)
   IF(NOT UNIX)
     # Use the Windows implementation.
     SET(KWSYS_C_SRCS ${KWSYS_C_SRCS} ProcessWin32.c)
+    SET_PROPERTY(SOURCE ProcessWin32.c APPEND PROPERTY COMPILE_DEFINITIONS
+      KWSYS_ENCODING_DEFAULT_CODEPAGE=${KWSYS_ENCODING_DEFAULT_CODEPAGE})
   ELSE()
     # Use the UNIX implementation.
     SET(KWSYS_C_SRCS ${KWSYS_C_SRCS} ProcessUNIX.c)
diff --git a/Source/kwsys/Directory.cxx b/Source/kwsys/Directory.cxx
index c549792..659c559 100644
--- a/Source/kwsys/Directory.cxx
+++ b/Source/kwsys/Directory.cxx
@@ -86,7 +86,7 @@ void Directory::Clear()
 
 // First microsoft compilers
 
-#if defined(_MSC_VER) || defined(__WATCOMC__)
+#if defined(_WIN32) || defined(__WATCOMC__)
 #include <windows.h>
 #include <io.h>
 #include <ctype.h>
diff --git a/Source/kwsys/FStream.hxx.in b/Source/kwsys/FStream.hxx.in
index 681e4d8..842148d 100644
--- a/Source/kwsys/FStream.hxx.in
+++ b/Source/kwsys/FStream.hxx.in
@@ -17,7 +17,7 @@
 
 namespace @KWSYS_NAMESPACE@
 {
-#if defined(_MSC_VER) && _MSC_VER >= 1400
+#if defined(_WIN32)
 # if defined(_NOEXCEPT)
 #  define @KWSYS_NAMESPACE at _FStream_NOEXCEPT _NOEXCEPT
 # else
@@ -30,8 +30,23 @@ namespace @KWSYS_NAMESPACE@
       typedef std::basic_filebuf<CharType,Traits> my_base_type;
       basic_filebuf *open(char const *s,std::ios_base::openmode mode)
       {
+        std::wstring wstr = Encoding::ToWide(s);
+        const wchar_t *ws = wstr.c_str();
+#if defined(_MSC_VER) && _MSC_VER >= 1400
+        const wchar_t *ss = ws;
+#else
+        const char *ss = 0;
+        size_t length = std::wcstombs(0, ws, 0);
+        if (length != size_t(-1)) {
+          std::vector<char> str(length + 1);
+          ss = str.data();
+          std::wcstombs((char *)ss, ws, str.size());
+        } else {
+          ss = s;
+        }
+#endif
         return static_cast<basic_filebuf*>(
-          my_base_type::open(Encoding::ToWide(s).c_str(), mode)
+          my_base_type::open(ss, mode)
           );
       }
   };
diff --git a/Source/kwsys/ProcessWin32.c b/Source/kwsys/ProcessWin32.c
index 2b93e69..208e725 100644
--- a/Source/kwsys/ProcessWin32.c
+++ b/Source/kwsys/ProcessWin32.c
@@ -181,7 +181,7 @@ struct kwsysProcessPipeData_s
   /* ------------- Data managed per call to Execute ------------- */
 
   /* Buffer for data read in this pipe's thread.  */
-  char DataBuffer[KWSYSPE_PIPE_BUFFER_SIZE];
+  char DataBuffer[KWSYSPE_PIPE_BUFFER_SIZE*2];
 
   /* The length of the data stored in the buffer.  */
   DWORD DataLength;
@@ -1626,6 +1626,25 @@ void kwsysProcessPipeThreadReadPipe(kwsysProcess* cp, kwsysProcessPipeData* td)
       KWSYSPE_DEBUG((stderr, "read closed %d\n", td->Index));
       }
 
+    if (td->DataLength > 0) {
+        UINT codepage = GetConsoleCP();
+        if (!codepage) {
+            codepage = GetACP();
+        }
+        if (codepage != KWSYS_ENCODING_DEFAULT_CODEPAGE) {
+            int wlength = MultiByteToWideChar(codepage, 0, td->DataBuffer, td->DataLength, NULL, 0);
+            wchar_t* wdata = malloc(wlength * sizeof(wchar_t));
+            int r = MultiByteToWideChar(codepage, 0, td->DataBuffer, td->DataLength, wdata, wlength);
+            if (r > 0) {
+                r = WideCharToMultiByte(KWSYS_ENCODING_DEFAULT_CODEPAGE, 0, wdata, wlength, td->DataBuffer, KWSYSPE_PIPE_BUFFER_SIZE * 2, NULL, NULL);
+                if (r > 0) {
+                    td->DataLength = r;
+                }
+            }
+            free(wdata);
+        }
+    }
+
     KWSYSPE_DEBUG((stderr, "read %d\n", td->Index));
 
     /* Wait for our turn to be handled by the main thread.  */
diff --git a/Source/kwsys/SystemInformation.cxx b/Source/kwsys/SystemInformation.cxx
index 81fb2f9..d82c635 100644
--- a/Source/kwsys/SystemInformation.cxx
+++ b/Source/kwsys/SystemInformation.cxx
@@ -219,6 +219,8 @@ typedef struct rlimit ResourceLimitType;
 # define USE_CPUID 0
 #endif
 
+#include <cmsys/SystemTools.hxx>
+
 #if USE_CPUID
 
 #define CPUID_AWARE_COMPILER
@@ -3476,7 +3478,7 @@ SystemInformationImplementation::GetHostMemoryAvailable(const char *hostLimitEnv
   // apply a limit across a set of processes. Units are in KiB.
   if (hostLimitEnvVarName)
     {
-    const char *hostLimitEnvVarValue=getenv(hostLimitEnvVarName);
+    const char *hostLimitEnvVarValue=SystemTools::GetEnv(hostLimitEnvVarName);
     if (hostLimitEnvVarValue)
       {
       SystemInformation::LongLong hostLimit=atoLongLong(hostLimitEnvVarValue);
@@ -3506,7 +3508,7 @@ SystemInformationImplementation::GetProcMemoryAvailable(
   // are not employed. Units are in KiB.
   if (procLimitEnvVarName)
     {
-    const char *procLimitEnvVarValue=getenv(procLimitEnvVarName);
+    const char *procLimitEnvVarValue=SystemTools::GetEnv(procLimitEnvVarName);
     if (procLimitEnvVarValue)
       {
       SystemInformation::LongLong procLimit=atoLongLong(procLimitEnvVarValue);
@@ -5408,7 +5410,7 @@ bool SystemInformationImplementation::QueryOSInformation()
     }
   this->Hostname = name;
 
-  const char* arch = getenv("PROCESSOR_ARCHITECTURE");
+  const char* arch = SystemTools::GetEnv("PROCESSOR_ARCHITECTURE");
   if(arch)
     {
     this->OSPlatform = arch;
diff --git a/Source/kwsys/SystemTools.cxx b/Source/kwsys/SystemTools.cxx
index c6e668d..afc7ca1 100644
--- a/Source/kwsys/SystemTools.cxx
+++ b/Source/kwsys/SystemTools.cxx
@@ -458,7 +458,19 @@ void SystemTools::GetPath(std::vector<std::string>& path, const char* env)
 
 const char* SystemTools::GetEnv(const char* key)
 {
-  return getenv(key);
+  const char* v = 0;
+#if defined(_WIN32)
+  std::wstring wkey = Encoding::ToWide(key);
+  wchar_t* wv = _wgetenv(wkey.c_str());
+  if (wv) {
+    std::string str = Encoding::ToNarrow(wv);
+    v = new char[str.length() + 1];
+    memcpy((void *)v, str.c_str(), str.length() + 1);
+  }
+#else
+  v = getenv(key);
+#endif
+  return v;
 }
 
 const char* SystemTools::GetEnv(const std::string& key)
@@ -468,7 +480,7 @@ const char* SystemTools::GetEnv(const std::string& key)
 
 bool SystemTools::GetEnv(const char* key, std::string& result)
 {
-  const char* v = getenv(key);
+  const char* v = SystemTools::GetEnv(key);
   if(v)
     {
     result = v;
@@ -4061,13 +4073,13 @@ void SystemTools::SplitPath(const std::string& p,
     if(root.size() == 1)
       {
 #if defined(_WIN32) && !defined(__CYGWIN__)
-      if(const char* userp = getenv("USERPROFILE"))
+      if(const char* userp = SystemTools::GetEnv("USERPROFILE"))
         {
         homedir = userp;
         }
       else
 #endif
-      if(const char* h = getenv("HOME"))
+      if(const char* h = SystemTools::GetEnv("HOME"))
         {
         homedir = h;
         }
@@ -4820,7 +4832,7 @@ int SystemTools::GetTerminalWidth()
     {
     width = -1;
     }
-  columns = getenv("COLUMNS");
+  columns = SystemTools::GetEnv("COLUMNS");
   if(columns && *columns)
     {
     long t;
@@ -5435,7 +5447,7 @@ void SystemTools::ClassInitialize()
 
   // If the current working directory is a logical path then keep the
   // logical name.
-  if(const char* pwd = getenv("PWD"))
+  if(const char* pwd = SystemTools::GetEnv("PWD"))
     {
     char buf[2048];
     if(const char* cwd = Getcwd(buf, 2048))
@@ -5505,8 +5517,8 @@ static int SystemToolsDebugReport(int, char* message, int*)
 
 void SystemTools::EnableMSVCDebugHook()
 {
-  if (getenv("DART_TEST_FROM_DART") ||
-      getenv("DASHBOARD_TEST_FROM_CTEST"))
+  if (SystemTools::GetEnv("DART_TEST_FROM_DART") ||
+      SystemTools::GetEnv("DASHBOARD_TEST_FROM_CTEST"))
     {
     _CrtSetReportHook(SystemToolsDebugReport);
     }
-- 
2.9.0



More information about the cmake-developers mailing list