[Cmake-commits] CMake branch, next, updated. v3.1.1-2395-gba9bd14
Brad King
brad.king at kitware.com
Tue Jan 27 11:30:54 EST 2015
This is an automated email from the git hooks/post-receive script. It was
generated because a ref change was pushed to the repository containing
the project "CMake".
The branch, next has been updated
via ba9bd140a68861152734e2371b20793393b4a534 (commit)
via 1f77a7001b2e3f8f9224cb603e5acfee45573064 (commit)
from 8c27ca48d548abad83d2eae3816e6ef9af8d63bf (commit)
Those revisions listed above that are new to this repository have
not appeared on any other notification email; so we list those
revisions in full, below.
- Log -----------------------------------------------------------------
http://cmake.org/gitweb?p=cmake.git;a=commitdiff;h=ba9bd140a68861152734e2371b20793393b4a534
commit ba9bd140a68861152734e2371b20793393b4a534
Merge: 8c27ca4 1f77a70
Author: Brad King <brad.king at kitware.com>
AuthorDate: Tue Jan 27 11:30:54 2015 -0500
Commit: CMake Topic Stage <kwrobot at kitware.com>
CommitDate: Tue Jan 27 11:30:54 2015 -0500
Merge topic 'file-strings-utf-16' into next
1f77a700 file: Teach STRINGS to support UTF-16 and UTF-32 encodings
http://cmake.org/gitweb?p=cmake.git;a=commitdiff;h=1f77a7001b2e3f8f9224cb603e5acfee45573064
commit 1f77a7001b2e3f8f9224cb603e5acfee45573064
Author: Justin Borodinsky <justin.borodinsky at gmail.com>
AuthorDate: Sun Jan 11 14:33:36 2015 -0500
Commit: Brad King <brad.king at kitware.com>
CommitDate: Tue Jan 27 11:30:26 2015 -0500
file: Teach STRINGS to support UTF-16 and UTF-32 encodings
diff --git a/Help/command/file.rst b/Help/command/file.rst
index b0d4792..73d4cfa 100644
--- a/Help/command/file.rst
+++ b/Help/command/file.rst
@@ -65,7 +65,10 @@ Parse a list of ASCII strings from ``<filename>`` and store it in
Consider only strings that match the given regular expression.
``ENCODING <encoding-type>``
- Consider strings of a given encoding. "UTF-8" is currently supported.
+ Consider strings of a given encoding. Currently supported encodings are:
+ UTF-8, UTF-16LE, UTF-16BE, UTF-32LE, UTF-32BE. If the ENCODING option
+ is not provided and the file has a Byte Order Mark, the ENCODING option
+ will be defaulted to respect the Byte Order Mark.
For example, the code
diff --git a/Help/release/dev/file-strings-utf-16.rst b/Help/release/dev/file-strings-utf-16.rst
new file mode 100644
index 0000000..f40b63e
--- /dev/null
+++ b/Help/release/dev/file-strings-utf-16.rst
@@ -0,0 +1,5 @@
+file-strings-utf-16
+-------------------
+
+* The :command:`file(STRINGS)` now supports UTF-16LE, UTF-16BE,
+ UTF-32LE, UTF-32BE as ``ENCODING`` options.
diff --git a/Source/cmFileCommand.cxx b/Source/cmFileCommand.cxx
index f125292..579e715 100644
--- a/Source/cmFileCommand.cxx
+++ b/Source/cmFileCommand.cxx
@@ -472,7 +472,13 @@ bool cmFileCommand::HandleStringsCommand(std::vector<std::string> const& args)
bool have_regex = false;
bool newline_consume = false;
bool hex_conversion_enabled = true;
- bool utf8_encoding = false;
+ enum { encoding_none = cmsys::FStream::BOM_None,
+ encoding_utf8 = cmsys::FStream::BOM_UTF8,
+ encoding_utf16le = cmsys::FStream::BOM_UTF16LE,
+ encoding_utf16be = cmsys::FStream::BOM_UTF16BE,
+ encoding_utf32le = cmsys::FStream::BOM_UTF32LE,
+ encoding_utf32be = cmsys::FStream::BOM_UTF32BE};
+ int encoding = encoding_none;
int arg_mode = arg_none;
for(unsigned int i=3; i < args.size(); ++i)
{
@@ -599,7 +605,23 @@ bool cmFileCommand::HandleStringsCommand(std::vector<std::string> const& args)
{
if(args[i] == "UTF-8")
{
- utf8_encoding = true;
+ encoding = encoding_utf8;
+ }
+ else if(args[i] == "UTF-16LE")
+ {
+ encoding = encoding_utf16le;
+ }
+ else if(args[i] == "UTF-16BE")
+ {
+ encoding = encoding_utf16be;
+ }
+ else if(args[i] == "UTF-32LE")
+ {
+ encoding = encoding_utf32le;
+ }
+ else if(args[i] == "UTF-32BE")
+ {
+ encoding = encoding_utf32be;
}
else
{
@@ -647,6 +669,23 @@ bool cmFileCommand::HandleStringsCommand(std::vector<std::string> const& args)
return false;
}
+ //If BOM is found and encoding was not specified, use the BOM
+ int bom_found = cmsys::FStream::ReadBOM(fin);
+ if(encoding == encoding_none && bom_found != cmsys::FStream::BOM_None)
+ {
+ encoding = bom_found;
+ }
+
+ unsigned int bytes_rem = 0;
+ if(encoding == encoding_utf16le || encoding == encoding_utf16be)
+ {
+ bytes_rem = 1;
+ }
+ if(encoding == encoding_utf32le || encoding == encoding_utf32be)
+ {
+ bytes_rem = 3;
+ }
+
// Parse strings out of the file.
int output_size = 0;
std::vector<std::string> strings;
@@ -658,6 +697,25 @@ bool cmFileCommand::HandleStringsCommand(std::vector<std::string> const& args)
std::string current_str;
int c = fin.get();
+ for(unsigned int i=0; i<bytes_rem; ++i)
+ {
+ int c1 = fin.get();
+ if(!fin)
+ {
+ fin.putback(static_cast<char>(c1));
+ break;
+ }
+ c = (c << 8) | c1;
+ }
+ if(encoding == encoding_utf16le)
+ {
+ c = ((c & 0xFF) << 8) | ((c & 0xFF00) >> 8);
+ }
+ else if(encoding == encoding_utf32le)
+ {
+ c = (((c & 0xFF) << 24) | ((c & 0xFF00) << 8) |
+ ((c & 0xFF0000) >> 8) | ((c & 0xFF000000) >> 24));
+ }
if(c == '\r')
{
@@ -673,7 +731,7 @@ bool cmFileCommand::HandleStringsCommand(std::vector<std::string> const& args)
// c is guaranteed to fit in char by the above if...
current_str += static_cast<char>(c);
}
- else if(utf8_encoding)
+ else if(encoding == encoding_utf8)
{
// Check for UTF-8 encoded string (up to 4 octets)
static const unsigned char utf8_check_table[3][2] =
diff --git a/Tests/RunCMake/string/RunCMakeTest.cmake b/Tests/RunCMake/string/RunCMakeTest.cmake
index fc913c6..89f7ea5 100644
--- a/Tests/RunCMake/string/RunCMakeTest.cmake
+++ b/Tests/RunCMake/string/RunCMakeTest.cmake
@@ -12,3 +12,8 @@ run_cmake(UuidMissingTypeValue)
run_cmake(UuidBadType)
run_cmake(RegexClear)
+
+run_cmake(UTF-16BE)
+run_cmake(UTF-16LE)
+run_cmake(UTF-32BE)
+run_cmake(UTF-32LE)
diff --git a/Tests/RunCMake/string/UTF-16BE-stderr.txt b/Tests/RunCMake/string/UTF-16BE-stderr.txt
new file mode 100644
index 0000000..8254f87
--- /dev/null
+++ b/Tests/RunCMake/string/UTF-16BE-stderr.txt
@@ -0,0 +1,2 @@
+Hello World
+Hello World
diff --git a/Tests/RunCMake/string/UTF-16BE.cmake b/Tests/RunCMake/string/UTF-16BE.cmake
new file mode 100644
index 0000000..da986c0
--- /dev/null
+++ b/Tests/RunCMake/string/UTF-16BE.cmake
@@ -0,0 +1,4 @@
+file(STRINGS UTF-16BE.txt str ENCODING UTF-16BE LENGTH_MINIMUM 4)
+message("${str}")
+file(STRINGS UTF-16BE.txt str LENGTH_MINIMUM 4)
+message("${str}")
diff --git a/Tests/RunCMake/string/UTF-16BE.txt b/Tests/RunCMake/string/UTF-16BE.txt
new file mode 100644
index 0000000..9d976bc
Binary files /dev/null and b/Tests/RunCMake/string/UTF-16BE.txt differ
diff --git a/Tests/RunCMake/string/UTF-16LE-stderr.txt b/Tests/RunCMake/string/UTF-16LE-stderr.txt
new file mode 100644
index 0000000..8254f87
--- /dev/null
+++ b/Tests/RunCMake/string/UTF-16LE-stderr.txt
@@ -0,0 +1,2 @@
+Hello World
+Hello World
diff --git a/Tests/RunCMake/string/UTF-16LE.cmake b/Tests/RunCMake/string/UTF-16LE.cmake
new file mode 100644
index 0000000..326d848
--- /dev/null
+++ b/Tests/RunCMake/string/UTF-16LE.cmake
@@ -0,0 +1,4 @@
+file(STRINGS UTF-16LE.txt str ENCODING UTF-16LE LENGTH_MINIMUM 4)
+message("${str}")
+file(STRINGS UTF-16LE.txt str LENGTH_MINIMUM 4)
+message("${str}")
diff --git a/Tests/RunCMake/string/UTF-16LE.txt b/Tests/RunCMake/string/UTF-16LE.txt
new file mode 100644
index 0000000..ebba874
Binary files /dev/null and b/Tests/RunCMake/string/UTF-16LE.txt differ
diff --git a/Tests/RunCMake/string/UTF-32BE-stderr.txt b/Tests/RunCMake/string/UTF-32BE-stderr.txt
new file mode 100644
index 0000000..8254f87
--- /dev/null
+++ b/Tests/RunCMake/string/UTF-32BE-stderr.txt
@@ -0,0 +1,2 @@
+Hello World
+Hello World
diff --git a/Tests/RunCMake/string/UTF-32BE.cmake b/Tests/RunCMake/string/UTF-32BE.cmake
new file mode 100644
index 0000000..debdeaa
--- /dev/null
+++ b/Tests/RunCMake/string/UTF-32BE.cmake
@@ -0,0 +1,4 @@
+file(STRINGS UTF-32BE.txt str ENCODING UTF-32BE LENGTH_MINIMUM 4)
+message("${str}")
+file(STRINGS UTF-32BE.txt str LENGTH_MINIMUM 4)
+message("${str}")
diff --git a/Tests/RunCMake/string/UTF-32BE.txt b/Tests/RunCMake/string/UTF-32BE.txt
new file mode 100644
index 0000000..6725fbb
Binary files /dev/null and b/Tests/RunCMake/string/UTF-32BE.txt differ
diff --git a/Tests/RunCMake/string/UTF-32LE-stderr.txt b/Tests/RunCMake/string/UTF-32LE-stderr.txt
new file mode 100644
index 0000000..8254f87
--- /dev/null
+++ b/Tests/RunCMake/string/UTF-32LE-stderr.txt
@@ -0,0 +1,2 @@
+Hello World
+Hello World
diff --git a/Tests/RunCMake/string/UTF-32LE.cmake b/Tests/RunCMake/string/UTF-32LE.cmake
new file mode 100644
index 0000000..22aab5f
--- /dev/null
+++ b/Tests/RunCMake/string/UTF-32LE.cmake
@@ -0,0 +1,4 @@
+file(STRINGS UTF-32LE.txt str ENCODING UTF-32LE LENGTH_MINIMUM 4)
+message("${str}")
+file(STRINGS UTF-32LE.txt str LENGTH_MINIMUM 4)
+message("${str}")
diff --git a/Tests/RunCMake/string/UTF-32LE.txt b/Tests/RunCMake/string/UTF-32LE.txt
new file mode 100644
index 0000000..cf5102f
Binary files /dev/null and b/Tests/RunCMake/string/UTF-32LE.txt differ
-----------------------------------------------------------------------
Summary of changes:
hooks/post-receive
--
CMake
More information about the Cmake-commits
mailing list