[Cmake-commits] CMake branch, next, updated. v3.0.0-4747-g7b81cf8
Clinton Stimpson
clinton at elemtech.com
Mon Aug 4 12:53:38 EDT 2014
This is an automated email from the git hooks/post-receive script. It was
generated because a ref change was pushed to the repository containing
the project "CMake".
The branch, next has been updated
via 7b81cf8943c519fa76c781afdcdc54eac08b3bfb (commit)
via 556635f5eb4f20052de4c5fa84d17bfa44764520 (commit)
via 0397a7d76906b11d97380e7073dd7183353980b6 (commit)
from 2ce89b454085b5432bdf05f0c86af0e3879f6c75 (commit)
Those revisions listed above that are new to this repository have
not appeared on any other notification email; so we list those
revisions in full, below.
- Log -----------------------------------------------------------------
http://cmake.org/gitweb?p=cmake.git;a=commitdiff;h=7b81cf8943c519fa76c781afdcdc54eac08b3bfb
commit 7b81cf8943c519fa76c781afdcdc54eac08b3bfb
Merge: 2ce89b4 556635f
Author: Clinton Stimpson <clinton at elemtech.com>
AuthorDate: Mon Aug 4 12:53:37 2014 -0400
Commit: CMake Topic Stage <kwrobot at kitware.com>
CommitDate: Mon Aug 4 12:53:37 2014 -0400
Merge topic 'file-strings-encoding' into next
556635f5 cmFileCommand: Add ENCODING option to file(STRINGS ...)
0397a7d7 cmFileCommand: Refactor handling for file(STRINGS ...).
http://cmake.org/gitweb?p=cmake.git;a=commitdiff;h=556635f5eb4f20052de4c5fa84d17bfa44764520
commit 556635f5eb4f20052de4c5fa84d17bfa44764520
Author: Clinton Stimpson <clinton at elemtech.com>
AuthorDate: Mon Aug 4 10:47:22 2014 -0600
Commit: Clinton Stimpson <clinton at elemtech.com>
CommitDate: Mon Aug 4 10:52:50 2014 -0600
cmFileCommand: Add ENCODING option to file(STRINGS ...)
For now, UTF8 encoding support is added.
This addresses issue #10519.
diff --git a/Help/command/file.rst b/Help/command/file.rst
index 58e3a26..5f93686 100644
--- a/Help/command/file.rst
+++ b/Help/command/file.rst
@@ -64,6 +64,9 @@ Parse a list of ASCII strings from ``<filename>`` and store it in
``REGEX <regex>``
Consider only strings that match the given regular expression.
+``ENCODING <encoding-type>``
+ Consider strings of a given encoding. "UTF8" is currently supported.
+
For example, the code
.. code-block:: cmake
diff --git a/Source/cmFileCommand.cxx b/Source/cmFileCommand.cxx
index b99b1c7..34c0d73 100644
--- a/Source/cmFileCommand.cxx
+++ b/Source/cmFileCommand.cxx
@@ -428,7 +428,8 @@ bool cmFileCommand::HandleStringsCommand(std::vector<std::string> const& args)
arg_length_minimum,
arg_length_maximum,
arg__maximum,
- arg_regex };
+ arg_regex,
+ arg_encoding };
unsigned int minlen = 0;
unsigned int maxlen = 0;
int limit_input = -1;
@@ -438,6 +439,7 @@ bool cmFileCommand::HandleStringsCommand(std::vector<std::string> const& args)
bool have_regex = false;
bool newline_consume = false;
bool hex_conversion_enabled = true;
+ bool utf8_encoding = false;
int arg_mode = arg_none;
for(unsigned int i=3; i < args.size(); ++i)
{
@@ -475,6 +477,10 @@ bool cmFileCommand::HandleStringsCommand(std::vector<std::string> const& args)
hex_conversion_enabled = false;
arg_mode = arg_none;
}
+ else if(args[i] == "ENCODING")
+ {
+ arg_mode = arg_encoding;
+ }
else if(arg_mode == arg_limit_input)
{
if(sscanf(args[i].c_str(), "%d", &limit_input) != 1 ||
@@ -556,6 +562,22 @@ bool cmFileCommand::HandleStringsCommand(std::vector<std::string> const& args)
have_regex = true;
arg_mode = arg_none;
}
+ else if(arg_mode == arg_encoding)
+ {
+ if(args[i] == "UTF8")
+ {
+ utf8_encoding = true;
+ }
+ else
+ {
+ cmOStringStream e;
+ e << "STRINGS option ENCODING \""
+ << args[i] << "\" not recognized.";
+ this->SetError(e.str());
+ return false;
+ }
+ arg_mode = arg_none;
+ }
else
{
cmOStringStream e;
@@ -618,6 +640,52 @@ bool cmFileCommand::HandleStringsCommand(std::vector<std::string> const& args)
// c is guaranteed to fit in char by the above if...
current_str += static_cast<char>(c);
}
+ else if(utf8_encoding)
+ {
+ // Check for UTF-8 encoded string (up to 4 octets)
+ static const unsigned char utf8_check_table[3][2] =
+ {
+ {0xE0, 0xC0},
+ {0xF0, 0xE0},
+ {0xF8, 0xF0},
+ };
+
+ // how many octets are there?
+ unsigned int num_utf8_bytes = 0;
+ for(unsigned int j=0; num_utf8_bytes == 0 && j<3; j++)
+ {
+ if((c & utf8_check_table[j][0]) == utf8_check_table[j][1])
+ num_utf8_bytes = j+2;
+ }
+
+ // get subsequent octets and check that they are valid
+ for(unsigned int j=0; j<num_utf8_bytes; j++)
+ {
+ if(j != 0)
+ {
+ c = fin.get();
+ if(!fin || (c & 0xC0) != 0x80)
+ {
+ fin.putback(c);
+ break;
+ }
+ }
+ current_str += static_cast<char>(c);
+ }
+
+ // if this was an invalid utf8 sequence, discard the data, and put
+ // back subsequent characters
+ if((current_str.length() != num_utf8_bytes))
+ {
+ for(unsigned int j=0; j<current_str.size()-1; j++)
+ {
+ c = current_str[current_str.size() - 1 - j];
+ fin.putback(c);
+ }
+ current_str.clear();
+ }
+ }
+
if(c == '\n' && !newline_consume)
{
diff --git a/Tests/StringFileTest/CMakeLists.txt b/Tests/StringFileTest/CMakeLists.txt
index 4fa5a86..c46bef6 100644
--- a/Tests/StringFileTest/CMakeLists.txt
+++ b/Tests/StringFileTest/CMakeLists.txt
@@ -55,6 +55,16 @@ else()
"file(STRINGS) incorrectly read from srec file [${infile_strings}]")
endif()
+#this file has utf-8 content
+FILE(STRINGS test.utf8 infile_strings ENCODING UTF8)
+list(LENGTH infile_strings content_len)
+if(content_len MATCHES "3")
+ message("file(STRINGS) correctly read from utf8 file [${infile_strings}]")
+else()
+ message(SEND_ERROR
+ "file(STRINGS) incorrectly read from utf8 file [${infile_strings}]")
+endif()
+
# String test
string(REGEX MATCH "[cC][mM][aA][kK][eE]" rmvar "CMake is great")
string(REGEX MATCHALL "[cC][mM][aA][kK][eE]" rmallvar "CMake is better than cmake or CMake")
diff --git a/Tests/StringFileTest/test.utf8 b/Tests/StringFileTest/test.utf8
new file mode 100644
index 0000000..6c29170
--- /dev/null
+++ b/Tests/StringFileTest/test.utf8
@@ -0,0 +1,3 @@
+The value of Ï (pi) is 3.141593
+Line mixed with binary partially matches valid utf8: Ï is à93.1593
+à
\ No newline at end of file
http://cmake.org/gitweb?p=cmake.git;a=commitdiff;h=0397a7d76906b11d97380e7073dd7183353980b6
commit 0397a7d76906b11d97380e7073dd7183353980b6
Author: Clinton Stimpson <clinton at elemtech.com>
AuthorDate: Mon Aug 4 10:45:13 2014 -0600
Commit: Clinton Stimpson <clinton at elemtech.com>
CommitDate: Mon Aug 4 10:52:26 2014 -0600
cmFileCommand: Refactor handling for file(STRINGS ...).
This refactor will make room for encoding support.
diff --git a/Source/cmFileCommand.cxx b/Source/cmFileCommand.cxx
index e47365a..b99b1c7 100644
--- a/Source/cmFileCommand.cxx
+++ b/Source/cmFileCommand.cxx
@@ -596,11 +596,29 @@ bool cmFileCommand::HandleStringsCommand(std::vector<std::string> const& args)
int output_size = 0;
std::vector<std::string> strings;
std::string s;
- int c;
while((!limit_count || strings.size() < limit_count) &&
(limit_input < 0 || static_cast<int>(fin.tellg()) < limit_input) &&
- (c = fin.get(), fin))
+ fin)
{
+ std::string current_str;
+
+ int c = fin.get();
+
+ if(c == '\r')
+ {
+ // Ignore CR character to make output always have UNIX newlines.
+ continue;
+ }
+
+ else if((c >= 0x20 && c < 0x7F) || c == '\t' ||
+ (c == '\n' && newline_consume))
+ {
+ // This is an ASCII character that may be part of a string.
+ // Cast added to avoid compiler warning. Cast is ok because
+ // c is guaranteed to fit in char by the above if...
+ current_str += static_cast<char>(c);
+ }
+
if(c == '\n' && !newline_consume)
{
// The current line has been terminated. Check if the current
@@ -621,26 +639,13 @@ bool cmFileCommand::HandleStringsCommand(std::vector<std::string> const& args)
// Reset the string to empty.
s = "";
}
- else if(c == '\r')
- {
- // Ignore CR character to make output always have UNIX newlines.
- }
- else if((c >= 0x20 && c < 0x7F) || c == '\t' ||
- (c == '\n' && newline_consume))
+ else if(current_str.empty())
{
- // This is an ASCII character that may be part of a string.
- // Cast added to avoid compiler warning. Cast is ok because
- // c is guaranteed to fit in char by the above if...
- s += static_cast<char>(c);
- }
- else
- {
- // TODO: Support ENCODING option. See issue #10519.
// A non-string character has been found. Check if the current
// string matches the requirements. We require that the length
// be at least one no matter what the user specified.
if(s.length() >= minlen && s.length() >= 1 &&
- (!have_regex || regex.find(s.c_str())))
+ (!have_regex || regex.find(s.c_str())))
{
output_size += static_cast<int>(s.size()) + 1;
if(limit_output >= 0 && output_size >= limit_output)
@@ -654,10 +659,15 @@ bool cmFileCommand::HandleStringsCommand(std::vector<std::string> const& args)
// Reset the string to empty.
s = "";
}
+ else
+ {
+ s += current_str;
+ }
+
- // Terminate a string if the maximum length is reached.
if(maxlen > 0 && s.size() == maxlen)
{
+ // Terminate a string if the maximum length is reached.
if(s.length() >= minlen &&
(!have_regex || regex.find(s.c_str())))
{
-----------------------------------------------------------------------
Summary of changes:
hooks/post-receive
--
CMake
More information about the Cmake-commits
mailing list