Fangjun Kuang
Committed by GitHub

Fix splitting words containing ', e.g., I've (#389)

@@ -163,6 +163,8 @@ template bool SplitStringToFloats(const std::string &full, const char *delim, @@ -163,6 +163,8 @@ template bool SplitStringToFloats(const std::string &full, const char *delim,
163 bool omit_empty_strings, 163 bool omit_empty_strings,
164 std::vector<double> *out); 164 std::vector<double> *out);
165 165
  166 +static bool IsPunct(char c) { return c != '\'' && std::ispunct(c); }
  167 +
166 static std::vector<std::string> MergeCharactersIntoWords( 168 static std::vector<std::string> MergeCharactersIntoWords(
167 const std::vector<std::string> &words) { 169 const std::vector<std::string> &words) {
168 std::vector<std::string> ans; 170 std::vector<std::string> ans;
@@ -174,7 +176,7 @@ static std::vector<std::string> MergeCharactersIntoWords( @@ -174,7 +176,7 @@ static std::vector<std::string> MergeCharactersIntoWords(
174 while (i < n) { 176 while (i < n) {
175 const auto &w = words[i]; 177 const auto &w = words[i];
176 if (w.size() > 1 || 178 if (w.size() > 1 ||
177 - (w.size() == 1 && (std::ispunct(w[0]) || std::isspace(w[0])))) { 179 + (w.size() == 1 && (IsPunct(w[0]) || std::isspace(w[0])))) {
178 if (prev != -1) { 180 if (prev != -1) {
179 std::string t; 181 std::string t;
180 for (; prev < i; ++prev) { 182 for (; prev < i; ++prev) {