Eclipse SUMO - Simulation of Urban MObility
StringUtils.cpp
Go to the documentation of this file.
1 /****************************************************************************/
2 // Eclipse SUMO, Simulation of Urban MObility; see https://eclipse.org/sumo
3 // Copyright (C) 2001-2020 German Aerospace Center (DLR) and others.
4 // This program and the accompanying materials are made available under the
5 // terms of the Eclipse Public License 2.0 which is available at
6 // https://www.eclipse.org/legal/epl-2.0/
7 // This Source Code may also be made available under the following Secondary
8 // Licenses when the conditions for such availability set forth in the Eclipse
9 // Public License 2.0 are satisfied: GNU General Public License, version 2
10 // or later which is available at
11 // https://www.gnu.org/licenses/old-licenses/gpl-2.0-standalone.html
12 // SPDX-License-Identifier: EPL-2.0 OR GPL-2.0-or-later
13 /****************************************************************************/
21 // Some static methods for string processing
22 /****************************************************************************/
23 #include <config.h>
24 
25 #include <string>
26 #include <iostream>
27 #include <cstdio>
28 #include <cstring>
29 #include <regex>
30 #include <xercesc/util/TransService.hpp>
31 #include <xercesc/util/TranscodingException.hpp>
33 #include <utils/common/ToString.h>
34 #include "StringUtils.h"
35 
36 
37 // ===========================================================================
38 // static member definitions
39 // ===========================================================================
40 std::string StringUtils::emptyString;
41 
42 
43 // ===========================================================================
44 // method definitions
45 // ===========================================================================
46 std::string
47 StringUtils::prune(const std::string& str) {
48  const std::string::size_type endpos = str.find_last_not_of(" \t\n\r");
49  if (std::string::npos != endpos) {
50  const int startpos = (int)str.find_first_not_of(" \t\n\r");
51  return str.substr(startpos, endpos - startpos + 1);
52  }
53  return "";
54 }
55 
56 
57 std::string
58 StringUtils::to_lower_case(std::string str) {
59  for (int i = 0; i < (int)str.length(); i++) {
60  if (str[i] >= 'A' && str[i] <= 'Z') {
61  str[i] = str[i] + 'a' - 'A';
62  }
63  }
64  return str;
65 }
66 
67 
68 std::string
69 StringUtils::latin1_to_utf8(std::string str) {
70  // inspired by http://stackoverflow.com/questions/4059775/convert-iso-8859-1-strings-to-utf-8-in-c-c
71  std::string result;
72  for (int i = 0; i < (int)str.length(); i++) {
73  const unsigned char c = str[i];
74  if (c < 128) {
75  result += c;
76  } else {
77  result += (char)(0xc2 + (c > 0xbf));
78  result += (char)((c & 0x3f) + 0x80);
79  }
80  }
81  return result;
82 }
83 
84 
85 std::string
86 StringUtils::convertUmlaute(std::string str) {
87  str = replace(str, "\xE4", "ae");
88  str = replace(str, "\xC4", "Ae");
89  str = replace(str, "\xF6", "oe");
90  str = replace(str, "\xD6", "Oe");
91  str = replace(str, "\xFC", "ue");
92  str = replace(str, "\xDC", "Ue");
93  str = replace(str, "\xDF", "ss");
94  str = replace(str, "\xC9", "E");
95  str = replace(str, "\xE9", "e");
96  str = replace(str, "\xC8", "E");
97  str = replace(str, "\xE8", "e");
98  return str;
99 }
100 
101 
102 
103 std::string
104 StringUtils::replace(std::string str, const char* what,
105  const char* by) {
106  const std::string what_tmp(what);
107  const std::string by_tmp(by);
108  std::string::size_type idx = str.find(what);
109  const int what_len = (int)what_tmp.length();
110  if (what_len > 0) {
111  const int by_len = (int)by_tmp.length();
112  while (idx != std::string::npos) {
113  str = str.replace(idx, what_len, by);
114  idx = str.find(what, idx + by_len);
115  }
116  }
117  return str;
118 }
119 
120 
121 std::string StringUtils::substituteEnvironment(std::string str) {
122  // Expression for an environment variables, e.g. ${NAME}
123  // Note: - R"(...)" is a raw string literal syntax to simplify a regex declaration
124  // - .+? looks for the shortest match (non-greedy)
125  // - (.+?) defines a "subgroup" which is already stripped of the $ and {, }
126  std::regex envVarExpr(R"(\$\{(.+?)\})");
127 
128  // Are there any variables in this string?
129  std::smatch match;
130  std::string strIter = str;
131 
132  // Loop over the entire value string and look for variable names
133  while (std::regex_search(strIter, match, envVarExpr)) {
134  std::string varName = match[1];
135 
136  // Find the variable in the environment and its value
137  std::string varValue;
138  if (std::getenv(varName.c_str()) != nullptr) {
139  varValue = std::getenv(varName.c_str());
140  }
141 
142  // Replace the variable placeholder with its value in the original string
143  str = std::regex_replace(str, std::regex("\\$\\{" + varName + "\\}"), varValue);
144 
145  // Continue the loop with the remainder of the string
146  strIter = match.suffix();
147  }
148 
149  return str;
150 }
151 
152 std::string
154  std::ostringstream oss;
155  if (time < 0) {
156  oss << "-";
157  time = -time;
158  }
159  char buffer[10];
160  sprintf(buffer, "%02i:", (time / 3600));
161  oss << buffer;
162  time = time % 3600;
163  sprintf(buffer, "%02i:", (time / 60));
164  oss << buffer;
165  time = time % 60;
166  sprintf(buffer, "%02i", time);
167  oss << buffer;
168  return oss.str();
169 }
170 
171 
172 bool
173 StringUtils::startsWith(const std::string& str, const std::string prefix) {
174  return str.compare(0, prefix.length(), prefix) == 0;
175 }
176 
177 
178 bool
179 StringUtils::endsWith(const std::string& str, const std::string suffix) {
180  if (str.length() >= suffix.length()) {
181  return str.compare(str.length() - suffix.length(), suffix.length(), suffix) == 0;
182  } else {
183  return false;
184  }
185 }
186 
187 
188 std::string
189 StringUtils::escapeXML(const std::string& orig, const bool maskDoubleHyphen) {
190  std::string result = replace(orig, "&", "&amp;");
191  result = replace(result, ">", "&gt;");
192  result = replace(result, "<", "&lt;");
193  result = replace(result, "\"", "&quot;");
194  if (maskDoubleHyphen) {
195  result = replace(result, "--", "&#45;&#45;");
196  }
197  for (char invalid = '\1'; invalid < ' '; invalid++) {
198  result = replace(result, std::string(1, invalid).c_str(), "");
199  }
200  return replace(result, "'", "&apos;");
201 }
202 
203 
204 std::string
205 StringUtils::urlEncode(const std::string& toEncode, const std::string encodeWhich) {
206  std::ostringstream out;
207 
208  for (int i = 0; i < (int)toEncode.length(); ++i) {
209  const char t = toEncode.at(i);
210 
211  if ((encodeWhich != "" && encodeWhich.find(t) == std::string::npos) ||
212  (encodeWhich == "" &&
213  ((t >= 45 && t <= 57) || // hyphen, period, slash, 0-9
214  (t >= 65 && t <= 90) || // A-Z
215  t == 95 || // underscore
216  (t >= 97 && t <= 122) || // a-z
217  t == 126)) // tilde
218  ) {
219  out << toEncode.at(i);
220  } else {
221  out << charToHex(toEncode.at(i));
222  }
223  }
224 
225  return out.str();
226 }
227 
228 
229 std::string
230 StringUtils::urlDecode(const std::string& toDecode) {
231  std::ostringstream out;
232 
233  for (int i = 0; i < (int)toDecode.length(); ++i) {
234  if (toDecode.at(i) == '%') {
235  std::string str(toDecode.substr(i + 1, 2));
236  out << hexToChar(str);
237  i += 2;
238  } else {
239  out << toDecode.at(i);
240  }
241  }
242 
243  return out.str();
244 }
245 
246 std::string
247 StringUtils::charToHex(unsigned char c) {
248  short i = c;
249 
250  std::stringstream s;
251 
252  s << "%" << std::setw(2) << std::setfill('0') << std::hex << i;
253 
254  return s.str();
255 }
256 
257 
258 unsigned char
259 StringUtils::hexToChar(const std::string& str) {
260  short c = 0;
261  if (!str.empty()) {
262  std::istringstream in(str);
263  in >> std::hex >> c;
264  if (in.fail()) {
265  throw NumberFormatException(str + " could not be interpreted as hex");
266  }
267  }
268  return static_cast<unsigned char>(c);
269 }
270 
271 
272 int
273 StringUtils::toInt(const std::string& sData) {
274  long long int result = toLong(sData);
275  if (result > std::numeric_limits<int>::max() || result < std::numeric_limits<int>::min()) {
276  throw NumberFormatException(toString(result) + " int overflow");
277  }
278  return (int)result;
279 }
280 
281 
282 int
283 StringUtils::toIntSecure(const std::string& sData, int def) {
284  if (sData.length() == 0) {
285  return def;
286  }
287  return toInt(sData);
288 }
289 
290 
291 long long int
292 StringUtils::toLong(const std::string& sData) {
293  const char* const data = sData.c_str();
294  if (data == 0 || data[0] == 0) {
295  throw EmptyData();
296  }
297  char* end;
298  errno = 0;
299 #ifdef WIN32
300  long long int ret = _strtoi64(data, &end, 10);
301 #else
302  long long int ret = strtoll(data, &end, 10);
303 #endif
304  if (errno == ERANGE) {
305  errno = 0;
306  throw NumberFormatException("(long long integer range) " + sData);
307  }
308  if ((int)(end - data) != (int)strlen(data)) {
309  throw NumberFormatException("(long long integer format) " + sData);
310  }
311  return ret;
312 }
313 
314 
315 int
316 StringUtils::hexToInt(const std::string& sData) {
317  if (sData.length() == 0) {
318  throw EmptyData();
319  }
320  size_t idx = 0;
321  int result;
322  try {
323  if (sData[0] == '#') { // for html color codes
324  result = std::stoi(sData.substr(1), &idx, 16);
325  idx++;
326  } else {
327  result = std::stoi(sData, &idx, 16);
328  }
329  } catch (...) {
330  throw NumberFormatException("(hex integer format) " + sData);
331  }
332  if (idx != sData.length()) {
333  throw NumberFormatException("(hex integer format) " + sData);
334  }
335  return result;
336 }
337 
338 
339 double
340 StringUtils::toDouble(const std::string& sData) {
341  if (sData.size() == 0) {
342  throw EmptyData();
343  }
344  try {
345  size_t idx = 0;
346  const double result = std::stod(sData, &idx);
347  if (idx != sData.size()) {
348  throw NumberFormatException("(double format) " + sData);
349  } else {
350  return result;
351  }
352  } catch (...) {
353  // invalid_argument or out_of_range
354  throw NumberFormatException("(double) " + sData);
355  }
356 }
357 
358 
359 double
360 StringUtils::toDoubleSecure(const std::string& sData, const double def) {
361  if (sData.length() == 0) {
362  return def;
363  }
364  return toDouble(sData);
365 }
366 
367 
368 bool
369 StringUtils::toBool(const std::string& sData) {
370  if (sData.length() == 0) {
371  throw EmptyData();
372  }
373  std::string s = sData;
374  // Don't use std::transform(..., ::tolower) due a C4244 Warning in MSVC17
375  for (int i = 0; i < (int)s.length(); i++) {
376  s[i] = (char)::tolower((char)s[i]);
377  }
378  if (s == "1" || s == "yes" || s == "true" || s == "on" || s == "x" || s == "t") {
379  return true;
380  } else if (s == "0" || s == "no" || s == "false" || s == "off" || s == "-" || s == "f") {
381  return false;
382  } else {
383  throw BoolFormatException(s);
384  }
385 }
386 
387 
388 std::string
389 StringUtils::transcode(const XMLCh* const data, int length) {
390  if (data == 0) {
391  throw EmptyData();
392  }
393  if (length == 0) {
394  return "";
395  }
396 #if _XERCES_VERSION < 30100
398  std::string result(t);
399  XERCES_CPP_NAMESPACE::XMLString::release(&t);
400  return result;
401 #else
402  try {
403  XERCES_CPP_NAMESPACE::TranscodeToStr utf8(data, "UTF-8");
404  return reinterpret_cast<const char*>(utf8.str());
405  } catch (XERCES_CPP_NAMESPACE::TranscodingException&) {
406  return "?";
407  }
408 #endif
409 }
410 
411 
412 std::string
413 StringUtils::trim_left(const std::string s, const std::string& t) {
414  std::string result = s;
415  result.erase(0, s.find_first_not_of(t));
416  return result;
417 }
418 
419 std::string
420 StringUtils::trim_right(const std::string s, const std::string& t) {
421  std::string result = s;
422  result.erase(s.find_last_not_of(t) + 1);
423  return result;
424 }
425 
426 std::string
427 StringUtils::trim(const std::string s, const std::string& t) {
428  return trim_right(trim_left(s, t), t);
429 }
430 
431 
432 /****************************************************************************/
std::string toString(const T &t, std::streamsize accuracy=gPrecision)
Definition: ToString.h:44
std::string transcode(const XMLCh *const qname)
static std::string urlEncode(const std::string &url, const std::string encodeWhich="")
static std::string charToHex(unsigned char c)
static std::string urlDecode(const std::string &encoded)
static long long int toLong(const std::string &sData)
converts a string into the long value described by it by calling the char-type converter,...
static double toDoubleSecure(const std::string &sData, const double def)
converts a string into the integer value described by it
static std::string replace(std::string str, const char *what, const char *by)
static std::string trim(const std::string s, const std::string &t=" \t\n")
remove leading and trailing whitespace
static std::string trim_right(const std::string s, const std::string &t=" \t\n")
remove trailing whitespace from string
static std::string trim_left(const std::string s, const std::string &t=" \t\n")
remove leading whitespace from string
static std::string substituteEnvironment(std::string str)
static std::string toTimeString(int time)
Builds a time string (hh:mm:ss) from the given seconds.
static int hexToInt(const std::string &sData)
converts a string with a hex value into the integer value described by it by calling the char-type co...
static double toDouble(const std::string &sData)
converts a string into the double value described by it by calling the char-type converter
static std::string escapeXML(const std::string &orig, const bool maskDoubleHyphen=false)
Replaces the standard escapes by their XML entities.
static std::string to_lower_case(std::string str)
Transfers the content to lower case.
Definition: StringUtils.cpp:58
static std::string latin1_to_utf8(std::string str)
Transfers from Latin 1 (ISO-8859-1) to UTF-8.
Definition: StringUtils.cpp:69
static std::string prune(const std::string &str)
Removes trailing and leading whitechars.
Definition: StringUtils.cpp:47
static std::string convertUmlaute(std::string str)
Converts german "Umlaute" to their latin-version.
Definition: StringUtils.cpp:86
static unsigned char hexToChar(const std::string &str)
static bool startsWith(const std::string &str, const std::string prefix)
Checks whether a given string starts with the prefix.
static std::string emptyString
An empty string.
Definition: StringUtils.h:80
static bool endsWith(const std::string &str, const std::string suffix)
Checks whether a given string ends with the suffix.
static std::string transcode(const XMLCh *const data)
converts a 0-terminated XMLCh* array (usually UTF-16, stemming from Xerces) into std::string in UTF-8
Definition: StringUtils.h:133
static int toIntSecure(const std::string &sData, int def)
converts a string into the integer value described by it
static int toInt(const std::string &sData)
converts a string into the integer value described by it by calling the char-type converter,...
static bool toBool(const std::string &sData)
converts a string into the bool value described by it by calling the char-type converter