WvStreams
wvstrutils.h
Go to the documentation of this file.
1 /* -*- Mode: C++ -*-
2  * Worldvisions Weaver Software:
3  * Copyright (C) 1997-2002 Net Integration Technologies, Inc.
4  *
5  * Various little string functions...
6  *
7  * FIXME: and some other assorted crap that belongs anywhere but here.
8  */
9 #ifndef __WVSTRUTILS_H
10 #define __WVSTRUTILS_H
11 
12 #include <sys/types.h> // for off_t
13 #include <time.h>
14 #include <ctype.h>
15 #include "wvstring.h"
16 #include "wvstringlist.h"
17 #include "wvhex.h"
18 #ifndef _WIN32
19 #include "wvregex.h"
20 #endif
21 
34 char *terminate_string(char *string, char c);
35 
44 char *trim_string(char *string);
45 
50 char *trim_string(char *string, char c);
51 
65 WvString spacecat(WvStringParm a, WvStringParm b, char sep = ' ',
66  bool onesep = false);
67 
68 
73 char *non_breaking(const char *string);
74 
79 void replace_char(void *string, char c1, char c2, int length);
80 
84 char *snip_string(char *haystack, char *needle);
85 
86 #ifndef _WIN32
87 
91 char *strlwr(char *string);
92 
97 char *strupr(char *string);
98 
99 #endif
100 
102 bool is_word(const char *string);
103 
112 WvString hexdump_buffer(const void *buf, size_t len, bool charRep = true);
113 
118 bool isnewline(char c);
119 
127 WvString url_decode(WvStringParm str, bool no_space = false);
128 
129 
138 WvString url_encode(WvStringParm str, WvStringParm unsafe = "");
139 
140 
144 WvString diff_dates(time_t t1, time_t t2);
145 
146 
151 WvString rfc822_date(time_t _when = -1);
152 
154 WvString rfc1123_date(time_t _when);
155 
157 WvString local_date(time_t _when = -1);
158 
160 WvString intl_time(time_t _when = -1);
161 
163 WvString intl_date(time_t _when = -1);
164 
166 WvString intl_datetime(time_t _when = -1);
167 
168 time_t intl_gmtoff(time_t t);
169 
170 #ifndef _WIN32
171 
176 WvString passwd_crypt(const char *str);
177 
178 #endif
179 
184 WvString passwd_md5(const char *str);
185 
191 
193 int strcount(WvStringParm s, const char c);
194 
200 
208 
215 WvString getdirname(WvStringParm fullname);
216 
217 /*
218  * Possible rounding methods for numbers -- remember from school?
219  */
220 enum RoundingMethod
221 {
222  ROUND_DOWN,
223  ROUND_DOWN_AT_POINT_FIVE,
224  ROUND_UP_AT_POINT_FIVE,
225  ROUND_UP
226 };
227 
233 WvString sizetoa(unsigned long long blocks, unsigned long blocksize = 1,
234  RoundingMethod rounding_method = ROUND_UP_AT_POINT_FIVE);
235 
240 WvString sizektoa(unsigned long long kbytes,
241  RoundingMethod rounding_method = ROUND_UP_AT_POINT_FIVE);
242 
248 WvString sizeitoa(unsigned long long blocks, unsigned long blocksize = 1,
249  RoundingMethod rounding_method = ROUND_UP_AT_POINT_FIVE);
250 
255 WvString sizekitoa(unsigned long long kbytes,
256  RoundingMethod rounding_method = ROUND_UP_AT_POINT_FIVE);
257 
261 WvString secondstoa(unsigned int total_seconds);
262 
267 int lookup(const char *str, const char * const *table,
268  bool case_sensitive = false);
269 
277 template<class StringCollection>
278 void strcoll_split(StringCollection &coll, WvStringParm _s,
279  const char *splitchars = " \t", int limit = 0)
280 {
281  WvString s(_s);
282  char *sptr = s.edit(), *eptr, oldc;
283 
284  // Simple if statement to catch (and add) empty (but not NULL) strings.
285  if (sptr && !*sptr )
286  {
287  WvString *emptyString = new WvString("");
288  coll.add(emptyString, true);
289  }
290 
291  // Needed to catch delimeters at the beginning of the string.
292  bool firstrun = true;
293 
294  while (sptr && *sptr)
295  {
296  --limit;
297 
298  if (firstrun)
299  {
300  firstrun = false;
301  }
302  else
303  {
304  sptr += strspn(sptr, splitchars);
305  }
306 
307  if (limit)
308  {
309  eptr = sptr + strcspn(sptr, splitchars);
310  }
311  else
312  {
313  eptr = sptr + strlen(sptr);
314  }
315 
316  oldc = *eptr;
317  *eptr = 0;
318 
319  WvString *newstr = new WvString(sptr);
320  coll.add(newstr, true);
321 
322  *eptr = oldc;
323  sptr = eptr;
324  }
325 }
326 
327 
341 template<class StringCollection>
342 void strcoll_splitstrict(StringCollection &coll, WvStringParm _s,
343  const char *splitchars = " \t", int limit = 0)
344 {
345  WvString s(_s);
346  char *cur = s.edit();
347 
348  if (!cur) return;
349 
350  for (;;)
351  {
352  --limit;
353  if (!limit)
354  {
355  coll.add(new WvString(cur), true);
356  break;
357  }
358 
359  int len = strcspn(cur, splitchars);
360 
361  char tmp = cur[len];
362  cur[len] = 0;
363  coll.add(new WvString(cur), true);
364  cur[len] = tmp;
365 
366  if (!cur[len]) break;
367  cur += len + 1;
368  }
369 }
370 
371 
372 #ifndef _WIN32 // don't have regex on win32
373 
380 template<class StringCollection>
381 void strcoll_split(StringCollection &coll, WvStringParm s,
382  const WvRegex &regex, int limit = 0)
383 {
384  int start = 0;
385  int match_start, match_end;
386  int count = 0;
387 
388  while ((limit == 0 || count < limit)
389  && regex.continuable_match(&s[start], match_start, match_end)
390  && match_end > 0)
391  {
392  WvString *substr = new WvString;
393  int len = match_start;
394  substr->setsize(len+1);
395  memcpy(substr->edit(), &s[start], len);
396  substr->edit()[len] = '\0';
397  coll.add(substr, true);
398  start += match_end;
399  ++count;
400  }
401 
402  if (limit == 0 || count < limit)
403  {
404  WvString *last = new WvString(&s[start]);
405  last->unique();
406  coll.add(last, true);
407  }
408 }
409 #endif
410 
411 
417 template<class StringCollection>
418 WvString strcoll_join(const StringCollection &coll,
419  const char *joinchars = " \t")
420 {
421  size_t joinlen = strlen(joinchars);
422  size_t totlen = 1;
423  typename StringCollection::Iter s(
424  const_cast<StringCollection&>(coll));
425  for (s.rewind(); s.next(); )
426  {
427  if (s->cstr())
428  totlen += strlen(s->cstr());
429  totlen += joinlen;
430  }
431  totlen -= joinlen; // no join chars at tail
432 
433  WvString total;
434  total.setsize(totlen);
435 
436  char *te = total.edit();
437  te[0] = 0;
438  bool first = true;
439  for (s.rewind(); s.next(); )
440  {
441  if (first)
442  first = false;
443  else
444  strcat(te, joinchars);
445  if (s->cstr())
446  strcat(te, s->cstr());
447  }
448  return total;
449 }
450 
456 
458 WvString undupe(WvStringParm s, char c);
459 
462 
465 
468 
473 WvString metriculate(const off_t i);
474 
480 
486 
493 WvString substr(WvString line, unsigned int pos, unsigned int len);
494 
500 
501 // Converts a string in decimal to an arbitrary numeric type
502 template<class T>
503 bool wvstring_to_num(WvStringParm str, T &n)
504 {
505  bool neg = false;
506  n = 0;
507 
508  for (const char *p = str; *p; ++p)
509  {
510  if (isdigit(*p))
511  {
512  n = n * T(10) + T(*p - '0');
513  }
514  else if ((const char *)str == p
515  && *p == '-')
516  {
517  neg = true;
518  }
519  else return false;
520  }
521 
522  if (neg)
523  n = -n;
524 
525  return true;
526 }
527 
528 /*
529  * Before using the C-style string escaping functions below, please consider
530  * using the functions in wvtclstring.h instead; they usualy lead to much more
531  * human readable and manageable results, and allow representation of
532  * lists of strings.
533  */
534 
536 {
537  char ch;
538  const char *esc;
539 };
540 extern const CStrExtraEscape CSTR_TCLSTR_ESCAPES[];
541 
543 //
544 // If data is NULL, returns WvString::null; otherwise, returns an allocated
545 // WvString containing the C-style string constant that represents the data.
546 //
547 // All printable characters including space except " and \ are represented with
548 // escaping.
549 //
550 // The usual C escapes are performed, such as \n, \r, \", \\ and \0.
551 //
552 // All other characters are escaped in uppercase hex form, eg. \x9E
553 //
554 // The extra_escapes parameter allows for additional characters beyond
555 // the usual ones escaped in C; setting it to CSTR_TCLSTR_ESCAPES will
556 // escape { and } as < and >, which allows the resulting strings to be
557 // TCL-string coded without ridiculous double-escaping.
558 //
559 WvString cstr_escape(const void *data, size_t size,
560  const CStrExtraEscape extra_escapes[] = NULL);
561 
563 //
564 // This function does *not* include the trailing null that a C compiler would --
565 // if you want this null, put \0 at the end of the C-style string
566 //
567 // If cstr is correctly formatted and max_size is large enough for the
568 // resulting data, returns true and size will equal the size of the
569 // resulting data. If data is not NULL it will contain this data.
570 //
571 // If cstr is correctly formatted but max_size is too small for the resulting
572 // data, returns false and size will equal the minimum value of min_size
573 // for this function to have returned true. If data is non-NULL it will
574 // contain the first max_size bytes of resulting data.
575 //
576 // If cstr is incorrectly formatted, returns false and size will equal 0.
577 //
578 // This functions works just as well on multiple, whitespace-separated
579 // C-style strings as well. This allows you to concatenate strings produced
580 // by cstr_escape, and the result of cstr_unescape will be the data blocks
581 // concatenated together. This implies that the empty string corresponds
582 // to a valid data block of length zero; however, a null string still returns
583 // an error.
584 //
585 // The extra_escapes parameter must match that used in the call to
586 // cstr_escape used to produce the escaped strings.
587 //
588 bool cstr_unescape(WvStringParm cstr, void *data, size_t max_size, size_t &size,
589  const CStrExtraEscape extra_escapes[] = NULL);
590 
591 static inline bool is_int(const char *str)
592 {
593  if (!str)
594  return false;
595 
596  if (*str == '-')
597  ++str;
598 
599  if (!*str)
600  return false;
601 
602  while (*str)
603  if (!isdigit(*str++))
604  return false;
605 
606  return true;
607 }
608 
611 WvString ptr2str(void* ptr);
612 
613 #endif // __WVSTRUTILS_H
WvString::edit
char * edit()
make the string editable, and return a non-const (char*)
Definition: wvstring.h:397
intl_datetime
WvString intl_datetime(time_t _when=-1)
Return the local date and time (in format of ISO 8601) out of _when.
Definition: strutils.cc:1274
wvhex.h
intl_gmtoff
time_t intl_gmtoff(time_t t)
Return the number of seconds by which localtime (at the given timestamp) is offset from GMT.
Definition: strutils.cc:1294
intl_time
WvString intl_time(time_t _when=-1)
Return the local time (in format of ISO 8601) out of _when.
Definition: strutils.cc:1246
lookup
int lookup(const char *str, const char *const *table, bool case_sensitive=false)
Finds a string in an array and returns its index.
Definition: strutils.cc:850
fqdomainname
WvString fqdomainname()
Get the fqdn of the local host, using gethostbyname() and gethostname()
Definition: strutils.cc:893
getfilename
WvString getfilename(WvStringParm fullname)
Take a full path/file name and splits it up into respective pathname and filename.
Definition: strutils.cc:506
strcoll_join
WvString strcoll_join(const StringCollection &coll, const char *joinchars=" \t")
Concatenates all strings in a collection and returns the result.
Definition: wvstrutils.h:418
url_decode
WvString url_decode(WvStringParm str, bool no_space=false)
Converts escaped characters (things like %20 etc.) from web URLS into their normal ASCII representati...
Definition: strutils.cc:311
passwd_md5
WvString passwd_md5(const char *str)
Similar to crypt(), but this randomly selects its own salt.
Definition: strcrypt.cc:38
sizetoa
WvString sizetoa(unsigned long long blocks, unsigned long blocksize=1, RoundingMethod rounding_method=ROUND_UP_AT_POINT_FIVE)
Given a number of blocks and a blocksize (default==1 byte), return a WvString containing a human-read...
Definition: strutils.cc:708
isnewline
bool isnewline(char c)
Returns true if 'c' is a newline or carriage return character.
Definition: strutils.cc:304
CStrExtraEscape
Definition: wvstrutils.h:536
rfc1123_date
WvString rfc1123_date(time_t _when)
Returns an RFC1123-compatible date made out of _when.
Definition: strutils.cc:838
undupe
WvString undupe(WvStringParm s, char c)
Replace any consecutive instances of character c with a single one.
Definition: strutils.cc:814
diff_dates
WvString diff_dates(time_t t1, time_t t2)
Returns the difference between to dates in a human readable format.
Definition: strutils.cc:376
trim_string
char * trim_string(char *string)
Trims whitespace from the beginning and end of the character string, including carriage return / line...
Definition: strutils.cc:59
passwd_crypt
WvString passwd_crypt(const char *str)
Similar to crypt(), but this randomly selects its own salt.
Definition: strcrypt.cc:14
WvRegex::continuable_match
bool continuable_match(WvStringParm string, int &match_start, int &match_end, WVREGEX_REGS_DECL) const
Match a given string against the compiled regular expression, capturing the start and end positions o...
Definition: wvregex.h:230
spacecat
WvString spacecat(WvStringParm a, WvStringParm b, char sep=' ', bool onesep=false)
return the string formed by concatenating string 'a' and string 'b' with the 'sep' character between ...
Definition: strutils.cc:114
local_date
WvString local_date(time_t _when=-1)
Return the local date (TZ applied) out of _when.
Definition: strutils.cc:1232
encode_hostname_as_DN
WvString encode_hostname_as_DN(WvStringParm hostname)
Example: encode_hostname_as_DN("www.fizzle.com") will result in dc=www,dc=fizzle,dc=com,...
Definition: strutils.cc:444
replace_char
void replace_char(void *string, char c1, char c2, int length)
Replace all instances of c1 with c2 for the first 'length' characters in 'string'.
Definition: strutils.cc:178
WvString
WvString is an implementation of a simple and efficient printable-string class.
Definition: wvstring.h:330
afterstr
WvString afterstr(WvStringParm line, WvStringParm a)
Returns everything in line (exclusively) after a.
Definition: strutils.cc:965
url_encode
WvString url_encode(WvStringParm str, WvStringParm unsafe="")
Converts all those pesky spaces, colons, and other nasties into nice unreadable Quasi-Unicode codes.
Definition: strutils.cc:351
depunctuate
WvString depunctuate(WvStringParm line)
Removes any trailing punctuation ('.
Definition: strutils.cc:1306
sizekitoa
WvString sizekitoa(unsigned long long kbytes, RoundingMethod rounding_method=ROUND_UP_AT_POINT_FIVE)
Given a size in kilobytes, return a human readable size.
Definition: strutils.cc:742
is_word
bool is_word(const char *string)
Returns true if all characters in 'string' are isalnum() (alphanumeric).
Definition: strutils.cc:228
non_breaking
char * non_breaking(const char *string)
Replaces all whitespace characters in the string with non-breaking spaces (&#160;) for use with web stuff.
Definition: strutils.cc:154
nice_hostname
WvString nice_hostname(WvStringParm name)
Given a hostname, turn it into a "nice" one.
Definition: strutils.cc:460
WvRegex
WvRegex – Unified support for regular expressions.
Definition: wvregex.h:48
substr
WvString substr(WvString line, unsigned int pos, unsigned int len)
Returns the string of length len starting at pos in line.
Definition: strutils.cc:998
strcount
int strcount(WvStringParm s, const char c)
How many times does 'c' occur in "s"?
Definition: strutils.cc:433
beforestr
WvString beforestr(WvStringParm line, WvStringParm a)
Returns everything in line (exclusively) before 'a'.
Definition: strutils.cc:981
snip_string
char * snip_string(char *haystack, char *needle)
Snip off the first part of 'haystack' if it consists of 'needle'.
Definition: strutils.cc:187
rfc822_date
WvString rfc822_date(time_t _when=-1)
Returns an RFC822-compatible date made out of _when, or, if _when < 0, out of the current time.
Definition: strutils.cc:395
sizektoa
WvString sizektoa(unsigned long long kbytes, RoundingMethod rounding_method=ROUND_UP_AT_POINT_FIVE)
Given a size in kilobyes, return a human readable size.
Definition: strutils.cc:721
intl_date
WvString intl_date(time_t _when=-1)
Return the local date (in format of ISO 8601) out of _when.
Definition: strutils.cc:1260
strlwr
char * strlwr(char *string)
In-place modify a character string so that all contained letters are in lower case.
Definition: strutils.cc:201
WvString::unique
WvString & unique()
make the buf and str pointers owned only by this WvString.
Definition: wvstring.cc:306
strcoll_splitstrict
void strcoll_splitstrict(StringCollection &coll, WvStringParm _s, const char *splitchars=" \t", int limit=0)
Splits a string and adds each substring to a collection.
Definition: wvstrutils.h:342
strcoll_split
void strcoll_split(StringCollection &coll, WvStringParm _s, const char *splitchars=" \t", int limit=0)
Splits a string and adds each substring to a collection.
Definition: wvstrutils.h:278
metriculate
WvString metriculate(const off_t i)
Inserts SI-style spacing into a number (eg passing 9876543210 returns "9 876 543 210")
Definition: strutils.cc:926
strupr
char * strupr(char *string)
In-place modify a character string so that all contained letters are in upper case.
Definition: strutils.cc:214
hexdump_buffer
WvString hexdump_buffer(const void *buf, size_t len, bool charRep=true)
Produce a hexadecimal dump of the data buffer in 'buf' of length 'len'.
Definition: strutils.cc:245
sizeitoa
WvString sizeitoa(unsigned long long blocks, unsigned long blocksize=1, RoundingMethod rounding_method=ROUND_UP_AT_POINT_FIVE)
Given a number of blocks and a blocksize (default==1 byte), return a WvString containing a human-read...
Definition: strutils.cc:729
secondstoa
WvString secondstoa(unsigned int total_seconds)
Given a number of seconds, returns a formatted human-readable string saying how long the period is.
Definition: strutils.cc:750
cstr_escape
WvString cstr_escape(const void *data, size_t size, const CStrExtraEscape extra_escapes[]=NULL)
Converts data into a C-style string constant.
Definition: strutils.cc:1143
wvgetcwd
WvString wvgetcwd()
Get the current working directory without a fixed-length buffer.
Definition: strutils.cc:905
hostname
WvString hostname()
Do gethostname() without a fixed-length buffer.
Definition: strutils.cc:870
terminate_string
char * terminate_string(char *string, char c)
Add character c to the end of a string after removing terminating carriage returns/linefeeds if any.
Definition: strutils.cc:32
WvFastString
A WvFastString acts exactly like a WvString, but can take (const char *) strings without needing to a...
Definition: wvstring.h:94
backslash_escape
WvString backslash_escape(WvStringParm s1)
Returns a string with a backslash in front of every non alphanumeric character in s1.
Definition: strutils.cc:410
cstr_unescape
bool cstr_unescape(WvStringParm cstr, void *data, size_t max_size, size_t &size, const CStrExtraEscape extra_escapes[]=NULL)
Converts a C-style string constant into data.
Definition: strutils.cc:1182
strreplace
WvString strreplace(WvStringParm s, WvStringParm a, WvStringParm b)
Replace any instances of "a" with "b" in "s".
Definition: strutils.cc:797
ptr2str
WvString ptr2str(void *ptr)
Converts a pointer into a string, like glibc's p formatter would do.
Definition: strutils.cc:1318