C++ Utilities
Loading...
Searching...
No Matches
string.h
Go to the documentation of this file.
1#pragma once
2// SPDX-FileCopyrightText: 2025 Nessan Fitzmaurice <nzznfitz+gh@icloud.com>
3// SPDX-License-Identifier: MIT
4
8
9#include <algorithm>
10#include <cctype>
11#include <charconv>
12#include <format>
13#include <iterator>
14#include <optional>
15#include <ranges>
16#include <regex>
17#include <string>
18#include <vector>
19
20namespace utilities {
21
22// --------------------------------------------------------------------------------------------------------------------
23// We start with the convert-an-input-string-in-place versions which only work on *non-const* input strings.
24// --------------------------------------------------------------------------------------------------------------------
25
38[[nodiscard]] constexpr char32_t
39to_upper(char32_t cp) noexcept
40{
41 return (cp >= 97 && cp <= 122) ? (cp ^ 0b100000) : cp;
42}
43
56[[nodiscard]] constexpr char32_t
57to_lower(char32_t cp) noexcept
58{
59 return (cp >= 65 && cp <= 90) ? (cp | 0b0100000) : cp;
60}
61
73inline void
74upper_case(std::string& str)
75{
76 std::transform(str.begin(), str.end(), str.begin(), [](char ch) {
77 return static_cast<char>(to_upper(static_cast<char32_t>(static_cast<unsigned char>(ch))));
78 });
79}
80
92inline void
93lower_case(std::string& str)
94{
95 std::transform(str.begin(), str.end(), str.begin(), [](char ch) {
96 return static_cast<char>(to_lower(static_cast<char32_t>(static_cast<unsigned char>(ch))));
97 });
98}
99
108inline void
109trim_left(std::string& str)
110{
111 str.erase(str.begin(), std::find_if(str.begin(), str.end(), [](int ch) { return !std::isspace(ch); }));
112}
113
122inline void
123trim_right(std::string& str)
124{
125 str.erase(std::find_if(str.rbegin(), str.rend(), [](int ch) { return !std::isspace(ch); }).base(), str.end());
126}
127
136inline void
137trim(std::string& str)
138{
139 trim_left(str);
140 trim_right(str);
141}
142
151inline void
152replace_left(std::string& str, std::string_view target, std::string_view replacement)
153{
154 auto p = str.find(target);
155 if (p != std::string::npos) str.replace(p, target.length(), replacement);
156}
157
166inline void
167replace_right(std::string& str, std::string_view target, std::string_view replacement)
168{
169 auto p = str.rfind(target);
170 if (p != std::string::npos) str.replace(p, target.length(), replacement);
171}
172
181inline void
182replace(std::string& str, std::string_view target, std::string_view replacement)
183{
184 std::size_t p = 0;
185 while ((p = str.find(target, p)) != std::string::npos) {
186 str.replace(p, target.length(), replacement);
187 p += replacement.length();
188 }
189}
190
204inline void
205replace_space(std::string& s, const std::string& with = " ", bool also_trim = true)
206{
207 if (also_trim) trim(s);
208 std::regex ws{R"(\s+)"};
209 s = std::regex_replace(s, ws, with);
210}
211
223inline void
224condense(std::string& s, bool also_trim = true)
225{
226 replace_space(s, " ", also_trim);
227}
228
237inline void
238erase_left(std::string& str, std::string_view target)
239{
240 auto p = str.find(target);
241 if (p != std::string::npos) str.erase(p, target.length());
242}
243
252inline void
253erase_right(std::string& str, std::string_view target)
254{
255 auto p = str.rfind(target);
256 if (p != std::string::npos) str.erase(p, target.length());
257}
258
267inline void
268erase(std::string& str, std::string_view target)
269{
270 std::size_t p = 0;
271 while ((p = str.find(target, p)) != std::string::npos) str.erase(p, target.length());
272}
273
285inline void
286remove_surrounds(std::string& s)
287{
288 std::size_t len = s.length();
289 while (len > 1) {
290 // If the first character is alpha-numeric we are done.
291 char first = s[0];
292 if (isalnum(first)) return;
293
294 // First character is not alpha-numeric.
295 // Grab the last character & check for a match.
296 char last = s[len - 1];
297 bool match = false;
298
299 // Handle cases [text], {text}, <text>, and (text) and then all others
300 switch (first) {
301 case '(':
302 if (last == ')') match = true;
303 break;
304 case '[':
305 if (last == ']') match = true;
306 break;
307 case '{':
308 if (last == '}') match = true;
309 break;
310 case '<':
311 if (last == '>') match = true;
312 break;
313 default:
314 if (last == first) match = true;
315 break;
316 }
317
318 if (match) {
319 // Shrink the string and continue
320 s = s.substr(1, len - 2);
321 len -= 2;
322 }
323 else {
324 // No match => no surround so we can exit.
325 return;
326 }
327 }
328}
329
338inline void
339standardize(std::string& s)
340{
341 condense(s);
342 upper_case(s);
344 trim(s);
345}
346
347// --------------------------------------------------------------------------------------------------------------------
348// Next we have all the counterpart create-a-new-string that is a copy of input-string with the appropriate conversion.
349// These happily work on *const* input strings as the inputs are left unaltered.
350// --------------------------------------------------------------------------------------------------------------------
351
362inline std::string
363upper_cased(std::string_view input)
364{
365 std::string s{input};
366 upper_case(s);
367 return s;
368}
369
380inline std::string
381lower_cased(std::string_view input)
382{
383 std::string s{input};
384 lower_case(s);
385 return s;
386}
387
396inline std::string
397trimmed_left(std::string_view input)
398{
399 std::string s{input};
400 trim_left(s);
401 return s;
402}
403
412inline std::string
413trimmed_right(std::string_view input)
414{
415 std::string s{input};
416 trim_right(s);
417 return s;
418}
419
428inline std::string
429trimmed(std::string_view input)
430{
431 std::string s{input};
432 trim(s);
433 return s;
434}
435
444inline std::string
445replaced_left(std::string_view input, std::string_view target, std::string_view replacement)
446{
447 std::string s{input};
448 replace_left(s, target, replacement);
449 return s;
450}
451
460inline std::string
461replaced_right(std::string_view input, std::string_view target, std::string_view replacement)
462{
463 std::string s{input};
464 replace_right(s, target, replacement);
465 return s;
466}
467
476inline std::string
477replaced(std::string_view input, std::string_view target, std::string_view replacement)
478{
479 std::string s{input};
480 replace(s, target, replacement);
481 return s;
482}
483
496inline std::string
497replaced_space(std::string_view input, const std::string& with = " ", bool also_trim = true)
498{
499 std::string s{input};
500 replace_space(s, with, also_trim);
501 return s;
502}
503
515inline std::string
516condensed(std::string_view input, bool also_trim = true)
517{
518 std::string s{input};
519 condense(s, also_trim);
520 return s;
521}
522
531inline std::string
532erased_left(std::string_view input, std::string_view target)
533{
534 std::string s{input};
535 erase_left(s, target);
536 return s;
537}
538
547inline std::string
548erased_right(std::string_view input, std::string_view target)
549{
550 std::string s{input};
551 erase_right(s, target);
552 return s;
553}
554
563inline std::string
564erased(std::string_view input, std::string_view target)
565{
566 std::string s{input};
567 erase(s, target);
568 return s;
569}
570
582inline std::string
583removed_surrounds(std::string_view input)
584{
585 std::string s{input};
587 return s;
588}
589
600inline std::string
601standardized(std::string_view input)
602{
603 std::string s{input};
604 standardize(s);
605 return s;
606}
607
608// --------------------------------------------------------------------------------------------------------------------
609// Next some functions that have no 'in-place' versus 'out-of-place' versions.
610// --------------------------------------------------------------------------------------------------------------------
611
620inline bool
621starts_with(std::string_view str, std::string_view prefix)
622{
623 return str.find(prefix) == 0;
624}
625
634inline bool
635ends_with(std::string_view str, std::string_view suffix)
636{
637 const auto pos = str.rfind(suffix);
638 return (pos != std::string::npos) && (pos == (str.length() - suffix.length()));
639}
640
651template<typename T>
652constexpr std::optional<T>
653possible(std::string_view in, const char** next = nullptr)
654{
655 in.remove_prefix(in.find_first_not_of("+ "));
656 T retval;
657 auto ec = std::from_chars(in.cbegin(), in.cend(), retval);
658 if (next) *next = ec.ptr;
659 if (ec.ec != std::errc{}) return std::nullopt;
660 return retval;
661}
662
672template<std::input_iterator InputIt, std::forward_iterator ForwardIt, typename BinaryFunc>
673constexpr void
674for_each_token(InputIt ib, InputIt ie, ForwardIt db, ForwardIt de, BinaryFunc function)
675{
676 while (ib != ie) {
677 const auto x = std::find_first_of(ib, ie, db, de); // Find a token in the input text
678 function(ib, x); // Call the user supplied function on the token
679 if (x == ie) break; // Stop if we hit the end of the input text
680 ib = std::next(x); // Otherwise go again past that token we just found
681 }
682}
683
699template<typename Container_t>
700constexpr void
701tokenize(std::string_view input, Container_t& output, std::string_view delimiters = "\t,;: ", bool skip = true)
702{
703 auto ib = cbegin(input);
704 auto ie = cend(input);
705 auto db = cbegin(delimiters);
706 auto de = cend(delimiters);
707
708 for_each_token(ib, ie, db, de, [&output, &skip](auto tb, auto te) {
709 if (tb != te || !skip) { output.emplace_back(tb, te); }
710 });
711}
712
726inline std::vector<std::string>
727split(std::string_view input, std::string_view delimiters = "\t,;: ", bool skip = true)
728{
729 std::vector<std::string> output;
730 output.reserve(input.size() / 2);
731 tokenize(input, output, delimiters, skip);
732 return output;
733}
734
743template<typename Iter, typename Traits, typename CharT, typename UnaryFunction>
744std::basic_string<CharT>
745regex_replace(Iter ib, Iter ie, const std::basic_regex<CharT, Traits>& re, UnaryFunction f)
746{
747 std::basic_string<CharT> s;
748
749 using diff_t = typename std::match_results<Iter>::difference_type;
750 diff_t match_pos_old = 0;
751 auto end_last_match = ib;
752
753 auto callback = [&](const std::match_results<Iter>& match) {
754 auto match_pos = match.position(0);
755 auto diff = match_pos - match_pos_old;
756
757 auto start_match = end_last_match;
758 std::advance(start_match, diff);
759
760 s.append(end_last_match, start_match);
761 s.append(f(match));
762
763 auto match_len = match.length(0);
764 match_pos_old = match_pos + match_len;
765 end_last_match = start_match;
766 std::advance(end_last_match, match_len);
767 };
768
769 std::regex_iterator<Iter> begin(ib, ie, re), end;
770 std::for_each(begin, end, callback);
771 s.append(end_last_match, ie);
772 return s;
773}
774
782template<typename Traits, typename CharT, typename UnaryFunction>
783std::string
784regex_replace(const std::string& s, const std::basic_regex<CharT, Traits>& re, UnaryFunction f)
785{
786 return regex_replace(s.cbegin(), s.cend(), re, f);
787}
788
789} // namespace utilities
The namespace for the utilities library.
Definition formatter.h:14
bool ends_with(std::string_view str, std::string_view suffix)
Check if a string ends with a particular suffix string.
Definition string.h:635
std::string trimmed(std::string_view input)
Returns a new string that is a copy of the input with all leading and trailing white-space removed.
Definition string.h:429
void upper_case(std::string &str)
Converts a string to upper case in-place.
Definition string.h:74
void lower_case(std::string &str)
Converts a string to lower case in place.
Definition string.h:93
void replace_right(std::string &str, std::string_view target, std::string_view replacement)
Replace the final occurrence of a target substring with some other string in-place.
Definition string.h:167
std::string replaced_space(std::string_view input, const std::string &with=" ", bool also_trim=true)
Returns a new string that is a copy of the input with all contiguous white space sequences replaced.
Definition string.h:497
std::string trimmed_right(std::string_view input)
Returns a new string that is a copy of the input with trailing white-space removed.
Definition string.h:413
constexpr std::optional< T > possible(std::string_view in, const char **next=nullptr)
Try to read a value of a particular type from a string.
Definition string.h:653
std::string removed_surrounds(std::string_view input)
Returns a new string that is a copy of the input with "surrounds" stripped from it.
Definition string.h:583
std::string upper_cased(std::string_view input)
Returns a new string that is a copy of the input converted to upper case.
Definition string.h:363
constexpr void for_each_token(InputIt ib, InputIt ie, ForwardIt db, ForwardIt de, BinaryFunc function)
Given input text and delimiters, tokenize the text and then passes the tokens to a function you suppl...
Definition string.h:674
void remove_surrounds(std::string &s)
Removes "surrounds" from a string in-place so for example: (text) -> text.
Definition string.h:286
bool starts_with(std::string_view str, std::string_view prefix)
Check if a string starts with a particular prefix string.
Definition string.h:621
constexpr char32_t to_lower(char32_t cp) noexcept
Converts a wide character to its lower case equivalent if it is a uppercase letter.
Definition string.h:57
std::string replaced_right(std::string_view input, std::string_view target, std::string_view replacement)
Returns a new string that is a copy of the input with the final occurrence of a target substring repl...
Definition string.h:461
void replace_space(std::string &s, const std::string &with=" ", bool also_trim=true)
Replace all contiguous white space sequences in a string in-place.
Definition string.h:205
std::vector< std::string > split(std::string_view input, std::string_view delimiters="\t,;: ", bool skip=true)
Tokenize a string and return the tokens as a vector of strings.
Definition string.h:727
void trim(std::string &str)
Removes all leading & trailing white-space from a string in-place.
Definition string.h:137
std::string trimmed_left(std::string_view input)
Returns a new string that is a copy of the input with leading white-space removed.
Definition string.h:397
constexpr char32_t to_upper(char32_t cp) noexcept
Converts a wide character to its upper case equivalent if it is a lowercase letter.
Definition string.h:39
void erase(std::string &str, std::string_view target)
Erase all occurrences of a target substring.
Definition string.h:268
std::string erased_left(std::string_view input, std::string_view target)
Returns a new string that is a copy of the input with the first occurrence of a target substring eras...
Definition string.h:532
void trim_left(std::string &str)
Removes any leading white-space from a string in-place.
Definition string.h:109
void erase_left(std::string &str, std::string_view target)
Erase the first occurrence of a target substring.
Definition string.h:238
std::string replaced(std::string_view input, std::string_view target, std::string_view replacement)
Returns a new string that is a copy of the input with all occurrences of a target substring replaced.
Definition string.h:477
std::string condensed(std::string_view input, bool also_trim=true)
Returns a copy of the input with all contiguous white space sequences replaced with one space.
Definition string.h:516
std::string lower_cased(std::string_view input)
Returns a new string that is a copy of the input converted to lower case.
Definition string.h:381
void standardize(std::string &s)
"Standardize" a string – turns "[ hallo world ] " or " Hallo World" into "HALLO WORLD"
Definition string.h:339
void condense(std::string &s, bool also_trim=true)
Condense contiguous white space sequences in a string in-place.
Definition string.h:224
void trim_right(std::string &str)
Remove any trailing white-space from a string in-place.
Definition string.h:123
std::string erased_right(std::string_view input, std::string_view target)
Returns a new string that is a copy of the input with the last occurrence of a target substring erase...
Definition string.h:548
void replace(std::string &str, std::string_view target, std::string_view replacement)
Replace all occurrences of a target substring with some other string in-place.
Definition string.h:182
void erase_right(std::string &str, std::string_view target)
Erase the last occurrence of a target substring.
Definition string.h:253
constexpr void tokenize(std::string_view input, Container_t &output, std::string_view delimiters="\t,;: ", bool skip=true)
Tokenize a string and put the tokens into the supplied output container.
Definition string.h:701
void replace_left(std::string &str, std::string_view target, std::string_view replacement)
Replace the first occurrence of a target substring with some other string in-place.
Definition string.h:152
std::basic_string< CharT > regex_replace(Iter ib, Iter ie, const std::basic_regex< CharT, Traits > &re, UnaryFunction f)
A version of regex_replace(...) where each match in turn is is run through a function you supply.
Definition string.h:745
std::string erased(std::string_view input, std::string_view target)
Returns a new string that is a copy of the input with the all occurrence of a target substring erased...
Definition string.h:564
std::string replaced_left(std::string_view input, std::string_view target, std::string_view replacement)
Returns a new string that is a copy of the input with the first occurrence of a target substring repl...
Definition string.h:445
std::string standardized(std::string_view input)
Returns a "standardized" string that is a copy of the input.
Definition string.h:601