Skip to content

Instantly share code, notes, and snippets.

@cobalthex
Last active December 24, 2023 06:35
Show Gist options
  • Select an option

  • Save cobalthex/34d9582b77d4849ca2aa1e5bf5112d94 to your computer and use it in GitHub Desktop.

Select an option

Save cobalthex/34d9582b77d4849ca2aa1e5bf5112d94 to your computer and use it in GitHub Desktop.
String ops
#include <string>
#include <string_view>
#include <optional>
#include <iostream>
struct StringSplitter
{
StringSplitter(char separator, const std::string& string, const std::string& whitespace = " \r\n\t\v")
: m_string(string)
, m_whitespace(whitespace)
, m_next(0)
, m_separator(separator)
{
}
std::optional<std::string_view> GetNext(bool ignoreEmpty = true)
{
if (m_next >= m_string.length())
{
return std::nullopt;
}
bool allWhitespace = true;
size_t end;
for (end = m_next; end < m_string.length(); ++end)
{
if (m_string[end] == m_separator)
{
if (allWhitespace && ignoreEmpty)
{
m_next = end + 1;
continue;
}
size_t start = m_next;
m_next = end + 1;
return std::optional(m_string.substr(start, end - start));
}
if (ignoreEmpty && m_whitespace.find(m_string[end]) == std::string_view::npos)
{
allWhitespace = false;
}
}
if (ignoreEmpty && allWhitespace)
{
return std::nullopt;
}
size_t start = m_next;
m_next = end + 1;
return std::optional(std::string_view(m_string).substr(start, end - start));
}
private:
std::string_view m_string;
std::string_view m_whitespace;
size_t m_next;
char m_separator;
};
int main()
{
std::string foo = "a;b;c;d;e;f;;g";
StringSplitter splitter(';', foo);
while (true)
{
auto split = splitter.GetNext();
if (!split)
{
break;
}
std::cout << split.value() << "\n";
}
}
#include <string>
#include <cstring>
#include <iostream>
#include <iomanip>
const char* c_ws = " \r\n\t";
using s32 = int;
void StripTags(std::string& line)
{
s32 nextWrite = 0; // next position in the string to write characters to (can overwrite characters before the current)
s32 lastNonWhitespace = 0; // the last position in the string that contained a non whitespace character, used for right side trimming
bool anyNonWhitespace = false; // have there been any non whitespace characters yet? used for left side trimming
for (s32 i = 0; i < line.length(); ++i)
{
// tags can either be <tag> or {tag}
if (line[i] == '<' ||
line[i] == '{')
{
const char terminal = line[i] == '<' ? '>' : '}';
s32 tagStart = i;
s32 tagEnd = 0;
s32 nameEnd = 0; // a tag is <name ...>, isolate just the name
bool isTag = true;
// skip to the end of the tag
for (tagEnd = i + 1;
tagEnd < line.length() && line[tagEnd] != terminal;
++tagEnd)
{
// did another tag start?
if (line[tagEnd] == '<' ||
line[tagEnd] == '{')
{
isTag = false;
break;
}
// at the end of the name portion of the tag?
if (nameEnd == 0 && strchr(c_ws, line[tagEnd]))
{
nameEnd = tagEnd;
}
}
// make sure this is actually a fully formed tag
if (isTag &&
tagEnd < line.length())
{
if (nameEnd == 0) // no spaces in tag (<foo> not <foo bar>)
{
nameEnd = tagEnd;
}
++tagEnd;
isTag =
tagEnd - tagStart > 2 && // ignore <>
nameEnd - tagStart > 1; // ignore if all whitespace inside tag
if (isTag)
{
i = tagEnd - 1;
continue;
}
}
lastNonWhitespace = nextWrite;
anyNonWhitespace = true;
}
else if (!strchr(c_ws, line[i]))
{
lastNonWhitespace = nextWrite;
anyNonWhitespace = true;
}
// shift characters left
line[nextWrite] = line[i];
if (anyNonWhitespace)
{
++nextWrite;
}
}
// trim the end of line
line.resize(std::min(nextWrite, lastNonWhitespace + 1));
}
struct FormatTest
{
std::string test;
std::string expected;
};
int main()
{
FormatTest lines[] =
{
/* 0 */ { "test string", "test string" },
/* 1 */ { "test <i> string 2", "test string 2" },
/* 2 */ { "test <i> string 3 </i>", "test string 3" },
/* 3 */ { "test <i> string 4 </i>", "test string 4" },
/* 4 */ { "<u> test string 5", "test string 5" },
/* 5 */ { "test string 6 </s>", "test string 6" },
/* 6 */ { "<i><u><s>test string 7</s></u></i>", "test string 7" },
/* 7 */ { "<i><u><s> test string 8 </s></u></i>", "test string 8" },
/* 8 */ { "whitespace \t ", "whitespace" },
/* 9 */ { "whitespace \t z ", "whitespace \t z" },
/* 10 */ { "<double < open ", "<double < open" },
/* 11 */ { "<not < open > ", "<not < open >" },
/* 12 */ { "<once <open> ", "<once" },
/* 13 */ { "<open tag ", "<open tag" },
/* 14 */ { "< ", "<", },
/* 15 */ { " <", "<", },
/* 16 */ { " < ", "<", },
/* 17 */ { " <> ", "<>", },
/* 18 */ { " < > ", "< >", },
/* 19 */ {" </> ", "", },
/* 20 */ {" <tag with attributes=\"foo\"> ", "", },
};
constexpr bool c_showPassed = false;
constexpr s32 c_maxLength = 60;
std::cout << " ## RESULT: 'TEST'" << std::setw(c_maxLength - 9) << " " << "--> 'ACTUAL' " << std::setw(c_maxLength - 12) << " " << "-=- 'EXPECTED'\n\n";
bool allPassed = true;
for (size_t i = 0; i < std::size(lines); ++i)
{
const FormatTest& line = lines[i];
std::string manip = line.test;
StripTags(manip);
const bool passed = manip == line.expected;
allPassed &= passed;
if (!c_showPassed && passed)
{
continue;
}
std::cout << std::setw(3) << i << " ";
std::cout << (passed ? " pass" : "! fail") << ": ";
std::cout << "'" << line.test << "'";
std::cout << std::setw(c_maxLength - line.test.length());
std::cout << "--> '" << manip << "'";
std::cout << std::setw(c_maxLength - manip.length());
std::cout << " -=- '" << line.expected << "'\n";
}
if (allPassed && !c_showPassed)
{
std::cout << " All passed!\n";
}
return allPassed ? 0 : 1;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment