Function sample_substr

Synopsis

#include <samples/quickstart.cpp>

void sample_substr()

Description

about ryml's string views (from c4core)

demonstrate usage of ryml::substr/ryml::csubstr

These types are imported from the c4core library into the ryml namespace You may have noticed above the use of a csubstr class. This class is defined in another library, c4core, which is imported by ryml. This is a library I use with my projects consisting of multiplatform low-level utilities. One of these is c4::csubstr (the name comes from "constant substring") which is a non-owning read-only string view, with many methods that make it practical to use (I would certainly argue more practical than std::string). In fact, c4::csubstr and its writeable counterpart c4::substr are the workhorses of the ryml parsing and serialization code.

See
https://c4core.docsforge.com/master/api/c4/basic_substring/
See
https://c4core.docsforge.com/master/api/c4/#substr
See
https://c4core.docsforge.com/master/api/c4/#csubstr

Mentioned in

Source

Lines 495-1202 in samples/quickstart.cpp. Line 49 in samples/quickstart.cpp.

void sample_substr()
{
    // substr is a mutable view: pointer and length to a string in memory.
    // csubstr is a const-substr (immutable).

    // construct from explicit args
    {
        const char foobar_str[] = "foobar";
        auto s = ryml::csubstr(foobar_str, strlen(foobar_str));
        CHECK(s == "foobar");
        CHECK(s.size() == 6);
        CHECK(s.data() == foobar_str);
        CHECK(s.size() == s.len);
        CHECK(s.data() == s.str);
    }

    // construct from a string array
    {
        const char foobar_str[] = "foobar";
        ryml::csubstr s = foobar_str;
        CHECK(s == "foobar");
        CHECK(s != "foobar0");
        CHECK(s.size() == 6);
        CHECK(s.data() == foobar_str);
        CHECK(s.size() == s.len);
        CHECK(s.data() == s.str);
    }
    // you can also declare directly in-place from an array:
    {
        ryml::csubstr s = "foobar";
        CHECK(s == "foobar");
        CHECK(s != "foobar0");
        CHECK(s.size() == 6);
        CHECK(s.size() == s.len);
        CHECK(s.data() == s.str);
    }

    // construct from a C-string:
    //
    // Since the input is only a pointer, the string length can only
    // be found with a call to strlen(). To make this cost evident, we
    // require a call to to_csubstr():
    {
        const char *foobar_str = "foobar";
        ryml::csubstr s = ryml::to_csubstr(foobar_str);
        CHECK(s == "foobar");
        CHECK(s != "foobar0");
        CHECK(s.size() == 6);
        CHECK(s.size() == s.len);
        CHECK(s.data() == s.str);
    }

    // construct from a std::string: same approach as above.
    // requires inclusion of the <ryml/std/string.hpp> header
    // or of the umbrella header <ryml_std.hpp>.
    // this was a conscious design choice to avoid requiring
    // the heavy std:: allocation machinery
    {
        std::string foobar_str = "foobar";
        ryml::csubstr s = ryml::to_csubstr(foobar_str); // defined in <ryml/std/string.hpp>
        CHECK(s == "foobar");
        CHECK(s != "foobar0");
        CHECK(s.size() == 6);
        CHECK(s.size() == s.len);
        CHECK(s.data() == s.str);
    }

    // convert substr -> csubstr
    {
        char buf[] = "foo";
        ryml::substr foo = buf;
        CHECK(foo.len == 3);
        CHECK(foo.data() == buf);
        ryml::csubstr cfoo = foo;
        CHECK(cfoo.data() == buf);
    }
    // cannot convert csubstr -> substr:
    {
        // ryml::substr foo2 = cfoo; // compile error: cannot write to csubstr
    }

    // construct from char[]/const char[]: mutable vs immutable memory
    {
        char const foobar_str_ro[] = "foobar"; // ro := read-only
        char       foobar_str_rw[] = "foobar"; // rw := read-write
        static_assert(std::is_array<decltype(foobar_str_ro)>::value, "this is an array");
        static_assert(std::is_array<decltype(foobar_str_rw)>::value, "this is an array");
        // csubstr <- read-only memory
        {
            ryml::csubstr foobar = foobar_str_ro;
            CHECK(foobar.data() == foobar_str_ro);
            CHECK(foobar.size() == strlen(foobar_str_ro));
            CHECK(foobar == "foobar"); // AKA strcmp
        }
        // csubstr <- read-write memory: you can create an immutable csubstr from mutable memory
        {
            ryml::csubstr foobar = foobar_str_rw;
            CHECK(foobar.data() == foobar_str_rw);
            CHECK(foobar.size() == strlen(foobar_str_rw));
            CHECK(foobar == "foobar"); // AKA strcmp
        }
        // substr <- read-write memory.
        {
            ryml::substr foobar = foobar_str_rw;
            CHECK(foobar.data() == foobar_str_rw);
            CHECK(foobar.size() == strlen(foobar_str_rw));
            CHECK(foobar == "foobar"); // AKA strcmp
        }
        // substr <- ro is impossible.
        {
            //ryml::substr foobar = foobar_str_ro; // compile error!
        }
    }

    // construct from char*/const char*: mutable vs immutable memory.
    // use to_substr()/to_csubstr()
    {
        char const* foobar_str_ro = "foobar";       // ro := read-only
        char        foobar_str_rw_[] = "foobar";    // rw := read-write
        char      * foobar_str_rw = foobar_str_rw_; // rw := read-write
        static_assert(!std::is_array<decltype(foobar_str_ro)>::value, "this is a decayed pointer");
        static_assert(!std::is_array<decltype(foobar_str_rw)>::value, "this is a decayed pointer");
        // csubstr <- read-only memory
        {
            //ryml::csubstr foobar = foobar_str_ro; // compile error: length is not known
            ryml::csubstr foobar = ryml::to_csubstr(foobar_str_ro);
            CHECK(foobar.data() == foobar_str_ro);
            CHECK(foobar.size() == strlen(foobar_str_ro));
            CHECK(foobar == "foobar"); // AKA strcmp
        }
        // csubstr <- read-write memory: you can create an immutable csubstr from mutable memory
        {
            ryml::csubstr foobar = ryml::to_csubstr(foobar_str_rw);
            CHECK(foobar.data() == foobar_str_rw);
            CHECK(foobar.size() == strlen(foobar_str_rw));
            CHECK(foobar == "foobar"); // AKA strcmp
        }
        // substr <- read-write memory.
        {
            ryml::substr foobar = ryml::to_substr(foobar_str_rw);
            CHECK(foobar.data() == foobar_str_rw);
            CHECK(foobar.size() == strlen(foobar_str_rw));
            CHECK(foobar == "foobar"); // AKA strcmp
        }
        // substr <- read-only is impossible.
        {
            //ryml::substr foobar = ryml::to_substr(foobar_str_ro); // compile error!
        }
    }

    // substr is mutable, without changing the size:
    {
        char buf[] = "foobar";
        ryml::substr foobar = buf;
        CHECK(foobar == "foobar");
        foobar[0] = 'F';            CHECK(foobar == "Foobar");
        foobar.back() = 'R';        CHECK(foobar == "FoobaR");
        foobar.reverse();           CHECK(foobar == "RabooF");
        foobar.reverse();           CHECK(foobar == "FoobaR");
        foobar.reverse_sub(1, 4);   CHECK(foobar == "FabooR");
        foobar.reverse_sub(1, 4);   CHECK(foobar == "FoobaR");
        foobar.reverse_range(2, 5); CHECK(foobar == "FoaboR");
        foobar.reverse_range(2, 5); CHECK(foobar == "FoobaR");
        foobar.replace('o', '0');   CHECK(foobar == "F00baR");
        foobar.replace('a', '_');   CHECK(foobar == "F00b_R");
        foobar.replace("_0b", 'a'); CHECK(foobar == "FaaaaR");
        foobar.toupper();           CHECK(foobar == "FAAAAR");
        foobar.tolower();           CHECK(foobar == "faaaar");
        foobar.fill('.');           CHECK(foobar == "......");
        // see also:
        // - erase()
        // - replace_all()
    }

    // sub-views
    {
        ryml::csubstr s = "fooFOObarBAR";
        CHECK(s.len == 12u);
        // sub(): <- first,[num]
        CHECK(s.sub(0)     == "fooFOObarBAR");
        CHECK(s.sub(0, 12) == "fooFOObarBAR");
        CHECK(s.sub(0,  3) == "foo"         );
        CHECK(s.sub(3)     ==    "FOObarBAR");
        CHECK(s.sub(3,  3) ==    "FOO"      );
        CHECK(s.sub(6)     ==       "barBAR");
        CHECK(s.sub(6,  3) ==       "bar"   );
        CHECK(s.sub(9)     ==          "BAR");
        CHECK(s.sub(9,  3) ==          "BAR");
        // first(): <- length
        CHECK(s.first(0) == ""   );
        CHECK(s.first(1) == "f"  );
        CHECK(s.first(2) != "f"  );
        CHECK(s.first(2) == "fo" );
        CHECK(s.first(3) == "foo");
        // last(): <- length
        CHECK(s.last(0) ==    "");
        CHECK(s.last(1) ==   "R");
        CHECK(s.last(2) ==  "AR");
        CHECK(s.last(3) == "BAR");
        // range(): <- first, last
        CHECK(s.range(0, 12) == "fooFOObarBAR");
        CHECK(s.range(1, 12) ==  "ooFOObarBAR");
        CHECK(s.range(1, 11) ==  "ooFOObarBA" );
        CHECK(s.range(2, 10) ==   "oFOObarB"  );
        CHECK(s.range(3,  9) ==    "FOObar"   );
        // offs(): offset from beginning, end
        CHECK(s.offs(0, 0) == "fooFOObarBAR");
        CHECK(s.offs(1, 0) ==  "ooFOObarBAR");
        CHECK(s.offs(1, 1) ==  "ooFOObarBA" );
        CHECK(s.offs(2, 1) ==   "oFOObarBA" );
        CHECK(s.offs(2, 2) ==   "oFOObarB"  );
        CHECK(s.offs(3, 3) ==    "FOObar"  );
        // right_of(): <- pos, include_pos
        CHECK(s.right_of(0,  true) == "fooFOObarBAR");
        CHECK(s.right_of(0, false) ==  "ooFOObarBAR");
        CHECK(s.right_of(1,  true) ==  "ooFOObarBAR");
        CHECK(s.right_of(1, false) ==   "oFOObarBAR");
        CHECK(s.right_of(2,  true) ==   "oFOObarBAR");
        CHECK(s.right_of(2, false) ==    "FOObarBAR");
        CHECK(s.right_of(3,  true) ==    "FOObarBAR");
        CHECK(s.right_of(3, false) ==     "OObarBAR");
        // left_of() <- pos, include_pos
        CHECK(s.left_of(12, false) == "fooFOObarBAR");
        CHECK(s.left_of(11,  true) == "fooFOObarBAR");
        CHECK(s.left_of(11, false) == "fooFOObarBA" );
        CHECK(s.left_of(10,  true) == "fooFOObarBA" );
        CHECK(s.left_of(10, false) == "fooFOObarB"  );
        CHECK(s.left_of( 9,  true) == "fooFOObarB"  );
        CHECK(s.left_of( 9, false) == "fooFOObar"   );
        // left_of(),right_of() <- substr
        ryml::csubstr FOO = s.sub(3, 3);
        CHECK(s.is_super(FOO)); // required for the following
        CHECK(s.left_of(FOO) == "foo");
        CHECK(s.right_of(FOO) == "barBAR");
    }

    // is_sub(),is_super()
    {
        ryml::csubstr foobar = "foobar";
        ryml::csubstr foo = foobar.first(3);
        CHECK(foo.is_sub(foobar));
        CHECK(foo.is_sub(foo));
        CHECK(!foo.is_super(foobar));
        CHECK(!foobar.is_sub(foo));
        // identity comparison is true:
        CHECK(foo.is_super(foo));
        CHECK(foo.is_sub(foo));
        CHECK(foobar.is_sub(foobar));
        CHECK(foobar.is_super(foobar));
    }

    // overlaps()
    {
        ryml::csubstr foobar = "foobar";
        ryml::csubstr foo = foobar.first(3);
        ryml::csubstr oba = foobar.offs(2, 1);
        ryml::csubstr abc = "abc";
        CHECK(foobar.overlaps(foo));
        CHECK(foobar.overlaps(oba));
        CHECK(foo.overlaps(foobar));
        CHECK(foo.overlaps(oba));
        CHECK(!foo.overlaps(abc));
        CHECK(!abc.overlaps(foo));
    }

    // triml(): trim characters from the left
    // trimr(): trim characters from the right
    // trim(): trim characters from left AND right
    {
        CHECK(ryml::csubstr(" \t\n\rcontents without whitespace\t \n\r").trim("\t \n\r") == "contents without whitespace");
        ryml::csubstr aaabbb = "aaabbb";
        ryml::csubstr aaa___bbb = "aaa___bbb";
        // trim a character:
        CHECK(aaabbb.triml('a') == aaabbb.last(3)); // bbb
        CHECK(aaabbb.trimr('a') == aaabbb);
        CHECK(aaabbb.trim ('a') == aaabbb.last(3)); // bbb
        CHECK(aaabbb.triml('b') == aaabbb);
        CHECK(aaabbb.trimr('b') == aaabbb.first(3)); // aaa
        CHECK(aaabbb.trim ('b') == aaabbb.first(3)); // aaa
        CHECK(aaabbb.triml('c') == aaabbb);
        CHECK(aaabbb.trimr('c') == aaabbb);
        CHECK(aaabbb.trim ('c') == aaabbb);
        CHECK(aaa___bbb.triml('a') == aaa___bbb.last(6)); // ___bbb
        CHECK(aaa___bbb.trimr('a') == aaa___bbb);
        CHECK(aaa___bbb.trim ('a') == aaa___bbb.last(6)); // ___bbb
        CHECK(aaa___bbb.triml('b') == aaa___bbb);
        CHECK(aaa___bbb.trimr('b') == aaa___bbb.first(6)); // aaa___
        CHECK(aaa___bbb.trim ('b') == aaa___bbb.first(6)); // aaa___
        CHECK(aaa___bbb.triml('c') == aaa___bbb);
        CHECK(aaa___bbb.trimr('c') == aaa___bbb);
        CHECK(aaa___bbb.trim ('c') == aaa___bbb);
        // trim ANY of the characters:
        CHECK(aaabbb.triml("ab") == "");
        CHECK(aaabbb.trimr("ab") == "");
        CHECK(aaabbb.trim ("ab") == "");
        CHECK(aaabbb.triml("ba") == "");
        CHECK(aaabbb.trimr("ba") == "");
        CHECK(aaabbb.trim ("ba") == "");
        CHECK(aaabbb.triml("cd") == aaabbb);
        CHECK(aaabbb.trimr("cd") == aaabbb);
        CHECK(aaabbb.trim ("cd") == aaabbb);
        CHECK(aaa___bbb.triml("ab") == aaa___bbb.last(6)); // ___bbb
        CHECK(aaa___bbb.triml("ba") == aaa___bbb.last(6)); // ___bbb
        CHECK(aaa___bbb.triml("cd") == aaa___bbb);
        CHECK(aaa___bbb.trimr("ab") == aaa___bbb.first(6)); // aaa___
        CHECK(aaa___bbb.trimr("ba") == aaa___bbb.first(6)); // aaa___
        CHECK(aaa___bbb.trimr("cd") == aaa___bbb);
        CHECK(aaa___bbb.trim ("ab") == aaa___bbb.range(3, 6)); // ___
        CHECK(aaa___bbb.trim ("ba") == aaa___bbb.range(3, 6)); // ___
        CHECK(aaa___bbb.trim ("cd") == aaa___bbb);
    }

    // unquoted():
    {
        CHECK(ryml::csubstr(R"('this is is single quoted')").unquoted() == "this is is single quoted");
        CHECK(ryml::csubstr(R"("this is is double quoted")").unquoted() == "this is is double quoted");
    }

    // stripl(): remove pattern from the left
    // stripr(): remove pattern from the right
    {
        ryml::csubstr abc___cba = "abc___cba";
        ryml::csubstr abc___abc = "abc___abc";
        CHECK(abc___cba.stripl("abc") == abc___cba.last(6)); // ___cba
        CHECK(abc___cba.stripr("abc") == abc___cba);
        CHECK(abc___cba.stripl("ab") == abc___cba.last(7)); // c___cba
        CHECK(abc___cba.stripr("ab") == abc___cba);
        CHECK(abc___cba.stripl("a") == abc___cba.last(8)); // bc___cba, same as triml('a')
        CHECK(abc___cba.stripr("a") == abc___cba.first(8));
        CHECK(abc___abc.stripl("abc") == abc___abc.last(6)); // ___abc
        CHECK(abc___abc.stripr("abc") == abc___abc.first(6)); // abc___
        CHECK(abc___abc.stripl("ab") == abc___abc.last(7)); // c___cba
        CHECK(abc___abc.stripr("ab") == abc___abc);
        CHECK(abc___abc.stripl("a") == abc___abc.last(8)); // bc___cba, same as triml('a')
        CHECK(abc___abc.stripr("a") == abc___abc);
    }

    // begins_with()/ends_with()
    // begins_with_any()/ends_with_any()
    {
        ryml::csubstr s = "foobar123";
        // char overloads
        CHECK(s.begins_with('f'));
        CHECK(s.ends_with('3'));
        CHECK(!s.ends_with('2'));
        CHECK(!s.ends_with('o'));
        // char[] overloads
        CHECK(s.begins_with("foobar"));
        CHECK(s.begins_with("foo"));
        CHECK(s.begins_with_any("foo"));
        CHECK(!s.begins_with("oof"));
        CHECK(s.begins_with_any("oof"));
        CHECK(s.ends_with("23"));
        CHECK(s.ends_with("123"));
        CHECK(s.ends_with_any("123"));
        CHECK(!s.ends_with("321"));
        CHECK(s.ends_with_any("231"));
    }

    // select()
    {
        ryml::csubstr s = "0123456789";
        CHECK(s.select('0') == s.sub(0, 1));
        CHECK(s.select('1') == s.sub(1, 1));
        CHECK(s.select('2') == s.sub(2, 1));
        CHECK(s.select('8') == s.sub(8, 1));
        CHECK(s.select('9') == s.sub(9, 1));
        CHECK(s.select("0123") == s.range(0, 4));
        CHECK(s.select("012" ) == s.range(0, 3));
        CHECK(s.select("01"  ) == s.range(0, 2));
        CHECK(s.select("0"   ) == s.range(0, 1));
        CHECK(s.select( "123") == s.range(1, 4));
        CHECK(s.select(  "23") == s.range(2, 4));
        CHECK(s.select(   "3") == s.range(3, 4));
    }

    // find()
    {
        ryml::csubstr s012345 = "012345";
        // find single characters:
        CHECK(s012345.find('a') == ryml::npos);
        CHECK(s012345.find('0'    ) == 0u);
        CHECK(s012345.find('0', 1u) == ryml::npos);
        CHECK(s012345.find('1'    ) == 1u);
        CHECK(s012345.find('1', 2u) == ryml::npos);
        CHECK(s012345.find('2'    ) == 2u);
        CHECK(s012345.find('2', 3u) == ryml::npos);
        CHECK(s012345.find('3'    ) == 3u);
        CHECK(s012345.find('3', 4u) == ryml::npos);
        // find patterns
        CHECK(s012345.find("ab"    ) == ryml::npos);
        CHECK(s012345.find("01"    ) == 0u);
        CHECK(s012345.find("01", 1u) == ryml::npos);
        CHECK(s012345.find("12"    ) == 1u);
        CHECK(s012345.find("12", 2u) == ryml::npos);
        CHECK(s012345.find("23"    ) == 2u);
        CHECK(s012345.find("23", 3u) == ryml::npos);
    }

    // count(): count the number of occurrences of a character
    {
        ryml::csubstr buf = "00110022003300440055";
        CHECK(buf.count('1'     ) ==  2u);
        CHECK(buf.count('1',  0u) ==  2u);
        CHECK(buf.count('1',  1u) ==  2u);
        CHECK(buf.count('1',  2u) ==  2u);
        CHECK(buf.count('1',  3u) ==  1u);
        CHECK(buf.count('1',  4u) ==  0u);
        CHECK(buf.count('1',  5u) ==  0u);
        CHECK(buf.count('0'     ) == 10u);
        CHECK(buf.count('0',  0u) == 10u);
        CHECK(buf.count('0',  1u) ==  9u);
        CHECK(buf.count('0',  2u) ==  8u);
        CHECK(buf.count('0',  3u) ==  8u);
        CHECK(buf.count('0',  4u) ==  8u);
        CHECK(buf.count('0',  5u) ==  7u);
        CHECK(buf.count('0',  6u) ==  6u);
        CHECK(buf.count('0',  7u) ==  6u);
        CHECK(buf.count('0',  8u) ==  6u);
        CHECK(buf.count('0',  9u) ==  5u);
        CHECK(buf.count('0', 10u) ==  4u);
        CHECK(buf.count('0', 11u) ==  4u);
        CHECK(buf.count('0', 12u) ==  4u);
        CHECK(buf.count('0', 13u) ==  3u);
        CHECK(buf.count('0', 14u) ==  2u);
        CHECK(buf.count('0', 15u) ==  2u);
        CHECK(buf.count('0', 16u) ==  2u);
        CHECK(buf.count('0', 17u) ==  1u);
        CHECK(buf.count('0', 18u) ==  0u);
        CHECK(buf.count('0', 19u) ==  0u);
        CHECK(buf.count('0', 20u) ==  0u);
    }

    // first_of(),last_of()
    {
        ryml::csubstr s012345 = "012345";
        CHECK(s012345.first_of('a') == ryml::npos);
        CHECK(s012345.first_of("ab") == ryml::npos);
        CHECK(s012345.first_of('0') == 0u);
        CHECK(s012345.first_of("0") == 0u);
        CHECK(s012345.first_of("01") == 0u);
        CHECK(s012345.first_of("10") == 0u);
        CHECK(s012345.first_of("012") == 0u);
        CHECK(s012345.first_of("210") == 0u);
        CHECK(s012345.first_of("0123") == 0u);
        CHECK(s012345.first_of("3210") == 0u);
        CHECK(s012345.first_of("01234") == 0u);
        CHECK(s012345.first_of("43210") == 0u);
        CHECK(s012345.first_of("012345") == 0u);
        CHECK(s012345.first_of("543210") == 0u);
        CHECK(s012345.first_of('5') == 5u);
        CHECK(s012345.first_of("5") == 5u);
        CHECK(s012345.first_of("45") == 4u);
        CHECK(s012345.first_of("54") == 4u);
        CHECK(s012345.first_of("345") == 3u);
        CHECK(s012345.first_of("543") == 3u);
        CHECK(s012345.first_of("2345") == 2u);
        CHECK(s012345.first_of("5432") == 2u);
        CHECK(s012345.first_of("12345") == 1u);
        CHECK(s012345.first_of("54321") == 1u);
        CHECK(s012345.first_of("012345") == 0u);
        CHECK(s012345.first_of("543210") == 0u);
        CHECK(s012345.first_of('0', 6u) == ryml::npos);
        CHECK(s012345.first_of('5', 6u) == ryml::npos);
        CHECK(s012345.first_of("012345", 6u) == ryml::npos);
        //
        CHECK(s012345.last_of('a') == ryml::npos);
        CHECK(s012345.last_of("ab") == ryml::npos);
        CHECK(s012345.last_of('0') == 0u);
        CHECK(s012345.last_of("0") == 0u);
        CHECK(s012345.last_of("01") == 1u);
        CHECK(s012345.last_of("10") == 1u);
        CHECK(s012345.last_of("012") == 2u);
        CHECK(s012345.last_of("210") == 2u);
        CHECK(s012345.last_of("0123") == 3u);
        CHECK(s012345.last_of("3210") == 3u);
        CHECK(s012345.last_of("01234") == 4u);
        CHECK(s012345.last_of("43210") == 4u);
        CHECK(s012345.last_of("012345") == 5u);
        CHECK(s012345.last_of("543210") == 5u);
        CHECK(s012345.last_of('5') == 5u);
        CHECK(s012345.last_of("5") == 5u);
        CHECK(s012345.last_of("45") == 5u);
        CHECK(s012345.last_of("54") == 5u);
        CHECK(s012345.last_of("345") == 5u);
        CHECK(s012345.last_of("543") == 5u);
        CHECK(s012345.last_of("2345") == 5u);
        CHECK(s012345.last_of("5432") == 5u);
        CHECK(s012345.last_of("12345") == 5u);
        CHECK(s012345.last_of("54321") == 5u);
        CHECK(s012345.last_of("012345") == 5u);
        CHECK(s012345.last_of("543210") == 5u);
        CHECK(s012345.last_of('0', 6u) == 0u);
        CHECK(s012345.last_of('5', 6u) == 5u);
        CHECK(s012345.last_of("012345", 6u) == 5u);
    }

    // first_not_of(), last_not_of()
    {
        ryml::csubstr s012345 = "012345";
        CHECK(s012345.first_not_of('a') == 0u);
        CHECK(s012345.first_not_of("ab") == 0u);
        CHECK(s012345.first_not_of('0') == 1u);
        CHECK(s012345.first_not_of("0") == 1u);
        CHECK(s012345.first_not_of("01") == 2u);
        CHECK(s012345.first_not_of("10") == 2u);
        CHECK(s012345.first_not_of("012") == 3u);
        CHECK(s012345.first_not_of("210") == 3u);
        CHECK(s012345.first_not_of("0123") == 4u);
        CHECK(s012345.first_not_of("3210") == 4u);
        CHECK(s012345.first_not_of("01234") == 5u);
        CHECK(s012345.first_not_of("43210") == 5u);
        CHECK(s012345.first_not_of("012345") == ryml::npos);
        CHECK(s012345.first_not_of("543210") == ryml::npos);
        CHECK(s012345.first_not_of('5') == 0u);
        CHECK(s012345.first_not_of("5") == 0u);
        CHECK(s012345.first_not_of("45") == 0u);
        CHECK(s012345.first_not_of("54") == 0u);
        CHECK(s012345.first_not_of("345") == 0u);
        CHECK(s012345.first_not_of("543") == 0u);
        CHECK(s012345.first_not_of("2345") == 0u);
        CHECK(s012345.first_not_of("5432") == 0u);
        CHECK(s012345.first_not_of("12345") == 0u);
        CHECK(s012345.first_not_of("54321") == 0u);
        CHECK(s012345.first_not_of("012345") == ryml::npos);
        CHECK(s012345.first_not_of("543210") == ryml::npos);
        CHECK(s012345.last_not_of('a') == 5u);
        CHECK(s012345.last_not_of("ab") == 5u);
        CHECK(s012345.last_not_of('5') == 4u);
        CHECK(s012345.last_not_of("5") == 4u);
        CHECK(s012345.last_not_of("45") == 3u);
        CHECK(s012345.last_not_of("54") == 3u);
        CHECK(s012345.last_not_of("345") == 2u);
        CHECK(s012345.last_not_of("543") == 2u);
        CHECK(s012345.last_not_of("2345") == 1u);
        CHECK(s012345.last_not_of("5432") == 1u);
        CHECK(s012345.last_not_of("12345") == 0u);
        CHECK(s012345.last_not_of("54321") == 0u);
        CHECK(s012345.last_not_of("012345") == ryml::npos);
        CHECK(s012345.last_not_of("543210") == ryml::npos);
        CHECK(s012345.last_not_of('0') == 5u);
        CHECK(s012345.last_not_of("0") == 5u);
        CHECK(s012345.last_not_of("01") == 5u);
        CHECK(s012345.last_not_of("10") == 5u);
        CHECK(s012345.last_not_of("012") == 5u);
        CHECK(s012345.last_not_of("210") == 5u);
        CHECK(s012345.last_not_of("0123") == 5u);
        CHECK(s012345.last_not_of("3210") == 5u);
        CHECK(s012345.last_not_of("01234") == 5u);
        CHECK(s012345.last_not_of("43210") == 5u);
        CHECK(s012345.last_not_of("012345") == ryml::npos);
        CHECK(s012345.last_not_of("543210") == ryml::npos);
    }

    // first_non_empty_span()
    {
        CHECK(ryml::csubstr("foo bar").first_non_empty_span() == "foo");
        CHECK(ryml::csubstr("       foo bar").first_non_empty_span() == "foo");
        CHECK(ryml::csubstr("\n   \r  \t  foo bar").first_non_empty_span() == "foo");
        CHECK(ryml::csubstr("\n   \r  \t  foo\n\r\t bar").first_non_empty_span() == "foo");
        CHECK(ryml::csubstr("\n   \r  \t  foo\n\r\t bar").first_non_empty_span() == "foo");
        CHECK(ryml::csubstr(",\n   \r  \t  foo\n\r\t bar").first_non_empty_span() == ",");
    }
    // first_uint_span()
    {
        CHECK(ryml::csubstr("1234 asdkjh").first_uint_span() == "1234");
        CHECK(ryml::csubstr("1234\rasdkjh").first_uint_span() == "1234");
        CHECK(ryml::csubstr("1234\tasdkjh").first_uint_span() == "1234");
        CHECK(ryml::csubstr("1234\nasdkjh").first_uint_span() == "1234");
        CHECK(ryml::csubstr("1234]asdkjh").first_uint_span() == "1234");
        CHECK(ryml::csubstr("1234)asdkjh").first_uint_span() == "1234");
        CHECK(ryml::csubstr("1234gasdkjh").first_uint_span() == "");
    }
    // first_int_span()
    {
        CHECK(ryml::csubstr("-1234 asdkjh").first_int_span() == "-1234");
        CHECK(ryml::csubstr("-1234\rasdkjh").first_int_span() == "-1234");
        CHECK(ryml::csubstr("-1234\tasdkjh").first_int_span() == "-1234");
        CHECK(ryml::csubstr("-1234\nasdkjh").first_int_span() == "-1234");
        CHECK(ryml::csubstr("-1234]asdkjh").first_int_span() == "-1234");
        CHECK(ryml::csubstr("-1234)asdkjh").first_int_span() == "-1234");
        CHECK(ryml::csubstr("-1234gasdkjh").first_int_span() == "");
    }
    // first_real_span()
    {
        CHECK(ryml::csubstr("-1234 asdkjh").first_real_span() == "-1234");
        CHECK(ryml::csubstr("-1234\rasdkjh").first_real_span() == "-1234");
        CHECK(ryml::csubstr("-1234\tasdkjh").first_real_span() == "-1234");
        CHECK(ryml::csubstr("-1234\nasdkjh").first_real_span() == "-1234");
        CHECK(ryml::csubstr("-1234]asdkjh").first_real_span() == "-1234");
        CHECK(ryml::csubstr("-1234)asdkjh").first_real_span() == "-1234");
        CHECK(ryml::csubstr("-1234gasdkjh").first_real_span() == "");
        CHECK(ryml::csubstr("1.234 asdkjh").first_real_span() == "1.234");
        CHECK(ryml::csubstr("1.234e5 asdkjh").first_real_span() == "1.234e5");
        CHECK(ryml::csubstr("1.234e+5 asdkjh").first_real_span() == "1.234e+5");
        CHECK(ryml::csubstr("1.234e-5 asdkjh").first_real_span() == "1.234e-5");
        CHECK(ryml::csubstr("1.234 asdkjh").first_real_span() == "1.234");
        CHECK(ryml::csubstr("1.234e5 asdkjh").first_real_span() == "1.234e5");
        CHECK(ryml::csubstr("1.234e+5 asdkjh").first_real_span() == "1.234e+5");
        CHECK(ryml::csubstr("1.234e-5 asdkjh").first_real_span() == "1.234e-5");
        CHECK(ryml::csubstr("-1.234 asdkjh").first_real_span() == "-1.234");
        CHECK(ryml::csubstr("-1.234e5 asdkjh").first_real_span() == "-1.234e5");
        CHECK(ryml::csubstr("-1.234e+5 asdkjh").first_real_span() == "-1.234e+5");
        CHECK(ryml::csubstr("-1.234e-5 asdkjh").first_real_span() == "-1.234e-5");
        CHECK(ryml::csubstr("0x1.e8480p+19 asdkjh").first_real_span() == "0x1.e8480p+19");
        CHECK(ryml::csubstr("0x1.e8480p-19 asdkjh").first_real_span() == "0x1.e8480p-19");
        CHECK(ryml::csubstr("-0x1.e8480p+19 asdkjh").first_real_span() == "-0x1.e8480p+19");
        CHECK(ryml::csubstr("-0x1.e8480p-19 asdkjh").first_real_span() == "-0x1.e8480p-19");
        CHECK(ryml::csubstr("+0x1.e8480p+19 asdkjh").first_real_span() == "+0x1.e8480p+19");
        CHECK(ryml::csubstr("+0x1.e8480p-19 asdkjh").first_real_span() == "+0x1.e8480p-19");
    }
    // see also is_number()

    // basename(), dirname(), extshort(), extlong()
    {
        CHECK(ryml::csubstr("/path/to/file.tar.gz").basename() == "file.tar.gz");
        CHECK(ryml::csubstr("/path/to/file.tar.gz").dirname() == "/path/to/");
        CHECK(ryml::csubstr("C:\\path\\to\\file.tar.gz").basename('\\') == "file.tar.gz");
        CHECK(ryml::csubstr("C:\\path\\to\\file.tar.gz").dirname('\\') == "C:\\path\\to\\");
        CHECK(ryml::csubstr("/path/to/file.tar.gz").extshort() == "gz");
        CHECK(ryml::csubstr("/path/to/file.tar.gz").extlong() == "tar.gz");
        CHECK(ryml::csubstr("/path/to/file.tar.gz").name_wo_extshort() == "/path/to/file.tar");
        CHECK(ryml::csubstr("/path/to/file.tar.gz").name_wo_extlong() == "/path/to/file");
    }

    // split()
    {
        using namespace ryml;
        csubstr parts[] = {"aa", "bb", "cc", "dd", "ee", "ff"};
        {
            size_t count = 0;
            for(csubstr part : csubstr("aa/bb/cc/dd/ee/ff").split('/'))
                CHECK(part == parts[count++]);
        }
        {
            size_t count = 0;
            for(csubstr part : csubstr("aa.bb.cc.dd.ee.ff").split('.'))
                CHECK(part == parts[count++]);
        }
        {
            size_t count = 0;
            for(csubstr part : csubstr("aa-bb-cc-dd-ee-ff").split('-'))
                CHECK(part == parts[count++]);
        }
        // see also next_split()
    }

    //  pop_left(),  pop_right() --- non-greedy version
    // gpop_left(), gpop_right() --- greedy version
    {
        const bool skip_empty = true;
        // pop_left(): pop the last element from the left
        CHECK(ryml::csubstr(  "0/1/2"   ). pop_left('/'            ) ==   "0"    );
        CHECK(ryml::csubstr( "/0/1/2"   ). pop_left('/'            ) == ""       );
        CHECK(ryml::csubstr("//0/1/2"   ). pop_left('/'            ) == ""       );
        CHECK(ryml::csubstr(  "0/1/2"   ). pop_left('/', skip_empty) ==   "0"    );
        CHECK(ryml::csubstr( "/0/1/2"   ). pop_left('/', skip_empty) ==  "/0"    );
        CHECK(ryml::csubstr("//0/1/2"   ). pop_left('/', skip_empty) == "//0"    );
        // gpop_left(): pop all but the first element (greedy pop)
        CHECK(ryml::csubstr(  "0/1/2"   ).gpop_left('/'            ) ==   "0/1"  );
        CHECK(ryml::csubstr( "/0/1/2"   ).gpop_left('/'            ) ==  "/0/1"  );
        CHECK(ryml::csubstr("//0/1/2"   ).gpop_left('/'            ) == "//0/1"  );
        CHECK(ryml::csubstr(  "0/1/2/"  ).gpop_left('/'            ) ==   "0/1/2");
        CHECK(ryml::csubstr( "/0/1/2/"  ).gpop_left('/'            ) ==  "/0/1/2");
        CHECK(ryml::csubstr("//0/1/2/"  ).gpop_left('/'            ) == "//0/1/2");
        CHECK(ryml::csubstr(  "0/1/2//" ).gpop_left('/'            ) ==   "0/1/2/");
        CHECK(ryml::csubstr( "/0/1/2//" ).gpop_left('/'            ) ==  "/0/1/2/");
        CHECK(ryml::csubstr("//0/1/2//" ).gpop_left('/'            ) == "//0/1/2/");
        CHECK(ryml::csubstr(  "0/1/2"   ).gpop_left('/', skip_empty) ==   "0/1"  );
        CHECK(ryml::csubstr( "/0/1/2"   ).gpop_left('/', skip_empty) ==  "/0/1"  );
        CHECK(ryml::csubstr("//0/1/2"   ).gpop_left('/', skip_empty) == "//0/1"  );
        CHECK(ryml::csubstr(  "0/1/2/"  ).gpop_left('/', skip_empty) ==   "0/1"  );
        CHECK(ryml::csubstr( "/0/1/2/"  ).gpop_left('/', skip_empty) ==  "/0/1"  );
        CHECK(ryml::csubstr("//0/1/2/"  ).gpop_left('/', skip_empty) == "//0/1"  );
        CHECK(ryml::csubstr(  "0/1/2//" ).gpop_left('/', skip_empty) ==   "0/1"  );
        CHECK(ryml::csubstr( "/0/1/2//" ).gpop_left('/', skip_empty) ==  "/0/1"  );
        CHECK(ryml::csubstr("//0/1/2//" ).gpop_left('/', skip_empty) == "//0/1"  );
        // pop_right(): pop the last element from the right
        CHECK(ryml::csubstr(  "0/1/2"   ). pop_right('/'            ) ==   "2"    );
        CHECK(ryml::csubstr(  "0/1/2/"  ). pop_right('/'            ) == ""       );
        CHECK(ryml::csubstr(  "0/1/2//" ). pop_right('/'            ) == ""       );
        CHECK(ryml::csubstr(  "0/1/2"   ). pop_right('/', skip_empty) ==   "2"    );
        CHECK(ryml::csubstr(  "0/1/2/"  ). pop_right('/', skip_empty) ==   "2/"   );
        CHECK(ryml::csubstr(  "0/1/2//" ). pop_right('/', skip_empty) ==   "2//"  );
        // gpop_right(): pop all but the first element (greedy pop)
        CHECK(ryml::csubstr(  "0/1/2"   ).gpop_right('/'            ) ==     "1/2");
        CHECK(ryml::csubstr(  "0/1/2/"  ).gpop_right('/'            ) ==     "1/2/"  );
        CHECK(ryml::csubstr(  "0/1/2//" ).gpop_right('/'            ) ==     "1/2//"  );
        CHECK(ryml::csubstr( "/0/1/2"   ).gpop_right('/'            ) ==   "0/1/2");
        CHECK(ryml::csubstr( "/0/1/2/"  ).gpop_right('/'            ) ==   "0/1/2/"  );
        CHECK(ryml::csubstr( "/0/1/2//" ).gpop_right('/'            ) ==   "0/1/2//"  );
        CHECK(ryml::csubstr("//0/1/2"   ).gpop_right('/'            ) ==  "/0/1/2");
        CHECK(ryml::csubstr("//0/1/2/"  ).gpop_right('/'            ) ==  "/0/1/2/"  );
        CHECK(ryml::csubstr("//0/1/2//" ).gpop_right('/'            ) ==  "/0/1/2//"  );
        CHECK(ryml::csubstr(  "0/1/2"   ).gpop_right('/', skip_empty) ==     "1/2");
        CHECK(ryml::csubstr(  "0/1/2/"  ).gpop_right('/', skip_empty) ==     "1/2/"  );
        CHECK(ryml::csubstr(  "0/1/2//" ).gpop_right('/', skip_empty) ==     "1/2//"  );
        CHECK(ryml::csubstr( "/0/1/2"   ).gpop_right('/', skip_empty) ==     "1/2");
        CHECK(ryml::csubstr( "/0/1/2/"  ).gpop_right('/', skip_empty) ==     "1/2/"  );
        CHECK(ryml::csubstr( "/0/1/2//" ).gpop_right('/', skip_empty) ==     "1/2//"  );
        CHECK(ryml::csubstr("//0/1/2"   ).gpop_right('/', skip_empty) ==     "1/2");
        CHECK(ryml::csubstr("//0/1/2/"  ).gpop_right('/', skip_empty) ==     "1/2/"  );
        CHECK(ryml::csubstr("//0/1/2//" ).gpop_right('/', skip_empty) ==     "1/2//"  );
    }

    // see the docs:
    // https://c4core.docsforge.com/master/api/c4/basic_substring/
}





Add Discussion as Guest

Log in