#include "WSLsplit.h"
/*
An efficient split functionAn efficient split function 
William S. Lear
10 May 1999 10:07:49 -0500 

        string line;
        vector<string> v;
        int count = 0;

        while (getline(cin, line)) {
            split(v, line, '|');
            ++count;
        }
        cout << count << endl;
    }

    main() {
        char line[1024];
        vector<string> v;
        int count = 0;

        while (cin.getline(line, 1024)) {
            split(v, line, '|');
            ++count;
        }
        cout << count << endl;
    }
*/

    // Split a string on a given character into a vector of strings
    // The vector is passed by reference and cleared each time
    // The number of strings split out is returned
    int split(vector<string>& v, const string& str, char c)
    {
        v.clear();
        string::const_iterator s = str.begin();
        while (true) {
            string::const_iterator begin = s;

            while (*s != c && s != str.end()) { ++s; }

	    v.push_back(string(begin, s));

	    if (s == str.end()) {
                break;
            }

            if (++s == str.end()) {
                v.push_back("");
                break;
            }
        }
        return v.size();
    }

    // Split a NULL-terminated character array on a given character into
    // a vector of strings
    // The vector is passed by reference and cleared each time
    // The number of strings split out is returned
    int split(vector<string>& v, const char* s, char c)
    {
        v.clear();
        while (true) {
            const char* begin = s;

            while (*s != c && *s) { ++s; }

	    v.push_back(string(begin, s));

	    if (!*s) {
                break;
            }

            if (!*++s) {
                v.push_back("");
                break;
            }
        }
        return v.size();
    }

/*    // Represents a span of a character array
    typedef pair<const char*, const char*> span;

    // Convenience function to set a span
    inline void set_span(span& s, const char* b, const char* e)
    {
        s.first = b;
        s.second = e;
    }

    // Split a NULL-terminated character array on a given character into
    // a vector of spans
    // The vector is of constant size is not cleared each time
    // The number of spans split out is returned
    int split(vector<span>& v, const char* s, char c)
    {
        int i = 0;
        while (true) {
            const char* begin = s;

            while (*s != c && *s) { ++s; }

	    set_span(v[i++], begin, s);

	    if (!*s) {
                break;
            }

	    if (!*++s) {
                set_span(v[i++], 0, 0);
                break;
            }
        }
        return i;
    }

Each of these routines will split the following string:

    <field0>|<field1>|<field2>| ... |<fieldN>

where any of the N fields can be empty, into N+1 fields.

So, what do folks think of this?  Obviously, the particular span
approach above has the drawback that is relies both on a fixed-length
input buffer and a fixed-length vector of spans.  However, for my
purposes, this is safe enough and speed is paramount at this time.

I'm sure there are better ways to approach this, issues I haven't
thought of, speed gains to be found here and there, etc.  I'd be glad
to hear of them.


Bill
-- 
William S. Lear | Who is there that sees not that this inextricable labyrinth
r a e l @       | of reasons  of state was artfully invented, lest the people
d e j a .       | should  understand  their own  affairs, and, understanding,
c o m           | become inclined to conduct them?    ---William Godwin, 1793
*/
