How do you modify a string split function to ignore consecutive delimiters?

advertisements

I am using a function that was posted as an answer on another Stackoverflow question. The user who posted this however notes that: it does not handle consecutive delimiters.

I am wondering how I can modify this so that it could handle consecutive delimiters? I want to essentially ignore it when I have an extra delminator.

For example say I have something like this:

h2,3 d3,4 j3,3 y4,1 g4,3

I want to split this into an array of strings at each space, however as you can see in some cases there are multiple spaces. I simply want to ignore the extra delimiters.

Edit: Just to make it clear this is the code I am using from the answer I linked to above:

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <assert.h>

char** str_split(char* a_str, const char a_delim)
{
    char** result    = 0;
    size_t count     = 0;
    char* tmp        = a_str;
    char* last_comma = 0;
    char delim[2];
    delim[0] = a_delim;
    delim[1] = 0;

    /* Count how many elements will be extracted. */
    while (*tmp)
    {
        if (a_delim == *tmp)
        {
            count++;
            last_comma = tmp;
        }
        tmp++;
    }

    /* Add space for trailing token. */
    count += last_comma < (a_str + strlen(a_str) - 1);

    /* Add space for terminating null string so caller
       knows where the list of returned strings ends. */
    count++;

    result = malloc(sizeof(char*) * count);

    if (result)
    {
        size_t idx  = 0;
        char* token = strtok(a_str, delim);

        while (token)
        {
            assert(idx < count);
            *(result + idx++) = strdup(token);
            token = strtok(0, delim);
        }
        assert(idx == count - 1);
        *(result + idx) = 0;
    }

    return result;
}

int main()
{
    char months[] = "JAN,FEB,MAR,APR,MAY,JUN,JUL,AUG,SEP,OCT,NOV,DEC";
    char** tokens;

    printf("months=[%s]\n\n", months);

    tokens = str_split(months, ',');

    if (tokens)
    {
        int i;
        for (i = 0; *(tokens + i); i++)
        {
            printf("month=[%s]\n", *(tokens + i));
            free(*(tokens + i));
        }
        printf("\n");
        free(tokens);
    }

    return 0;
}


This should do the trick:

char** str_split(const char *str, char delimiter)
{
    int len, i, j;
    char* buf;
    char** ret;

    len = strlen(str);
    buf = malloc(len + 1);
    memcpy(buf, str, len + 1);

    j = 1;
    for (i = 0; i < len; ++i)
        if (buf[i] == delimiter)
        {
            while (buf[i + 1] == delimiter) i++;
            j++;
        }

    ret = malloc(sizeof(char*) * (j + 1));
    ret[j] = NULL;

    ret[0] = buf;
    j = 1;
    for (i = 0; i < len; ++i)
        if (buf[i] == delimiter)
        {
            buf[i] = '\0';
            while (buf[i + 1] == delimiter) i++;
            ret[j++] = &buf[i + 1];
        }
    return ret;
}

Drop the two lines while (buf[i + 1] == delimiter) i++; if you want it to disable sequenced delimiters overruning.