lua 提取 字符串操作 可以用在c 程序当中

#define MAXCAPTURES        32

#define CAP_UNFINISHED    (-1)
#define CAP_POSITION    (-2)
#define MAXCCALLS    200

#define L_ESC        '%'
#define SPECIALS    "^$*+?.([%-"
#define uchar(c)    ((unsigned char)(c))

typedef struct MatchState
    int matchdepth;  /* control for recursive depth (to avoid C stack overflow) */
    const char *src_init;  /* init of source string */
    const char *src_end;  /* end ('\0') of source string */
    const char *p_end;  /* end ('\0') of pattern */
    int level;  /* total number of captures (finished or unfinished) */
    struct {
        const char *init;
        ptrdiff_t len;
    } capture[MAXCAPTURES];
} MatchState;

static const char *match (MatchState *ms, const char *s, const char *p);

static int check_capture (MatchState *ms, int l)
    l -= '1';
    if (l < 0 || l >= ms->level || ms->capture[l].len == CAP_UNFINISHED)
        return printf("invalid capture index %%%d", l + 1);
    return l;

static int capture_to_close (MatchState *ms)
    int level = ms->level;
    for (level--; level>=0; level--)
        if (ms->capture[level].len == CAP_UNFINISHED) return level;
    return printf("invalid pattern capture");

static const char *start_capture (MatchState *ms, const char *s,
                                  const char *p, int what)
    const char *res;
    int level = ms->level;
    if (level >= MAXCAPTURES)
        printf("too many captures");
    ms->capture[level].init = s;
    ms->capture[level].len = what;
    ms->level = level+1;
    if ((res=match(ms, s, p)) == NULL)  /* match failed? */
        ms->level--;  /* undo capture */
    return res;

static const char *end_capture (MatchState *ms, const char *s,
                                const char *p)
    int l = capture_to_close(ms);
    const char *res;
    ms->capture[l].len = s - ms->capture[l].init;  /* close capture */
    if ((res = match(ms, s, p)) == NULL)  /* match failed? */
        ms->capture[l].len = CAP_UNFINISHED;  /* undo capture */
    return res;

static const char *matchbalance (MatchState *ms, const char *s,
                                 const char *p)
    if (p >= ms->p_end - 1)
        printf( "malformed pattern \n" );
        //luaL_error(ms->L, "malformed pattern "
        //           "(missing arguments to " LUA_QL("%%b") ")");
    if (*s != *p) return NULL;
    else {
        int b = *p;
        int e = *(p+1);
        int cont = 1;
        while (++s < ms->src_end) {
            if (*s == e) {
                if (--cont == 0) return s+1;
            else if (*s == b) cont++;
    return NULL;  /* string ends out of balance */

static const char *classend (MatchState *ms, const char *p)
    switch (*p++) {
        case L_ESC: {
            if (p == ms->p_end)
                printf("malformed pattern ends width '%%'");
                //luaL_error(ms->L, "malformed pattern (ends with " LUA_QL("%%") ")");
            return p+1;
        case '[': {
            if (*p == '^') p++;
            do {  /* look for a `]' */
                if (p == ms->p_end)
                    printf("maltformed pattern missing ]");
                    //luaL_error(ms->L, "malformed pattern (missing " LUA_QL("]") ")");
                if (*(p++) == L_ESC && p < ms->p_end)
                    p++;  /* skip escapes (e.g. `%]') */
            } while (*p != ']');
            return p+1;
        default: {
            return p;

static int match_class (int c, int cl) {
    int res;
    switch (tolower(cl)) {
        case 'a' : res = isalpha(c); break;
        case 'c' : res = iscntrl(c); break;
        case 'd' : res = isdigit(c); break;
        case 'g' : res = isgraph(c); break;
        case 'l' : res = islower(c); break;
        case 'p' : res = ispunct(c); break;
        case 's' : res = isspace(c); break;
        case 'u' : res = isupper(c); break;
        case 'w' : res = isalnum(c); break;
        case 'x' : res = isxdigit(c); break;
        case 'z' : res = (c == 0); break;  /* deprecated option */
        default: return (cl == c);
    return (islower(cl) ? res : !res);

static int matchbracketclass (int c, const char *p, const char *ec) {
    int sig = 1;
    if (*(p+1) == '^') {
        sig = 0;
        p++;  /* skip the `^' */
    while (++p < ec) {
        if (*p == L_ESC) {
            if (match_class(c, uchar(*p)))
                return sig;
        else if ((*(p+1) == '-') && (p+2 < ec)) {
            if (uchar(*(p-2)) <= c && c <= uchar(*p))
                return sig;
        else if (uchar(*p) == c) return sig;
    return !sig;

static const char *match_capture (MatchState *ms, const char *s, int l) {
    size_t len;
    l = check_capture(ms, l);
    len = ms->capture[l].len;
    if ((size_t)(ms->src_end-s) >= len &&
        memcmp(ms->capture[l].init, s, len) == 0)
        return s+len;
    else return NULL;

static int singlematch (MatchState *ms, const char *s, const char *p,
                        const char *ep)
    if (s >= ms->src_end)
        return 0;
    else {
        int c = uchar(*s);
        switch (*p) {
            case '.': return 1;  /* matches any char */
            case L_ESC: return match_class(c, uchar(*(p+1)));
            case '[': return matchbracketclass(c, p, ep-1);
            default:  return (uchar(*p) == c);

static const char *max_expand (MatchState *ms, const char *s,
                               const char *p, const char *ep)
    ptrdiff_t i = 0;  /* counts maximum expand for item */
    while (singlematch(ms, s + i, p, ep))
    /* keeps trying to match with the maximum repetitions */
    while (i>=0) {
        const char *res = match(ms, (s+i), ep+1);
        if (res) return res;
        i--;  /* else didn't match; reduce 1 repetition to try again */
    return NULL;

static const char *min_expand (MatchState *ms, const char *s,
                               const char *p, const char *ep)
    for (;;)
        const char *res = match(ms, s, ep+1);
        if (res != NULL)
            return res;
        else if (singlematch(ms, s, p, ep))
            s++;  /* try with one more repetition */
        else return NULL;

static const char *match (MatchState *ms, const char *s, const char *p)
    if (ms->matchdepth-- == 0)
        printf("pattern too complex");
init: /* using goto's to optimize tail recursion */
    if (p != ms->p_end) {  /* end of pattern? */
        switch (*p) {
            case '(': {  /* start capture */
                if (*(p + 1) == ')')  /* position capture? */
                    s = start_capture(ms, s, p + 2, CAP_POSITION);
                    s = start_capture(ms, s, p + 1, CAP_UNFINISHED);
            case ')': {  /* end capture */
                s = end_capture(ms, s, p + 1);
            case '$': {
                if ((p + 1) != ms->p_end)  /* is the `$' the last char in pattern? */
                    goto dflt;  /* no; go to default */
                s = (s == ms->src_end) ? s : NULL;  /* check end of string */
            case L_ESC: {  /* escaped sequences not in the format class[*+?-]? */
                switch (*(p + 1)) {
                    case 'b': {  /* balanced string? */
                        s = matchbalance(ms, s, p + 2);
                        if (s != NULL) {
                            p += 4; goto init;  /* return match(ms, s, p + 4); */
                        }  /* else fail (s == NULL) */
                    case 'f': {  /* frontier? */
                        const char *ep; char previous;
                        p += 2;
                        if (*p != '[')
                            printf( "missing [ after in pattern" );
                        //luaL_error(ms->L, "missing " LUA_QL("[") " after "
                        //               LUA_QL("%%f") " in pattern");
                        ep = classend(ms, p);  /* points to what is next */
                        previous = (s == ms->src_init) ? '\0' : *(s - 1);
                        if (!matchbracketclass(uchar(previous), p, ep - 1) &&
                            matchbracketclass(uchar(*s), p, ep - 1)) {
                            p = ep; goto init;  /* return match(ms, s, ep); */
                        s = NULL;  /* match failed */
                    case '0': case '1': case '2': case '3':
                    case '4': case '5': case '6': case '7':
                    case '8': case '9': {  /* capture results (%0-%9)? */
                        s = match_capture(ms, s, uchar(*(p + 1)));
                        if (s != NULL) {
                            p += 2; goto init;  /* return match(ms, s, p + 2) */
                    default: goto dflt;
            default: dflt: {  /* pattern class plus optional suffix */
                const char *ep = classend(ms, p);  /* points to optional suffix */
                /* does not match at least once? */
                if (!singlematch(ms, s, p, ep)) {
                    if (*ep == '*' || *ep == '?' || *ep == '-') {  /* accept empty? */
                        p = ep + 1; goto init;  /* return match(ms, s, ep + 1); */
                    else  /* '+' or no suffix */
                        s = NULL;  /* fail */
                else {  /* matched once */
                    switch (*ep) {  /* handle optional suffix */
                        case '?': {  /* optional */
                            const char *res;
                            if ((res = match(ms, s + 1, ep + 1)) != NULL)
                                s = res;
                            else {
                                p = ep + 1; goto init;  /* else return match(ms, s, ep + 1); */
                        case '+':  /* 1 or more repetitions */
                            s++;  /* 1 match already done */
                            /* go through */
                        case '*':  /* 0 or more repetitions */
                            s = max_expand(ms, s, p, ep);
                        case '-':  /* 0 or more repetitions (minimum) */
                            s = min_expand(ms, s, p, ep);
                        default:  /* no suffix */
                            s++; p = ep; goto init;  /* return match(ms, s + 1, ep); */
    return s;

static size_t posrelat (ptrdiff_t pos, size_t len)
    if (pos >= 0) return (size_t)pos;
    else if (0u - (size_t)pos > len) return 0;
    else return len - ((size_t)-pos) + 1;

bool str_gmatch( const char* s, const char* p )
    MatchState ms;
    size_t ls, lp;
    //const char *s = "this is a test";
    //const char *p = "%a+";
    ls = strlen( s );
    lp = strlen( p );
    ms.matchdepth = MAXCCALLS;
    ms.src_init = s;
    ms.src_end = s+ls;
    ms.p_end = p + lp;
    const char* src = s;
    for ( ;src <= ms.src_end; src++ )
        const char *e;
        ms.level = 0;
        //lua_assert(ms.matchdepth == MAXCCALLS);
        if ((e = match(&ms, src, p)) != NULL) {
            int i = 0;
            size_t newstart = e-s;
            if (e == src)
            //nlen = newstart;
            src = e;
            //lua_pushinteger(L, newstart);
            //lua_replace(L, lua_upvalueindex(3));
            //return push_captures(&ms, src, e);
    return false;

bool str_match( const char* s, const char* p )
    size_t ls, lp;
    //const char *s = "this is a test";
    //const char *p = "%a+";
    ls = strlen( s );
    lp = strlen( p );
    size_t init = posrelat( 1, ls );
    MatchState ms;
    const char *s1 = s + init - 1;
    int anchor = (*p == '^');
    if (anchor) {
        p++; lp--;
    ms.matchdepth = MAXCCALLS;
    ms.src_init = s;
    ms.src_end = s + ls;
    ms.p_end = p + lp;
    do {
        const char *res;
        ms.level = 0;
        if ((res=match(&ms, s1, p)) != NULL)
            size_t i = res - s;
            i = s1 - s + 1;
            //if (find) {
            //    lua_pushinteger(L, s1 - s + 1);
            //    lua_pushinteger(L, res - s);
            //    return push_captures(&ms, NULL, 0) + 2;
            //    return push_captures(&ms, s1, res);
    } while (s1++ < ms.src_end && !anchor);
    return false;

bool str_gsub()
    return false;

static int str_gsub (lua_State *L) {
    size_t srcl, lp;
    const char *src = luaL_checklstring(L, 1, &srcl);
    const char *p = luaL_checklstring(L, 2, &lp);
    int tr = lua_type(L, 3);
    size_t max_s = luaL_optinteger(L, 4, srcl+1);
    int anchor = (*p == '^');
    size_t n = 0;
    MatchState ms;
    luaL_Buffer b;
    luaL_argcheck(L, tr == LUA_TNUMBER || tr == LUA_TSTRING ||
                  tr == LUA_TFUNCTION || tr == LUA_TTABLE, 3,
                  "string/function/table expected");
    luaL_buffinit(L, &b);
    if (anchor) {
        p++; lp--;
    ms.L = L;
    ms.matchdepth = MAXCCALLS;
    ms.src_init = src;
    ms.src_end = src+srcl;
    ms.p_end = p + lp;
    while (n < max_s) {
        const char *e;
        ms.level = 0;
        lua_assert(ms.matchdepth == MAXCCALLS);
        e = match(&ms, src, p);
        if (e) {
            add_value(&ms, &b, src, e, tr);
        if (e && e>src)
            src = e;
        else if (src < ms.src_end)
            luaL_addchar(&b, *src++);
        else break;
        if (anchor) break;
    luaL_addlstring(&b, src, ms.src_end-src);
    lua_pushinteger(L, n);
    return 2;

  • 0
  • 0
    觉得还不错? 一键收藏
  • 3


  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
评论 3




当前余额3.43前往充值 >
领取后你会自动成为博主和红包主的粉丝 规则
钱包余额 0


