[ngx_lua] ngx.re.split implementation

Jader H. Silva jaderhs5 at gmail.com
Fri Jul 13 21:19:11 UTC 2012


The patch was removed in the previous message :(
So here it is (for real).

>From 9091c40e22f6fd0ca2173ecbeb1f932502cc8ac6 Mon Sep 17 00:00:00 2001
From: "Jader H. Silva" <jaderhs5 at gmail.com>
Date: Fri, 13 Jul 2012 18:06:32 -0300
Subject: [PATCH] Add ngx.re.split function

---
 src/ngx_http_lua_regex.c |  443
++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 443 insertions(+)

diff --git a/src/ngx_http_lua_regex.c b/src/ngx_http_lua_regex.c
index 108070c..aa5d445 100644
--- a/src/ngx_http_lua_regex.c
+++ b/src/ngx_http_lua_regex.c
@@ -74,6 +74,7 @@ static int ngx_http_lua_ngx_re_match(lua_State *L);
 static int ngx_http_lua_ngx_re_gmatch(lua_State *L);
 static int ngx_http_lua_ngx_re_sub(lua_State *L);
 static int ngx_http_lua_ngx_re_gsub(lua_State *L);
+static int ngx_http_lua_ngx_re_split(lua_State *L);
 static void ngx_http_lua_regex_free_study_data(ngx_pool_t *pool,
     pcre_extra *sd);
 static ngx_int_t ngx_lua_regex_compile(ngx_lua_regex_compile_t *rc);
@@ -1611,6 +1612,445 @@ error:
     return luaL_error(L, msg);
 }

+static int
+ngx_http_lua_ngx_re_split(lua_State *L)
+{
+    ngx_http_lua_regex_t        *re;
+    ngx_http_request_t          *r;
+    ngx_str_t                    subj;
+    ngx_str_t                    pat;
+    ngx_str_t                    opts;
+    ngx_str_t                    tpl;
+    ngx_http_lua_main_conf_t    *lmcf = NULL;
+    ngx_pool_t                  *pool, *old_pool;
+    ngx_lua_regex_compile_t      re_comp;
+    const char                  *msg;
+    ngx_int_t                    rc;
+    ngx_uint_t                   n;
+    ngx_int_t                    i;
+    int                          nargs;
+    int                         *cap = NULL;
+    int                          ovecsize;
+    int                          type;
+    unsigned                     func;
+    int                          offset;
+    size_t                       count;
+    luaL_Buffer                  luabuf;
+    ngx_int_t                    flags;
+    ngx_int_t                    limit = -1;
+    u_char                      *p;
+    u_char                       errstr[NGX_MAX_CONF_ERRSTR + 1];
+    pcre_extra                  *sd = NULL;
+
+    ngx_http_lua_complex_value_t              *ctpl = NULL;
+    ngx_http_lua_compile_complex_value_t       ccv;
+
+    nargs = lua_gettop(L);
+
+    if (nargs != 2 && nargs != 3 && nargs != 4) {
+        return luaL_error(L, "expecting two or three or four arguments,
but got %d",
+                nargs);
+    }
+
+    lua_pushlightuserdata(L, &ngx_http_lua_request_key);
+    lua_rawget(L, LUA_GLOBALSINDEX);
+    r = lua_touserdata(L, -1);
+    lua_pop(L, 1);
+
+    if (r == NULL) {
+        return luaL_error(L, "no request object found");
+    }
+
+    subj.data = (u_char *) luaL_checklstring(L, 1, &subj.len);
+    pat.data = (u_char *) luaL_checklstring(L, 2, &pat.len);
+
+    if (nargs >= 3) {
+        opts.data = (u_char *) luaL_checklstring(L, 3, &opts.len);
+
+        if (nargs == 4) {
+            limit = luaL_checkinteger(L, 4);
+            lua_pop(L, 1);
+
+        } else {/* nargs == 3 */
+            limit = -1;
+        }
+
+    } else { /* nargs == 2 */
+        opts.data = (u_char *) "";
+        opts.len = 0;
+    }
+
+    ngx_memzero(&re_comp, sizeof(ngx_lua_regex_compile_t));
+
+    /* stack: subj regex repl */
+
+    re_comp.options = 0;
+
+    flags = ngx_http_lua_ngx_re_parse_opts(L, &re_comp, &opts, 4);
+
+    if (flags & NGX_LUA_RE_COMPILE_ONCE) {
+        lmcf = ngx_http_get_module_main_conf(r, ngx_http_lua_module);
+        pool = lmcf->pool;
+
+        dd("server pool %p", lmcf->pool);
+
+        lua_pushlightuserdata(L, &ngx_http_lua_regex_cache_key);
+        lua_rawget(L, LUA_REGISTRYINDEX); /* table */
+
+        lua_pushliteral(L, "s");
+        lua_pushinteger(L, tpl.len);
+        lua_pushliteral(L, ":");
+        lua_pushvalue(L, 2);
+
+        if (tpl.len != 0) {
+            lua_pushvalue(L, 3);
+        }
+
+        dd("options size: %d", (int) sizeof(re_comp.options));
+
+        lua_pushlstring(L, (char *) &re_comp.options,
sizeof(re_comp.options));
+                /* table regex opts */
+
+        if (tpl.len == 0) {
+            lua_concat(L, 5); /* table key */
+
+        } else {
+            lua_concat(L, 6); /* table key */
+        }
+
+        lua_pushvalue(L, -1); /* table key key */
+
+        dd("regex cache key: %.*s", (int) (pat.len +
sizeof(re_comp.options)),
+                lua_tostring(L, -1));
+
+        lua_rawget(L, -3); /* table key re */
+        re = lua_touserdata(L, -1);
+
+        lua_pop(L, 1); /* table key */
+
+        if (re) {
+            ngx_log_debug2(NGX_LOG_DEBUG_HTTP, r->connection->log, 0,
+                    "lua regex cache hit for split regex \"%s\" with
options "
+                    "\"%s\"", pat.data, opts.data);
+
+            lua_pop(L, 2);
+
+            dd("restoring regex %p, ncaptures %d,  captures %p", re->regex,
+                    re->ncaptures, re->captures);
+
+            re_comp.regex = re->regex;
+            sd = re->regex_sd;
+            re_comp.captures = re->ncaptures;
+            cap = re->captures;
+            ctpl = re->replace;
+
+            if (flags & NGX_LUA_RE_MODE_DFA) {
+                ovecsize = 2;
+
+            } else {
+                ovecsize = (re->ncaptures + 1) * 3;
+            }
+
+            goto exec;
+        }
+
+        ngx_log_debug2(NGX_LOG_DEBUG_HTTP, r->connection->log, 0,
+                "lua regex cache miss for split regex \"%s\" with options "
+                "\"%s\"",
+                pat.data, opts.data);
+
+        if (lmcf->regex_cache_entries >= lmcf->regex_cache_max_entries) {
+
+            if (lmcf->regex_cache_entries ==
lmcf->regex_cache_max_entries) {
+                ngx_log_error(NGX_LOG_WARN, r->connection->log, 0,
+                        "lua exceeding regex cache max entries (%i)",
+                        lmcf->regex_cache_max_entries);
+
+                lmcf->regex_cache_entries++;
+            }
+
+            pool = r->pool;
+            flags &= ~NGX_LUA_RE_COMPILE_ONCE;
+        }
+
+    } else {
+        pool = r->pool;
+    }
+
+    re_comp.pattern = pat;
+    re_comp.err.len = NGX_MAX_CONF_ERRSTR;
+    re_comp.err.data = errstr;
+    re_comp.pool = pool;
+
+    dd("compiling regex");
+
+    ngx_log_debug5(NGX_LOG_DEBUG_HTTP, r->connection->log, 0,
+            "lua compiling split regex \"%s\" with options \"%s\" "
+            "(compile once: %d) (dfa mode: %d) (jit mode: %d)",
+            pat.data, opts.data,
+            (flags & NGX_LUA_RE_COMPILE_ONCE) != 0,
+            (flags & NGX_LUA_RE_MODE_DFA) != 0,
+            (flags & NGX_LUA_RE_MODE_JIT) != 0);
+
+    old_pool = ngx_http_lua_pcre_malloc_init(pool);
+
+    rc = ngx_lua_regex_compile(&re_comp);
+
+    ngx_http_lua_pcre_malloc_done(old_pool);
+
+    if (rc != NGX_OK) {
+        dd("compile failed");
+
+        re_comp.err.data[re_comp.err.len] = '\0';
+        msg = lua_pushfstring(L, "failed to compile regex \"%s\": %s",
+                pat.data, re_comp.err.data);
+
+        return luaL_argerror(L, 2, msg);
+    }
+
+#if LUA_HAVE_PCRE_JIT
+
+    if (flags & NGX_LUA_RE_MODE_JIT) {
+
+        old_pool = ngx_http_lua_pcre_malloc_init(pool);
+
+        sd = pcre_study(re_comp.regex, PCRE_STUDY_JIT_COMPILE, &msg);
+
+        ngx_http_lua_pcre_malloc_done(old_pool);
+
+#   if (NGX_DEBUG)
+        dd("sd = %p", sd);
+
+        if (msg != NULL) {
+            ngx_log_debug2(NGX_LOG_DEBUG_HTTP, r->connection->log, 0,
+                "pcre study failed with PCRE_STUDY_JIT_COMPILE: %s (%p)",
+                msg, sd);
+        }
+
+        if (sd != NULL) {
+            int         jitted;
+
+            old_pool = ngx_http_lua_pcre_malloc_init(pool);
+
+            pcre_fullinfo(re_comp.regex, sd, PCRE_INFO_JIT, &jitted);
+
+            ngx_http_lua_pcre_malloc_done(old_pool);
+
+            ngx_log_debug1(NGX_LOG_DEBUG_HTTP, r->connection->log, 0,
+                "pcre JIT compiling result: %d", jitted);
+        }
+#   endif /* NGX_DEBUG */
+
+    } else {
+
+        old_pool = ngx_http_lua_pcre_malloc_init(pool);
+
+        sd = pcre_study(re_comp.regex, 0, &msg);
+
+        ngx_http_lua_pcre_malloc_done(old_pool);
+
+#   if (NGX_DEBUG)
+        dd("sd = %p", sd);
+
+        if (msg != NULL) {
+            ngx_log_debug2(NGX_LOG_DEBUG_HTTP, r->connection->log, 0,
+                "pcre_study failed with PCRE_STUDY_JIT_COMPILE: %s (%p)",
+                msg, sd);
+        }
+#   endif /* NGX_DEBUG */
+    }
+
+#else  /* LUA_HAVE_PCRE_JIT */
+
+    if (flags & NGX_LUA_RE_MODE_JIT) {
+        ngx_log_debug0(NGX_LOG_DEBUG_HTTP, r->connection->log, 0,
+                "your pcre build does not have JIT support and "
+                "the \"j\" regex option is ignored");
+    }
+
+#endif /* LUA_HAVE_PCRE_JIT */
+
+    dd("compile done, captures %d", re_comp.captures);
+
+    if (flags & NGX_LUA_RE_MODE_DFA) {
+        ovecsize = 2;
+
+    } else {
+        ovecsize = (re_comp.captures + 1) * 3;
+    }
+
+    cap = ngx_palloc(pool, ovecsize * sizeof(int));
+    if (cap == NULL) {
+        flags &= ~NGX_LUA_RE_COMPILE_ONCE;
+        msg = "out of memory";
+        goto error;
+    }
+
+    if (flags & NGX_LUA_RE_COMPILE_ONCE) {
+
+        ngx_log_debug2(NGX_LOG_DEBUG_HTTP, r->connection->log, 0,
+                "lua saving compiled sub regex (%d captures) into the
cache "
+                "(entries %i)", re_comp.captures,
+                lmcf ? lmcf->regex_cache_entries : 0);
+
+        re = ngx_palloc(pool, sizeof(ngx_http_lua_regex_t));
+        if (re == NULL) {
+            return luaL_error(L, "out of memory");
+        }
+
+        dd("saving regex %p, ncaptures %d,  captures %p", re_comp.regex,
+                re_comp.captures, cap);
+
+        re->regex = re_comp.regex;
+        re->regex_sd = sd;
+        re->ncaptures = re_comp.captures;
+        re->captures = cap;
+        re->replace = ctpl;
+
+        lua_pushlightuserdata(L, re); /* table key value */
+        lua_rawset(L, -3); /* table */
+        lua_pop(L, 1);
+
+        if (lmcf) {
+            lmcf->regex_cache_entries++;
+        }
+    }
+
+exec:
+    count = 0;
+    offset = 0;
+
+    lua_newtable(L);
+
+    for (;;) {
+        if (subj.len == 0 || count == limit) {
+            break;
+        }
+
+        if (flags & NGX_LUA_RE_MODE_DFA) {
+
+#if LUA_HAVE_PCRE_DFA
+
+            int ws[NGX_LUA_RE_DFA_MODE_WORKSPACE_COUNT];
+            rc = ngx_http_lua_regex_dfa_exec(re_comp.regex, sd, &subj,
+                offset, cap, ovecsize, ws,
NGX_LUA_RE_DFA_MODE_WORKSPACE_COUNT);
+
+#else /* LUA_HAVE_PCRE_DFA */
+
+            msg = "at least pcre 6.0 is required for the DFA mode";
+            goto error;
+
+#endif /* LUA_HAVE_PCRE_DFA */
+
+        } else {
+            rc = ngx_http_lua_regex_exec(re_comp.regex, sd, &subj, offset,
cap,
+                    ovecsize);
+        }
+
+        if (rc == NGX_REGEX_NO_MATCHED) {
+            break;
+        }
+
+        if (rc < 0) {
+            msg = lua_pushfstring(L, ngx_regex_exec_n " failed: %d on
\"%s\" "
+                "using \"%s\"", (int) rc, subj.data, pat.data);
+            goto error;
+        }
+
+        if (rc == 0) {
+            if (flags & NGX_LUA_RE_MODE_DFA) {
+                rc = 1;
+
+            } else {
+                msg = "capture size too small";
+                goto error;
+            }
+        }
+
+        dd("rc = %d", (int) rc);
+
+        count++;
+
+        luaL_buffinit(L, &luabuf);
+
+        luaL_addlstring(&luabuf, (char *) &subj.data[offset],
+                    cap[0] - offset);
+
+        lua_pushnumber(L, count);
+        luaL_pushresult(&luabuf);
+        lua_settable(L, -3);
+
+        offset = cap[1];
+
+    }
+
+    if (count == 0) {
+        dd("no match, just the original subject");
+
+        lua_pushnumber(L, count+1);
+        lua_pushvalue(L, 1);
+        lua_settable(L, -3);
+
+    } else {
+        if (offset != (int) subj.len) {
+            dd("adding trailer: %s (len %d)", &subj.data[offset],
+                    (int) (subj.len - offset));
+
+            luaL_buffinit(L, &luabuf);
+
+            luaL_addlstring(&luabuf, (char *) &subj.data[offset],
+                    subj.len - offset);
+
+            lua_pushnumber(L, count+1);
+            luaL_pushresult(&luabuf);
+            lua_settable(L, -3);
+
+        }
+
+        dd("the dst string: %s", lua_tostring(L, -1));
+    }
+
+    if (!(flags & NGX_LUA_RE_COMPILE_ONCE)) {
+        if (sd) {
+            ngx_http_lua_regex_free_study_data(pool, sd);
+        }
+
+        if (re_comp.regex) {
+            ngx_pfree(pool, re_comp.regex);
+        }
+
+        if (ctpl) {
+            ngx_pfree(pool, ctpl);
+        }
+
+        if (cap) {
+            ngx_pfree(pool, cap);
+        }
+    }
+
+    return 1;
+
+error:
+    if (!(flags & NGX_LUA_RE_COMPILE_ONCE)) {
+        if (sd) {
+            ngx_http_lua_regex_free_study_data(pool, sd);
+        }
+
+        if (re_comp.regex) {
+            ngx_pfree(pool, re_comp.regex);
+        }
+
+        if (ctpl) {
+            ngx_pfree(pool, ctpl);
+        }
+
+        if (cap) {
+            ngx_pfree(pool, cap);
+        }
+    }
+
+    return luaL_error(L, msg);
+}

 void
 ngx_http_lua_inject_regex_api(lua_State *L)
@@ -1631,6 +2071,9 @@ ngx_http_lua_inject_regex_api(lua_State *L)
     lua_pushcfunction(L, ngx_http_lua_ngx_re_gsub);
     lua_setfield(L, -2, "gsub");

+    lua_pushcfunction(L, ngx_http_lua_ngx_re_split);
+    lua_setfield(L, -2, "split");
+
     lua_setfield(L, -2, "re");
 }

-- 
1.7.9.5


2012/7/13 Jader H. Silva <jaderhs5 at gmail.com>

> So, here it is :)
>
> ngx.re.split(*subject, regex, options?*, limit?)
>
> This function is based on ngx_re_sub.
>
> It will split subject on regex matches and return a table of strings.
> Limit is the max number of splits (0 will return a table containing the
> subject string).
>
> Let me know if there are bugs, identation issues or anything I need to fix.
>
> Jader H. Silva
>
>
> 2012/7/11 agentzh <agentzh at gmail.com>
>
>> Hello!
>>
>> On Wed, Jul 11, 2012 at 12:54 PM, Jader Henrique da Silva
>> <cad_jsilva at uolinc.com> wrote:
>> > I was checking HttpLuaModule docs and saw "ngx.re.split" implementation
>> in
>> > the TODO section.
>> >
>> > Is it already implemented?
>>
>> Nope, otherwise I would update the TODO section accordingly :)
>>
>> > Are there any details about this implementation (e.g. parameters,
>> returned
>> > data)?
>> >
>>
>> Not yet. But I think the behavior will be similar to Perl 5's split
>> builtin function.
>>
>> I'm always open to patches for this feature :)
>>
>> Best regards,
>> -agentzh
>>
>> _______________________________________________
>> nginx mailing list
>> nginx at nginx.org
>> http://mailman.nginx.org/mailman/listinfo/nginx
>>
>
>
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://mailman.nginx.org/pipermail/nginx/attachments/20120713/18b127ed/attachment-0001.html>


More information about the nginx mailing list