The patch was removed in the previous message :(<br>So here it is (for real).<br><br>From 9091c40e22f6fd0ca2173ecbeb1f932502cc8ac6 Mon Sep 17 00:00:00 2001<br>From: "Jader H. Silva" <<a href="mailto:jaderhs5@gmail.com">jaderhs5@gmail.com</a>><br>
Date: Fri, 13 Jul 2012 18:06:32 -0300<br>Subject: [PATCH] Add ngx.re.split function<br><br>---<br> src/ngx_http_lua_regex.c |  443 ++++++++++++++++++++++++++++++++++++++++++++++<br> 1 file changed, 443 insertions(+)<br><br>
diff --git a/src/ngx_http_lua_regex.c b/src/ngx_http_lua_regex.c<br>index 108070c..aa5d445 100644<br>--- a/src/ngx_http_lua_regex.c<br>+++ b/src/ngx_http_lua_regex.c<br>@@ -74,6 +74,7 @@ static int ngx_http_lua_ngx_re_match(lua_State *L);<br>
 static int ngx_http_lua_ngx_re_gmatch(lua_State *L);<br> static int ngx_http_lua_ngx_re_sub(lua_State *L);<br> static int ngx_http_lua_ngx_re_gsub(lua_State *L);<br>+static int ngx_http_lua_ngx_re_split(lua_State *L);<br>
 static void ngx_http_lua_regex_free_study_data(ngx_pool_t *pool,<br>     pcre_extra *sd);<br> static ngx_int_t ngx_lua_regex_compile(ngx_lua_regex_compile_t *rc);<br>@@ -1611,6 +1612,445 @@ error:<br>     return luaL_error(L, msg);<br>
 }<br> <br>+static int<br>+ngx_http_lua_ngx_re_split(lua_State *L)<br>+{<br>+    ngx_http_lua_regex_t        *re;<br>+    ngx_http_request_t          *r;<br>+    ngx_str_t                    subj;<br>+    ngx_str_t                    pat;<br>
+    ngx_str_t                    opts;<br>+    ngx_str_t                    tpl;<br>+    ngx_http_lua_main_conf_t    *lmcf = NULL;<br>+    ngx_pool_t                  *pool, *old_pool;<br>+    ngx_lua_regex_compile_t      re_comp;<br>
+    const char                  *msg;<br>+    ngx_int_t                    rc;<br>+    ngx_uint_t                   n;<br>+    ngx_int_t                    i;<br>+    int                          nargs;<br>+    int                         *cap = NULL;<br>
+    int                          ovecsize;<br>+    int                          type;<br>+    unsigned                     func;<br>+    int                          offset;<br>+    size_t                       count;<br>
+    luaL_Buffer                  luabuf;<br>+    ngx_int_t                    flags;<br>+    ngx_int_t                    limit = -1;<br>+    u_char                      *p;<br>+    u_char                       errstr[NGX_MAX_CONF_ERRSTR + 1];<br>
+    pcre_extra                  *sd = NULL;<br>+<br>+    ngx_http_lua_complex_value_t              *ctpl = NULL;<br>+    ngx_http_lua_compile_complex_value_t       ccv;<br>+<br>+    nargs = lua_gettop(L);<br>+<br>+    if (nargs != 2 && nargs != 3 && nargs != 4) {<br>
+        return luaL_error(L, "expecting two or three or four arguments, but got %d",<br>+                nargs);<br>+    }<br>+<br>+    lua_pushlightuserdata(L, &ngx_http_lua_request_key);<br>+    lua_rawget(L, LUA_GLOBALSINDEX);<br>
+    r = lua_touserdata(L, -1);<br>+    lua_pop(L, 1);<br>+<br>+    if (r == NULL) {<br>+        return luaL_error(L, "no request object found");<br>+    }<br>+<br>+    subj.data = (u_char *) luaL_checklstring(L, 1, &subj.len);<br>
+    pat.data = (u_char *) luaL_checklstring(L, 2, &pat.len);<br>+<br>+    if (nargs >= 3) {<br>+        opts.data = (u_char *) luaL_checklstring(L, 3, &opts.len);<br>+<br>+        if (nargs == 4) {<br>+            limit = luaL_checkinteger(L, 4);<br>
+            lua_pop(L, 1);<br>+<br>+        } else {/* nargs == 3 */<br>+            limit = -1;<br>+        }<br>+<br>+    } else { /* nargs == 2 */<br>+        opts.data = (u_char *) "";<br>+        opts.len = 0;<br>
+    }<br>+<br>+    ngx_memzero(&re_comp, sizeof(ngx_lua_regex_compile_t));<br>+<br>+    /* stack: subj regex repl */<br>+<br>+    re_comp.options = 0;<br>+<br>+    flags = ngx_http_lua_ngx_re_parse_opts(L, &re_comp, &opts, 4);<br>
+<br>+    if (flags & NGX_LUA_RE_COMPILE_ONCE) {<br>+        lmcf = ngx_http_get_module_main_conf(r, ngx_http_lua_module);<br>+        pool = lmcf->pool;<br>+<br>+        dd("server pool %p", lmcf->pool);<br>
+<br>+        lua_pushlightuserdata(L, &ngx_http_lua_regex_cache_key);<br>+        lua_rawget(L, LUA_REGISTRYINDEX); /* table */<br>+<br>+        lua_pushliteral(L, "s");<br>+        lua_pushinteger(L, tpl.len);<br>
+        lua_pushliteral(L, ":");<br>+        lua_pushvalue(L, 2);<br>+<br>+        if (tpl.len != 0) {<br>+            lua_pushvalue(L, 3);<br>+        }<br>+<br>+        dd("options size: %d", (int) sizeof(re_comp.options));<br>
+<br>+        lua_pushlstring(L, (char *) &re_comp.options, sizeof(re_comp.options));<br>+                /* table regex opts */<br>+<br>+        if (tpl.len == 0) {<br>+            lua_concat(L, 5); /* table key */<br>
+<br>+        } else {<br>+            lua_concat(L, 6); /* table key */<br>+        }<br>+<br>+        lua_pushvalue(L, -1); /* table key key */<br>+<br>+        dd("regex cache key: %.*s", (int) (pat.len + sizeof(re_comp.options)),<br>
+                lua_tostring(L, -1));<br>+<br>+        lua_rawget(L, -3); /* table key re */<br>+        re = lua_touserdata(L, -1);<br>+<br>+        lua_pop(L, 1); /* table key */<br>+<br>+        if (re) {<br>+            ngx_log_debug2(NGX_LOG_DEBUG_HTTP, r->connection->log, 0,<br>
+                    "lua regex cache hit for split regex \"%s\" with options "<br>+                    "\"%s\"", pat.data, opts.data);<br>+<br>+            lua_pop(L, 2);<br>+<br>+            dd("restoring regex %p, ncaptures %d,  captures %p", re->regex,<br>
+                    re->ncaptures, re->captures);<br>+<br>+            re_comp.regex = re->regex;<br>+            sd = re->regex_sd;<br>+            re_comp.captures = re->ncaptures;<br>+            cap = re->captures;<br>
+            ctpl = re->replace;<br>+<br>+            if (flags & NGX_LUA_RE_MODE_DFA) {<br>+                ovecsize = 2;<br>+<br>+            } else {<br>+                ovecsize = (re->ncaptures + 1) * 3;<br>
+            }<br>+<br>+            goto exec;<br>+        }<br>+<br>+        ngx_log_debug2(NGX_LOG_DEBUG_HTTP, r->connection->log, 0,<br>+                "lua regex cache miss for split regex \"%s\" with options "<br>
+                "\"%s\"",<br>+                pat.data, opts.data);<br>+<br>+        if (lmcf->regex_cache_entries >= lmcf->regex_cache_max_entries) {<br>+<br>+            if (lmcf->regex_cache_entries == lmcf->regex_cache_max_entries) {<br>
+                ngx_log_error(NGX_LOG_WARN, r->connection->log, 0,<br>+                        "lua exceeding regex cache max entries (%i)",<br>+                        lmcf->regex_cache_max_entries);<br>
+<br>+                lmcf->regex_cache_entries++;<br>+            }<br>+<br>+            pool = r->pool;<br>+            flags &= ~NGX_LUA_RE_COMPILE_ONCE;<br>+        }<br>+<br>+    } else {<br>+        pool = r->pool;<br>
+    }<br>+<br>+    re_comp.pattern = pat;<br>+    re_comp.err.len = NGX_MAX_CONF_ERRSTR;<br>+    re_comp.err.data = errstr;<br>+    re_comp.pool = pool;<br>+<br>+    dd("compiling regex");<br>+<br>+    ngx_log_debug5(NGX_LOG_DEBUG_HTTP, r->connection->log, 0,<br>
+            "lua compiling split regex \"%s\" with options \"%s\" "<br>+            "(compile once: %d) (dfa mode: %d) (jit mode: %d)",<br>+            pat.data, opts.data,<br>+            (flags & NGX_LUA_RE_COMPILE_ONCE) != 0,<br>
+            (flags & NGX_LUA_RE_MODE_DFA) != 0,<br>+            (flags & NGX_LUA_RE_MODE_JIT) != 0);<br>+<br>+    old_pool = ngx_http_lua_pcre_malloc_init(pool);<br>+<br>+    rc = ngx_lua_regex_compile(&re_comp);<br>
+<br>+    ngx_http_lua_pcre_malloc_done(old_pool);<br>+<br>+    if (rc != NGX_OK) {<br>+        dd("compile failed");<br>+<br>+        re_comp.err.data[re_comp.err.len] = '\0';<br>+        msg = lua_pushfstring(L, "failed to compile regex \"%s\": %s",<br>
+                pat.data, re_comp.err.data);<br>+<br>+        return luaL_argerror(L, 2, msg);<br>+    }<br>+<br>+#if LUA_HAVE_PCRE_JIT<br>+<br>+    if (flags & NGX_LUA_RE_MODE_JIT) {<br>+<br>+        old_pool = ngx_http_lua_pcre_malloc_init(pool);<br>
+<br>+        sd = pcre_study(re_comp.regex, PCRE_STUDY_JIT_COMPILE, &msg);<br>+<br>+        ngx_http_lua_pcre_malloc_done(old_pool);<br>+<br>+#   if (NGX_DEBUG)<br>+        dd("sd = %p", sd);<br>+<br>+        if (msg != NULL) {<br>
+            ngx_log_debug2(NGX_LOG_DEBUG_HTTP, r->connection->log, 0,<br>+                "pcre study failed with PCRE_STUDY_JIT_COMPILE: %s (%p)",<br>+                msg, sd);<br>+        }<br>+<br>+        if (sd != NULL) {<br>
+            int         jitted;<br>+<br>+            old_pool = ngx_http_lua_pcre_malloc_init(pool);<br>+<br>+            pcre_fullinfo(re_comp.regex, sd, PCRE_INFO_JIT, &jitted);<br>+<br>+            ngx_http_lua_pcre_malloc_done(old_pool);<br>
+<br>+            ngx_log_debug1(NGX_LOG_DEBUG_HTTP, r->connection->log, 0,<br>+                "pcre JIT compiling result: %d", jitted);<br>+        }<br>+#   endif /* NGX_DEBUG */<br>+<br>+    } else {<br>
+<br>+        old_pool = ngx_http_lua_pcre_malloc_init(pool);<br>+<br>+        sd = pcre_study(re_comp.regex, 0, &msg);<br>+<br>+        ngx_http_lua_pcre_malloc_done(old_pool);<br>+<br>+#   if (NGX_DEBUG)<br>+        dd("sd = %p", sd);<br>
+<br>+        if (msg != NULL) {<br>+            ngx_log_debug2(NGX_LOG_DEBUG_HTTP, r->connection->log, 0,<br>+                "pcre_study failed with PCRE_STUDY_JIT_COMPILE: %s (%p)",<br>+                msg, sd);<br>
+        }<br>+#   endif /* NGX_DEBUG */<br>+    }<br>+<br>+#else  /* LUA_HAVE_PCRE_JIT */<br>+<br>+    if (flags & NGX_LUA_RE_MODE_JIT) {<br>+        ngx_log_debug0(NGX_LOG_DEBUG_HTTP, r->connection->log, 0,<br>
+                "your pcre build does not have JIT support and "<br>+                "the \"j\" regex option is ignored");<br>+    }<br>+<br>+#endif /* LUA_HAVE_PCRE_JIT */<br>+<br>+    dd("compile done, captures %d", re_comp.captures);<br>
+<br>+    if (flags & NGX_LUA_RE_MODE_DFA) {<br>+        ovecsize = 2;<br>+<br>+    } else {<br>+        ovecsize = (re_comp.captures + 1) * 3;<br>+    }<br>+<br>+    cap = ngx_palloc(pool, ovecsize * sizeof(int));<br>
+    if (cap == NULL) {<br>+        flags &= ~NGX_LUA_RE_COMPILE_ONCE;<br>+        msg = "out of memory";<br>+        goto error;<br>+    }<br>+<br>+    if (flags & NGX_LUA_RE_COMPILE_ONCE) {<br>+<br>+        ngx_log_debug2(NGX_LOG_DEBUG_HTTP, r->connection->log, 0,<br>
+                "lua saving compiled sub regex (%d captures) into the cache "<br>+                "(entries %i)", re_comp.captures,<br>+                lmcf ? lmcf->regex_cache_entries : 0);<br>+<br>
+        re = ngx_palloc(pool, sizeof(ngx_http_lua_regex_t));<br>+        if (re == NULL) {<br>+            return luaL_error(L, "out of memory");<br>+        }<br>+<br>+        dd("saving regex %p, ncaptures %d,  captures %p", re_comp.regex,<br>
+                re_comp.captures, cap);<br>+<br>+        re->regex = re_comp.regex;<br>+        re->regex_sd = sd;<br>+        re->ncaptures = re_comp.captures;<br>+        re->captures = cap;<br>+        re->replace = ctpl;<br>
+<br>+        lua_pushlightuserdata(L, re); /* table key value */<br>+        lua_rawset(L, -3); /* table */<br>+        lua_pop(L, 1);<br>+<br>+        if (lmcf) {<br>+            lmcf->regex_cache_entries++;<br>+        }<br>
+    }<br>+<br>+exec:<br>+    count = 0;<br>+    offset = 0;<br>+<br>+    lua_newtable(L);<br>+<br>+    for (;;) {<br>+        if (subj.len == 0 || count == limit) {<br>+            break;<br>+        }<br>+<br>+        if (flags & NGX_LUA_RE_MODE_DFA) {<br>
+<br>+#if LUA_HAVE_PCRE_DFA<br>+<br>+            int ws[NGX_LUA_RE_DFA_MODE_WORKSPACE_COUNT];<br>+            rc = ngx_http_lua_regex_dfa_exec(re_comp.regex, sd, &subj,<br>+                offset, cap, ovecsize, ws, NGX_LUA_RE_DFA_MODE_WORKSPACE_COUNT);<br>
+<br>+#else /* LUA_HAVE_PCRE_DFA */<br>+<br>+            msg = "at least pcre 6.0 is required for the DFA mode";<br>+            goto error;<br>+<br>+#endif /* LUA_HAVE_PCRE_DFA */<br>+<br>+        } else {<br>+            rc = ngx_http_lua_regex_exec(re_comp.regex, sd, &subj, offset, cap,<br>
+                    ovecsize);<br>+        }<br>+<br>+        if (rc == NGX_REGEX_NO_MATCHED) {<br>+            break;<br>+        }<br>+<br>+        if (rc < 0) {<br>+            msg = lua_pushfstring(L, ngx_regex_exec_n " failed: %d on \"%s\" "<br>
+                "using \"%s\"", (int) rc, subj.data, pat.data);<br>+            goto error;<br>+        }<br>+<br>+        if (rc == 0) {<br>+            if (flags & NGX_LUA_RE_MODE_DFA) {<br>+                rc = 1;<br>
+<br>+            } else {<br>+                msg = "capture size too small";<br>+                goto error;<br>+            }<br>+        }<br>+<br>+        dd("rc = %d", (int) rc);<br>+<br>+        count++;<br>
+<br>+        luaL_buffinit(L, &luabuf);<br>+<br>+        luaL_addlstring(&luabuf, (char *) &subj.data[offset],<br>+                    cap[0] - offset);<br>+<br>+        lua_pushnumber(L, count);<br>+        luaL_pushresult(&luabuf);<br>
+        lua_settable(L, -3);<br>+<br>+        offset = cap[1];<br>+<br>+    }<br>+<br>+    if (count == 0) {<br>+        dd("no match, just the original subject");<br>+<br>+        lua_pushnumber(L, count+1);<br>
+        lua_pushvalue(L, 1);<br>+        lua_settable(L, -3);<br>+<br>+    } else {<br>+        if (offset != (int) subj.len) {<br>+            dd("adding trailer: %s (len %d)", &subj.data[offset],<br>+                    (int) (subj.len - offset));<br>
+<br>+            luaL_buffinit(L, &luabuf);<br>+<br>+            luaL_addlstring(&luabuf, (char *) &subj.data[offset],<br>+                    subj.len - offset);<br>+<br>+            lua_pushnumber(L, count+1);<br>
+            luaL_pushresult(&luabuf);<br>+            lua_settable(L, -3);<br>+<br>+        }<br>+<br>+        dd("the dst string: %s", lua_tostring(L, -1));<br>+    }<br>+<br>+    if (!(flags & NGX_LUA_RE_COMPILE_ONCE)) {<br>
+        if (sd) {<br>+            ngx_http_lua_regex_free_study_data(pool, sd);<br>+        }<br>+<br>+        if (re_comp.regex) {<br>+            ngx_pfree(pool, re_comp.regex);<br>+        }<br>+<br>+        if (ctpl) {<br>
+            ngx_pfree(pool, ctpl);<br>+        }<br>+<br>+        if (cap) {<br>+            ngx_pfree(pool, cap);<br>+        }<br>+    }<br>+<br>+    return 1;<br>+<br>+error:<br>+    if (!(flags & NGX_LUA_RE_COMPILE_ONCE)) {<br>
+        if (sd) {<br>+            ngx_http_lua_regex_free_study_data(pool, sd);<br>+        }<br>+<br>+        if (re_comp.regex) {<br>+            ngx_pfree(pool, re_comp.regex);<br>+        }<br>+<br>+        if (ctpl) {<br>
+            ngx_pfree(pool, ctpl);<br>+        }<br>+<br>+        if (cap) {<br>+            ngx_pfree(pool, cap);<br>+        }<br>+    }<br>+<br>+    return luaL_error(L, msg);<br>+}<br> <br> void<br> ngx_http_lua_inject_regex_api(lua_State *L)<br>
@@ -1631,6 +2071,9 @@ ngx_http_lua_inject_regex_api(lua_State *L)<br>     lua_pushcfunction(L, ngx_http_lua_ngx_re_gsub);<br>     lua_setfield(L, -2, "gsub");<br> <br>+    lua_pushcfunction(L, ngx_http_lua_ngx_re_split);<br>
+    lua_setfield(L, -2, "split");<br>+<br>     lua_setfield(L, -2, "re");<br> }<br> <br>-- <br>1.7.9.5<br><br><br><div class="gmail_quote">2012/7/13 Jader H. Silva <span dir="ltr"><<a href="mailto:jaderhs5@gmail.com" target="_blank">jaderhs5@gmail.com</a>></span><br>
<blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex">So, here it is :)<br><br>ngx.re.split(<i>subject, regex, options?</i>, limit?)<br><br>This function is based on ngx_re_sub.<br>
<br>It will split subject on regex matches and return a table of strings. Limit is the max number of splits (0 will return a table containing the subject string).<br>
<br>Let me know if there are bugs, identation issues or anything I need to fix.<span class="HOEnZb"><font color="#888888"><br><br>Jader H. Silva</font></span><div class="HOEnZb"><div class="h5"><br><br><div class="gmail_quote">
2012/7/11 agentzh <span dir="ltr"><<a href="mailto:agentzh@gmail.com" target="_blank">agentzh@gmail.com</a>></span><br>
<blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex">Hello!<br>
<br>
On Wed, Jul 11, 2012 at 12:54 PM, Jader Henrique da Silva<br>
<<a href="mailto:cad_jsilva@uolinc.com" target="_blank">cad_jsilva@uolinc.com</a>> wrote:<br>
> I was checking HttpLuaModule docs and saw "ngx.re.split" implementation in<br>
> the TODO section.<br>
><br>
> Is it already implemented?<br>
<br>
Nope, otherwise I would update the TODO section accordingly :)<br>
<br>
> Are there any details about this implementation (e.g. parameters, returned<br>
> data)?<br>
><br>
<br>
Not yet. But I think the behavior will be similar to Perl 5's split<br>
builtin function.<br>
<br>
I'm always open to patches for this feature :)<br>
<br>
Best regards,<br>
-agentzh<br>
<br>
_______________________________________________<br>
nginx mailing list<br>
<a href="mailto:nginx@nginx.org" target="_blank">nginx@nginx.org</a><br>
<a href="http://mailman.nginx.org/mailman/listinfo/nginx" target="_blank">http://mailman.nginx.org/mailman/listinfo/nginx</a><br>
</blockquote></div><br>
</div></div></blockquote></div><br>