The patch was removed in the previous message :(<br>So here it is (for real).<br><br>From 9091c40e22f6fd0ca2173ecbeb1f932502cc8ac6 Mon Sep 17 00:00:00 2001<br>From: "Jader H. Silva" <<a href="mailto:jaderhs5@gmail.com">jaderhs5@gmail.com</a>><br>
Date: Fri, 13 Jul 2012 18:06:32 -0300<br>Subject: [PATCH] Add ngx.re.split function<br><br>---<br> src/ngx_http_lua_regex.c | 443 ++++++++++++++++++++++++++++++++++++++++++++++<br> 1 file changed, 443 insertions(+)<br><br>
diff --git a/src/ngx_http_lua_regex.c b/src/ngx_http_lua_regex.c<br>index 108070c..aa5d445 100644<br>--- a/src/ngx_http_lua_regex.c<br>+++ b/src/ngx_http_lua_regex.c<br>@@ -74,6 +74,7 @@ static int ngx_http_lua_ngx_re_match(lua_State *L);<br>
static int ngx_http_lua_ngx_re_gmatch(lua_State *L);<br> static int ngx_http_lua_ngx_re_sub(lua_State *L);<br> static int ngx_http_lua_ngx_re_gsub(lua_State *L);<br>+static int ngx_http_lua_ngx_re_split(lua_State *L);<br>
static void ngx_http_lua_regex_free_study_data(ngx_pool_t *pool,<br> pcre_extra *sd);<br> static ngx_int_t ngx_lua_regex_compile(ngx_lua_regex_compile_t *rc);<br>@@ -1611,6 +1612,445 @@ error:<br> return luaL_error(L, msg);<br>
}<br> <br>+static int<br>+ngx_http_lua_ngx_re_split(lua_State *L)<br>+{<br>+ ngx_http_lua_regex_t *re;<br>+ ngx_http_request_t *r;<br>+ ngx_str_t subj;<br>+ ngx_str_t pat;<br>
+ ngx_str_t opts;<br>+ ngx_str_t tpl;<br>+ ngx_http_lua_main_conf_t *lmcf = NULL;<br>+ ngx_pool_t *pool, *old_pool;<br>+ ngx_lua_regex_compile_t re_comp;<br>
+ const char *msg;<br>+ ngx_int_t rc;<br>+ ngx_uint_t n;<br>+ ngx_int_t i;<br>+ int nargs;<br>+ int *cap = NULL;<br>
+ int ovecsize;<br>+ int type;<br>+ unsigned func;<br>+ int offset;<br>+ size_t count;<br>
+ luaL_Buffer luabuf;<br>+ ngx_int_t flags;<br>+ ngx_int_t limit = -1;<br>+ u_char *p;<br>+ u_char errstr[NGX_MAX_CONF_ERRSTR + 1];<br>
+ pcre_extra *sd = NULL;<br>+<br>+ ngx_http_lua_complex_value_t *ctpl = NULL;<br>+ ngx_http_lua_compile_complex_value_t ccv;<br>+<br>+ nargs = lua_gettop(L);<br>+<br>+ if (nargs != 2 && nargs != 3 && nargs != 4) {<br>
+ return luaL_error(L, "expecting two or three or four arguments, but got %d",<br>+ nargs);<br>+ }<br>+<br>+ lua_pushlightuserdata(L, &ngx_http_lua_request_key);<br>+ lua_rawget(L, LUA_GLOBALSINDEX);<br>
+ r = lua_touserdata(L, -1);<br>+ lua_pop(L, 1);<br>+<br>+ if (r == NULL) {<br>+ return luaL_error(L, "no request object found");<br>+ }<br>+<br>+ subj.data = (u_char *) luaL_checklstring(L, 1, &subj.len);<br>
+ pat.data = (u_char *) luaL_checklstring(L, 2, &pat.len);<br>+<br>+ if (nargs >= 3) {<br>+ opts.data = (u_char *) luaL_checklstring(L, 3, &opts.len);<br>+<br>+ if (nargs == 4) {<br>+ limit = luaL_checkinteger(L, 4);<br>
+ lua_pop(L, 1);<br>+<br>+ } else {/* nargs == 3 */<br>+ limit = -1;<br>+ }<br>+<br>+ } else { /* nargs == 2 */<br>+ opts.data = (u_char *) "";<br>+ opts.len = 0;<br>
+ }<br>+<br>+ ngx_memzero(&re_comp, sizeof(ngx_lua_regex_compile_t));<br>+<br>+ /* stack: subj regex repl */<br>+<br>+ re_comp.options = 0;<br>+<br>+ flags = ngx_http_lua_ngx_re_parse_opts(L, &re_comp, &opts, 4);<br>
+<br>+ if (flags & NGX_LUA_RE_COMPILE_ONCE) {<br>+ lmcf = ngx_http_get_module_main_conf(r, ngx_http_lua_module);<br>+ pool = lmcf->pool;<br>+<br>+ dd("server pool %p", lmcf->pool);<br>
+<br>+ lua_pushlightuserdata(L, &ngx_http_lua_regex_cache_key);<br>+ lua_rawget(L, LUA_REGISTRYINDEX); /* table */<br>+<br>+ lua_pushliteral(L, "s");<br>+ lua_pushinteger(L, tpl.len);<br>
+ lua_pushliteral(L, ":");<br>+ lua_pushvalue(L, 2);<br>+<br>+ if (tpl.len != 0) {<br>+ lua_pushvalue(L, 3);<br>+ }<br>+<br>+ dd("options size: %d", (int) sizeof(re_comp.options));<br>
+<br>+ lua_pushlstring(L, (char *) &re_comp.options, sizeof(re_comp.options));<br>+ /* table regex opts */<br>+<br>+ if (tpl.len == 0) {<br>+ lua_concat(L, 5); /* table key */<br>
+<br>+ } else {<br>+ lua_concat(L, 6); /* table key */<br>+ }<br>+<br>+ lua_pushvalue(L, -1); /* table key key */<br>+<br>+ dd("regex cache key: %.*s", (int) (pat.len + sizeof(re_comp.options)),<br>
+ lua_tostring(L, -1));<br>+<br>+ lua_rawget(L, -3); /* table key re */<br>+ re = lua_touserdata(L, -1);<br>+<br>+ lua_pop(L, 1); /* table key */<br>+<br>+ if (re) {<br>+ ngx_log_debug2(NGX_LOG_DEBUG_HTTP, r->connection->log, 0,<br>
+ "lua regex cache hit for split regex \"%s\" with options "<br>+ "\"%s\"", pat.data, opts.data);<br>+<br>+ lua_pop(L, 2);<br>+<br>+ dd("restoring regex %p, ncaptures %d, captures %p", re->regex,<br>
+ re->ncaptures, re->captures);<br>+<br>+ re_comp.regex = re->regex;<br>+ sd = re->regex_sd;<br>+ re_comp.captures = re->ncaptures;<br>+ cap = re->captures;<br>
+ ctpl = re->replace;<br>+<br>+ if (flags & NGX_LUA_RE_MODE_DFA) {<br>+ ovecsize = 2;<br>+<br>+ } else {<br>+ ovecsize = (re->ncaptures + 1) * 3;<br>
+ }<br>+<br>+ goto exec;<br>+ }<br>+<br>+ ngx_log_debug2(NGX_LOG_DEBUG_HTTP, r->connection->log, 0,<br>+ "lua regex cache miss for split regex \"%s\" with options "<br>
+ "\"%s\"",<br>+ pat.data, opts.data);<br>+<br>+ if (lmcf->regex_cache_entries >= lmcf->regex_cache_max_entries) {<br>+<br>+ if (lmcf->regex_cache_entries == lmcf->regex_cache_max_entries) {<br>
+ ngx_log_error(NGX_LOG_WARN, r->connection->log, 0,<br>+ "lua exceeding regex cache max entries (%i)",<br>+ lmcf->regex_cache_max_entries);<br>
+<br>+ lmcf->regex_cache_entries++;<br>+ }<br>+<br>+ pool = r->pool;<br>+ flags &= ~NGX_LUA_RE_COMPILE_ONCE;<br>+ }<br>+<br>+ } else {<br>+ pool = r->pool;<br>
+ }<br>+<br>+ re_comp.pattern = pat;<br>+ re_comp.err.len = NGX_MAX_CONF_ERRSTR;<br>+ re_comp.err.data = errstr;<br>+ re_comp.pool = pool;<br>+<br>+ dd("compiling regex");<br>+<br>+ ngx_log_debug5(NGX_LOG_DEBUG_HTTP, r->connection->log, 0,<br>
+ "lua compiling split regex \"%s\" with options \"%s\" "<br>+ "(compile once: %d) (dfa mode: %d) (jit mode: %d)",<br>+ pat.data, opts.data,<br>+ (flags & NGX_LUA_RE_COMPILE_ONCE) != 0,<br>
+ (flags & NGX_LUA_RE_MODE_DFA) != 0,<br>+ (flags & NGX_LUA_RE_MODE_JIT) != 0);<br>+<br>+ old_pool = ngx_http_lua_pcre_malloc_init(pool);<br>+<br>+ rc = ngx_lua_regex_compile(&re_comp);<br>
+<br>+ ngx_http_lua_pcre_malloc_done(old_pool);<br>+<br>+ if (rc != NGX_OK) {<br>+ dd("compile failed");<br>+<br>+ re_comp.err.data[re_comp.err.len] = '\0';<br>+ msg = lua_pushfstring(L, "failed to compile regex \"%s\": %s",<br>
+ pat.data, re_comp.err.data);<br>+<br>+ return luaL_argerror(L, 2, msg);<br>+ }<br>+<br>+#if LUA_HAVE_PCRE_JIT<br>+<br>+ if (flags & NGX_LUA_RE_MODE_JIT) {<br>+<br>+ old_pool = ngx_http_lua_pcre_malloc_init(pool);<br>
+<br>+ sd = pcre_study(re_comp.regex, PCRE_STUDY_JIT_COMPILE, &msg);<br>+<br>+ ngx_http_lua_pcre_malloc_done(old_pool);<br>+<br>+# if (NGX_DEBUG)<br>+ dd("sd = %p", sd);<br>+<br>+ if (msg != NULL) {<br>
+ ngx_log_debug2(NGX_LOG_DEBUG_HTTP, r->connection->log, 0,<br>+ "pcre study failed with PCRE_STUDY_JIT_COMPILE: %s (%p)",<br>+ msg, sd);<br>+ }<br>+<br>+ if (sd != NULL) {<br>
+ int jitted;<br>+<br>+ old_pool = ngx_http_lua_pcre_malloc_init(pool);<br>+<br>+ pcre_fullinfo(re_comp.regex, sd, PCRE_INFO_JIT, &jitted);<br>+<br>+ ngx_http_lua_pcre_malloc_done(old_pool);<br>
+<br>+ ngx_log_debug1(NGX_LOG_DEBUG_HTTP, r->connection->log, 0,<br>+ "pcre JIT compiling result: %d", jitted);<br>+ }<br>+# endif /* NGX_DEBUG */<br>+<br>+ } else {<br>
+<br>+ old_pool = ngx_http_lua_pcre_malloc_init(pool);<br>+<br>+ sd = pcre_study(re_comp.regex, 0, &msg);<br>+<br>+ ngx_http_lua_pcre_malloc_done(old_pool);<br>+<br>+# if (NGX_DEBUG)<br>+ dd("sd = %p", sd);<br>
+<br>+ if (msg != NULL) {<br>+ ngx_log_debug2(NGX_LOG_DEBUG_HTTP, r->connection->log, 0,<br>+ "pcre_study failed with PCRE_STUDY_JIT_COMPILE: %s (%p)",<br>+ msg, sd);<br>
+ }<br>+# endif /* NGX_DEBUG */<br>+ }<br>+<br>+#else /* LUA_HAVE_PCRE_JIT */<br>+<br>+ if (flags & NGX_LUA_RE_MODE_JIT) {<br>+ ngx_log_debug0(NGX_LOG_DEBUG_HTTP, r->connection->log, 0,<br>
+ "your pcre build does not have JIT support and "<br>+ "the \"j\" regex option is ignored");<br>+ }<br>+<br>+#endif /* LUA_HAVE_PCRE_JIT */<br>+<br>+ dd("compile done, captures %d", re_comp.captures);<br>
+<br>+ if (flags & NGX_LUA_RE_MODE_DFA) {<br>+ ovecsize = 2;<br>+<br>+ } else {<br>+ ovecsize = (re_comp.captures + 1) * 3;<br>+ }<br>+<br>+ cap = ngx_palloc(pool, ovecsize * sizeof(int));<br>
+ if (cap == NULL) {<br>+ flags &= ~NGX_LUA_RE_COMPILE_ONCE;<br>+ msg = "out of memory";<br>+ goto error;<br>+ }<br>+<br>+ if (flags & NGX_LUA_RE_COMPILE_ONCE) {<br>+<br>+ ngx_log_debug2(NGX_LOG_DEBUG_HTTP, r->connection->log, 0,<br>
+ "lua saving compiled sub regex (%d captures) into the cache "<br>+ "(entries %i)", re_comp.captures,<br>+ lmcf ? lmcf->regex_cache_entries : 0);<br>+<br>
+ re = ngx_palloc(pool, sizeof(ngx_http_lua_regex_t));<br>+ if (re == NULL) {<br>+ return luaL_error(L, "out of memory");<br>+ }<br>+<br>+ dd("saving regex %p, ncaptures %d, captures %p", re_comp.regex,<br>
+ re_comp.captures, cap);<br>+<br>+ re->regex = re_comp.regex;<br>+ re->regex_sd = sd;<br>+ re->ncaptures = re_comp.captures;<br>+ re->captures = cap;<br>+ re->replace = ctpl;<br>
+<br>+ lua_pushlightuserdata(L, re); /* table key value */<br>+ lua_rawset(L, -3); /* table */<br>+ lua_pop(L, 1);<br>+<br>+ if (lmcf) {<br>+ lmcf->regex_cache_entries++;<br>+ }<br>
+ }<br>+<br>+exec:<br>+ count = 0;<br>+ offset = 0;<br>+<br>+ lua_newtable(L);<br>+<br>+ for (;;) {<br>+ if (subj.len == 0 || count == limit) {<br>+ break;<br>+ }<br>+<br>+ if (flags & NGX_LUA_RE_MODE_DFA) {<br>
+<br>+#if LUA_HAVE_PCRE_DFA<br>+<br>+ int ws[NGX_LUA_RE_DFA_MODE_WORKSPACE_COUNT];<br>+ rc = ngx_http_lua_regex_dfa_exec(re_comp.regex, sd, &subj,<br>+ offset, cap, ovecsize, ws, NGX_LUA_RE_DFA_MODE_WORKSPACE_COUNT);<br>
+<br>+#else /* LUA_HAVE_PCRE_DFA */<br>+<br>+ msg = "at least pcre 6.0 is required for the DFA mode";<br>+ goto error;<br>+<br>+#endif /* LUA_HAVE_PCRE_DFA */<br>+<br>+ } else {<br>+ rc = ngx_http_lua_regex_exec(re_comp.regex, sd, &subj, offset, cap,<br>
+ ovecsize);<br>+ }<br>+<br>+ if (rc == NGX_REGEX_NO_MATCHED) {<br>+ break;<br>+ }<br>+<br>+ if (rc < 0) {<br>+ msg = lua_pushfstring(L, ngx_regex_exec_n " failed: %d on \"%s\" "<br>
+ "using \"%s\"", (int) rc, subj.data, pat.data);<br>+ goto error;<br>+ }<br>+<br>+ if (rc == 0) {<br>+ if (flags & NGX_LUA_RE_MODE_DFA) {<br>+ rc = 1;<br>
+<br>+ } else {<br>+ msg = "capture size too small";<br>+ goto error;<br>+ }<br>+ }<br>+<br>+ dd("rc = %d", (int) rc);<br>+<br>+ count++;<br>
+<br>+ luaL_buffinit(L, &luabuf);<br>+<br>+ luaL_addlstring(&luabuf, (char *) &subj.data[offset],<br>+ cap[0] - offset);<br>+<br>+ lua_pushnumber(L, count);<br>+ luaL_pushresult(&luabuf);<br>
+ lua_settable(L, -3);<br>+<br>+ offset = cap[1];<br>+<br>+ }<br>+<br>+ if (count == 0) {<br>+ dd("no match, just the original subject");<br>+<br>+ lua_pushnumber(L, count+1);<br>
+ lua_pushvalue(L, 1);<br>+ lua_settable(L, -3);<br>+<br>+ } else {<br>+ if (offset != (int) subj.len) {<br>+ dd("adding trailer: %s (len %d)", &subj.data[offset],<br>+ (int) (subj.len - offset));<br>
+<br>+ luaL_buffinit(L, &luabuf);<br>+<br>+ luaL_addlstring(&luabuf, (char *) &subj.data[offset],<br>+ subj.len - offset);<br>+<br>+ lua_pushnumber(L, count+1);<br>
+ luaL_pushresult(&luabuf);<br>+ lua_settable(L, -3);<br>+<br>+ }<br>+<br>+ dd("the dst string: %s", lua_tostring(L, -1));<br>+ }<br>+<br>+ if (!(flags & NGX_LUA_RE_COMPILE_ONCE)) {<br>
+ if (sd) {<br>+ ngx_http_lua_regex_free_study_data(pool, sd);<br>+ }<br>+<br>+ if (re_comp.regex) {<br>+ ngx_pfree(pool, re_comp.regex);<br>+ }<br>+<br>+ if (ctpl) {<br>
+ ngx_pfree(pool, ctpl);<br>+ }<br>+<br>+ if (cap) {<br>+ ngx_pfree(pool, cap);<br>+ }<br>+ }<br>+<br>+ return 1;<br>+<br>+error:<br>+ if (!(flags & NGX_LUA_RE_COMPILE_ONCE)) {<br>
+ if (sd) {<br>+ ngx_http_lua_regex_free_study_data(pool, sd);<br>+ }<br>+<br>+ if (re_comp.regex) {<br>+ ngx_pfree(pool, re_comp.regex);<br>+ }<br>+<br>+ if (ctpl) {<br>
+ ngx_pfree(pool, ctpl);<br>+ }<br>+<br>+ if (cap) {<br>+ ngx_pfree(pool, cap);<br>+ }<br>+ }<br>+<br>+ return luaL_error(L, msg);<br>+}<br> <br> void<br> ngx_http_lua_inject_regex_api(lua_State *L)<br>
@@ -1631,6 +2071,9 @@ ngx_http_lua_inject_regex_api(lua_State *L)<br> lua_pushcfunction(L, ngx_http_lua_ngx_re_gsub);<br> lua_setfield(L, -2, "gsub");<br> <br>+ lua_pushcfunction(L, ngx_http_lua_ngx_re_split);<br>
+ lua_setfield(L, -2, "split");<br>+<br> lua_setfield(L, -2, "re");<br> }<br> <br>-- <br>1.7.9.5<br><br><br><div class="gmail_quote">2012/7/13 Jader H. Silva <span dir="ltr"><<a href="mailto:jaderhs5@gmail.com" target="_blank">jaderhs5@gmail.com</a>></span><br>
<blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex">So, here it is :)<br><br>ngx.re.split(<i>subject, regex, options?</i>, limit?)<br><br>This function is based on ngx_re_sub.<br>
<br>It will split subject on regex matches and return a table of strings. Limit is the max number of splits (0 will return a table containing the subject string).<br>
<br>Let me know if there are bugs, identation issues or anything I need to fix.<span class="HOEnZb"><font color="#888888"><br><br>Jader H. Silva</font></span><div class="HOEnZb"><div class="h5"><br><br><div class="gmail_quote">
2012/7/11 agentzh <span dir="ltr"><<a href="mailto:agentzh@gmail.com" target="_blank">agentzh@gmail.com</a>></span><br>
<blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex">Hello!<br>
<br>
On Wed, Jul 11, 2012 at 12:54 PM, Jader Henrique da Silva<br>
<<a href="mailto:cad_jsilva@uolinc.com" target="_blank">cad_jsilva@uolinc.com</a>> wrote:<br>
> I was checking HttpLuaModule docs and saw "ngx.re.split" implementation in<br>
> the TODO section.<br>
><br>
> Is it already implemented?<br>
<br>
Nope, otherwise I would update the TODO section accordingly :)<br>
<br>
> Are there any details about this implementation (e.g. parameters, returned<br>
> data)?<br>
><br>
<br>
Not yet. But I think the behavior will be similar to Perl 5's split<br>
builtin function.<br>
<br>
I'm always open to patches for this feature :)<br>
<br>
Best regards,<br>
-agentzh<br>
<br>
_______________________________________________<br>
nginx mailing list<br>
<a href="mailto:nginx@nginx.org" target="_blank">nginx@nginx.org</a><br>
<a href="http://mailman.nginx.org/mailman/listinfo/nginx" target="_blank">http://mailman.nginx.org/mailman/listinfo/nginx</a><br>
</blockquote></div><br>
</div></div></blockquote></div><br>