[ngx_lua] ngx.re.split implementation
Jader H. Silva
jaderhs5 at gmail.com
Fri Jul 13 21:19:11 UTC 2012
The patch was removed in the previous message :(
So here it is (for real).
>From 9091c40e22f6fd0ca2173ecbeb1f932502cc8ac6 Mon Sep 17 00:00:00 2001
From: "Jader H. Silva" <jaderhs5 at gmail.com>
Date: Fri, 13 Jul 2012 18:06:32 -0300
Subject: [PATCH] Add ngx.re.split function
---
src/ngx_http_lua_regex.c | 443
++++++++++++++++++++++++++++++++++++++++++++++
1 file changed, 443 insertions(+)
diff --git a/src/ngx_http_lua_regex.c b/src/ngx_http_lua_regex.c
index 108070c..aa5d445 100644
--- a/src/ngx_http_lua_regex.c
+++ b/src/ngx_http_lua_regex.c
@@ -74,6 +74,7 @@ static int ngx_http_lua_ngx_re_match(lua_State *L);
static int ngx_http_lua_ngx_re_gmatch(lua_State *L);
static int ngx_http_lua_ngx_re_sub(lua_State *L);
static int ngx_http_lua_ngx_re_gsub(lua_State *L);
+static int ngx_http_lua_ngx_re_split(lua_State *L);
static void ngx_http_lua_regex_free_study_data(ngx_pool_t *pool,
pcre_extra *sd);
static ngx_int_t ngx_lua_regex_compile(ngx_lua_regex_compile_t *rc);
@@ -1611,6 +1612,445 @@ error:
return luaL_error(L, msg);
}
+static int
+ngx_http_lua_ngx_re_split(lua_State *L)
+{
+ ngx_http_lua_regex_t *re;
+ ngx_http_request_t *r;
+ ngx_str_t subj;
+ ngx_str_t pat;
+ ngx_str_t opts;
+ ngx_str_t tpl;
+ ngx_http_lua_main_conf_t *lmcf = NULL;
+ ngx_pool_t *pool, *old_pool;
+ ngx_lua_regex_compile_t re_comp;
+ const char *msg;
+ ngx_int_t rc;
+ ngx_uint_t n;
+ ngx_int_t i;
+ int nargs;
+ int *cap = NULL;
+ int ovecsize;
+ int type;
+ unsigned func;
+ int offset;
+ size_t count;
+ luaL_Buffer luabuf;
+ ngx_int_t flags;
+ ngx_int_t limit = -1;
+ u_char *p;
+ u_char errstr[NGX_MAX_CONF_ERRSTR + 1];
+ pcre_extra *sd = NULL;
+
+ ngx_http_lua_complex_value_t *ctpl = NULL;
+ ngx_http_lua_compile_complex_value_t ccv;
+
+ nargs = lua_gettop(L);
+
+ if (nargs != 2 && nargs != 3 && nargs != 4) {
+ return luaL_error(L, "expecting two or three or four arguments,
but got %d",
+ nargs);
+ }
+
+ lua_pushlightuserdata(L, &ngx_http_lua_request_key);
+ lua_rawget(L, LUA_GLOBALSINDEX);
+ r = lua_touserdata(L, -1);
+ lua_pop(L, 1);
+
+ if (r == NULL) {
+ return luaL_error(L, "no request object found");
+ }
+
+ subj.data = (u_char *) luaL_checklstring(L, 1, &subj.len);
+ pat.data = (u_char *) luaL_checklstring(L, 2, &pat.len);
+
+ if (nargs >= 3) {
+ opts.data = (u_char *) luaL_checklstring(L, 3, &opts.len);
+
+ if (nargs == 4) {
+ limit = luaL_checkinteger(L, 4);
+ lua_pop(L, 1);
+
+ } else {/* nargs == 3 */
+ limit = -1;
+ }
+
+ } else { /* nargs == 2 */
+ opts.data = (u_char *) "";
+ opts.len = 0;
+ }
+
+ ngx_memzero(&re_comp, sizeof(ngx_lua_regex_compile_t));
+
+ /* stack: subj regex repl */
+
+ re_comp.options = 0;
+
+ flags = ngx_http_lua_ngx_re_parse_opts(L, &re_comp, &opts, 4);
+
+ if (flags & NGX_LUA_RE_COMPILE_ONCE) {
+ lmcf = ngx_http_get_module_main_conf(r, ngx_http_lua_module);
+ pool = lmcf->pool;
+
+ dd("server pool %p", lmcf->pool);
+
+ lua_pushlightuserdata(L, &ngx_http_lua_regex_cache_key);
+ lua_rawget(L, LUA_REGISTRYINDEX); /* table */
+
+ lua_pushliteral(L, "s");
+ lua_pushinteger(L, tpl.len);
+ lua_pushliteral(L, ":");
+ lua_pushvalue(L, 2);
+
+ if (tpl.len != 0) {
+ lua_pushvalue(L, 3);
+ }
+
+ dd("options size: %d", (int) sizeof(re_comp.options));
+
+ lua_pushlstring(L, (char *) &re_comp.options,
sizeof(re_comp.options));
+ /* table regex opts */
+
+ if (tpl.len == 0) {
+ lua_concat(L, 5); /* table key */
+
+ } else {
+ lua_concat(L, 6); /* table key */
+ }
+
+ lua_pushvalue(L, -1); /* table key key */
+
+ dd("regex cache key: %.*s", (int) (pat.len +
sizeof(re_comp.options)),
+ lua_tostring(L, -1));
+
+ lua_rawget(L, -3); /* table key re */
+ re = lua_touserdata(L, -1);
+
+ lua_pop(L, 1); /* table key */
+
+ if (re) {
+ ngx_log_debug2(NGX_LOG_DEBUG_HTTP, r->connection->log, 0,
+ "lua regex cache hit for split regex \"%s\" with
options "
+ "\"%s\"", pat.data, opts.data);
+
+ lua_pop(L, 2);
+
+ dd("restoring regex %p, ncaptures %d, captures %p", re->regex,
+ re->ncaptures, re->captures);
+
+ re_comp.regex = re->regex;
+ sd = re->regex_sd;
+ re_comp.captures = re->ncaptures;
+ cap = re->captures;
+ ctpl = re->replace;
+
+ if (flags & NGX_LUA_RE_MODE_DFA) {
+ ovecsize = 2;
+
+ } else {
+ ovecsize = (re->ncaptures + 1) * 3;
+ }
+
+ goto exec;
+ }
+
+ ngx_log_debug2(NGX_LOG_DEBUG_HTTP, r->connection->log, 0,
+ "lua regex cache miss for split regex \"%s\" with options "
+ "\"%s\"",
+ pat.data, opts.data);
+
+ if (lmcf->regex_cache_entries >= lmcf->regex_cache_max_entries) {
+
+ if (lmcf->regex_cache_entries ==
lmcf->regex_cache_max_entries) {
+ ngx_log_error(NGX_LOG_WARN, r->connection->log, 0,
+ "lua exceeding regex cache max entries (%i)",
+ lmcf->regex_cache_max_entries);
+
+ lmcf->regex_cache_entries++;
+ }
+
+ pool = r->pool;
+ flags &= ~NGX_LUA_RE_COMPILE_ONCE;
+ }
+
+ } else {
+ pool = r->pool;
+ }
+
+ re_comp.pattern = pat;
+ re_comp.err.len = NGX_MAX_CONF_ERRSTR;
+ re_comp.err.data = errstr;
+ re_comp.pool = pool;
+
+ dd("compiling regex");
+
+ ngx_log_debug5(NGX_LOG_DEBUG_HTTP, r->connection->log, 0,
+ "lua compiling split regex \"%s\" with options \"%s\" "
+ "(compile once: %d) (dfa mode: %d) (jit mode: %d)",
+ pat.data, opts.data,
+ (flags & NGX_LUA_RE_COMPILE_ONCE) != 0,
+ (flags & NGX_LUA_RE_MODE_DFA) != 0,
+ (flags & NGX_LUA_RE_MODE_JIT) != 0);
+
+ old_pool = ngx_http_lua_pcre_malloc_init(pool);
+
+ rc = ngx_lua_regex_compile(&re_comp);
+
+ ngx_http_lua_pcre_malloc_done(old_pool);
+
+ if (rc != NGX_OK) {
+ dd("compile failed");
+
+ re_comp.err.data[re_comp.err.len] = '\0';
+ msg = lua_pushfstring(L, "failed to compile regex \"%s\": %s",
+ pat.data, re_comp.err.data);
+
+ return luaL_argerror(L, 2, msg);
+ }
+
+#if LUA_HAVE_PCRE_JIT
+
+ if (flags & NGX_LUA_RE_MODE_JIT) {
+
+ old_pool = ngx_http_lua_pcre_malloc_init(pool);
+
+ sd = pcre_study(re_comp.regex, PCRE_STUDY_JIT_COMPILE, &msg);
+
+ ngx_http_lua_pcre_malloc_done(old_pool);
+
+# if (NGX_DEBUG)
+ dd("sd = %p", sd);
+
+ if (msg != NULL) {
+ ngx_log_debug2(NGX_LOG_DEBUG_HTTP, r->connection->log, 0,
+ "pcre study failed with PCRE_STUDY_JIT_COMPILE: %s (%p)",
+ msg, sd);
+ }
+
+ if (sd != NULL) {
+ int jitted;
+
+ old_pool = ngx_http_lua_pcre_malloc_init(pool);
+
+ pcre_fullinfo(re_comp.regex, sd, PCRE_INFO_JIT, &jitted);
+
+ ngx_http_lua_pcre_malloc_done(old_pool);
+
+ ngx_log_debug1(NGX_LOG_DEBUG_HTTP, r->connection->log, 0,
+ "pcre JIT compiling result: %d", jitted);
+ }
+# endif /* NGX_DEBUG */
+
+ } else {
+
+ old_pool = ngx_http_lua_pcre_malloc_init(pool);
+
+ sd = pcre_study(re_comp.regex, 0, &msg);
+
+ ngx_http_lua_pcre_malloc_done(old_pool);
+
+# if (NGX_DEBUG)
+ dd("sd = %p", sd);
+
+ if (msg != NULL) {
+ ngx_log_debug2(NGX_LOG_DEBUG_HTTP, r->connection->log, 0,
+ "pcre_study failed with PCRE_STUDY_JIT_COMPILE: %s (%p)",
+ msg, sd);
+ }
+# endif /* NGX_DEBUG */
+ }
+
+#else /* LUA_HAVE_PCRE_JIT */
+
+ if (flags & NGX_LUA_RE_MODE_JIT) {
+ ngx_log_debug0(NGX_LOG_DEBUG_HTTP, r->connection->log, 0,
+ "your pcre build does not have JIT support and "
+ "the \"j\" regex option is ignored");
+ }
+
+#endif /* LUA_HAVE_PCRE_JIT */
+
+ dd("compile done, captures %d", re_comp.captures);
+
+ if (flags & NGX_LUA_RE_MODE_DFA) {
+ ovecsize = 2;
+
+ } else {
+ ovecsize = (re_comp.captures + 1) * 3;
+ }
+
+ cap = ngx_palloc(pool, ovecsize * sizeof(int));
+ if (cap == NULL) {
+ flags &= ~NGX_LUA_RE_COMPILE_ONCE;
+ msg = "out of memory";
+ goto error;
+ }
+
+ if (flags & NGX_LUA_RE_COMPILE_ONCE) {
+
+ ngx_log_debug2(NGX_LOG_DEBUG_HTTP, r->connection->log, 0,
+ "lua saving compiled sub regex (%d captures) into the
cache "
+ "(entries %i)", re_comp.captures,
+ lmcf ? lmcf->regex_cache_entries : 0);
+
+ re = ngx_palloc(pool, sizeof(ngx_http_lua_regex_t));
+ if (re == NULL) {
+ return luaL_error(L, "out of memory");
+ }
+
+ dd("saving regex %p, ncaptures %d, captures %p", re_comp.regex,
+ re_comp.captures, cap);
+
+ re->regex = re_comp.regex;
+ re->regex_sd = sd;
+ re->ncaptures = re_comp.captures;
+ re->captures = cap;
+ re->replace = ctpl;
+
+ lua_pushlightuserdata(L, re); /* table key value */
+ lua_rawset(L, -3); /* table */
+ lua_pop(L, 1);
+
+ if (lmcf) {
+ lmcf->regex_cache_entries++;
+ }
+ }
+
+exec:
+ count = 0;
+ offset = 0;
+
+ lua_newtable(L);
+
+ for (;;) {
+ if (subj.len == 0 || count == limit) {
+ break;
+ }
+
+ if (flags & NGX_LUA_RE_MODE_DFA) {
+
+#if LUA_HAVE_PCRE_DFA
+
+ int ws[NGX_LUA_RE_DFA_MODE_WORKSPACE_COUNT];
+ rc = ngx_http_lua_regex_dfa_exec(re_comp.regex, sd, &subj,
+ offset, cap, ovecsize, ws,
NGX_LUA_RE_DFA_MODE_WORKSPACE_COUNT);
+
+#else /* LUA_HAVE_PCRE_DFA */
+
+ msg = "at least pcre 6.0 is required for the DFA mode";
+ goto error;
+
+#endif /* LUA_HAVE_PCRE_DFA */
+
+ } else {
+ rc = ngx_http_lua_regex_exec(re_comp.regex, sd, &subj, offset,
cap,
+ ovecsize);
+ }
+
+ if (rc == NGX_REGEX_NO_MATCHED) {
+ break;
+ }
+
+ if (rc < 0) {
+ msg = lua_pushfstring(L, ngx_regex_exec_n " failed: %d on
\"%s\" "
+ "using \"%s\"", (int) rc, subj.data, pat.data);
+ goto error;
+ }
+
+ if (rc == 0) {
+ if (flags & NGX_LUA_RE_MODE_DFA) {
+ rc = 1;
+
+ } else {
+ msg = "capture size too small";
+ goto error;
+ }
+ }
+
+ dd("rc = %d", (int) rc);
+
+ count++;
+
+ luaL_buffinit(L, &luabuf);
+
+ luaL_addlstring(&luabuf, (char *) &subj.data[offset],
+ cap[0] - offset);
+
+ lua_pushnumber(L, count);
+ luaL_pushresult(&luabuf);
+ lua_settable(L, -3);
+
+ offset = cap[1];
+
+ }
+
+ if (count == 0) {
+ dd("no match, just the original subject");
+
+ lua_pushnumber(L, count+1);
+ lua_pushvalue(L, 1);
+ lua_settable(L, -3);
+
+ } else {
+ if (offset != (int) subj.len) {
+ dd("adding trailer: %s (len %d)", &subj.data[offset],
+ (int) (subj.len - offset));
+
+ luaL_buffinit(L, &luabuf);
+
+ luaL_addlstring(&luabuf, (char *) &subj.data[offset],
+ subj.len - offset);
+
+ lua_pushnumber(L, count+1);
+ luaL_pushresult(&luabuf);
+ lua_settable(L, -3);
+
+ }
+
+ dd("the dst string: %s", lua_tostring(L, -1));
+ }
+
+ if (!(flags & NGX_LUA_RE_COMPILE_ONCE)) {
+ if (sd) {
+ ngx_http_lua_regex_free_study_data(pool, sd);
+ }
+
+ if (re_comp.regex) {
+ ngx_pfree(pool, re_comp.regex);
+ }
+
+ if (ctpl) {
+ ngx_pfree(pool, ctpl);
+ }
+
+ if (cap) {
+ ngx_pfree(pool, cap);
+ }
+ }
+
+ return 1;
+
+error:
+ if (!(flags & NGX_LUA_RE_COMPILE_ONCE)) {
+ if (sd) {
+ ngx_http_lua_regex_free_study_data(pool, sd);
+ }
+
+ if (re_comp.regex) {
+ ngx_pfree(pool, re_comp.regex);
+ }
+
+ if (ctpl) {
+ ngx_pfree(pool, ctpl);
+ }
+
+ if (cap) {
+ ngx_pfree(pool, cap);
+ }
+ }
+
+ return luaL_error(L, msg);
+}
void
ngx_http_lua_inject_regex_api(lua_State *L)
@@ -1631,6 +2071,9 @@ ngx_http_lua_inject_regex_api(lua_State *L)
lua_pushcfunction(L, ngx_http_lua_ngx_re_gsub);
lua_setfield(L, -2, "gsub");
+ lua_pushcfunction(L, ngx_http_lua_ngx_re_split);
+ lua_setfield(L, -2, "split");
+
lua_setfield(L, -2, "re");
}
--
1.7.9.5
2012/7/13 Jader H. Silva <jaderhs5 at gmail.com>
> So, here it is :)
>
> ngx.re.split(*subject, regex, options?*, limit?)
>
> This function is based on ngx_re_sub.
>
> It will split subject on regex matches and return a table of strings.
> Limit is the max number of splits (0 will return a table containing the
> subject string).
>
> Let me know if there are bugs, identation issues or anything I need to fix.
>
> Jader H. Silva
>
>
> 2012/7/11 agentzh <agentzh at gmail.com>
>
>> Hello!
>>
>> On Wed, Jul 11, 2012 at 12:54 PM, Jader Henrique da Silva
>> <cad_jsilva at uolinc.com> wrote:
>> > I was checking HttpLuaModule docs and saw "ngx.re.split" implementation
>> in
>> > the TODO section.
>> >
>> > Is it already implemented?
>>
>> Nope, otherwise I would update the TODO section accordingly :)
>>
>> > Are there any details about this implementation (e.g. parameters,
>> returned
>> > data)?
>> >
>>
>> Not yet. But I think the behavior will be similar to Perl 5's split
>> builtin function.
>>
>> I'm always open to patches for this feature :)
>>
>> Best regards,
>> -agentzh
>>
>> _______________________________________________
>> nginx mailing list
>> nginx at nginx.org
>> http://mailman.nginx.org/mailman/listinfo/nginx
>>
>
>
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://mailman.nginx.org/pipermail/nginx/attachments/20120713/18b127ed/attachment-0001.html>
More information about the nginx
mailing list