[njs] Improved surrogate pairs support for PCRE2 backend.
Dmitry Volyntsev
xeioex at nginx.com
Fri May 6 03:27:15 UTC 2022
details: https://hg.nginx.org/njs/rev/ded5304adaf0
branches:
changeset: 1850:ded5304adaf0
user: Dmitry Volyntsev <xeioex at nginx.com>
date: Thu May 05 20:25:05 2022 -0700
description:
Improved surrogate pairs support for PCRE2 backend.
In collaboration with Javier Evans.
diffstat:
external/njs_regex.c | 20 +++++++++++++++++++-
src/test/njs_unit_test.c | 5 +++++
2 files changed, 24 insertions(+), 1 deletions(-)
diffs (46 lines):
diff -r 80ed74a0e205 -r ded5304adaf0 external/njs_regex.c
--- a/external/njs_regex.c Wed May 04 16:44:48 2022 -0700
+++ b/external/njs_regex.c Thu May 05 20:25:05 2022 -0700
@@ -60,8 +60,26 @@ njs_regex_compile_ctx_t *
njs_regex_compile_ctx_create(njs_regex_generic_ctx_t *ctx)
{
#ifdef NJS_HAVE_PCRE2
+ pcre2_compile_context *cc;
- return pcre2_compile_context_create(ctx);
+ cc = pcre2_compile_context_create(ctx);
+
+#ifdef PCRE2_EXTRA_ALLOW_SURROGATE_ESCAPES
+ if (njs_fast_path(cc != NULL)) {
+ /* Workaround for surrogate pairs in regular expressions
+ *
+ * This option is needed because njs, unlike the standard ECMAScript,
+ * stores and processes strings in UTF-8 encoding.
+ * PCRE2 does not support surrogate pairs by default when it
+ * is compiled for UTF-8 only strings. But many polyfills
+ * and transpilers use such surrogate pairs expressions.
+ */
+ pcre2_set_compile_extra_options(cc,
+ PCRE2_EXTRA_ALLOW_SURROGATE_ESCAPES);
+ }
+#endif
+
+ return cc;
#else
diff -r 80ed74a0e205 -r ded5304adaf0 src/test/njs_unit_test.c
--- a/src/test/njs_unit_test.c Wed May 04 16:44:48 2022 -0700
+++ b/src/test/njs_unit_test.c Thu May 05 20:25:05 2022 -0700
@@ -10841,6 +10841,11 @@ static njs_unit_test_t njs_test[] =
njs_str("true") },
#endif
+#ifdef PCRE2_EXTRA_ALLOW_SURROGATE_ESCAPES
+ { njs_str("/\\u200d\\ud800-/"),
+ njs_str("/\\u200d\\ud800-/") },
+#endif
+
{ njs_str("/(\\.(?!com|org)|\\/)/.test('ah.info')"),
njs_str("true") },
More information about the nginx-devel
mailing list