[njs] Improved readability of surrogate pairs handling.

Dmitry Volyntsev xeioex at nginx.com
Mon Jul 27 14:36:22 UTC 2020


details:   https://hg.nginx.org/njs/rev/d4c69313ac6c
branches:  
changeset: 1482:d4c69313ac6c
user:      Dmitry Volyntsev <xeioex at nginx.com>
date:      Mon Jul 27 14:34:35 2020 +0000
description:
Improved readability of surrogate pairs handling.

diffstat:

 src/njs_json.c    |   2 +-
 src/njs_parser.c  |   4 ++--
 src/njs_string.c  |   4 ++--
 src/njs_string.h  |  10 ----------
 src/njs_unicode.h |  12 ++++++++++++
 src/njs_utf16.c   |  11 ++++-------
 6 files changed, 21 insertions(+), 22 deletions(-)

diffs (123 lines):

diff -r 6078d0c735b4 -r d4c69313ac6c src/njs_json.c
--- a/src/njs_json.c	Mon Jul 27 14:18:15 2020 +0000
+++ b/src/njs_json.c	Mon Jul 27 14:34:35 2020 +0000
@@ -738,7 +738,7 @@ njs_json_parse_string(njs_json_parse_ctx
                 p += 4;
 
                 if (njs_fast_path(njs_surrogate_trailing(utf_low))) {
-                    utf = njs_string_surrogate_pair(utf, utf_low);
+                    utf = njs_surrogate_pair(utf, utf_low);
 
                 } else if (njs_surrogate_leading(utf_low)) {
                     utf = NJS_UNICODE_REPLACEMENT;
diff -r 6078d0c735b4 -r d4c69313ac6c src/njs_parser.c
--- a/src/njs_parser.c	Mon Jul 27 14:18:15 2020 +0000
+++ b/src/njs_parser.c	Mon Jul 27 14:34:35 2020 +0000
@@ -8088,7 +8088,7 @@ njs_parser_escape_string_create(njs_pars
 
         if (cp_pair != 0) {
             if (njs_fast_path(njs_surrogate_trailing(cp))) {
-                cp = njs_string_surrogate_pair(cp_pair, cp);
+                cp = njs_surrogate_pair(cp_pair, cp);
 
             } else if (njs_slow_path(njs_surrogate_leading(cp))) {
                 cp = NJS_UNICODE_REPLACEMENT;
@@ -8238,7 +8238,7 @@ njs_parser_escape_string_calc_length(njs
 
         if (cp_pair != 0) {
             if (njs_fast_path(njs_surrogate_trailing(cp))) {
-                cp = njs_string_surrogate_pair(cp_pair, cp);
+                cp = njs_surrogate_pair(cp_pair, cp);
 
             } else if (njs_slow_path(njs_surrogate_leading(cp))) {
                 cp = NJS_UNICODE_REPLACEMENT;
diff -r 6078d0c735b4 -r d4c69313ac6c src/njs_string.c
--- a/src/njs_string.c	Mon Jul 27 14:18:15 2020 +0000
+++ b/src/njs_string.c	Mon Jul 27 14:34:35 2020 +0000
@@ -4272,7 +4272,7 @@ njs_string_encode_uri(njs_vm_t *vm, njs_
                         goto uri_error;
                     }
 
-                    cp = njs_string_surrogate_pair(cp, cp_low);
+                    cp = njs_surrogate_pair(cp, cp_low);
                     size += njs_utf8_size(cp) * 3;
                     continue;
                 }
@@ -4312,7 +4312,7 @@ njs_string_encode_uri(njs_vm_t *vm, njs_
 
         if (njs_slow_path(njs_surrogate_leading(cp))) {
             cp_low = njs_utf8_decode(&ctx, &src, end);
-            cp = njs_string_surrogate_pair(cp, cp_low);
+            cp = njs_surrogate_pair(cp, cp_low);
         }
 
         njs_utf8_encode(encode, cp);
diff -r 6078d0c735b4 -r d4c69313ac6c src/njs_string.h
--- a/src/njs_string.h	Mon Jul 27 14:18:15 2020 +0000
+++ b/src/njs_string.h	Mon Jul 27 14:34:35 2020 +0000
@@ -26,16 +26,6 @@
 /* The maximum signed int32_t. */
 #define NJS_STRING_MAX_LENGTH  0x7fffffff
 
-#define njs_surrogate_leading(cp)    ((cp) >= 0xd800 && (cp) <= 0xdbff)
-
-#define njs_surrogate_trailing(cp)   ((cp) >= 0xdc00 && (cp) <= 0xdfff)
-
-#define njs_surrogate_any(cp)        ((cp) >= 0xd800 && (cp) <= 0xdfff)
-
-/* Converting surrogate pair to code point.  */
-#define njs_string_surrogate_pair(high, low)                                  \
-    (0x10000 + ((high - 0xd800) << 10) + (low - 0xdc00))
-
 /*
  * NJS_STRING_MAP_STRIDE should be power of two to use shift and binary
  * AND operations instead of division and remainder operations but no
diff -r 6078d0c735b4 -r d4c69313ac6c src/njs_unicode.h
--- a/src/njs_unicode.h	Mon Jul 27 14:18:15 2020 +0000
+++ b/src/njs_unicode.h	Mon Jul 27 14:34:35 2020 +0000
@@ -23,5 +23,17 @@ typedef struct {
     u_char    upper;
 } njs_unicode_decode_t;
 
+#define njs_surrogate_leading(cp)                                             \
+    (((unsigned) (cp) - 0xd800) <= 0xdbff - 0xd800)
+
+#define njs_surrogate_trailing(cp)                                            \
+    (((unsigned) (cp) - 0xdc00) <= 0xdfff - 0xdc00)
+
+#define njs_surrogate_any(cp)                                                 \
+    (((unsigned) (cp) - 0xd800) <= 0xdfff - 0xd800)
+
+#define njs_surrogate_pair(high, low)                                         \
+    (0x10000 + (((high) - 0xd800) << 10) + ((low) - 0xdc00))
+
 
 #endif /* _NJS_UNICODE_H_INCLUDED_ */
diff -r 6078d0c735b4 -r d4c69313ac6c src/njs_utf16.c
--- a/src/njs_utf16.c	Mon Jul 27 14:18:15 2020 +0000
+++ b/src/njs_utf16.c	Mon Jul 27 14:34:35 2020 +0000
@@ -79,9 +79,8 @@ lead_state:
 #endif
 
     if (ctx->codepoint != 0x00) {
-        if ((unsigned) (unit - 0xDC00) <= (0xDFFF - 0xDC00)) {
-            unit = 0x10000 + ((ctx->codepoint - 0xD800) << 10)
-                   + (unit - 0xDC00);
+        if (njs_surrogate_trailing(unit)) {
+            unit = njs_surrogate_pair(ctx->codepoint, unit);
 
             ctx->codepoint = 0x00;
 
@@ -96,10 +95,8 @@ lead_state:
         return NJS_UNICODE_ERROR;
     }
 
-    /* Surrogate pair. */
-
-    if ((unsigned) (unit - 0xD800) <= (0xDFFF - 0xD800)) {
-        if ((unsigned) (unit - 0xDC00) <= (0xDFFF - 0xDC00)) {
+    if (njs_surrogate_any(unit)) {
+        if (njs_surrogate_trailing(unit)) {
             return NJS_UNICODE_ERROR;
         }
 


More information about the nginx-devel mailing list