[PATCH 1 of 2] Rewritten host header validation to follow generic parsing rules

Sergey Kandaurov pluknet at nginx.com
Mon May 27 10:21:43 UTC 2024


# HG changeset patch
# User Sergey Kandaurov <pluknet at nginx.com>
# Date 1716805272 -14400
#      Mon May 27 14:21:12 2024 +0400
# Node ID e82a7318ed48fdbc1273771bc96357e9dc232975
# Parent  f58b6f6362387eeace46043a6fc0bceb56a6786a
Rewritten host header validation to follow generic parsing rules.

It now uses a generic model of state-based machine, with more strict
parsing rules borrowed from ngx_http_validate_host(), with additional
checks for double dots and stripping a port subcomponent.

Notably, now a port subcomponent of the Host header is restricted
to digits, using underscores in domain name labels is prohibited.

diff --git a/src/http/ngx_http_request.c b/src/http/ngx_http_request.c
--- a/src/http/ngx_http_request.c
+++ b/src/http/ngx_http_request.c
@@ -2148,9 +2148,11 @@ ngx_http_validate_host(ngx_str_t *host, 
     size_t   i, dot_pos, host_len;
 
     enum {
-        sw_usual = 0,
-        sw_literal,
-        sw_rest
+        sw_host_start = 0,
+        sw_host,
+        sw_host_end,
+        sw_host_ip_literal,
+        sw_port,
     } state;
 
     dot_pos = host->len;
@@ -2158,55 +2160,122 @@ ngx_http_validate_host(ngx_str_t *host, 
 
     h = host->data;
 
-    state = sw_usual;
+    state = sw_host_start;
 
     for (i = 0; i < host->len; i++) {
         ch = h[i];
 
-        switch (ch) {
-
-        case '.':
-            if (dot_pos == i - 1) {
-                return NGX_DECLINED;
+        switch (state) {
+
+        case sw_host_start:
+
+            if (ch == '[') {
+                state = sw_host_ip_literal;
+                break;
+            }
+
+            state = sw_host;
+
+            /* fall through */
+
+        case sw_host:
+
+            if (ch >= 'A' && ch <= 'Z') {
+                alloc = 1;
+                break;
+            }
+
+            if (ch >= 'a' && ch <= 'z') {
+                break;
             }
-            dot_pos = i;
-            break;
-
-        case ':':
-            if (state == sw_usual) {
+
+            if ((ch >= '0' && ch <= '9') || ch == '-') {
+                break;
+            }
+
+            if (ch == '.') {
+                if (dot_pos == i - 1) {
+                    return NGX_DECLINED;
+                }
+
+                dot_pos = i;
+                break;
+            }
+
+            /* fall through */
+
+        case sw_host_end:
+
+            switch (ch) {
+            case ':':
                 host_len = i;
-                state = sw_rest;
+                state = sw_port;
+                break;
+            default:
+                return NGX_DECLINED;
             }
             break;
 
-        case '[':
-            if (i == 0) {
-                state = sw_literal;
-            }
-            break;
-
-        case ']':
-            if (state == sw_literal) {
-                host_len = i + 1;
-                state = sw_rest;
-            }
-            break;
-
-        default:
-
-            if (ngx_path_separator(ch)) {
-                return NGX_DECLINED;
-            }
-
-            if (ch <= 0x20 || ch == 0x7f) {
-                return NGX_DECLINED;
-            }
+        case sw_host_ip_literal:
 
             if (ch >= 'A' && ch <= 'Z') {
                 alloc = 1;
+                break;
             }
 
+            if (ch >= 'a' && ch <= 'z') {
+                break;
+            }
+
+            if (ch >= '0' && ch <= '9') {
+                break;
+            }
+
+            if (ch == '.') {
+                if (dot_pos == i - 1) {
+                    return NGX_DECLINED;
+                }
+
+                dot_pos = i;
+                break;
+            }
+
+            switch (ch) {
+            case ':':
+                break;
+            case ']':
+                host_len = i + 1;
+                state = sw_host_end;
+                break;
+            case '-':
+            case '_':
+            case '~':
+                /* unreserved */
+                break;
+            case '!':
+            case '$':
+            case '&':
+            case '\'':
+            case '(':
+            case ')':
+            case '*':
+            case '+':
+            case ',':
+            case ';':
+            case '=':
+                /* sub-delims */
+                break;
+            default:
+                return NGX_DECLINED;
+            }
             break;
+
+        case sw_port:
+            if (ch >= '0' && ch <= '9') {
+                break;
+            }
+
+            return NGX_DECLINED;
         }
     }
 
diff --git a/src/stream/ngx_stream_core_module.c b/src/stream/ngx_stream_core_module.c
--- a/src/stream/ngx_stream_core_module.c
+++ b/src/stream/ngx_stream_core_module.c
@@ -471,9 +471,11 @@ ngx_stream_validate_host(ngx_str_t *host
     size_t   i, dot_pos, host_len;
 
     enum {
-        sw_usual = 0,
-        sw_literal,
-        sw_rest
+        sw_host_start = 0,
+        sw_host,
+        sw_host_end,
+        sw_host_ip_literal,
+        sw_port,
     } state;
 
     dot_pos = host->len;
@@ -481,55 +483,122 @@ ngx_stream_validate_host(ngx_str_t *host
 
     h = host->data;
 
-    state = sw_usual;
+    state = sw_host_start;
 
     for (i = 0; i < host->len; i++) {
         ch = h[i];
 
-        switch (ch) {
+        switch (state) {
+
+        case sw_host_start:
+
+            if (ch == '[') {
+                state = sw_host_ip_literal;
+                break;
+            }
+
+            state = sw_host;
 
-        case '.':
-            if (dot_pos == i - 1) {
-                return NGX_DECLINED;
+            /* fall through */
+
+        case sw_host:
+
+            if (ch >= 'A' && ch <= 'Z') {
+                alloc = 1;
+                break;
             }
-            dot_pos = i;
-            break;
+
+            if (ch >= 'a' && ch <= 'z') {
+                break;
+            }
 
-        case ':':
-            if (state == sw_usual) {
-                host_len = i;
-                state = sw_rest;
+            if ((ch >= '0' && ch <= '9') || ch == '-') {
+                break;
+            }
+
+            if (ch == '.') {
+                if (dot_pos == i - 1) {
+                    return NGX_DECLINED;
+                }
+
+                dot_pos = i;
+                break;
             }
-            break;
+
+            /* fall through */
+
+        case sw_host_end:
 
-        case '[':
-            if (i == 0) {
-                state = sw_literal;
+            switch (ch) {
+            case ':':
+                host_len = i;
+                state = sw_port;
+                break;
+            default:
+                return NGX_DECLINED;
             }
             break;
 
-        case ']':
-            if (state == sw_literal) {
+        case sw_host_ip_literal:
+
+            if (ch >= 'A' && ch <= 'Z') {
+                alloc = 1;
+                break;
+            }
+
+            if (ch >= 'a' && ch <= 'z') {
+                break;
+            }
+
+            if (ch >= '0' && ch <= '9') {
+                break;
+            }
+
+            if (ch == '.') {
+                if (dot_pos == i - 1) {
+                    return NGX_DECLINED;
+                }
+
+                dot_pos = i;
+                break;
+            }
+
+            switch (ch) {
+            case ':':
+                break;
+            case ']':
                 host_len = i + 1;
-                state = sw_rest;
+                state = sw_host_end;
+                break;
+            case '-':
+            case '_':
+            case '~':
+                /* unreserved */
+                break;
+            case '!':
+            case '$':
+            case '&':
+            case '\'':
+            case '(':
+            case ')':
+            case '*':
+            case '+':
+            case ',':
+            case ';':
+            case '=':
+                /* sub-delims */
+                break;
+            default:
+                return NGX_DECLINED;
             }
             break;
 
-        default:
-
-            if (ngx_path_separator(ch)) {
-                return NGX_DECLINED;
+        case sw_port:
+            if (ch >= '0' && ch <= '9') {
+                break;
             }
 
-            if (ch <= 0x20 || ch == 0x7f) {
-                return NGX_DECLINED;
-            }
-
-            if (ch >= 'A' && ch <= 'Z') {
-                alloc = 1;
-            }
-
-            break;
+            return NGX_DECLINED;
         }
     }
 


More information about the nginx-devel mailing list