[PATCH 1 of 2] Rewritten host header validation to follow generic parsing rules
Sergey Kandaurov
pluknet at nginx.com
Mon May 27 10:21:43 UTC 2024
# HG changeset patch
# User Sergey Kandaurov <pluknet at nginx.com>
# Date 1716805272 -14400
# Mon May 27 14:21:12 2024 +0400
# Node ID e82a7318ed48fdbc1273771bc96357e9dc232975
# Parent f58b6f6362387eeace46043a6fc0bceb56a6786a
Rewritten host header validation to follow generic parsing rules.
It now uses a generic model of state-based machine, with more strict
parsing rules borrowed from ngx_http_validate_host(), with additional
checks for double dots and stripping a port subcomponent.
Notably, now a port subcomponent of the Host header is restricted
to digits, using underscores in domain name labels is prohibited.
diff --git a/src/http/ngx_http_request.c b/src/http/ngx_http_request.c
--- a/src/http/ngx_http_request.c
+++ b/src/http/ngx_http_request.c
@@ -2148,9 +2148,11 @@ ngx_http_validate_host(ngx_str_t *host,
size_t i, dot_pos, host_len;
enum {
- sw_usual = 0,
- sw_literal,
- sw_rest
+ sw_host_start = 0,
+ sw_host,
+ sw_host_end,
+ sw_host_ip_literal,
+ sw_port,
} state;
dot_pos = host->len;
@@ -2158,55 +2160,122 @@ ngx_http_validate_host(ngx_str_t *host,
h = host->data;
- state = sw_usual;
+ state = sw_host_start;
for (i = 0; i < host->len; i++) {
ch = h[i];
- switch (ch) {
-
- case '.':
- if (dot_pos == i - 1) {
- return NGX_DECLINED;
+ switch (state) {
+
+ case sw_host_start:
+
+ if (ch == '[') {
+ state = sw_host_ip_literal;
+ break;
+ }
+
+ state = sw_host;
+
+ /* fall through */
+
+ case sw_host:
+
+ if (ch >= 'A' && ch <= 'Z') {
+ alloc = 1;
+ break;
+ }
+
+ if (ch >= 'a' && ch <= 'z') {
+ break;
}
- dot_pos = i;
- break;
-
- case ':':
- if (state == sw_usual) {
+
+ if ((ch >= '0' && ch <= '9') || ch == '-') {
+ break;
+ }
+
+ if (ch == '.') {
+ if (dot_pos == i - 1) {
+ return NGX_DECLINED;
+ }
+
+ dot_pos = i;
+ break;
+ }
+
+ /* fall through */
+
+ case sw_host_end:
+
+ switch (ch) {
+ case ':':
host_len = i;
- state = sw_rest;
+ state = sw_port;
+ break;
+ default:
+ return NGX_DECLINED;
}
break;
- case '[':
- if (i == 0) {
- state = sw_literal;
- }
- break;
-
- case ']':
- if (state == sw_literal) {
- host_len = i + 1;
- state = sw_rest;
- }
- break;
-
- default:
-
- if (ngx_path_separator(ch)) {
- return NGX_DECLINED;
- }
-
- if (ch <= 0x20 || ch == 0x7f) {
- return NGX_DECLINED;
- }
+ case sw_host_ip_literal:
if (ch >= 'A' && ch <= 'Z') {
alloc = 1;
+ break;
}
+ if (ch >= 'a' && ch <= 'z') {
+ break;
+ }
+
+ if (ch >= '0' && ch <= '9') {
+ break;
+ }
+
+ if (ch == '.') {
+ if (dot_pos == i - 1) {
+ return NGX_DECLINED;
+ }
+
+ dot_pos = i;
+ break;
+ }
+
+ switch (ch) {
+ case ':':
+ break;
+ case ']':
+ host_len = i + 1;
+ state = sw_host_end;
+ break;
+ case '-':
+ case '_':
+ case '~':
+ /* unreserved */
+ break;
+ case '!':
+ case '$':
+ case '&':
+ case '\'':
+ case '(':
+ case ')':
+ case '*':
+ case '+':
+ case ',':
+ case ';':
+ case '=':
+ /* sub-delims */
+ break;
+ default:
+ return NGX_DECLINED;
+ }
break;
+
+ case sw_port:
+ if (ch >= '0' && ch <= '9') {
+ break;
+ }
+
+ return NGX_DECLINED;
}
}
diff --git a/src/stream/ngx_stream_core_module.c b/src/stream/ngx_stream_core_module.c
--- a/src/stream/ngx_stream_core_module.c
+++ b/src/stream/ngx_stream_core_module.c
@@ -471,9 +471,11 @@ ngx_stream_validate_host(ngx_str_t *host
size_t i, dot_pos, host_len;
enum {
- sw_usual = 0,
- sw_literal,
- sw_rest
+ sw_host_start = 0,
+ sw_host,
+ sw_host_end,
+ sw_host_ip_literal,
+ sw_port,
} state;
dot_pos = host->len;
@@ -481,55 +483,122 @@ ngx_stream_validate_host(ngx_str_t *host
h = host->data;
- state = sw_usual;
+ state = sw_host_start;
for (i = 0; i < host->len; i++) {
ch = h[i];
- switch (ch) {
+ switch (state) {
+
+ case sw_host_start:
+
+ if (ch == '[') {
+ state = sw_host_ip_literal;
+ break;
+ }
+
+ state = sw_host;
- case '.':
- if (dot_pos == i - 1) {
- return NGX_DECLINED;
+ /* fall through */
+
+ case sw_host:
+
+ if (ch >= 'A' && ch <= 'Z') {
+ alloc = 1;
+ break;
}
- dot_pos = i;
- break;
+
+ if (ch >= 'a' && ch <= 'z') {
+ break;
+ }
- case ':':
- if (state == sw_usual) {
- host_len = i;
- state = sw_rest;
+ if ((ch >= '0' && ch <= '9') || ch == '-') {
+ break;
+ }
+
+ if (ch == '.') {
+ if (dot_pos == i - 1) {
+ return NGX_DECLINED;
+ }
+
+ dot_pos = i;
+ break;
}
- break;
+
+ /* fall through */
+
+ case sw_host_end:
- case '[':
- if (i == 0) {
- state = sw_literal;
+ switch (ch) {
+ case ':':
+ host_len = i;
+ state = sw_port;
+ break;
+ default:
+ return NGX_DECLINED;
}
break;
- case ']':
- if (state == sw_literal) {
+ case sw_host_ip_literal:
+
+ if (ch >= 'A' && ch <= 'Z') {
+ alloc = 1;
+ break;
+ }
+
+ if (ch >= 'a' && ch <= 'z') {
+ break;
+ }
+
+ if (ch >= '0' && ch <= '9') {
+ break;
+ }
+
+ if (ch == '.') {
+ if (dot_pos == i - 1) {
+ return NGX_DECLINED;
+ }
+
+ dot_pos = i;
+ break;
+ }
+
+ switch (ch) {
+ case ':':
+ break;
+ case ']':
host_len = i + 1;
- state = sw_rest;
+ state = sw_host_end;
+ break;
+ case '-':
+ case '_':
+ case '~':
+ /* unreserved */
+ break;
+ case '!':
+ case '$':
+ case '&':
+ case '\'':
+ case '(':
+ case ')':
+ case '*':
+ case '+':
+ case ',':
+ case ';':
+ case '=':
+ /* sub-delims */
+ break;
+ default:
+ return NGX_DECLINED;
}
break;
- default:
-
- if (ngx_path_separator(ch)) {
- return NGX_DECLINED;
+ case sw_port:
+ if (ch >= '0' && ch <= '9') {
+ break;
}
- if (ch <= 0x20 || ch == 0x7f) {
- return NGX_DECLINED;
- }
-
- if (ch >= 'A' && ch <= 'Z') {
- alloc = 1;
- }
-
- break;
+ return NGX_DECLINED;
}
}
More information about the nginx-devel
mailing list