Michael Kourlas via nginx-devel
February 15, 2023 11:52AM
# HG changeset patch
# User Michael Kourlas <michael.kourlas@solace.com>
# Date 1676408746 18000
# Tue Feb 14 16:05:46 2023 -0500
# Node ID 129437ade41b14a584fb4b7558accc1b8dee7f45
# Parent cffaf3f2eec8fd33605c2a37814f5ffc30371989
HTTP: Add new uri_normalization_percent_decode option

This patch addresses ticket #2225 by adding a new
uri_normalization_percent_decode configuration option that controls which
characters are percent-decoded by nginx as part of its URI normalization.

The option has two values: "all" and "all-except-reserved". "all" is the
default value and is the current behaviour. When the option is set to
"all-except-reserved", nginx percent-decodes all characters except those in the
reserved set defined by RFC 3986:

reserved = gen-delims / sub-delims

gen-delims = ":" / "/" / "?" / "#" / "[" / "]" / "@"

sub-delims = "!" / "$" / "&" / "'" / "(" / ")"
/ "*" / "+" / "," / ";" / "="

In addition, when "all-except-reserved" is used, nginx will not re-encode "%"
from the request URI when it observes that it is part of a percent-encoded
reserved character.

When nginx percent-decodes reserved characters, this can often change the
request URI's semantics, making it impossible to use a normalized URI for
certain use cases. "uri_normalization_percent_decode" gives the configuration
author the freedom to determine which reserved characters are semantically
relevant and which are not.

For example, consider the following location block, which handles part of a
hypothetical API:

location ~ ^/api/objects/[^/]+/subobjects(/.*)?$ {
...
}

Because nginx always normalizes "%2F" to "/", this location block will not
match a path of /api/objects/sample%2Fname/subobjects, even if the API permits
"/" to appear percent-encoded in the URI as part of object names. nginx will
instead interpret this as /api/objects/sample/name/subobjects, a completely
different path. Setting "uri_normalization_percent_decode" to
"all-except-reserved" will leave "%2F" encoded, resulting in the expected
behaviour.

diff -r cffaf3f2eec8 -r 129437ade41b src/core/ngx_string.c
--- a/src/core/ngx_string.c Thu Feb 02 23:38:48 2023 +0300
+++ b/src/core/ngx_string.c Tue Feb 14 16:05:46 2023 -0500
@@ -1487,7 +1487,8 @@


uintptr_t
-ngx_escape_uri(u_char *dst, u_char *src, size_t size, ngx_uint_t type)
+ngx_escape_uri(u_char *dst, u_char *src, size_t size, ngx_uint_t type,
+ ngx_uint_t skip_preencoded_type)
{
ngx_uint_t n;
uint32_t *escape;
@@ -1641,7 +1642,11 @@
n = 0;

while (size) {
- if (escape[*src >> 5] & (1U << (*src & 0x1f))) {
+ if ((escape[*src >> 5] & (1U << (*src & 0x1f)))
+ && !(*src == '%' && size >= 3
+ && ngx_escape_uri_skip_preencoded_character(
+ src + 1, skip_preencoded_type)))
+ {
n++;
}
src++;
@@ -1652,7 +1657,11 @@
}

while (size) {
- if (escape[*src >> 5] & (1U << (*src & 0x1f))) {
+ if ((escape[*src >> 5] & (1U << (*src & 0x1f)))
+ && !(*src == '%' && size >= 3
+ && ngx_escape_uri_skip_preencoded_character(
+ src + 1, skip_preencoded_type)))
+ {
*dst++ = '%';
*dst++ = hex[*src >> 4];
*dst++ = hex[*src & 0xf];
@@ -1668,6 +1677,87 @@
}


+ngx_uint_t
+ngx_escape_uri_skip_preencoded_character(u_char *hex_component,
+ ngx_uint_t skip_preencoded_type)
+{
+ u_char ch, decoded_ch;
+ uint32_t *skip;
+
+ static uint32_t none[] = {
+ 0x00000000, /* 0000 0000 0000 0000 0000 0000 0000 0000 */
+
+ /* ?>=< ;:98 7654 3210 /.-, +*)( '&%$ #"! */
+ 0x00000000, /* 0000 0000 0000 0000 0000 0000 0000 0000 */
+
+ /* _^]\ [ZYX WVUT SRQP ONML KJIH GFED CBA@ */
+ 0x00000000, /* 0000 0000 0000 0000 0000 0000 0000 0000 */
+
+ /* ~}| {zyx wvut srqp onml kjih gfed cba` */
+ 0x00000000, /* 0000 0000 0000 0000 0000 0000 0000 0000 */
+
+ 0x00000000, /* 0000 0000 0000 0000 0000 0000 0000 0000 */
+ 0x00000000, /* 0000 0000 0000 0000 0000 0000 0000 0000 */
+ 0x00000000, /* 0000 0000 0000 0000 0000 0000 0000 0000 */
+ 0x00000000 /* 0000 0000 0000 0000 0000 0000 0000 0000 */
+ };
+
+ static uint32_t reserved_only[] = {
+ 0x00000000, /* 0000 0000 0000 0000 0000 0000 0000 0000 */
+
+ /* ?>=< ;:98 7654 3210 /.-, +*)( '&%$ #"! */
+ 0xac009fda, /* 1010 1100 0000 0000 1001 1111 1101 1010 */
+
+ /* _^]\ [ZYX WVUT SRQP ONML KJIH GFED CBA@ */
+ 0x28000001, /* 0010 1000 0000 0000 0000 0000 0000 0001 */
+
+ /* ~}| {zyx wvut srqp onml kjih gfed cba` */
+ 0x00000000, /* 0000 0000 0000 0000 0000 0000 0000 0000 */
+
+ 0x00000000, /* 0000 0000 0000 0000 0000 0000 0000 0000 */
+ 0x00000000, /* 0000 0000 0000 0000 0000 0000 0000 0000 */
+ 0x00000000, /* 0000 0000 0000 0000 0000 0000 0000 0000 */
+ 0x00000000 /* 0000 0000 0000 0000 0000 0000 0000 0000 */
+ };
+
+ static uint32_t *skip_map[] = { none, reserved_only };
+
+ skip = skip_map[skip_preencoded_type];
+
+ ch = *hex_component;
+ if (ch >= '0' && ch <= '9') {
+ decoded_ch = (u_char) (ch - '0');
+ } else {
+ ch = (u_char) (ch | 0x20);
+ if (ch >= 'a' && ch <= 'f') {
+ decoded_ch = (u_char) (ch - 'a' + 10);
+ } else {
+ /* not part of a percent-encoded character */
+ return 0;
+ }
+ }
+
+ ch = *(hex_component + 1);
+ if (ch >= '0' && ch <= '9') {
+ decoded_ch = (u_char) ((decoded_ch << 4) + (ch - '0'));
+ } else {
+ ch = (u_char) (ch | 0x20);
+ if (ch >= 'a' && ch <= 'f') {
+ decoded_ch = (u_char) ((decoded_ch << 4) + (ch - 'a') + 10);
+ } else {
+ /* not part of a percent-encoded character */
+ return 0;
+ }
+ }
+
+ if (skip[decoded_ch >> 5] & (1U << (decoded_ch & 0x1f))) {
+ return 1;
+ } else {
+ return 0;
+ }
+}
+
+
void
ngx_unescape_uri(u_char **dst, u_char **src, size_t size, ngx_uint_t type)
{
diff -r cffaf3f2eec8 -r 129437ade41b src/core/ngx_string.h
--- a/src/core/ngx_string.h Thu Feb 02 23:38:48 2023 +0300
+++ b/src/core/ngx_string.h Tue Feb 14 16:05:46 2023 -0500
@@ -204,11 +204,20 @@
#define NGX_ESCAPE_MEMCACHED 5
#define NGX_ESCAPE_MAIL_AUTH 6

+/*
+ * these enumeration values must correspond to the enumeration values for
+ * NGX_HTTP_URI_NORMALIZATION_PERCENT_DECODE
+ */
+#define NGX_ESCAPE_SKIP_PREENCODED_NONE 0
+#define NGX_ESCAPE_SKIP_PREENCODED_RESERVED 1
+
#define NGX_UNESCAPE_URI 1
#define NGX_UNESCAPE_REDIRECT 2

uintptr_t ngx_escape_uri(u_char *dst, u_char *src, size_t size,
- ngx_uint_t type);
+ ngx_uint_t type, ngx_uint_t skip_preencoded_type);
+ngx_uint_t ngx_escape_uri_skip_preencoded_character(u_char *seq,
+ ngx_uint_t skip_preencoded_type);
void ngx_unescape_uri(u_char **dst, u_char **src, size_t size, ngx_uint_t type);
uintptr_t ngx_escape_html(u_char *dst, u_char *src, size_t size);
uintptr_t ngx_escape_json(u_char *dst, u_char *src, size_t size);
diff -r cffaf3f2eec8 -r 129437ade41b src/event/ngx_event_openssl.c
--- a/src/event/ngx_event_openssl.c Thu Feb 02 23:38:48 2023 +0300
+++ b/src/event/ngx_event_openssl.c Tue Feb 14 16:05:46 2023 -0500
@@ -5366,7 +5366,8 @@
return NGX_OK;
}

- n = ngx_escape_uri(NULL, cert.data, cert.len, NGX_ESCAPE_URI_COMPONENT);
+ n = ngx_escape_uri(NULL, cert.data, cert.len, NGX_ESCAPE_URI_COMPONENT,
+ NGX_ESCAPE_SKIP_PREENCODED_NONE);

s->len = cert.len + n * 2;
s->data = ngx_pnalloc(pool, s->len);
@@ -5374,7 +5375,8 @@
return NGX_ERROR;
}

- ngx_escape_uri(s->data, cert.data, cert.len, NGX_ESCAPE_URI_COMPONENT);
+ ngx_escape_uri(s->data, cert.data, cert.len, NGX_ESCAPE_URI_COMPONENT,
+ NGX_ESCAPE_SKIP_PREENCODED_NONE);

return NGX_OK;
}
diff -r cffaf3f2eec8 -r 129437ade41b src/event/ngx_event_openssl_stapling.c
--- a/src/event/ngx_event_openssl_stapling.c Thu Feb 02 23:38:48 2023 +0300
+++ b/src/event/ngx_event_openssl_stapling.c Tue Feb 14 16:05:46 2023 -0500
@@ -1747,7 +1747,8 @@
ngx_encode_base64(&base64, &binary);

escape = ngx_escape_uri(NULL, base64.data, base64.len,
- NGX_ESCAPE_URI_COMPONENT);
+ NGX_ESCAPE_URI_COMPONENT,
+ NGX_ESCAPE_SKIP_PREENCODED_NONE);

ngx_log_debug2(NGX_LOG_DEBUG_EVENT, ctx->log, 0,
"ssl ocsp request length %z, escape %d",
@@ -1777,7 +1778,8 @@

} else {
p = (u_char *) ngx_escape_uri(p, base64.data, base64.len,
- NGX_ESCAPE_URI_COMPONENT);
+ NGX_ESCAPE_URI_COMPONENT,
+ NGX_ESCAPE_SKIP_PREENCODED_NONE);
}

p = ngx_cpymem(p, " HTTP/1.0" CRLF, sizeof(" HTTP/1.0" CRLF) - 1);
diff -r cffaf3f2eec8 -r 129437ade41b src/http/modules/ngx_http_autoindex_module.c
--- a/src/http/modules/ngx_http_autoindex_module.c Thu Feb 02 23:38:48 2023 +0300
+++ b/src/http/modules/ngx_http_autoindex_module.c Tue Feb 14 16:05:46 2023 -0500
@@ -487,7 +487,8 @@
for (i = 0; i < entries->nelts; i++) {
entry[i].escape = 2 * ngx_escape_uri(NULL, entry[i].name.data,
entry[i].name.len,
- NGX_ESCAPE_URI_COMPONENT);
+ NGX_ESCAPE_URI_COMPONENT,
+ NGX_ESCAPE_SKIP_PREENCODED_NONE);

entry[i].escape_html = ngx_escape_html(NULL, entry[i].name.data,
entry[i].name.len);
@@ -549,7 +550,8 @@

if (entry[i].escape) {
ngx_escape_uri(b->last, entry[i].name.data, entry[i].name.len,
- NGX_ESCAPE_URI_COMPONENT);
+ NGX_ESCAPE_URI_COMPONENT,
+ NGX_ESCAPE_SKIP_PREENCODED_NONE);

b->last += entry[i].name.len + entry[i].escape;

diff -r cffaf3f2eec8 -r 129437ade41b src/http/modules/ngx_http_dav_module.c
--- a/src/http/modules/ngx_http_dav_module.c Thu Feb 02 23:38:48 2023 +0300
+++ b/src/http/modules/ngx_http_dav_module.c Tue Feb 14 16:05:46 2023 -0500
@@ -1072,9 +1072,12 @@
static ngx_int_t
ngx_http_dav_location(ngx_http_request_t *r)
{
- u_char *p;
- size_t len;
- uintptr_t escape;
+ u_char *p;
+ size_t len;
+ uintptr_t escape;
+ ngx_http_core_srv_conf_t *cscf;
+
+ cscf = ngx_http_get_module_srv_conf(r, ngx_http_core_module);

r->headers_out.location = ngx_list_push(&r->headers_out.headers);
if (r->headers_out.location == NULL) {
@@ -1085,7 +1088,8 @@
r->headers_out.location->next = NULL;
ngx_str_set(&r->headers_out.location->key, "Location");

- escape = 2 * ngx_escape_uri(NULL, r->uri.data, r->uri.len, NGX_ESCAPE_URI);
+ escape = 2 * ngx_escape_uri(NULL, r->uri.data, r->uri.len, NGX_ESCAPE_URI,
+ cscf->uri_normalization_percent_decode);

if (escape) {
len = r->uri.len + escape;
@@ -1099,7 +1103,8 @@
r->headers_out.location->value.len = len;
r->headers_out.location->value.data = p;

- ngx_escape_uri(p, r->uri.data, r->uri.len, NGX_ESCAPE_URI);
+ ngx_escape_uri(p, r->uri.data, r->uri.len, NGX_ESCAPE_URI,
+ cscf->uri_normalization_percent_decode);

} else {
r->headers_out.location->value = r->uri;
diff -r cffaf3f2eec8 -r 129437ade41b src/http/modules/ngx_http_grpc_module.c
--- a/src/http/modules/ngx_http_grpc_module.c Thu Feb 02 23:38:48 2023 +0300
+++ b/src/http/modules/ngx_http_grpc_module.c Tue Feb 14 16:05:46 2023 -0500
@@ -720,12 +720,15 @@
ngx_http_upstream_t *u;
ngx_http_grpc_frame_t *f;
ngx_http_script_code_pt code;
+ ngx_http_core_srv_conf_t *cscf;
ngx_http_grpc_loc_conf_t *glcf;
ngx_http_script_engine_t e, le;
ngx_http_script_len_code_pt lcode;

u = r->upstream;

+ cscf = ngx_http_get_module_srv_conf(r, ngx_http_core_module);
+
glcf = ngx_http_get_module_loc_conf(r, ngx_http_grpc_module);

ctx = ngx_http_get_module_ctx(r, ngx_http_grpc_module);
@@ -756,7 +759,8 @@

} else {
escape = 2 * ngx_escape_uri(NULL, r->uri.data, r->uri.len,
- NGX_ESCAPE_URI);
+ NGX_ESCAPE_URI,
+ cscf->uri_normalization_percent_decode);
uri_len = r->uri.len + escape + sizeof("?") - 1 + r->args.len;
}

@@ -950,7 +954,7 @@

if (escape) {
p = (u_char *) ngx_escape_uri(p, r->uri.data, r->uri.len,
- NGX_ESCAPE_URI);
+ NGX_ESCAPE_URI, cscf->uri_normalization_percent_decode);

} else {
p = ngx_copy(p, r->uri.data, r->uri.len);
diff -r cffaf3f2eec8 -r 129437ade41b src/http/modules/ngx_http_memcached_module.c
--- a/src/http/modules/ngx_http_memcached_module.c Thu Feb 02 23:38:48 2023 +0300
+++ b/src/http/modules/ngx_http_memcached_module.c Tue Feb 14 16:05:46 2023 -0500
@@ -255,7 +255,8 @@
return NGX_ERROR;
}

- escape = 2 * ngx_escape_uri(NULL, vv->data, vv->len, NGX_ESCAPE_MEMCACHED);
+ escape = 2 * ngx_escape_uri(NULL, vv->data, vv->len, NGX_ESCAPE_MEMCACHED,
+ NGX_ESCAPE_SKIP_PREENCODED_NONE);

len = sizeof("get ") - 1 + vv->len + escape + sizeof(CRLF) - 1;

@@ -285,7 +286,7 @@

} else {
b->last = (u_char *) ngx_escape_uri(b->last, vv->data, vv->len,
- NGX_ESCAPE_MEMCACHED);
+ NGX_ESCAPE_MEMCACHED, NGX_ESCAPE_SKIP_PREENCODED_NONE);
}

ctx->key.len = b->last - ctx->key.data;
diff -r cffaf3f2eec8 -r 129437ade41b src/http/modules/ngx_http_proxy_module.c
--- a/src/http/modules/ngx_http_proxy_module.c Thu Feb 02 23:38:48 2023 +0300
+++ b/src/http/modules/ngx_http_proxy_module.c Tue Feb 14 16:05:46 2023 -0500
@@ -1143,10 +1143,13 @@
ngx_str_t *key;
ngx_http_upstream_t *u;
ngx_http_proxy_ctx_t *ctx;
+ ngx_http_core_srv_conf_t *cscf;
ngx_http_proxy_loc_conf_t *plcf;

u = r->upstream;

+ cscf = ngx_http_get_module_srv_conf(r, ngx_http_core_module);
+
plcf = ngx_http_get_module_loc_conf(r, ngx_http_proxy_module);

ctx = ngx_http_get_module_ctx(r, ngx_http_proxy_module);
@@ -1190,7 +1193,8 @@

if (r->quoted_uri || r->internal) {
escape = 2 * ngx_escape_uri(NULL, r->uri.data + loc_len,
- r->uri.len - loc_len, NGX_ESCAPE_URI);
+ r->uri.len - loc_len, NGX_ESCAPE_URI,
+ cscf->uri_normalization_percent_decode);
} else {
escape = 0;
}
@@ -1211,7 +1215,8 @@

if (escape) {
ngx_escape_uri(p, r->uri.data + loc_len,
- r->uri.len - loc_len, NGX_ESCAPE_URI);
+ r->uri.len - loc_len, NGX_ESCAPE_URI,
+ cscf->uri_normalization_percent_decode);
p += r->uri.len - loc_len + escape;

} else {
@@ -1249,11 +1254,14 @@
ngx_http_script_code_pt code;
ngx_http_proxy_headers_t *headers;
ngx_http_script_engine_t e, le;
+ ngx_http_core_srv_conf_t *cscf;
ngx_http_proxy_loc_conf_t *plcf;
ngx_http_script_len_code_pt lcode;

u = r->upstream;

+ cscf = ngx_http_get_module_srv_conf(r, ngx_http_core_module);
+
plcf = ngx_http_get_module_loc_conf(r, ngx_http_proxy_module);

#if (NGX_HTTP_CACHE)
@@ -1303,7 +1311,7 @@

if (r->quoted_uri || r->internal) {
escape = 2 * ngx_escape_uri(NULL, r->uri.data + loc_len,
- r->uri.len - loc_len, NGX_ESCAPE_URI);
+ r->uri.len - loc_len, NGX_ESCAPE_URI, cscf->uri_normalization_percent_decode);
}

uri_len = ctx->vars.uri.len + r->uri.len - loc_len + escape
@@ -1428,7 +1436,8 @@

if (escape) {
ngx_escape_uri(b->last, r->uri.data + loc_len,
- r->uri.len - loc_len, NGX_ESCAPE_URI);
+ r->uri.len - loc_len, NGX_ESCAPE_URI,
+ cscf->uri_normalization_percent_decode);
b->last += r->uri.len - loc_len + escape;

} else {
diff -r cffaf3f2eec8 -r 129437ade41b src/http/modules/ngx_http_ssi_filter_module.c
--- a/src/http/modules/ngx_http_ssi_filter_module.c Thu Feb 02 23:38:48 2023 +0300
+++ b/src/http/modules/ngx_http_ssi_filter_module.c Tue Feb 14 16:05:46 2023 -0500
@@ -2348,7 +2348,8 @@

case NGX_HTTP_SSI_URL_ENCODING:
len = 2 * ngx_escape_uri(NULL, value->data, value->len,
- NGX_ESCAPE_HTML);
+ NGX_ESCAPE_HTML,
+ NGX_ESCAPE_SKIP_PREENCODED_NONE);

if (len) {
p = ngx_pnalloc(r->pool, value->len + len);
@@ -2356,7 +2357,8 @@
return NGX_HTTP_SSI_ERROR;
}

- (void) ngx_escape_uri(p, value->data, value->len, NGX_ESCAPE_HTML);
+ (void) ngx_escape_uri(p, value->data, value->len, NGX_ESCAPE_HTML,
+ NGX_ESCAPE_SKIP_PREENCODED_NONE);
}

len += value->len;
diff -r cffaf3f2eec8 -r 129437ade41b src/http/modules/ngx_http_static_module.c
--- a/src/http/modules/ngx_http_static_module.c Thu Feb 02 23:38:48 2023 +0300
+++ b/src/http/modules/ngx_http_static_module.c Tue Feb 14 16:05:46 2023 -0500
@@ -58,6 +58,7 @@
ngx_buf_t *b;
ngx_chain_t out;
ngx_open_file_info_t of;
+ ngx_http_core_srv_conf_t *cscf;
ngx_http_core_loc_conf_t *clcf;

if (!(r->method & (NGX_HTTP_GET|NGX_HTTP_HEAD|NGX_HTTP_POST))) {
@@ -85,6 +86,8 @@
ngx_log_debug1(NGX_LOG_DEBUG_HTTP, log, 0,
"http filename: \"%s\"", path.data);

+ cscf = ngx_http_get_module_srv_conf(r, ngx_http_core_module);
+
clcf = ngx_http_get_module_loc_conf(r, ngx_http_core_module);

ngx_memzero(&of, sizeof(ngx_open_file_info_t));
@@ -157,7 +160,8 @@
}

escape = 2 * ngx_escape_uri(NULL, r->uri.data, r->uri.len,
- NGX_ESCAPE_URI);
+ NGX_ESCAPE_URI,
+ cscf->uri_normalization_percent_decode);

if (!clcf->alias && r->args.len == 0 && escape == 0) {
len = r->uri.len + 1;
@@ -180,7 +184,7 @@

if (escape) {
last = (u_char *) ngx_escape_uri(location, r->uri.data,
- r->uri.len, NGX_ESCAPE_URI);
+ r->uri.len, NGX_ESCAPE_URI, cscf->uri_normalization_percent_decode);

} else {
last = ngx_copy(location, r->uri.data, r->uri.len);
diff -r cffaf3f2eec8 -r 129437ade41b src/http/ngx_http.c
--- a/src/http/ngx_http.c Thu Feb 02 23:38:48 2023 +0300
+++ b/src/http/ngx_http.c Tue Feb 14 16:05:46 2023 -0500
@@ -900,7 +900,8 @@
uintptr_t escape;

escape = 2 * ngx_escape_uri(NULL, clcf->name.data, clcf->name.len,
- NGX_ESCAPE_URI);
+ NGX_ESCAPE_URI,
+ NGX_ESCAPE_SKIP_PREENCODED_NONE);

if (escape) {
len = clcf->name.len + escape;
@@ -913,7 +914,8 @@
clcf->escaped_name.len = len;
clcf->escaped_name.data = p;

- ngx_escape_uri(p, clcf->name.data, clcf->name.len, NGX_ESCAPE_URI);
+ ngx_escape_uri(p, clcf->name.data, clcf->name.len, NGX_ESCAPE_URI,
+ NGX_ESCAPE_SKIP_PREENCODED_NONE);

} else {
clcf->escaped_name = clcf->name;
diff -r cffaf3f2eec8 -r 129437ade41b src/http/ngx_http.h
--- a/src/http/ngx_http.h Thu Feb 02 23:38:48 2023 +0300
+++ b/src/http/ngx_http.h Tue Feb 14 16:05:46 2023 -0500
@@ -96,7 +96,7 @@
ngx_int_t ngx_http_parse_request_line(ngx_http_request_t *r, ngx_buf_t *b);
ngx_int_t ngx_http_parse_uri(ngx_http_request_t *r);
ngx_int_t ngx_http_parse_complex_uri(ngx_http_request_t *r,
- ngx_uint_t merge_slashes);
+ ngx_uint_t merge_slashes, ngx_uint_t uri_normalization_percent_decode);
ngx_int_t ngx_http_parse_status_line(ngx_http_request_t *r, ngx_buf_t *b,
ngx_http_status_t *status);
ngx_int_t ngx_http_parse_unsafe_uri(ngx_http_request_t *r, ngx_str_t *uri,
diff -r cffaf3f2eec8 -r 129437ade41b src/http/ngx_http_core_module.c
--- a/src/http/ngx_http_core_module.c Thu Feb 02 23:38:48 2023 +0300
+++ b/src/http/ngx_http_core_module.c Tue Feb 14 16:05:46 2023 -0500
@@ -180,6 +180,14 @@
#endif


+static ngx_conf_enum_t ngx_http_core_uri_normalization_percent_decode[] = {
+ { ngx_string("all"), NGX_HTTP_URI_NORMALIZATION_PERCENT_DECODE_ALL },
+ { ngx_string("all-except-reserved"),
+ NGX_HTTP_URI_NORMALIZATION_PERCENT_DECODE_ALL_EXCEPT_RESERVED },
+ { ngx_null_string, 0 }
+};
+
+
static ngx_command_t ngx_http_core_commands[] = {

{ ngx_string("variables_hash_max_size"),
@@ -778,6 +786,13 @@

#endif

+ { ngx_string("uri_normalization_percent_decode"),
+ NGX_HTTP_MAIN_CONF|NGX_HTTP_SRV_CONF|NGX_CONF_TAKE1,
+ ngx_conf_set_enum_slot,
+ NGX_HTTP_SRV_CONF_OFFSET,
+ offsetof(ngx_http_core_srv_conf_t, uri_normalization_percent_decode),
+ &ngx_http_core_uri_normalization_percent_decode },
+
ngx_null_command
};

@@ -3462,6 +3477,7 @@
cscf->ignore_invalid_headers = NGX_CONF_UNSET;
cscf->merge_slashes = NGX_CONF_UNSET;
cscf->underscores_in_headers = NGX_CONF_UNSET;
+ cscf->uri_normalization_percent_decode = NGX_CONF_UNSET;

cscf->file_name = cf->conf_file->file.name.data;
cscf->line = cf->conf_file->line;
@@ -3539,6 +3555,10 @@
return NGX_CONF_ERROR;
}

+ ngx_conf_merge_uint_value(conf->uri_normalization_percent_decode,
+ prev->uri_normalization_percent_decode,
+ NGX_HTTP_URI_NORMALIZATION_PERCENT_DECODE_ALL);
+
return NGX_CONF_OK;
}

diff -r cffaf3f2eec8 -r 129437ade41b src/http/ngx_http_core_module.h
--- a/src/http/ngx_http_core_module.h Thu Feb 02 23:38:48 2023 +0300
+++ b/src/http/ngx_http_core_module.h Tue Feb 14 16:05:46 2023 -0500
@@ -60,6 +60,14 @@
#define NGX_HTTP_SERVER_TOKENS_BUILD 2


+/*
+ * these enumeration values must correspond to the enumeration values for
+ * NGX_ESCAPE_SKIP_PREENCODED
+ */
+#define NGX_HTTP_URI_NORMALIZATION_PERCENT_DECODE_ALL 0
+#define NGX_HTTP_URI_NORMALIZATION_PERCENT_DECODE_ALL_EXCEPT_RESERVED 1
+
+
typedef struct ngx_http_location_tree_node_s ngx_http_location_tree_node_t;
typedef struct ngx_http_core_loc_conf_s ngx_http_core_loc_conf_t;

@@ -200,6 +208,8 @@
ngx_flag_t merge_slashes;
ngx_flag_t underscores_in_headers;

+ ngx_uint_t uri_normalization_percent_decode;
+
unsigned listen:1;
#if (NGX_PCRE)
unsigned captures:1;
diff -r cffaf3f2eec8 -r 129437ade41b src/http/ngx_http_parse.c
--- a/src/http/ngx_http_parse.c Thu Feb 02 23:38:48 2023 +0300
+++ b/src/http/ngx_http_parse.c Tue Feb 14 16:05:46 2023 -0500
@@ -1245,9 +1245,11 @@


ngx_int_t
-ngx_http_parse_complex_uri(ngx_http_request_t *r, ngx_uint_t merge_slashes)
+ngx_http_parse_complex_uri(ngx_http_request_t *r, ngx_uint_t merge_slashes,
+ ngx_uint_t uri_normalization_percent_decode)
{
u_char c, ch, decoded, *p, *u;
+ uint32_t* decode;
enum {
sw_usual = 0,
sw_slash,
@@ -1257,6 +1259,44 @@
sw_quoted_second
} state, quoted_state;

+ static uint32_t all[] = {
+ 0xffffffff, /* 1111 1111 1111 1111 1111 1111 1111 1111 */
+
+ /* ?>=< ;:98 7654 3210 /.-, +*)( '&%$ #"! */
+ 0xffffffff, /* 1111 1111 1111 1111 1111 1111 1111 1111 */
+
+ /* _^]\ [ZYX WVUT SRQP ONML KJIH GFED CBA@ */
+ 0xffffffff, /* 1111 1111 1111 1111 1111 1111 1111 1111 */
+
+ /* ~}| {zyx wvut srqp onml kjih gfed cba` */
+ 0xffffffff, /* 1111 1111 1111 1111 1111 1111 1111 1111 */
+
+ 0xffffffff, /* 1111 1111 1111 1111 1111 1111 1111 1111 */
+ 0xffffffff, /* 1111 1111 1111 1111 1111 1111 1111 1111 */
+ 0xffffffff, /* 1111 1111 1111 1111 1111 1111 1111 1111 */
+ 0xffffffff /* 1111 1111 1111 1111 1111 1111 1111 1111 */
+ };
+
+ static uint32_t all_except_reserved[] = {
+ 0xffffffff, /* 1111 1111 1111 1111 1111 1111 1111 1111 */
+
+ /* ?>=< ;:98 7654 3210 /.-, +*)( '&%$ #"! */
+ 0x53ff6025, /* 0101 0011 1111 1111 0110 0000 0010 0101 */
+
+ /* _^]\ [ZYX WVUT SRQP ONML KJIH GFED CBA@ */
+ 0xd7fffffe, /* 1101 0111 1111 1111 1111 1111 1111 1110 */
+
+ /* ~}| {zyx wvut srqp onml kjih gfed cba` */
+ 0xffffffff, /* 1111 1111 1111 1111 1111 1111 1111 1111 */
+
+ 0xffffffff, /* 1111 1111 1111 1111 1111 1111 1111 1111 */
+ 0xffffffff, /* 1111 1111 1111 1111 1111 1111 1111 1111 */
+ 0xffffffff, /* 1111 1111 1111 1111 1111 1111 1111 1111 */
+ 0xffffffff /* 1111 1111 1111 1111 1111 1111 1111 1111 */
+ };
+
+ static uint32_t *decode_map[] = { all, all_except_reserved };
+
#if (NGX_SUPPRESS_WARN)
decoded = '\0';
quoted_state = sw_usual;
@@ -1267,6 +1307,7 @@
u = r->uri.data;
r->uri_ext = NULL;
r->args_start = NULL;
+ decode = decode_map[uri_normalization_percent_decode];

if (r->empty_path_in_uri) {
*u++ = '/';
@@ -1520,6 +1561,14 @@
if (ch >= '0' && ch <= '9') {
ch = (u_char) ((decoded << 4) + (ch - '0'));

+ if (!(decode[ch >> 5] & (1U << (ch & 0x1f)))) {
+ state = sw_usual;
+ ngx_memcpy(u, p - 3, 3);
+ u += 3;
+ ch = *p++;
+ break;
+ }
+
if (ch == '%' || ch == '#') {
state = sw_usual;
*u++ = ch;
@@ -1538,6 +1587,14 @@
if (c >= 'a' && c <= 'f') {
ch = (u_char) ((decoded << 4) + (c - 'a') + 10);

+ if (!(decode[ch >> 5] & (1U << (ch & 0x1f)))) {
+ state = sw_usual;
+ ngx_memcpy(u, p - 3, 3);
+ u += 3;
+ ch = *p++;
+ break;
+ }
+
if (ch == '?') {
state = sw_usual;
*u++ = ch;
diff -r cffaf3f2eec8 -r 129437ade41b src/http/ngx_http_request.c
--- a/src/http/ngx_http_request.c Thu Feb 02 23:38:48 2023 +0300
+++ b/src/http/ngx_http_request.c Tue Feb 14 16:05:46 2023 -0500
@@ -1234,7 +1234,9 @@

cscf = ngx_http_get_module_srv_conf(r, ngx_http_core_module);

- if (ngx_http_parse_complex_uri(r, cscf->merge_slashes) != NGX_OK) {
+ if (ngx_http_parse_complex_uri(r, cscf->merge_slashes,
+ cscf->uri_normalization_percent_decode) != NGX_OK)
+ {
r->uri.len = 0;

ngx_log_error(NGX_LOG_INFO, r->connection->log, 0,
diff -r cffaf3f2eec8 -r 129437ade41b src/http/ngx_http_script.c
--- a/src/http/ngx_http_script.c Thu Feb 02 23:38:48 2023 +0300
+++ b/src/http/ngx_http_script.c Tue Feb 14 16:05:46 2023 -0500
@@ -1044,11 +1044,14 @@
ngx_http_script_engine_t le;
ngx_http_script_len_code_pt lcode;
ngx_http_script_regex_code_t *code;
+ ngx_http_core_srv_conf_t *cscf;

code = (ngx_http_script_regex_code_t *) e->ip;

r = e->request;

+ cscf = ngx_http_get_module_srv_conf(r, ngx_http_core_module);
+
ngx_log_debug1(NGX_LOG_DEBUG_HTTP, r->connection->log, 0,
"http script regex: \"%V\"", &code->name);

@@ -1146,7 +1149,7 @@
if (code->uri) {
if (r->ncaptures && (r->quoted_uri || r->plus_in_uri)) {
e->buf.len += 2 * ngx_escape_uri(NULL, r->uri.data, r->uri.len,
- NGX_ESCAPE_ARGS);
+ NGX_ESCAPE_ARGS, cscf->uri_normalization_percent_decode);
}
}

@@ -1339,9 +1342,12 @@
ngx_uint_t n;
ngx_http_request_t *r;
ngx_http_script_copy_capture_code_t *code;
+ ngx_http_core_srv_conf_t *cscf;

r = e->request;

+ cscf = ngx_http_get_module_srv_conf(r, ngx_http_core_module);
+
code = (ngx_http_script_copy_capture_code_t *) e->ip;

e->ip += sizeof(ngx_http_script_copy_capture_code_t);
@@ -1359,7 +1365,7 @@

return cap[n + 1] - cap[n]
+ 2 * ngx_escape_uri(NULL, &p[cap[n]], cap[n + 1] - cap[n],
- NGX_ESCAPE_ARGS);
+ NGX_ESCAPE_ARGS, cscf->uri_normalization_percent_decode);
} else {
return cap[n + 1] - cap[n];
}
@@ -1377,9 +1383,12 @@
ngx_uint_t n;
ngx_http_request_t *r;
ngx_http_script_copy_capture_code_t *code;
+ ngx_http_core_srv_conf_t *cscf;

r = e->request;

+ cscf = ngx_http_get_module_srv_conf(r, ngx_http_core_module);
+
code = (ngx_http_script_copy_capture_code_t *) e->ip;

e->ip += sizeof(ngx_http_script_copy_capture_code_t);
@@ -1397,8 +1406,7 @@
&& (e->request->quoted_uri || e->request->plus_in_uri))
{
e->pos = (u_char *) ngx_escape_uri(pos, &p[cap[n]],
- cap[n + 1] - cap[n],
- NGX_ESCAPE_ARGS);
+ cap[n + 1] - cap[n], NGX_ESCAPE_ARGS, cscf->uri_normalization_percent_decode);
} else {
e->pos = ngx_copy(pos, &p[cap[n]], cap[n + 1] - cap[n]);
}
diff -r cffaf3f2eec8 -r 129437ade41b src/http/ngx_http_special_response.c
--- a/src/http/ngx_http_special_response.c Thu Feb 02 23:38:48 2023 +0300
+++ b/src/http/ngx_http_special_response.c Tue Feb 14 16:05:46 2023 -0500
@@ -797,7 +797,8 @@
len = r->headers_out.location->value.len;
location = r->headers_out.location->value.data;

- escape = 2 * ngx_escape_uri(NULL, location, len, NGX_ESCAPE_REFRESH);
+ escape = 2 * ngx_escape_uri(NULL, location, len, NGX_ESCAPE_REFRESH,
+ NGX_ESCAPE_SKIP_PREENCODED_NONE);

size = sizeof(ngx_http_msie_refresh_head) - 1
+ escape + len
@@ -841,7 +842,8 @@
p = ngx_cpymem(p, location, len);

} else {
- p = (u_char *) ngx_escape_uri(p, location, len, NGX_ESCAPE_REFRESH);
+ p = (u_char *) ngx_escape_uri(p, location, len, NGX_ESCAPE_REFRESH,
+ NGX_ESCAPE_SKIP_PREENCODED_NONE);
}

b->last = ngx_cpymem(p, ngx_http_msie_refresh_tail,
diff -r cffaf3f2eec8 -r 129437ade41b src/mail/ngx_mail_auth_http_module.c
--- a/src/mail/ngx_mail_auth_http_module.c Thu Feb 02 23:38:48 2023 +0300
+++ b/src/mail/ngx_mail_auth_http_module.c Tue Feb 14 16:05:46 2023 -0500
@@ -1478,7 +1478,8 @@
u_char *p;
uintptr_t n;

- n = ngx_escape_uri(NULL, text->data, text->len, NGX_ESCAPE_MAIL_AUTH);
+ n = ngx_escape_uri(NULL, text->data, text->len, NGX_ESCAPE_MAIL_AUTH,
+ NGX_ESCAPE_SKIP_PREENCODED_NONE);

if (n == 0) {
*escaped = *text;
@@ -1492,7 +1493,8 @@
return NGX_ERROR;
}

- (void) ngx_escape_uri(p, text->data, text->len, NGX_ESCAPE_MAIL_AUTH);
+ (void) ngx_escape_uri(p, text->data, text->len, NGX_ESCAPE_MAIL_AUTH,
+ NGX_ESCAPE_SKIP_PREENCODED_NONE);

escaped->data = p;

________________________________
Confidentiality notice

This e-mail message and any attachment hereto contain confidential information which may be privileged and which is intended for the exclusive use of its addressee(s). If you receive this message in error, please inform sender immediately and destroy any copy thereof. Furthermore, any disclosure, distribution or copying of this message and/or any attachment hereto without the consent of the sender is strictly prohibited. Thank you.
_______________________________________________
nginx-devel mailing list
nginx-devel@nginx.org
https://mailman.nginx.org/mailman/listinfo/nginx-devel
Subject Author Views Posted

[PATCH] HTTP: Add new uri_normalization_percent_decode option

Michael Kourlas via nginx-devel 639 February 15, 2023 11:52AM

Re: [PATCH] HTTP: Add new uri_normalization_percent_decode option

Maxim Dounin 129 February 17, 2023 08:00AM

Re: [PATCH] HTTP: Add new uri_normalization_percent_decode option

Michael Kourlas via nginx-devel 97 March 27, 2023 12:20PM

Re: [PATCH] HTTP: Add new uri_normalization_percent_decode option

Maxim Dounin 102 March 28, 2023 10:10AM

RE: [PATCH] HTTP: Add new uri_normalization_percent_decode option

Michael Kourlas via nginx-devel 115 March 30, 2023 01:20PM

Re: [PATCH] HTTP: Add new uri_normalization_percent_decode option

Maxim Dounin 110 April 01, 2023 04:12PM

RE: [PATCH] HTTP: Add new uri_normalization_percent_decode option

Michael Kourlas via nginx-devel 95 April 03, 2023 02:34PM

Re: [PATCH] HTTP: Add new uri_normalization_percent_decode option

Maxim Dounin 111 April 03, 2023 11:28PM

RE: [PATCH] HTTP: Add new uri_normalization_percent_decode option

Michael Kourlas via nginx-devel 117 April 06, 2023 10:28AM



Sorry, you do not have permission to post/reply in this forum.

Online Users

Guests: 285
Record Number of Users: 8 on April 13, 2023
Record Number of Guests: 421 on December 02, 2018
Powered by nginx      Powered by FreeBSD      PHP Powered      Powered by MariaDB      ipv6 ready