[ANNOUNCE] gunzip filter module 0.3

Maxim Dounin mdounin at mdounin.ru
Thu Apr 22 07:19:40 MSD 2010


Hello!

On Tue, Apr 20, 2010 at 07:00:49PM -0400, theromis1 wrote:

> ok,
> 
> I've created patch which must feet nginx rules and my needs, 
> please find it below.
> when I've checked sources of zlib.h I found this "windowBits can 
> also be greater than 15 for optional zip decoding. Add   32 to 
> windowBits to enable zlib and gzip decoding with automatic 
> header detection, or add 16 to decode only the gzip format (the 
> zlib format will return a Z_DATA_ERROR)."
> As I can understand if pass here +16 it will work only with 
> gzip, +32 will take both.

I perfectly understand what +32 does.  I just don't think that it 
should be done.  Headers indicate gzip, and data we see isn't 
gzip.  So it's an error and it should be reported.

> We have huge traffic, and throw this module goes a lot of sites 
> which don't checking RFC browser supports both of it, I'm just 
> counting alerts in error log and with +32 this amount much 
> smaller.

Just a note: if you are trying to proxy the whole internet you 
probably choose a wrong tool.  nginx was never designed to be 
generic forward-proxy, it's reverse proxy and it expects 
controllable backends.  And it has quite a few limitations which 
render it hardly usable as forward proxy (including fully buffered 
request messages, no http/1.1 support to upstreams, no protection 
for X-Accel-* response headers and so on).

> Regarding yandex stuff, I don't know why they made it this way, 
> but they sending 3 chunks: 1st is 16 bytes weird gzip header and 
> nothing more, next chunk correct html content, and 3rd one is 
> zero size.

Yes, when we are talking about HTTP/1.1 and chunked encoding.  
Last two chunks are normal (actual content + final chunk), but the 
first one is broken.

> Idiotic, but browser can handle it.

No they can't.  They just used to ignore message body when 
handling redirects.

> May we work this way, try to 
> gunzip if we got error try to gunzip next chunk, or if we have 
> fatal error just pass whole stream unchanged, at least on first 
> chunk?

While it is technically possible to do so via postponing sending 
header until after we see some body parts (e.g. xslt filter does 
this) - it is a bit tricky and will had some bad implications with 
current nginx code, e.g. 304 responses will read body from static 
files instead of just doing stat() and so on.

Sure these all can be resolved, but I don't see any benefits for 
supported configurations here, only for unsupported forward proxy 
case.

> My patch follows:
> --- /root/work/ngx_http_gunzip_filter_module-0.3/ngx_http_gunzip_filter_module.c	2010-03-22 11:11:16.000000000 -0700
> +++ ngx_http_gunzip_filter_module.c	2010-04-20 15:59:01.000000000 -0700
> @@ -16,6 +16,9 @@
>  typedef struct {
>      ngx_flag_t           enable;
>      ngx_bufs_t           bufs;
> +    ngx_uint_t           pass;
> +    ngx_hash_t           types;
> +    ngx_array_t         *types_keys;
>  } ngx_http_gunzip_conf_t;
>  
>  
> @@ -40,6 +43,17 @@
>      ngx_http_request_t  *request;
>  } ngx_http_gunzip_ctx_t;
>  
> +#define NGX_HTTP_GUNZIP_PASS_ANY           0x02
> +#define NGX_HTTP_GUNZIP_PASS_CONTENT_TYPE  0x04
> +#define NGX_HTTP_GUNZIP_PASS_200           0x08
> +
> +static ngx_conf_bitmask_t  ngx_http_gzip_pass_mask[] = {
> +    { ngx_string("any"), NGX_HTTP_GUNZIP_PASS_ANY },
> +    { ngx_string("content_type"), NGX_HTTP_GUNZIP_PASS_CONTENT_TYPE },
> +    { ngx_string("200"), NGX_HTTP_GUNZIP_PASS_200 },
> +    { ngx_null_string, 0 }
> +};
> +
>  
>  static ngx_int_t ngx_http_gunzip_filter_inflate_start(ngx_http_request_t *r,
>      ngx_http_gunzip_ctx_t *ctx);
> @@ -78,6 +92,20 @@
>        offsetof(ngx_http_gunzip_conf_t, bufs),
>        NULL },
>  
> +    { ngx_string("gunzip_pass"),
> +      NGX_HTTP_MAIN_CONF|NGX_HTTP_SRV_CONF|NGX_HTTP_LOC_CONF|NGX_CONF_1MORE,
> +      ngx_conf_set_bitmask_slot,
> +      NGX_HTTP_LOC_CONF_OFFSET,
> +      offsetof(ngx_http_gunzip_conf_t, pass),
> +      &ngx_http_gzip_pass_mask },
> +    
> +    { ngx_string("gunzip_filter_types"),
> +      NGX_HTTP_MAIN_CONF|NGX_HTTP_SRV_CONF|NGX_HTTP_LOC_CONF|NGX_CONF_1MORE,
> +      ngx_http_types_slot,
> +      NGX_HTTP_LOC_CONF_OFFSET,
> +      offsetof(ngx_http_gunzip_conf_t, types_keys),
> +      &ngx_http_html_default_types[0] },
> +

I don't like naming and syntax, sorry.  Suffix "_pass" is used for 
backend modules (proxy_pass, memcached_pass, fastcgi_pass), but I 
belive it should be handled by "gunzip" directive itself anyway.

Directive "gunzip_filter_types" should be "gunzip_types", and I think it 
should be "*" by default.  I.e. gunzip all types by default, and 
make it possible to reduce the list.  This way types is completely 
orthogonal to other settings.

>        ngx_null_command
>  };
>  
> @@ -130,6 +158,10 @@
>      /* TODO ignore content encoding? */
>  
>      if (!conf->enable
> +	|| ((conf->pass & NGX_HTTP_GUNZIP_PASS_200 ) && (r->upstream)
> +	   && (r->upstream->state) && (r->upstream->state->status != 200))

I still doesn't understand why do you check 
r->upstream->state->status.  There is r->status.

And as I already said I don't like the whole idea of "gunzipping 
only 200 responses".

> +	|| ((conf->pass & NGX_HTTP_GUNZIP_PASS_CONTENT_TYPE)
> +           && ngx_http_test_content_type(r, &conf->types) == NULL)
>          || r->headers_out.content_encoding == NULL
>          || r->headers_out.content_encoding->value.len != 4
>          || ngx_strncasecmp(r->headers_out.content_encoding->value.data,
> @@ -138,26 +170,29 @@
>          return ngx_http_next_header_filter(r);
>      }
>  
> +    if (! conf->pass & NGX_HTTP_GUNZIP_PASS_ANY )
> +    {
>  #if (nginx_version >= 8025 || (nginx_version >= 7065 && nginx_version < 8000))
>  
> -    r->gzip_vary = 1;
> -
> -    if (!r->gzip_tested) {
> -        if (ngx_http_gzip_ok(r) == NGX_OK) {
> -            return ngx_http_next_header_filter(r);
> -        }
> +      r->gzip_vary = 1;
>  
> -    } else if (!r->gzip_ok) {
> -        return ngx_http_next_header_filter(r);
> -    }
> +      if (!r->gzip_tested) {
> +          if (ngx_http_gzip_ok(r) == NGX_OK) {
> +              return ngx_http_next_header_filter(r);
> +          }
> +
> +      } else if (!r->gzip_ok) {
> +          return ngx_http_next_header_filter(r);
> +      }
>  
>  #else
>  
> -    if (ngx_http_gzip_ok(r) == NGX_OK) {
> -        return ngx_http_next_header_filter(r);
> -    }
> +      if (ngx_http_gzip_ok(r) == NGX_OK) {
> +          return ngx_http_next_header_filter(r);
> +      }
>  
>  #endif
> +   }

Don't hesitate to use goto.

>  
>      ctx = ngx_pcalloc(r->pool, sizeof(ngx_http_gunzip_ctx_t));
>      if (ctx == NULL) {
> @@ -314,8 +349,8 @@
>      ctx->zstream.zfree = ngx_http_gunzip_filter_free;
>      ctx->zstream.opaque = ctx;
>  
> -    /* windowBits +16 to decode gzip, zlib 1.2.0.4+ */
> -    rc = inflateInit2(&ctx->zstream, MAX_WBITS + 16);
> +    /* windowBits +32 to decode gzip and zlib, zlib 1.2.0.4+ */
> +    rc = inflateInit2(&ctx->zstream, MAX_WBITS + 32);
>  
>      if (rc != Z_OK) {
>          ngx_log_error(NGX_LOG_ALERT, r->connection->log, 0,
> @@ -669,6 +704,24 @@
>      ngx_conf_merge_bufs_value(conf->bufs, prev->bufs,
>                                (128 * 1024) / ngx_pagesize, ngx_pagesize);
>  
> +    ngx_conf_merge_bitmask_value(conf->pass, prev->pass,
> +                              NGX_CONF_BITMASK_SET);
> +
> +#if (nginx_version < 8000)

Relevant change was introduced in 0.8.29.

> +    if (ngx_http_merge_types(cf, conf->types_keys, &conf->types,
> +                             prev->types_keys, &prev->types,
> +                             ngx_http_html_default_types)
> +        != NGX_OK)
> +#else
> +    if (ngx_http_merge_types(cf, &conf->types_keys, &conf->types,
> +                             &prev->types_keys, &prev->types,
> +                             ngx_http_html_default_types)
> +        != NGX_OK)
> +#endif
> +    {
> +        return NGX_CONF_ERROR;
> +    }
> +
>      return NGX_CONF_OK;
>  }

Maxim Dounin



More information about the nginx mailing list