[PATCH] Upstream: add consistent hash module
Steven Hartland
steven.hartland at multiplay.co.uk
Thu May 8 15:23:05 UTC 2014
Be good to see this in core, currently we use the 3rd party module to achieve this:
http://wiki.nginx.org/HttpUpstreamRequestHashModule
One question on the patch, you appear to have some commented out locking is
this an oversight?
Regards
Steve
----- Original Message -----
From: "Jianjun Zheng" <codeeply at gmail.com>
To: <nginx-devel at nginx.org>
Sent: Thursday, May 08, 2014 8:52 AM
Subject: [PATCH] Upstream: add consistent hash module
> usage example: cons_hash $request_uri;
>
> Not only consistent hash is introduced as a load balancing, but also
> increase the hits of cache in upstream servers.
>
> This modules is written according to round_robin, ip_hash
> and least_conn modules for upstream. So it inherits all the
> essential logic from them.
>
> This implementation is simple but has a high performance,
> The time comlexity remains O(log(T)) with a low constant coefficient,
> even when half of servers get down,
> regardless of the weight of servers, as analysis below.
>
> Suppose there are N upstream servers, with D down servers,
> and the weights are W1,W2,...,W(N-1).
> V is the amount of virtual nodes per unit.
> P is the probability to get an alive server from all nodes at random.
>
> Let T = (W1+W2+...+W(N-1))*V.
> then the space complexity is O(T),
> and the time comlexity is O(log(T) + (1-P)*N/(N-D)),
> 'N/(N-D)' of which is independent of W and V.
>
>
> # HG changeset patch
> # User Jianjun Zheng <codeeply at gmail.com>
> # Date 1399531525 -28800
> # Thu May 08 14:45:25 2014 +0800
> # Node ID 063f37f1654ef6cc03bd311a7fe6189b299ce2f2
> # Parent 48c97d83ab7f0a3f641987fb32ace8af7720aefc
> Upstream: add consistent hash module
>
> diff -r 48c97d83ab7f -r 063f37f1654e auto/modules
> --- a/auto/modules Tue Apr 29 22:22:38 2014 +0200
> +++ b/auto/modules Thu May 08 14:45:25 2014 +0800
> @@ -381,6 +381,11 @@
> HTTP_SRCS="$HTTP_SRCS $HTTP_UPSTREAM_LEAST_CONN_SRCS"
> fi
>
> +if [ $HTTP_UPSTREAM_CONS_HASH = YES ]; then
> + HTTP_MODULES="$HTTP_MODULES $HTTP_UPSTREAM_CONS_HASH_MODULE"
> + HTTP_SRCS="$HTTP_SRCS $HTTP_UPSTREAM_CONS_HASH_SRCS"
> +fi
> +
> if [ $HTTP_UPSTREAM_KEEPALIVE = YES ]; then
> HTTP_MODULES="$HTTP_MODULES $HTTP_UPSTREAM_KEEPALIVE_MODULE"
> HTTP_SRCS="$HTTP_SRCS $HTTP_UPSTREAM_KEEPALIVE_SRCS"
> diff -r 48c97d83ab7f -r 063f37f1654e auto/options
> --- a/auto/options Tue Apr 29 22:22:38 2014 +0200
> +++ b/auto/options Thu May 08 14:45:25 2014 +0800
> @@ -100,6 +100,7 @@
> HTTP_GZIP_STATIC=NO
> HTTP_UPSTREAM_IP_HASH=YES
> HTTP_UPSTREAM_LEAST_CONN=YES
> +HTTP_UPSTREAM_CONS_HASH=YES
> HTTP_UPSTREAM_KEEPALIVE=YES
>
> # STUB
> diff -r 48c97d83ab7f -r 063f37f1654e auto/sources
> --- a/auto/sources Tue Apr 29 22:22:38 2014 +0200
> +++ b/auto/sources Thu May 08 14:45:25 2014 +0800
> @@ -504,6 +504,11 @@
> src/http/modules/ngx_http_upstream_least_conn_module.c"
>
>
> +HTTP_UPSTREAM_CONS_HASH_MODULE=ngx_http_upstream_cons_hash_module
> +HTTP_UPSTREAM_CONS_HASH_SRCS=" \
> + src/http/modules/ngx_http_upstream_cons_hash_module.c"
> +
> +
> HTTP_UPSTREAM_KEEPALIVE_MODULE=ngx_http_upstream_keepalive_module
> HTTP_UPSTREAM_KEEPALIVE_SRCS=" \
> src/http/modules/ngx_http_upstream_keepalive_module.c"
> diff -r 48c97d83ab7f -r 063f37f1654e
> src/http/modules/ngx_http_upstream_cons_hash_module.c
> --- /dev/null Thu Jan 01 00:00:00 1970 +0000
> +++ b/src/http/modules/ngx_http_upstream_cons_hash_module.c Thu May 08
> 14:45:25 2014 +0800
> @@ -0,0 +1,591 @@
> +#include <ngx_core.h>
> +#include <ngx_http.h>
> +#include <ngx_config.h>
> +
> +
> +#define NGX_HTTP_UPSTREAM_CH_VNODE_NUM 141
> +
> +
> +typedef struct {
> + uint32_t hash;
> +
> + ngx_uint_t index;
> +} ngx_http_upstream_cons_hash_node_t;
> +
> +
> +typedef struct {
> + ngx_array_t *values;
> + ngx_array_t *lengths;
> +
> + ngx_uint_t node_number;
> + ngx_http_upstream_cons_hash_node_t *nodes;
> +
> + ngx_uint_t *nearest;
> +
> + ngx_http_upstream_rr_peers_t *peers;
> +
> + ngx_log_t *log;
> +
> + ngx_pool_t *pool;
> +} ngx_http_upstream_cons_hash_conf_t;
> +
> +
> +typedef struct {
> + /* the round robin data must be first */
> + ngx_http_upstream_rr_peer_data_t rrp;
> +
> + ngx_uint_t found;
> +
> + ngx_http_upstream_cons_hash_conf_t *chcf;
> +
> + ngx_event_get_peer_pt get_rr_peer;
> +} ngx_http_upstream_ch_peer_data_t;
> +
> +
> +static void *ngx_http_upstream_cons_hash_create_conf(ngx_conf_t *cf);
> +static char *ngx_http_upstream_cons_hash(ngx_conf_t *cf, ngx_command_t
> *cmd,
> + void *conf);
> +static ngx_int_t ngx_http_upstream_init_cons_hash(ngx_conf_t *cf,
> + ngx_http_upstream_srv_conf_t *us);
> +
> +static ngx_int_t ngx_http_upstream_init_cons_hash_peer(ngx_http_request_t
> *r,
> + ngx_http_upstream_srv_conf_t *us);
> +static ngx_int_t ngx_http_upstream_get_cons_hash_peer(
> + ngx_peer_connection_t *pc, void *data);
> +
> +static ngx_uint_t ngx_http_upstream_find_cons_hash_peer(
> + ngx_http_upstream_cons_hash_conf_t *chcf, uint32_t hash);
> +static int ngx_http_upstream_cons_hash_cmp_dist(const void *one,
> + const void *two);
> +static int ngx_http_upstream_cons_hash_cmp_node(const void *one,
> + const void *two);
> +static ngx_int_t ngx_http_upstream_cons_hash_random(
> + ngx_http_upstream_cons_hash_conf_t *chcf, ngx_str_t value, ngx_uint_t
> id,
> + uint32_t *ret);
> +static ngx_int_t ngx_http_upstream_cons_hash_init_nearest(
> + ngx_http_upstream_cons_hash_conf_t *chcf);
> +
> +inline static ngx_int_t ngx_http_upstream_get_cons_hash_try_peer(
> + ngx_peer_connection_t *pc, void *data, ngx_uint_t index);
> +
> +
> +static ngx_command_t ngx_http_upstream_cons_hash_commands[] = {
> +
> + { ngx_string("cons_hash"),
> + NGX_HTTP_UPS_CONF | NGX_CONF_TAKE1,
> + ngx_http_upstream_cons_hash,
> + 0,
> + 0,
> + NULL },
> +
> + ngx_null_command
> +};
> +
> +
> +static ngx_http_module_t ngx_http_upstream_cons_hash_module_ctx = {
> + NULL, /* preconfiguration */
> + NULL, /* postconfiguration */
> +
> + NULL, /* create main configuration */
> + NULL, /* init main configuration */
> +
> + ngx_http_upstream_cons_hash_create_conf, /* create server
> configuration*/
> + NULL, /* merge server configuration */
> +
> + NULL, /* create location configuration
> */
> + NULL /* merge location configuration
> */
> +};
> +
> +
> +ngx_module_t ngx_http_upstream_cons_hash_module = {
> + NGX_MODULE_V1,
> + &ngx_http_upstream_cons_hash_module_ctx, /* module context */
> + ngx_http_upstream_cons_hash_commands, /* module directives */
> + NGX_HTTP_MODULE, /* module type */
> + NULL, /* init master */
> + NULL, /* init module */
> + NULL, /* init process */
> + NULL, /* init thread */
> + NULL, /* exit thread */
> + NULL, /* exit process */
> + NULL, /* exit master */
> + NGX_MODULE_V1_PADDING
> +};
> +
> +
> +static void *
> +ngx_http_upstream_cons_hash_create_conf(ngx_conf_t *cf)
> +{
> + ngx_http_upstream_cons_hash_conf_t *conf;
> +
> + conf = ngx_pcalloc(cf->pool,
> sizeof(ngx_http_upstream_cons_hash_conf_t));
> + if (conf == NULL) {
> + return NULL;
> + }
> +
> + return conf;
> +}
> +
> +
> +static ngx_int_t
> +ngx_http_upstream_init_cons_hash(ngx_conf_t *cf,
> + ngx_http_upstream_srv_conf_t *us)
> +{
> + uint32_t hash;
> + ngx_int_t rc;
> + ngx_str_t name;
> + ngx_uint_t i, j, k, n, nn, m;
> + ngx_http_upstream_rr_peers_t *peers;
> + ngx_http_upstream_cons_hash_conf_t *chcf;
> +
> + if (ngx_http_upstream_init_round_robin(cf, us) == NGX_ERROR) {
> + return NGX_ERROR;
> + }
> +
> + peers = us->peer.data;
> +
> + n = peers->number;
> + nn = 0;
> +
> + for (i = 0; i < n; ++i) {
> + nn += peers->peer[i].weight;
> + }
> +
> + nn *= (NGX_HTTP_UPSTREAM_CH_VNODE_NUM + 1);
> +
> + chcf = ngx_http_conf_upstream_srv_conf(us,
> +
> ngx_http_upstream_cons_hash_module);
> +
> + /*
> + * to guarantee nn % n == 0, there's no side effect,
> + * but much more convenient to construct the 'nearest'
> + */
> +
> + nn = (nn + n - 1) / n * n;
> + chcf->node_number = nn;
> +
> + chcf->log = cf->log;
> + chcf->pool = cf->pool;
> + chcf->peers = peers;
> +
> + chcf->nodes = ngx_pcalloc(cf->pool, nn *
> + sizeof(ngx_http_upstream_cons_hash_node_t));
> + if (chcf->nodes == NULL) {
> + return NGX_ERROR;
> + }
> +
> + for (i = 0, k = 0; i < n; ++i) {
> +
> + name = peers->peer[i].name;
> + m = peers->peer[i].weight * (1 + NGX_HTTP_UPSTREAM_CH_VNODE_NUM);
> +
> + for (j = 0; j < m; ++j, ++k) {
> +
> + chcf->nodes[k].index = i;
> +
> + rc = ngx_http_upstream_cons_hash_random(chcf, name, j, &hash);
> + if (rc == NGX_ERROR) {
> + return NGX_ERROR;
> + }
> +
> + chcf->nodes[k].hash = hash;
> + }
> + }
> +
> + for (i = 0; i < nn - k; ++i) {
> + chcf->nodes[k + i].index = chcf->nodes[0].index;
> + chcf->nodes[k + i].hash = chcf->nodes[0].hash;
> + }
> +
> + ngx_qsort(chcf->nodes, nn, sizeof(ngx_http_upstream_cons_hash_node_t),
> + ngx_http_upstream_cons_hash_cmp_node);
> +
> + rc = ngx_http_upstream_cons_hash_init_nearest(chcf);
> + if (rc == NGX_ERROR) {
> + return NGX_ERROR;
> + }
> +
> + us->peer.init = ngx_http_upstream_init_cons_hash_peer;
> +
> + return NGX_OK;
> +}
> +
> +
> +static ngx_int_t
> +ngx_http_upstream_init_cons_hash_peer(ngx_http_request_t *r,
> + ngx_http_upstream_srv_conf_t *us)
> +{
> + uint32_t hash;
> + ngx_str_t raw_value;
> + ngx_http_upstream_cons_hash_conf_t *chcf;
> + ngx_http_upstream_ch_peer_data_t *chp;
> +
> + chcf = ngx_http_conf_upstream_srv_conf(us,
> +
> ngx_http_upstream_cons_hash_module);
> + if (chcf == NULL) {
> + return NGX_ERROR;
> + }
> +
> + chp = ngx_pcalloc(r->pool, sizeof(ngx_http_upstream_ch_peer_data_t));
> + if (chp == NULL) {
> + return NGX_ERROR;
> + }
> +
> + r->upstream->peer.data = &chp->rrp;
> +
> + if (ngx_http_upstream_init_round_robin_peer(r, us) != NGX_OK) {
> + return NGX_ERROR;
> + }
> +
> + if (!chp->rrp.peers->single) {
> +
> + /* raw_value.data is allocated in ngx_http_script_run from r->pool
> */
> +
> + if (ngx_http_script_run(r, &raw_value,
> + chcf->lengths->elts, 0, chcf->values->elts) ==
> NULL) {
> + return NGX_ERROR;
> + }
> +
> + hash = ngx_murmur_hash2(raw_value.data, raw_value.len);
> +
> + ngx_pfree(r->pool, raw_value.data);
> +
> + chp->found = ngx_http_upstream_find_cons_hash_peer(chcf, hash);
> + }
> +
> + r->upstream->peer.get = ngx_http_upstream_get_cons_hash_peer;
> +
> + chp->chcf = chcf;
> + chp->get_rr_peer = ngx_http_upstream_get_round_robin_peer;
> +
> + return NGX_OK;
> +}
> +
> +
> +inline static ngx_int_t
> +ngx_http_upstream_get_cons_hash_try_peer(ngx_peer_connection_t *pc, void
> *data,
> + ngx_uint_t index)
> +{
> + ngx_http_upstream_ch_peer_data_t *chp = data;
> +
> + time_t now;
> + ngx_int_t rc;
> + ngx_uint_t n, m;
> + ngx_http_upstream_rr_peer_t *peer;
> +
> + n = index / (8 * sizeof(uintptr_t));
> + m = (uintptr_t) 1 << index % (8 * sizeof(uintptr_t));
> +
> + if (chp->rrp.tried[n] & m) {
> + return NGX_AGAIN;
> + }
> +
> + rc = NGX_AGAIN;
> +
> + now = ngx_time();
> +
> + peer = &chp->chcf->peers->peer[index];
> +
> + /* ngx_lock_mutex(chp->rrp.peers->mutex); */
> +
> + if (!peer->down) {
> + if (peer->max_fails == 0 || peer->fails < peer->max_fails) {
> + rc = NGX_OK;
> + }
> +
> + if (now - peer->checked > peer->fail_timeout) {
> + peer->checked = now;
> + rc = NGX_OK;
> + }
> + }
> +
> + if (rc == NGX_OK) {
> + chp->rrp.current = index;
> +
> + pc->sockaddr = peer->sockaddr;
> + pc->socklen = peer->socklen;
> + pc->name = &peer->name;
> + }
> +
> + /* ngx_unlock_mutex(chp->rrp.peers->mutex); */
> +
> + chp->rrp.tried[n] |= m;
> +
> + return rc;
> +}
> +
> +
> +static ngx_int_t
> +ngx_http_upstream_get_cons_hash_peer(ngx_peer_connection_t *pc, void *data)
> +{
> + ngx_http_upstream_ch_peer_data_t *chp = data;
> +
> + ngx_int_t rc;
> + ngx_uint_t i, j, n, nn;
> + ngx_uint_t *nearest;
> + ngx_http_upstream_rr_peers_t *peers;
> + ngx_http_upstream_cons_hash_node_t *nodes;
> +
> + if (chp->rrp.peers->single) {
> + return chp->get_rr_peer(pc, &chp->rrp);
> + }
> +
> + pc->cached = 0;
> + pc->connection = NULL;
> +
> + peers = chp->chcf->peers;
> + nodes = chp->chcf->nodes;
> + nearest = chp->chcf->nearest;
> +
> + n = peers->number;
> + nn = chp->chcf->node_number;
> +
> + for (i = chp->found; i % n != 0; i = (i + 1) % nn) {
> +
> + rc = ngx_http_upstream_get_cons_hash_try_peer(pc, data,
> + nodes[i].index);
> + if (rc == NGX_OK) {
> + return NGX_OK;
> + }
> + }
> +
> + for (j = (i + n) % nn; i != j; i = (i + 1) % nn) {
> +
> + rc = ngx_http_upstream_get_cons_hash_try_peer(pc, data,
> + nearest[i]);
> + if (rc == NGX_OK) {
> + return NGX_OK;
> + }
> + }
> +
> + /* all peers failed, mark them as live for quick recovery */
> +
> + for (i = 0; i < peers->number; i++) {
> + peers->peer[i].fails = 0;
> + }
> +
> + pc->name = peers->name;
> +
> + return NGX_BUSY;
> +}
> +
> +
> +static ngx_uint_t
> +ngx_http_upstream_find_cons_hash_peer(ngx_http_upstream_cons_hash_conf_t
> *chcf,
> + uint32_t hash)
> +{
> + uint32_t mid_hash;
> + ngx_int_t l, r, mid;
> +
> + l = 0;
> + r = chcf->node_number - 1;
> +
> + while (l <= r) {
> + mid = (l + r) >> 1;
> + mid_hash = chcf->nodes[mid].hash;
> +
> + if (mid_hash < hash) {
> + l = mid + 1;
> + } else {
> + r = mid - 1;
> + }
> + }
> +
> + if (l == (ngx_int_t)chcf->node_number) {
> + l = 0;
> + }
> +
> + return l;
> +}
> +
> +
> +static char *
> +ngx_http_upstream_cons_hash(ngx_conf_t *cf, ngx_command_t *cmd, void *conf)
> +{
> + ngx_str_t *value;
> + ngx_http_script_compile_t sc;
> + ngx_http_upstream_srv_conf_t *uscf;
> + ngx_http_upstream_cons_hash_conf_t *chcf;
> +
> + uscf = ngx_http_conf_get_module_srv_conf(cf, ngx_http_upstream_module);
> +
> + chcf = ngx_http_conf_upstream_srv_conf(uscf,
> +
> ngx_http_upstream_cons_hash_module);
> + if (uscf->peer.init_upstream) {
> + ngx_conf_log_error(NGX_LOG_WARN, cf, 0,
> + "load balancing method redefined");
> + }
> + uscf->peer.init_upstream = ngx_http_upstream_init_cons_hash;
> +
> + value = cf->args->elts;
> + ngx_memzero(&sc, sizeof(ngx_http_script_compile_t));
> +
> + sc.cf = cf;
> + sc.source = &value[1];
> + sc.lengths = &chcf->lengths;;
> + sc.values = &chcf->values;
> + sc.complete_lengths = 1;
> + sc.complete_values = 1;
> +
> + if (ngx_http_script_compile(&sc) != NGX_OK) {
> + return NGX_CONF_ERROR;
> + }
> +
> + uscf->flags = NGX_HTTP_UPSTREAM_CREATE
> + |NGX_HTTP_UPSTREAM_WEIGHT
> + |NGX_HTTP_UPSTREAM_MAX_FAILS
> + |NGX_HTTP_UPSTREAM_FAIL_TIMEOUT
> + |NGX_HTTP_UPSTREAM_DOWN;
> +
> + return NGX_CONF_OK;
> +}
> +
> +
> +static ngx_int_t
> +ngx_http_upstream_cons_hash_random(ngx_http_upstream_cons_hash_conf_t
> *chcf,
> + ngx_str_t value, ngx_uint_t id, uint32_t *ret)
> +{
> + /* repeatable random with same (val, id) */
> +
> + u_char *buf, *pos;
> + ngx_uint_t total;
> +
> + total = NGX_INT_T_LEN + value.len;
> +
> + buf = ngx_calloc(total, chcf->log);
> + if (buf == NULL) {
> + return NGX_ERROR;
> + }
> +
> + pos = ngx_snprintf(buf, total, "%i-%*s", id, value.len, value.data);
> +
> + *ret = ngx_murmur_hash2(buf, pos - buf);
> +
> + ngx_free(buf);
> +
> + return NGX_OK;
> +}
> +
> +
> +static ngx_int_t
> +ngx_http_upstream_cons_hash_init_nearest(
> + ngx_http_upstream_cons_hash_conf_t *chcf)
> +{
> + ngx_int_t k;
> + ngx_uint_t i, j, n, nn;
> + ngx_uint_t *nearest, *temp;
> + ngx_http_upstream_cons_hash_node_t *nodes;
> +
> + n = chcf->peers->number;
> + nn = chcf->node_number;
> +
> + nodes = chcf->nodes;
> +
> + nearest = ngx_pcalloc(chcf->pool, nn * sizeof(ngx_uint_t));
> + if (nearest == NULL) {
> + return NGX_ERROR;
> + }
> +
> + chcf->nearest = nearest;
> +
> + temp = ngx_pcalloc(chcf->pool, n * sizeof(ngx_uint_t));
> + if (temp == NULL) {
> + return NGX_ERROR;
> + }
> +
> + for (i = 0; i < nn; ++i) {
> + nearest[i] = nn;
> + }
> +
> + for (i = 0; i < nn; i += n) {
> + for (j = 0; j < n; ++j) {
> + temp[j] = nn;
> + }
> + for (k = n - 1; k >= 0; --k) {
> + temp[nodes[i + k].index] = i + k;
> + }
> + for (j = 0; j < n; ++j) {
> + nearest[i + j] = temp[j];
> + }
> + }
> +
> + ngx_pfree(chcf->pool, temp);
> +
> + /* update the 'nearest' twice */
> +
> + for (i = 0; i < 2; ++i) {
> + for (k = nn - n; k >= 0; k -= n) {
> + for (j = 0; j < n; ++j) {
> + if (nearest[k + j] == nn) {
> + nearest[k + j] = nearest[(k + j + n) % nn];
> + }
> + }
> + }
> + }
> +
> + for (i = 0; i < nn; i += n) {
> +
> + /* there is no elt equals to nn in the 'nearest' now */
> +
> + for (j = 0; j < n; ++j) {
> + if (nearest[i + j] < i) {
> + nearest[i + j] += nn;
> + }
> + }
> +
> + ngx_qsort(nearest + i, n, sizeof(ngx_uint_t),
> + ngx_http_upstream_cons_hash_cmp_dist);
> +
> + for (j = 0; j < n; ++j) {
> + if (nearest[i + j] >= nn) {
> + nearest[i + j] -= nn;
> + }
> + }
> + }
> +
> + for (i = 0; i < nn; ++i) {
> + nearest[i] = nodes[nearest[i]].index;
> + }
> +
> + return NGX_OK;
> +}
> +
> +
> +static int
> +ngx_http_upstream_cons_hash_cmp_dist(const void *one, const void *two)
> +{
> + ngx_uint_t first, second;
> +
> + first = *(ngx_uint_t *)one;
> + second = *(ngx_uint_t *)two;
> +
> + if (first < second) {
> + return -1;
> + }
> +
> + if (first > second) {
> + return 1;
> + }
> +
> + return 0;
> +}
> +
> +
> +static int
> +ngx_http_upstream_cons_hash_cmp_node(const void *one, const void *two)
> +{
> + ngx_http_upstream_cons_hash_node_t *first, *second;
> +
> + first = (ngx_http_upstream_cons_hash_node_t *) one;
> + second = (ngx_http_upstream_cons_hash_node_t *) two;
> +
> + if (first->hash < second->hash) {
> + return -1;
> + }
> +
> + if (first->hash > second->hash) {
> + return 1;
> + }
> +
> + return 0;
> +}
>
--------------------------------------------------------------------------------
> _______________________________________________
> nginx-devel mailing list
> nginx-devel at nginx.org
> http://mailman.nginx.org/mailman/listinfo/nginx-devel
More information about the nginx-devel
mailing list