[PATCH] Upstream: add consistent hash module
Jianjun Zheng
codeeply at gmail.com
Thu May 8 07:52:24 UTC 2014
usage example: cons_hash $request_uri;
Not only consistent hash is introduced as a load balancing, but also
increase the hits of cache in upstream servers.
This modules is written according to round_robin, ip_hash
and least_conn modules for upstream. So it inherits all the
essential logic from them.
This implementation is simple but has a high performance,
The time comlexity remains O(log(T)) with a low constant coefficient,
even when half of servers get down,
regardless of the weight of servers, as analysis below.
Suppose there are N upstream servers, with D down servers,
and the weights are W1,W2,...,W(N-1).
V is the amount of virtual nodes per unit.
P is the probability to get an alive server from all nodes at random.
Let T = (W1+W2+...+W(N-1))*V.
then the space complexity is O(T),
and the time comlexity is O(log(T) + (1-P)*N/(N-D)),
'N/(N-D)' of which is independent of W and V.
# HG changeset patch
# User Jianjun Zheng <codeeply at gmail.com>
# Date 1399531525 -28800
# Thu May 08 14:45:25 2014 +0800
# Node ID 063f37f1654ef6cc03bd311a7fe6189b299ce2f2
# Parent 48c97d83ab7f0a3f641987fb32ace8af7720aefc
Upstream: add consistent hash module
diff -r 48c97d83ab7f -r 063f37f1654e auto/modules
--- a/auto/modules Tue Apr 29 22:22:38 2014 +0200
+++ b/auto/modules Thu May 08 14:45:25 2014 +0800
@@ -381,6 +381,11 @@
HTTP_SRCS="$HTTP_SRCS $HTTP_UPSTREAM_LEAST_CONN_SRCS"
fi
+if [ $HTTP_UPSTREAM_CONS_HASH = YES ]; then
+ HTTP_MODULES="$HTTP_MODULES $HTTP_UPSTREAM_CONS_HASH_MODULE"
+ HTTP_SRCS="$HTTP_SRCS $HTTP_UPSTREAM_CONS_HASH_SRCS"
+fi
+
if [ $HTTP_UPSTREAM_KEEPALIVE = YES ]; then
HTTP_MODULES="$HTTP_MODULES $HTTP_UPSTREAM_KEEPALIVE_MODULE"
HTTP_SRCS="$HTTP_SRCS $HTTP_UPSTREAM_KEEPALIVE_SRCS"
diff -r 48c97d83ab7f -r 063f37f1654e auto/options
--- a/auto/options Tue Apr 29 22:22:38 2014 +0200
+++ b/auto/options Thu May 08 14:45:25 2014 +0800
@@ -100,6 +100,7 @@
HTTP_GZIP_STATIC=NO
HTTP_UPSTREAM_IP_HASH=YES
HTTP_UPSTREAM_LEAST_CONN=YES
+HTTP_UPSTREAM_CONS_HASH=YES
HTTP_UPSTREAM_KEEPALIVE=YES
# STUB
diff -r 48c97d83ab7f -r 063f37f1654e auto/sources
--- a/auto/sources Tue Apr 29 22:22:38 2014 +0200
+++ b/auto/sources Thu May 08 14:45:25 2014 +0800
@@ -504,6 +504,11 @@
src/http/modules/ngx_http_upstream_least_conn_module.c"
+HTTP_UPSTREAM_CONS_HASH_MODULE=ngx_http_upstream_cons_hash_module
+HTTP_UPSTREAM_CONS_HASH_SRCS=" \
+ src/http/modules/ngx_http_upstream_cons_hash_module.c"
+
+
HTTP_UPSTREAM_KEEPALIVE_MODULE=ngx_http_upstream_keepalive_module
HTTP_UPSTREAM_KEEPALIVE_SRCS=" \
src/http/modules/ngx_http_upstream_keepalive_module.c"
diff -r 48c97d83ab7f -r 063f37f1654e
src/http/modules/ngx_http_upstream_cons_hash_module.c
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/src/http/modules/ngx_http_upstream_cons_hash_module.c Thu May 08
14:45:25 2014 +0800
@@ -0,0 +1,591 @@
+#include <ngx_core.h>
+#include <ngx_http.h>
+#include <ngx_config.h>
+
+
+#define NGX_HTTP_UPSTREAM_CH_VNODE_NUM 141
+
+
+typedef struct {
+ uint32_t hash;
+
+ ngx_uint_t index;
+} ngx_http_upstream_cons_hash_node_t;
+
+
+typedef struct {
+ ngx_array_t *values;
+ ngx_array_t *lengths;
+
+ ngx_uint_t node_number;
+ ngx_http_upstream_cons_hash_node_t *nodes;
+
+ ngx_uint_t *nearest;
+
+ ngx_http_upstream_rr_peers_t *peers;
+
+ ngx_log_t *log;
+
+ ngx_pool_t *pool;
+} ngx_http_upstream_cons_hash_conf_t;
+
+
+typedef struct {
+ /* the round robin data must be first */
+ ngx_http_upstream_rr_peer_data_t rrp;
+
+ ngx_uint_t found;
+
+ ngx_http_upstream_cons_hash_conf_t *chcf;
+
+ ngx_event_get_peer_pt get_rr_peer;
+} ngx_http_upstream_ch_peer_data_t;
+
+
+static void *ngx_http_upstream_cons_hash_create_conf(ngx_conf_t *cf);
+static char *ngx_http_upstream_cons_hash(ngx_conf_t *cf, ngx_command_t
*cmd,
+ void *conf);
+static ngx_int_t ngx_http_upstream_init_cons_hash(ngx_conf_t *cf,
+ ngx_http_upstream_srv_conf_t *us);
+
+static ngx_int_t ngx_http_upstream_init_cons_hash_peer(ngx_http_request_t
*r,
+ ngx_http_upstream_srv_conf_t *us);
+static ngx_int_t ngx_http_upstream_get_cons_hash_peer(
+ ngx_peer_connection_t *pc, void *data);
+
+static ngx_uint_t ngx_http_upstream_find_cons_hash_peer(
+ ngx_http_upstream_cons_hash_conf_t *chcf, uint32_t hash);
+static int ngx_http_upstream_cons_hash_cmp_dist(const void *one,
+ const void *two);
+static int ngx_http_upstream_cons_hash_cmp_node(const void *one,
+ const void *two);
+static ngx_int_t ngx_http_upstream_cons_hash_random(
+ ngx_http_upstream_cons_hash_conf_t *chcf, ngx_str_t value, ngx_uint_t
id,
+ uint32_t *ret);
+static ngx_int_t ngx_http_upstream_cons_hash_init_nearest(
+ ngx_http_upstream_cons_hash_conf_t *chcf);
+
+inline static ngx_int_t ngx_http_upstream_get_cons_hash_try_peer(
+ ngx_peer_connection_t *pc, void *data, ngx_uint_t index);
+
+
+static ngx_command_t ngx_http_upstream_cons_hash_commands[] = {
+
+ { ngx_string("cons_hash"),
+ NGX_HTTP_UPS_CONF | NGX_CONF_TAKE1,
+ ngx_http_upstream_cons_hash,
+ 0,
+ 0,
+ NULL },
+
+ ngx_null_command
+};
+
+
+static ngx_http_module_t ngx_http_upstream_cons_hash_module_ctx = {
+ NULL, /* preconfiguration */
+ NULL, /* postconfiguration */
+
+ NULL, /* create main configuration */
+ NULL, /* init main configuration */
+
+ ngx_http_upstream_cons_hash_create_conf, /* create server
configuration*/
+ NULL, /* merge server configuration */
+
+ NULL, /* create location configuration
*/
+ NULL /* merge location configuration
*/
+};
+
+
+ngx_module_t ngx_http_upstream_cons_hash_module = {
+ NGX_MODULE_V1,
+ &ngx_http_upstream_cons_hash_module_ctx, /* module context */
+ ngx_http_upstream_cons_hash_commands, /* module directives */
+ NGX_HTTP_MODULE, /* module type */
+ NULL, /* init master */
+ NULL, /* init module */
+ NULL, /* init process */
+ NULL, /* init thread */
+ NULL, /* exit thread */
+ NULL, /* exit process */
+ NULL, /* exit master */
+ NGX_MODULE_V1_PADDING
+};
+
+
+static void *
+ngx_http_upstream_cons_hash_create_conf(ngx_conf_t *cf)
+{
+ ngx_http_upstream_cons_hash_conf_t *conf;
+
+ conf = ngx_pcalloc(cf->pool,
sizeof(ngx_http_upstream_cons_hash_conf_t));
+ if (conf == NULL) {
+ return NULL;
+ }
+
+ return conf;
+}
+
+
+static ngx_int_t
+ngx_http_upstream_init_cons_hash(ngx_conf_t *cf,
+ ngx_http_upstream_srv_conf_t *us)
+{
+ uint32_t hash;
+ ngx_int_t rc;
+ ngx_str_t name;
+ ngx_uint_t i, j, k, n, nn, m;
+ ngx_http_upstream_rr_peers_t *peers;
+ ngx_http_upstream_cons_hash_conf_t *chcf;
+
+ if (ngx_http_upstream_init_round_robin(cf, us) == NGX_ERROR) {
+ return NGX_ERROR;
+ }
+
+ peers = us->peer.data;
+
+ n = peers->number;
+ nn = 0;
+
+ for (i = 0; i < n; ++i) {
+ nn += peers->peer[i].weight;
+ }
+
+ nn *= (NGX_HTTP_UPSTREAM_CH_VNODE_NUM + 1);
+
+ chcf = ngx_http_conf_upstream_srv_conf(us,
+
ngx_http_upstream_cons_hash_module);
+
+ /*
+ * to guarantee nn % n == 0, there's no side effect,
+ * but much more convenient to construct the 'nearest'
+ */
+
+ nn = (nn + n - 1) / n * n;
+ chcf->node_number = nn;
+
+ chcf->log = cf->log;
+ chcf->pool = cf->pool;
+ chcf->peers = peers;
+
+ chcf->nodes = ngx_pcalloc(cf->pool, nn *
+ sizeof(ngx_http_upstream_cons_hash_node_t));
+ if (chcf->nodes == NULL) {
+ return NGX_ERROR;
+ }
+
+ for (i = 0, k = 0; i < n; ++i) {
+
+ name = peers->peer[i].name;
+ m = peers->peer[i].weight * (1 + NGX_HTTP_UPSTREAM_CH_VNODE_NUM);
+
+ for (j = 0; j < m; ++j, ++k) {
+
+ chcf->nodes[k].index = i;
+
+ rc = ngx_http_upstream_cons_hash_random(chcf, name, j, &hash);
+ if (rc == NGX_ERROR) {
+ return NGX_ERROR;
+ }
+
+ chcf->nodes[k].hash = hash;
+ }
+ }
+
+ for (i = 0; i < nn - k; ++i) {
+ chcf->nodes[k + i].index = chcf->nodes[0].index;
+ chcf->nodes[k + i].hash = chcf->nodes[0].hash;
+ }
+
+ ngx_qsort(chcf->nodes, nn, sizeof(ngx_http_upstream_cons_hash_node_t),
+ ngx_http_upstream_cons_hash_cmp_node);
+
+ rc = ngx_http_upstream_cons_hash_init_nearest(chcf);
+ if (rc == NGX_ERROR) {
+ return NGX_ERROR;
+ }
+
+ us->peer.init = ngx_http_upstream_init_cons_hash_peer;
+
+ return NGX_OK;
+}
+
+
+static ngx_int_t
+ngx_http_upstream_init_cons_hash_peer(ngx_http_request_t *r,
+ ngx_http_upstream_srv_conf_t *us)
+{
+ uint32_t hash;
+ ngx_str_t raw_value;
+ ngx_http_upstream_cons_hash_conf_t *chcf;
+ ngx_http_upstream_ch_peer_data_t *chp;
+
+ chcf = ngx_http_conf_upstream_srv_conf(us,
+
ngx_http_upstream_cons_hash_module);
+ if (chcf == NULL) {
+ return NGX_ERROR;
+ }
+
+ chp = ngx_pcalloc(r->pool, sizeof(ngx_http_upstream_ch_peer_data_t));
+ if (chp == NULL) {
+ return NGX_ERROR;
+ }
+
+ r->upstream->peer.data = &chp->rrp;
+
+ if (ngx_http_upstream_init_round_robin_peer(r, us) != NGX_OK) {
+ return NGX_ERROR;
+ }
+
+ if (!chp->rrp.peers->single) {
+
+ /* raw_value.data is allocated in ngx_http_script_run from r->pool
*/
+
+ if (ngx_http_script_run(r, &raw_value,
+ chcf->lengths->elts, 0, chcf->values->elts) ==
NULL) {
+ return NGX_ERROR;
+ }
+
+ hash = ngx_murmur_hash2(raw_value.data, raw_value.len);
+
+ ngx_pfree(r->pool, raw_value.data);
+
+ chp->found = ngx_http_upstream_find_cons_hash_peer(chcf, hash);
+ }
+
+ r->upstream->peer.get = ngx_http_upstream_get_cons_hash_peer;
+
+ chp->chcf = chcf;
+ chp->get_rr_peer = ngx_http_upstream_get_round_robin_peer;
+
+ return NGX_OK;
+}
+
+
+inline static ngx_int_t
+ngx_http_upstream_get_cons_hash_try_peer(ngx_peer_connection_t *pc, void
*data,
+ ngx_uint_t index)
+{
+ ngx_http_upstream_ch_peer_data_t *chp = data;
+
+ time_t now;
+ ngx_int_t rc;
+ ngx_uint_t n, m;
+ ngx_http_upstream_rr_peer_t *peer;
+
+ n = index / (8 * sizeof(uintptr_t));
+ m = (uintptr_t) 1 << index % (8 * sizeof(uintptr_t));
+
+ if (chp->rrp.tried[n] & m) {
+ return NGX_AGAIN;
+ }
+
+ rc = NGX_AGAIN;
+
+ now = ngx_time();
+
+ peer = &chp->chcf->peers->peer[index];
+
+ /* ngx_lock_mutex(chp->rrp.peers->mutex); */
+
+ if (!peer->down) {
+ if (peer->max_fails == 0 || peer->fails < peer->max_fails) {
+ rc = NGX_OK;
+ }
+
+ if (now - peer->checked > peer->fail_timeout) {
+ peer->checked = now;
+ rc = NGX_OK;
+ }
+ }
+
+ if (rc == NGX_OK) {
+ chp->rrp.current = index;
+
+ pc->sockaddr = peer->sockaddr;
+ pc->socklen = peer->socklen;
+ pc->name = &peer->name;
+ }
+
+ /* ngx_unlock_mutex(chp->rrp.peers->mutex); */
+
+ chp->rrp.tried[n] |= m;
+
+ return rc;
+}
+
+
+static ngx_int_t
+ngx_http_upstream_get_cons_hash_peer(ngx_peer_connection_t *pc, void *data)
+{
+ ngx_http_upstream_ch_peer_data_t *chp = data;
+
+ ngx_int_t rc;
+ ngx_uint_t i, j, n, nn;
+ ngx_uint_t *nearest;
+ ngx_http_upstream_rr_peers_t *peers;
+ ngx_http_upstream_cons_hash_node_t *nodes;
+
+ if (chp->rrp.peers->single) {
+ return chp->get_rr_peer(pc, &chp->rrp);
+ }
+
+ pc->cached = 0;
+ pc->connection = NULL;
+
+ peers = chp->chcf->peers;
+ nodes = chp->chcf->nodes;
+ nearest = chp->chcf->nearest;
+
+ n = peers->number;
+ nn = chp->chcf->node_number;
+
+ for (i = chp->found; i % n != 0; i = (i + 1) % nn) {
+
+ rc = ngx_http_upstream_get_cons_hash_try_peer(pc, data,
+ nodes[i].index);
+ if (rc == NGX_OK) {
+ return NGX_OK;
+ }
+ }
+
+ for (j = (i + n) % nn; i != j; i = (i + 1) % nn) {
+
+ rc = ngx_http_upstream_get_cons_hash_try_peer(pc, data,
+ nearest[i]);
+ if (rc == NGX_OK) {
+ return NGX_OK;
+ }
+ }
+
+ /* all peers failed, mark them as live for quick recovery */
+
+ for (i = 0; i < peers->number; i++) {
+ peers->peer[i].fails = 0;
+ }
+
+ pc->name = peers->name;
+
+ return NGX_BUSY;
+}
+
+
+static ngx_uint_t
+ngx_http_upstream_find_cons_hash_peer(ngx_http_upstream_cons_hash_conf_t
*chcf,
+ uint32_t hash)
+{
+ uint32_t mid_hash;
+ ngx_int_t l, r, mid;
+
+ l = 0;
+ r = chcf->node_number - 1;
+
+ while (l <= r) {
+ mid = (l + r) >> 1;
+ mid_hash = chcf->nodes[mid].hash;
+
+ if (mid_hash < hash) {
+ l = mid + 1;
+ } else {
+ r = mid - 1;
+ }
+ }
+
+ if (l == (ngx_int_t)chcf->node_number) {
+ l = 0;
+ }
+
+ return l;
+}
+
+
+static char *
+ngx_http_upstream_cons_hash(ngx_conf_t *cf, ngx_command_t *cmd, void *conf)
+{
+ ngx_str_t *value;
+ ngx_http_script_compile_t sc;
+ ngx_http_upstream_srv_conf_t *uscf;
+ ngx_http_upstream_cons_hash_conf_t *chcf;
+
+ uscf = ngx_http_conf_get_module_srv_conf(cf, ngx_http_upstream_module);
+
+ chcf = ngx_http_conf_upstream_srv_conf(uscf,
+
ngx_http_upstream_cons_hash_module);
+ if (uscf->peer.init_upstream) {
+ ngx_conf_log_error(NGX_LOG_WARN, cf, 0,
+ "load balancing method redefined");
+ }
+ uscf->peer.init_upstream = ngx_http_upstream_init_cons_hash;
+
+ value = cf->args->elts;
+ ngx_memzero(&sc, sizeof(ngx_http_script_compile_t));
+
+ sc.cf = cf;
+ sc.source = &value[1];
+ sc.lengths = &chcf->lengths;;
+ sc.values = &chcf->values;
+ sc.complete_lengths = 1;
+ sc.complete_values = 1;
+
+ if (ngx_http_script_compile(&sc) != NGX_OK) {
+ return NGX_CONF_ERROR;
+ }
+
+ uscf->flags = NGX_HTTP_UPSTREAM_CREATE
+ |NGX_HTTP_UPSTREAM_WEIGHT
+ |NGX_HTTP_UPSTREAM_MAX_FAILS
+ |NGX_HTTP_UPSTREAM_FAIL_TIMEOUT
+ |NGX_HTTP_UPSTREAM_DOWN;
+
+ return NGX_CONF_OK;
+}
+
+
+static ngx_int_t
+ngx_http_upstream_cons_hash_random(ngx_http_upstream_cons_hash_conf_t
*chcf,
+ ngx_str_t value, ngx_uint_t id, uint32_t *ret)
+{
+ /* repeatable random with same (val, id) */
+
+ u_char *buf, *pos;
+ ngx_uint_t total;
+
+ total = NGX_INT_T_LEN + value.len;
+
+ buf = ngx_calloc(total, chcf->log);
+ if (buf == NULL) {
+ return NGX_ERROR;
+ }
+
+ pos = ngx_snprintf(buf, total, "%i-%*s", id, value.len, value.data);
+
+ *ret = ngx_murmur_hash2(buf, pos - buf);
+
+ ngx_free(buf);
+
+ return NGX_OK;
+}
+
+
+static ngx_int_t
+ngx_http_upstream_cons_hash_init_nearest(
+ ngx_http_upstream_cons_hash_conf_t *chcf)
+{
+ ngx_int_t k;
+ ngx_uint_t i, j, n, nn;
+ ngx_uint_t *nearest, *temp;
+ ngx_http_upstream_cons_hash_node_t *nodes;
+
+ n = chcf->peers->number;
+ nn = chcf->node_number;
+
+ nodes = chcf->nodes;
+
+ nearest = ngx_pcalloc(chcf->pool, nn * sizeof(ngx_uint_t));
+ if (nearest == NULL) {
+ return NGX_ERROR;
+ }
+
+ chcf->nearest = nearest;
+
+ temp = ngx_pcalloc(chcf->pool, n * sizeof(ngx_uint_t));
+ if (temp == NULL) {
+ return NGX_ERROR;
+ }
+
+ for (i = 0; i < nn; ++i) {
+ nearest[i] = nn;
+ }
+
+ for (i = 0; i < nn; i += n) {
+ for (j = 0; j < n; ++j) {
+ temp[j] = nn;
+ }
+ for (k = n - 1; k >= 0; --k) {
+ temp[nodes[i + k].index] = i + k;
+ }
+ for (j = 0; j < n; ++j) {
+ nearest[i + j] = temp[j];
+ }
+ }
+
+ ngx_pfree(chcf->pool, temp);
+
+ /* update the 'nearest' twice */
+
+ for (i = 0; i < 2; ++i) {
+ for (k = nn - n; k >= 0; k -= n) {
+ for (j = 0; j < n; ++j) {
+ if (nearest[k + j] == nn) {
+ nearest[k + j] = nearest[(k + j + n) % nn];
+ }
+ }
+ }
+ }
+
+ for (i = 0; i < nn; i += n) {
+
+ /* there is no elt equals to nn in the 'nearest' now */
+
+ for (j = 0; j < n; ++j) {
+ if (nearest[i + j] < i) {
+ nearest[i + j] += nn;
+ }
+ }
+
+ ngx_qsort(nearest + i, n, sizeof(ngx_uint_t),
+ ngx_http_upstream_cons_hash_cmp_dist);
+
+ for (j = 0; j < n; ++j) {
+ if (nearest[i + j] >= nn) {
+ nearest[i + j] -= nn;
+ }
+ }
+ }
+
+ for (i = 0; i < nn; ++i) {
+ nearest[i] = nodes[nearest[i]].index;
+ }
+
+ return NGX_OK;
+}
+
+
+static int
+ngx_http_upstream_cons_hash_cmp_dist(const void *one, const void *two)
+{
+ ngx_uint_t first, second;
+
+ first = *(ngx_uint_t *)one;
+ second = *(ngx_uint_t *)two;
+
+ if (first < second) {
+ return -1;
+ }
+
+ if (first > second) {
+ return 1;
+ }
+
+ return 0;
+}
+
+
+static int
+ngx_http_upstream_cons_hash_cmp_node(const void *one, const void *two)
+{
+ ngx_http_upstream_cons_hash_node_t *first, *second;
+
+ first = (ngx_http_upstream_cons_hash_node_t *) one;
+ second = (ngx_http_upstream_cons_hash_node_t *) two;
+
+ if (first->hash < second->hash) {
+ return -1;
+ }
+
+ if (first->hash > second->hash) {
+ return 1;
+ }
+
+ return 0;
+}
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://mailman.nginx.org/pipermail/nginx-devel/attachments/20140508/594c5a7c/attachment-0001.html>
More information about the nginx-devel
mailing list