Use primes for hashtable size
Andrew Borodin
borodin at octonica.com
Thu Jun 1 11:54:50 UTC 2017
Hi, Maxim!
2017-05-30 18:01 GMT+05:00 Maxim Dounin <mdounin at mdounin.ru>:
>
> The maximum size of hash table as specified by the hinit->max_size
> field is indeed maximum size, and not the size of the hash table.
> Following code in the ngx_hash_init() will try hard to find to
> find out an optimal hash size for a given set of values within the
> maximum size specified, and will test all the prime numbers as
> well.
>
> I see no reasons to additionally limit the maximum size to a prime
> number. If you think there are some, please be more specific.
>
> You are right. I've modified patch to checkout primes first, then proceed
to "hard work" . Also I've kolhozed some perf prove of improvement.
This test creates a hash table of 5000 semirandom strings (not very random,
just bytes permutated).
On my Ubuntu VM without patch hash creation is 92-96ms, with patch it's
strictly 0. "hard work" search tries about 2k sizes before success, primes
search hits at second.
Docs say some words about startup speed and I wanted to apply primes
somewhere, so here we go.
Best regards, Andrey Borodin.
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://mailman.nginx.org/pipermail/nginx-devel/attachments/20170601/d6607847/attachment.html>
-------------- next part --------------
diff --git a/src/core/ngx_hash.c b/src/core/ngx_hash.c
index 1944c7a21d..4e733c4625 100644
--- a/src/core/ngx_hash.c
+++ b/src/core/ngx_hash.c
@@ -244,6 +244,152 @@ ngx_hash_find_combined(ngx_hash_combined_t *hash, ngx_uint_t key, u_char *name,
return NULL;
}
+static ngx_uint_t ngx_hash_min_prime(ngx_uint_t value) {
+ static ngx_uint_t primes[] =
+ {
+ 3,
+ 7,
+ 11,
+ 17,
+ 23,
+ 29,
+ 37,
+ 47,
+ 59,
+ 71,
+ 89,
+ 107,
+ 131,
+ 163,
+ 197,
+ 239,
+ 293,
+ 353,
+ 431,
+ 521,
+ 631,
+ 761,
+ 919,
+ 1103,
+ 1327,
+ 1597,
+ 1931,
+ 2333,
+ 2801,
+ 3371,
+ 4049,
+ 4861,
+ 5839,
+ 7013,
+ 8419,
+ 10103,
+ 12143,
+ 14591,
+ 17519,
+ 21023,
+ 25229,
+ 30293,
+ 36353,
+ 43627,
+ 52361,
+ 62851,
+ 75431,
+ 90523,
+ 108631,
+ 130363,
+ 156437,
+ 187751,
+ 225307,
+ 270371,
+ 324449,
+ 389357,
+ 467237,
+ 560689,
+ 672827,
+ 807403,
+ 968897,
+ 1162687,
+ 1395263,
+ 1674319,
+ 2009191,
+ 2411033,
+ 2893249,
+ 3471899,
+ 4166287,
+ 4999559,
+ 5999471,
+ 7199369,
+ 7919311,
+ 8711267,
+ 9582409,
+ 10540661,
+ 11594729,
+ 12754219,
+ 14029643,
+ 15432619,
+ 16975891,
+ 18673483,
+ 20540831,
+ 22594919,
+ 24854419,
+ 27339863,
+ 30073853,
+ 33081239,
+ 36389369,
+ 40028333,
+ 44031179,
+ 48434303,
+ 53277769,
+ 58605563,
+ 64466147,
+ 70912783,
+ 78004061,
+ 85804471,
+ 94384919,
+ 103823417,
+ 114205771,
+ 125626351,
+ 138189017,
+ 152007971,
+ 167208817,
+ 183929719,
+ 202322693,
+ 222554977,
+ 244810487,
+ 269291537,
+ 296220709,
+ 325842779,
+ 358427071,
+ 394269781,
+ 433696759,
+ 477066449,
+ 524773133,
+ 577250461,
+ 634975519,
+ 698473099,
+ 768320467,
+ 845152513,
+ 929667799,
+ 1022634581,
+ 1124898043,
+ 1237387859,
+ 1361126671,
+ 1497239377,
+ 1646963321,
+ 1811659669,
+ 1992825643,
+ };
+
+ for (size_t i = 0; i < sizeof(primes)/sizeof(*primes); ++i)
+ {
+ ngx_uint_t prime = primes[i];
+ if (prime > value)
+ {
+ return prime;
+ }
+ }
+ return value;
+}
#define NGX_HASH_ELT_SIZE(name) \
(sizeof(void *) + ngx_align((name)->key.len + 2, sizeof(void *)))
@@ -283,6 +429,36 @@ ngx_hash_init(ngx_hash_init_t *hinit, ngx_hash_key_t *names, ngx_uint_t nelts)
bucket_size = hinit->bucket_size - sizeof(void *);
+ /*
+ * For hash size check primes first, then try others.
+ * There cannot be more than log(max_size) primes to test
+ */
+
+ for (size = ngx_hash_min_prime(nelts); size <= hinit->max_size; size = ngx_hash_min_prime(size)) {
+
+ ngx_memzero(test, size * sizeof(u_short));
+
+ for (n = 0; n < nelts; n++) {
+ if (names[n].key.data == NULL) {
+ continue;
+ }
+
+ key = names[n].key_hash % size;
+ test[key] = (u_short) (test[key] + NGX_HASH_ELT_SIZE(&names[n]));
+
+ if (test[key] > (u_short) bucket_size) {
+ goto next_prime;
+ }
+ }
+
+ goto found;
+
+ next_prime:
+
+ continue;
+ }
+
+
start = nelts / (bucket_size / (2 * sizeof(void *)));
start = start ? start : 1;
-------------- next part --------------
diff --git a/src/core/nginx.c b/src/core/nginx.c
index abaa50d618..904a11fb76 100644
--- a/src/core/nginx.c
+++ b/src/core/nginx.c
@@ -8,6 +8,8 @@
#include <ngx_config.h>
#include <ngx_core.h>
#include <nginx.h>
+#include <sys/time.h>
+#include <sys/resource.h>
static void ngx_show_version_info(void);
@@ -282,6 +284,49 @@ main(int argc, char *const *argv)
}
cycle = ngx_init_cycle(&init_cycle);
+
+
+ /* Here will be hash tests */
+ {
+ size_t elementscount = 5000;
+ ngx_hash_t hashx;
+ ngx_hash_key_t *elements;
+ ngx_hash_init_t hash;
+ struct rusage start,end;
+ ngx_log_stderr(0,"doing tests");
+
+
+ elements = ngx_alloc(sizeof(ngx_hash_key_t)*elementscount,log);
+
+ hash.hash = &hashx;
+ hash.key = ngx_hash_key;
+ hash.max_size = 10000;
+ hash.bucket_size = 64;
+ hash.name = "test_hash";
+ hash.pool = ngx_create_pool(NGX_CYCLE_POOL_SIZE, log);
+ hash.temp_pool = NULL;
+
+ srand(42);
+ for(size_t i=0;i<elementscount;i++){
+ elements[i].key.len = 4;
+ elements[i].key.data = ngx_alloc(4,log);
+
+ ((u_char*)elements[i].key.data)[3] = 'a' + (i%16);
+ ((u_char*)elements[i].key.data)[1] = 'a' + ((i>>4)%16);
+ ((u_char*)elements[i].key.data)[2] = 'a' + ((i>>8)%16);
+ ((u_char*)elements[i].key.data)[0] = 'a' + ((i>>16)%16);
+ elements[i].key_hash = ngx_hash_key(elements[i].key.data,4);
+ }
+
+ ngx_log_stderr(0, "before init");
+ getrusage(RUSAGE_SELF,&start);
+ ngx_hash_init(&hash, elements , elementscount);
+ getrusage(RUSAGE_SELF,&end);
+ ngx_log_stderr(0, "after init");
+ ngx_log_stderr(0, "time %d",end.ru_utime.tv_usec - start.ru_utime.tv_usec);
+ return 0;
+ }
+
if (cycle == NULL) {
if (ngx_test_config) {
ngx_log_stderr(0, "configuration file %s test failed",
@@ -317,6 +362,7 @@ main(int argc, char *const *argv)
return 0;
}
+
if (ngx_signal) {
return ngx_signal_process(cycle, ngx_signal);
}
More information about the nginx-devel
mailing list