Use primes for hashtable size

Andrew Borodin borodin at octonica.com
Thu Jun 1 11:54:50 UTC 2017


Hi, Maxim!

2017-05-30 18:01 GMT+05:00 Maxim Dounin <mdounin at mdounin.ru>:
>
> The maximum size of hash table as specified by the hinit->max_size
> field is indeed maximum size, and not the size of the hash table.
> Following code in the ngx_hash_init() will try hard to find to
> find out an optimal hash size for a given set of values within the
> maximum size specified, and will test all the prime numbers as
> well.
>
> I see no reasons to additionally limit the maximum size to a prime
> number.  If you think there are some, please be more specific.
>
> You are right. I've modified patch to checkout primes first, then proceed
to "hard work" . Also I've kolhozed some perf prove of improvement.
This test creates a hash table of 5000 semirandom strings (not very random,
just bytes permutated).
On my Ubuntu VM without patch hash creation is 92-96ms, with patch it's
strictly 0. "hard work" search tries about 2k sizes before success, primes
search hits at second.

Docs say some words about startup speed and I wanted to apply primes
somewhere, so here we go.

Best regards, Andrey Borodin.
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://mailman.nginx.org/pipermail/nginx-devel/attachments/20170601/d6607847/attachment.html>
-------------- next part --------------
diff --git a/src/core/ngx_hash.c b/src/core/ngx_hash.c
index 1944c7a21d..4e733c4625 100644
--- a/src/core/ngx_hash.c
+++ b/src/core/ngx_hash.c
@@ -244,6 +244,152 @@ ngx_hash_find_combined(ngx_hash_combined_t *hash, ngx_uint_t key, u_char *name,
     return NULL;
 }
 
+static ngx_uint_t ngx_hash_min_prime(ngx_uint_t value) {
+    static ngx_uint_t primes[] =
+            {
+                    3,
+                    7,
+                    11,
+                    17,
+                    23,
+                    29,
+                    37,
+                    47,
+                    59,
+                    71,
+                    89,
+                    107,
+                    131,
+                    163,
+                    197,
+                    239,
+                    293,
+                    353,
+                    431,
+                    521,
+                    631,
+                    761,
+                    919,
+                    1103,
+                    1327,
+                    1597,
+                    1931,
+                    2333,
+                    2801,
+                    3371,
+                    4049,
+                    4861,
+                    5839,
+                    7013,
+                    8419,
+                    10103,
+                    12143,
+                    14591,
+                    17519,
+                    21023,
+                    25229,
+                    30293,
+                    36353,
+                    43627,
+                    52361,
+                    62851,
+                    75431,
+                    90523,
+                    108631,
+                    130363,
+                    156437,
+                    187751,
+                    225307,
+                    270371,
+                    324449,
+                    389357,
+                    467237,
+                    560689,
+                    672827,
+                    807403,
+                    968897,
+                    1162687,
+                    1395263,
+                    1674319,
+                    2009191,
+                    2411033,
+                    2893249,
+                    3471899,
+                    4166287,
+                    4999559,
+                    5999471,
+                    7199369,
+                    7919311,
+                    8711267,
+                    9582409,
+                    10540661,
+                    11594729,
+                    12754219,
+                    14029643,
+                    15432619,
+                    16975891,
+                    18673483,
+                    20540831,
+                    22594919,
+                    24854419,
+                    27339863,
+                    30073853,
+                    33081239,
+                    36389369,
+                    40028333,
+                    44031179,
+                    48434303,
+                    53277769,
+                    58605563,
+                    64466147,
+                    70912783,
+                    78004061,
+                    85804471,
+                    94384919,
+                    103823417,
+                    114205771,
+                    125626351,
+                    138189017,
+                    152007971,
+                    167208817,
+                    183929719,
+                    202322693,
+                    222554977,
+                    244810487,
+                    269291537,
+                    296220709,
+                    325842779,
+                    358427071,
+                    394269781,
+                    433696759,
+                    477066449,
+                    524773133,
+                    577250461,
+                    634975519,
+                    698473099,
+                    768320467,
+                    845152513,
+                    929667799,
+                    1022634581,
+                    1124898043,
+                    1237387859,
+                    1361126671,
+                    1497239377,
+                    1646963321,
+                    1811659669,
+                    1992825643,
+            };
+
+    for (size_t i = 0; i < sizeof(primes)/sizeof(*primes); ++i)
+    {
+        ngx_uint_t prime = primes[i];
+        if (prime > value)
+        {
+            return prime;
+        }
+    }
+    return value;
+}
 
 #define NGX_HASH_ELT_SIZE(name)                                               \
     (sizeof(void *) + ngx_align((name)->key.len + 2, sizeof(void *)))
@@ -283,6 +429,36 @@ ngx_hash_init(ngx_hash_init_t *hinit, ngx_hash_key_t *names, ngx_uint_t nelts)
 
     bucket_size = hinit->bucket_size - sizeof(void *);
 
+    /*
+     * For hash size check primes first, then try others.
+     * There cannot be more than log(max_size) primes to test
+     */
+
+    for (size = ngx_hash_min_prime(nelts); size <= hinit->max_size; size = ngx_hash_min_prime(size)) {
+
+        ngx_memzero(test, size * sizeof(u_short));
+
+        for (n = 0; n < nelts; n++) {
+            if (names[n].key.data == NULL) {
+                continue;
+            }
+
+            key = names[n].key_hash % size;
+            test[key] = (u_short) (test[key] + NGX_HASH_ELT_SIZE(&names[n]));
+
+            if (test[key] > (u_short) bucket_size) {
+                goto next_prime;
+            }
+        }
+
+        goto found;
+
+    next_prime:
+
+        continue;
+    }
+
+
     start = nelts / (bucket_size / (2 * sizeof(void *)));
     start = start ? start : 1;
 
-------------- next part --------------
diff --git a/src/core/nginx.c b/src/core/nginx.c
index abaa50d618..904a11fb76 100644
--- a/src/core/nginx.c
+++ b/src/core/nginx.c
@@ -8,6 +8,8 @@
 #include <ngx_config.h>
 #include <ngx_core.h>
 #include <nginx.h>
+#include <sys/time.h>
+#include <sys/resource.h>
 
 
 static void ngx_show_version_info(void);
@@ -282,6 +284,49 @@ main(int argc, char *const *argv)
     }
 
     cycle = ngx_init_cycle(&init_cycle);
+
+
+        /* Here will be hash tests */
+    {
+    	size_t elementscount = 5000;
+    	ngx_hash_t			hashx;
+    	ngx_hash_key_t             *elements;
+    	    ngx_hash_init_t             hash;
+    	    struct rusage 	start,end;
+    	    ngx_log_stderr(0,"doing tests");
+
+
+    	    elements = ngx_alloc(sizeof(ngx_hash_key_t)*elementscount,log);
+
+    	    	hash.hash = &hashx;
+    	        hash.key = ngx_hash_key;
+    	        hash.max_size = 10000;
+    	        hash.bucket_size = 64;
+    	        hash.name = "test_hash";
+    	        hash.pool = ngx_create_pool(NGX_CYCLE_POOL_SIZE, log);
+    	        hash.temp_pool = NULL;
+
+    	        srand(42);
+    	        for(size_t i=0;i<elementscount;i++){
+    	        	elements[i].key.len = 4;
+    	        	elements[i].key.data = ngx_alloc(4,log);
+
+    	        	((u_char*)elements[i].key.data)[3] = 'a' + (i%16);
+    	        	((u_char*)elements[i].key.data)[1] = 'a' + ((i>>4)%16);
+    	        	((u_char*)elements[i].key.data)[2] = 'a' + ((i>>8)%16);
+    	        	((u_char*)elements[i].key.data)[0] = 'a' + ((i>>16)%16);
+    	        	elements[i].key_hash = ngx_hash_key(elements[i].key.data,4);
+    	        }
+
+    	        ngx_log_stderr(0, "before init");
+    	        getrusage(RUSAGE_SELF,&start);
+    	        ngx_hash_init(&hash, elements , elementscount);
+    	        getrusage(RUSAGE_SELF,&end);
+    	        ngx_log_stderr(0, "after init");
+    	        ngx_log_stderr(0, "time %d",end.ru_utime.tv_usec - start.ru_utime.tv_usec);
+    	        return 0;
+    }
+
     if (cycle == NULL) {
         if (ngx_test_config) {
             ngx_log_stderr(0, "configuration file %s test failed",
@@ -317,6 +362,7 @@ main(int argc, char *const *argv)
         return 0;
     }
 
+
     if (ngx_signal) {
         return ngx_signal_process(cycle, ngx_signal);
     }


More information about the nginx-devel mailing list