[PATCH] optimization of Intel processor cacheline calculation

Simon Liu simohayha.bobo at gmail.com
Sat May 5 12:10:30 UTC 2012


Hello!

cacheline calculation is hardcode in ngx_cpuinfo, this will make mistake in
some  intel processor. example  cache line is 64 byte in  sandy bridge,
 its family code is 0110 and model no is 1010 or 1101(in this document
http://www.intel.com/content/www/us/en/processors/processor-identification-cpuid-instruction-note.html).
 but code is this in ngx_cpuinfo:

        /* Pentium Pro, II, III */
        case 6:
            ngx_cacheline_size = 32;

            model = ((cpu[0] & 0xf0000) >> 8) | (cpu[0] & 0xf0);

            if (model >= 0xd0) {
                /* Intel Core, Core 2, Atom */
                ngx_cacheline_size = 64;
            }

            break;

if model no is 1010 ,  ngx_cacheline_size will be 32, and so this is wrong.

Below is a patch(for nginx trunk) fix this problem, and use cpuid(2) solve
hardcode。

Index: src/core/ngx_cpuinfo.c
===================================================================
--- src/core/ngx_cpuinfo.c (revision 4615)
+++ src/core/ngx_cpuinfo.c (working copy)
@@ -12,9 +12,93 @@
 #if (( __i386__ || __amd64__ ) && ( __GNUC__ || __INTEL_COMPILER ))


+#define NGX_CACHE_LVL_1_DATA            1
+#define NGX_CACHE_LVL_2                 2
+#define NGX_CACHE_LVL_3                 3
+#define NGX_CACHE_PREFETCHING           4
+
+
+typedef struct ngx_cache_table {
+    u_char            descriptor;
+    u_char            type;
+    ngx_uint_t        size;
+} ngx_cache_table_t;
+
+
 static ngx_inline void ngx_cpuid(uint32_t i, uint32_t *buf);


+static ngx_cache_table_t  cache_table[] = {
+    { 0x0a, NGX_CACHE_LVL_1_DATA, 32 },  /* 32 byte line size */
+    { 0x0c, NGX_CACHE_LVL_1_DATA, 32 },  /* 32 byte line size */
+    { 0x0d, NGX_CACHE_LVL_1_DATA, 64 },  /* 64 byte line size */
+    { 0x0e, NGX_CACHE_LVL_1_DATA, 64 },  /* 64 byte line size */
+    { 0x21, NGX_CACHE_LVL_2, 64 },       /* 64 byte line size */
+    { 0x22, NGX_CACHE_LVL_3, 64 },       /* 64 byte line size */
+    { 0x23, NGX_CACHE_LVL_3, 64 },       /* 64 byte line size */
+    { 0x25, NGX_CACHE_LVL_3, 64 },       /* 64 byte line size */
+    { 0x29, NGX_CACHE_LVL_3, 64 },       /* 64 byte line size */
+    { 0x2c, NGX_CACHE_LVL_1_DATA, 64 },  /* 64 byte line size */
+    { 0x39, NGX_CACHE_LVL_2, 64 },       /* 64 byte line size */
+    { 0x3a, NGX_CACHE_LVL_2, 64 },       /* 64 byte line size */
+    { 0x3b, NGX_CACHE_LVL_2, 64 },       /* 64 byte line size */
+    { 0x3c, NGX_CACHE_LVL_2, 64 },       /* 64 byte line size */
+    { 0x3d, NGX_CACHE_LVL_2, 64 },       /* 64 byte line size */
+    { 0x3e, NGX_CACHE_LVL_2, 64 },       /* 64 byte line size */
+    { 0x3f, NGX_CACHE_LVL_2, 64 },       /* 64 byte line size */
+    { 0x41, NGX_CACHE_LVL_2, 32 },       /* 32 byte line size */
+    { 0x42, NGX_CACHE_LVL_2, 32 },       /* 32 byte line size */
+    { 0x43, NGX_CACHE_LVL_2, 32 },       /* 32 byte line size */
+    { 0x44, NGX_CACHE_LVL_2, 32 },       /* 32 byte line size */
+    { 0x45, NGX_CACHE_LVL_2, 32 },       /* 32 byte line size */
+    { 0x46, NGX_CACHE_LVL_3, 64 },       /* 64 byte line size */
+    { 0x47, NGX_CACHE_LVL_3, 64 },       /* 64 byte line size */
+    { 0x48, NGX_CACHE_LVL_2, 64 },       /* 64 byte line size */
+    { 0x49, NGX_CACHE_LVL_3, 64 },       /* 64 byte line size */
+    { 0x4a, NGX_CACHE_LVL_3, 64 },       /* 64 byte line size */
+    { 0x4b, NGX_CACHE_LVL_3, 64 },       /* 64 byte line size */
+    { 0x4c, NGX_CACHE_LVL_3, 64 },       /* 64 byte line size */
+    { 0x4d, NGX_CACHE_LVL_3, 64 },       /* 64 byte line size */
+    { 0x4e, NGX_CACHE_LVL_2, 64 },       /* 64 byte line size */
+    { 0x60, NGX_CACHE_LVL_1_DATA, 64 },  /* 64 byte line size */
+    { 0x66, NGX_CACHE_LVL_1_DATA, 64 },  /* 64 byte line size */
+    { 0x67, NGX_CACHE_LVL_1_DATA, 64 },  /* 64 byte line size */
+    { 0x68, NGX_CACHE_LVL_1_DATA, 64 },  /* 64 byte line size */
+    { 0x78, NGX_CACHE_LVL_2, 64 },       /* 64 byte line size */
+    { 0x79, NGX_CACHE_LVL_2, 64 },       /* 64 byte line size */
+    { 0x7a, NGX_CACHE_LVL_2, 64 },       /* 64 byte line size */
+    { 0x7b, NGX_CACHE_LVL_2, 64 },       /* 64 byte line size */
+    { 0x7c, NGX_CACHE_LVL_2, 64 },       /* 64 byte line size */
+    { 0x7d, NGX_CACHE_LVL_2, 64 },       /* 64 byte line size */
+    { 0x7f, NGX_CACHE_LVL_2, 64 },       /* 64 byte line size */
+    { 0x80, NGX_CACHE_LVL_2, 64 },       /* 64 byte line size */
+    { 0x82, NGX_CACHE_LVL_2, 32 },       /* 32 byte line size */
+    { 0x83, NGX_CACHE_LVL_2, 32 },       /* 32 byte line size */
+    { 0x84, NGX_CACHE_LVL_2, 32 },       /* 32 byte line size */
+    { 0x85, NGX_CACHE_LVL_2, 32 },       /* 32 byte line size */
+    { 0x86, NGX_CACHE_LVL_2, 64 },       /* 64 byte line size */
+    { 0x87, NGX_CACHE_LVL_2, 64 },       /* 64 byte line size */
+    { 0xd0, NGX_CACHE_LVL_3, 64 },       /* 64 byte line size */
+    { 0xd1, NGX_CACHE_LVL_3, 64 },       /* 64 byte line size */
+    { 0xd2, NGX_CACHE_LVL_3, 64 },       /* 64 byte line size */
+    { 0xd6, NGX_CACHE_LVL_3, 64 },       /* 64 byte line size */
+    { 0xd7, NGX_CACHE_LVL_3, 64 },       /* 64 byte line size */
+    { 0xd8, NGX_CACHE_LVL_3, 64 },       /* 64 byte line size */
+    { 0xdc, NGX_CACHE_LVL_3, 64 },       /* 64 byte line size */
+    { 0xdd, NGX_CACHE_LVL_3, 64 },       /* 64 byte line size */
+    { 0xde, NGX_CACHE_LVL_3, 64 },       /* 64 byte line size */
+    { 0xe2, NGX_CACHE_LVL_3, 64 },       /* 64 byte line size */
+    { 0xe3, NGX_CACHE_LVL_3, 64 },       /* 64 byte line size */
+    { 0xe4, NGX_CACHE_LVL_3, 64 },       /* 64 byte line size */
+    { 0xea, NGX_CACHE_LVL_3, 64 },       /* 64 byte line size */
+    { 0xeb, NGX_CACHE_LVL_3, 64 },       /* 64 byte line size */
+    { 0xec, NGX_CACHE_LVL_3, 64 },       /* 64 byte line size */
+    { 0xf0, NGX_CACHE_PREFETCHING, 64 },  /* 64-byte prefetching */
+    { 0xf1, NGX_CACHE_PREFETCHING, 128 }, /* 128-byte prefetching */
+    { 0x00, 0, 0}
+};
+
+
 #if ( __i386__ )

 static ngx_inline void
@@ -67,13 +151,25 @@
 #endif


+static ngx_inline
+uint32_t ngx_cpuid_eax(uint32_t op)
+{
+    uint32_t   cpu[4];
+
+    ngx_cpuid(op, cpu);
+
+    return cpu[0];
+}
+
+
 /* auto detect the L2 cache line size of modern and widespread CPUs */

 void
 ngx_cpuinfo(void)
 {
-    u_char    *vendor;
-    uint32_t   vbuf[5], cpu[4], model;
+    u_char    *vendor, *dp, des;
+    uint32_t   vbuf[5], cache[4], n;
+    ngx_uint_t i, j, k, l1, l2, l3, prefetch;

     vbuf[0] = 0;
     vbuf[1] = 0;
@@ -81,6 +177,13 @@
     vbuf[3] = 0;
     vbuf[4] = 0;

+    l1 = 0;
+    l2 = 0;
+    l3 = 0;
+    prefetch = 0;
+
+    dp = (u_char *) cache;
+
     ngx_cpuid(0, vbuf);

     vendor = (u_char *) &vbuf[1];
@@ -89,39 +192,57 @@
         return;
     }

-    ngx_cpuid(1, cpu);
-
     if (ngx_strcmp(vendor, "GenuineIntel") == 0) {

-        switch ((cpu[0] & 0xf00) >> 8) {
+        n = ngx_cpuid_eax(2) & 0xFF;

-        /* Pentium */
-        case 5:
-            ngx_cacheline_size = 32;
-            break;
+        for (i = 0 ; i < n ; i++) {
+            ngx_cpuid(2, cache);

-        /* Pentium Pro, II, III */
-        case 6:
-            ngx_cacheline_size = 32;
+            for (j = 0; j < 3; j++) {
+                if (cache[j] & (1 << 31)) {
+                    cache[j] = 0;
+                }
+            }

-            model = ((cpu[0] & 0xf0000) >> 8) | (cpu[0] & 0xf0);
+            for (j = 1; j < 16; j++) {
+                des = dp[j];
+                k = 0;

-            if (model >= 0xd0) {
-                /* Intel Core, Core 2, Atom */
-                ngx_cacheline_size = 64;
-            }
+                while (cache_table[k].descriptor != 0) {
+                    if (cache_table[k].descriptor == des) {

-            break;
+                        switch (cache_table[k].type) {

-        /*
-         * Pentium 4, although its cache line size is 64 bytes,
-         * it prefetches up to two cache lines during memory read
-         */
-        case 15:
-            ngx_cacheline_size = 128;
-            break;
+                        case NGX_CACHE_LVL_1_DATA:
+                            l1 = cache_table[k].size;
+                            break;
+
+                        case NGX_CACHE_LVL_2:
+                            l2 = cache_table[k].size;
+                            break;
+
+                        case NGX_CACHE_LVL_3:
+                            l3 = cache_table[k].size;
+                            break;
+
+                        case NGX_CACHE_PREFETCHING:
+                            prefetch = cache_table[k].size;
+                            break;
+                        }
+
+                        break;
+                    }
+
+                    k++;
+                }
+            }
         }

+        ngx_cacheline_size = ngx_max(l1, l2);
+        ngx_cacheline_size = ngx_max(l3, ngx_cacheline_size);
+        ngx_cacheline_size = ngx_max(prefetch, ngx_cacheline_size);
+
     } else if (ngx_strcmp(vendor, "AuthenticAMD") == 0) {
         ngx_cacheline_size = 64;
     }




-- 
do not fear to be eccentric in opinion, for every opinion now accepted was
once eccentric.
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://mailman.nginx.org/pipermail/nginx-devel/attachments/20120505/e3fd2365/attachment-0001.html>
-------------- next part --------------
A non-text attachment was scrubbed...
Name: cpuinfo.patch
Type: application/octet-stream
Size: 8228 bytes
Desc: not available
URL: <http://mailman.nginx.org/pipermail/nginx-devel/attachments/20120505/e3fd2365/attachment-0001.obj>


More information about the nginx-devel mailing list