Welcome! Log In Create A New Profile

Advanced

[PATCH] optimization of Intel processor cacheline calculation

Simon Liu
May 05, 2012 08:12AM
Hello!

cacheline calculation is hardcode in ngx_cpuinfo, this will make mistake in
some intel processor. example cache line is 64 byte in sandy bridge,
its family code is 0110 and model no is 1010 or 1101(in this document
http://www.intel.com/content/www/us/en/processors/processor-identification-cpuid-instruction-note.html).
but code is this in ngx_cpuinfo:

/* Pentium Pro, II, III */
case 6:
ngx_cacheline_size = 32;

model = ((cpu[0] & 0xf0000) >> 8) | (cpu[0] & 0xf0);

if (model >= 0xd0) {
/* Intel Core, Core 2, Atom */
ngx_cacheline_size = 64;
}

break;

if model no is 1010 , ngx_cacheline_size will be 32, and so this is wrong.

Below is a patch(for nginx trunk) fix this problem, and use cpuid(2) solve
hardcode。

Index: src/core/ngx_cpuinfo.c
===================================================================
--- src/core/ngx_cpuinfo.c (revision 4615)
+++ src/core/ngx_cpuinfo.c (working copy)
@@ -12,9 +12,93 @@
#if (( __i386__ || __amd64__ ) && ( __GNUC__ || __INTEL_COMPILER ))


+#define NGX_CACHE_LVL_1_DATA 1
+#define NGX_CACHE_LVL_2 2
+#define NGX_CACHE_LVL_3 3
+#define NGX_CACHE_PREFETCHING 4
+
+
+typedef struct ngx_cache_table {
+ u_char descriptor;
+ u_char type;
+ ngx_uint_t size;
+} ngx_cache_table_t;
+
+
static ngx_inline void ngx_cpuid(uint32_t i, uint32_t *buf);


+static ngx_cache_table_t cache_table[] = {
+ { 0x0a, NGX_CACHE_LVL_1_DATA, 32 }, /* 32 byte line size */
+ { 0x0c, NGX_CACHE_LVL_1_DATA, 32 }, /* 32 byte line size */
+ { 0x0d, NGX_CACHE_LVL_1_DATA, 64 }, /* 64 byte line size */
+ { 0x0e, NGX_CACHE_LVL_1_DATA, 64 }, /* 64 byte line size */
+ { 0x21, NGX_CACHE_LVL_2, 64 }, /* 64 byte line size */
+ { 0x22, NGX_CACHE_LVL_3, 64 }, /* 64 byte line size */
+ { 0x23, NGX_CACHE_LVL_3, 64 }, /* 64 byte line size */
+ { 0x25, NGX_CACHE_LVL_3, 64 }, /* 64 byte line size */
+ { 0x29, NGX_CACHE_LVL_3, 64 }, /* 64 byte line size */
+ { 0x2c, NGX_CACHE_LVL_1_DATA, 64 }, /* 64 byte line size */
+ { 0x39, NGX_CACHE_LVL_2, 64 }, /* 64 byte line size */
+ { 0x3a, NGX_CACHE_LVL_2, 64 }, /* 64 byte line size */
+ { 0x3b, NGX_CACHE_LVL_2, 64 }, /* 64 byte line size */
+ { 0x3c, NGX_CACHE_LVL_2, 64 }, /* 64 byte line size */
+ { 0x3d, NGX_CACHE_LVL_2, 64 }, /* 64 byte line size */
+ { 0x3e, NGX_CACHE_LVL_2, 64 }, /* 64 byte line size */
+ { 0x3f, NGX_CACHE_LVL_2, 64 }, /* 64 byte line size */
+ { 0x41, NGX_CACHE_LVL_2, 32 }, /* 32 byte line size */
+ { 0x42, NGX_CACHE_LVL_2, 32 }, /* 32 byte line size */
+ { 0x43, NGX_CACHE_LVL_2, 32 }, /* 32 byte line size */
+ { 0x44, NGX_CACHE_LVL_2, 32 }, /* 32 byte line size */
+ { 0x45, NGX_CACHE_LVL_2, 32 }, /* 32 byte line size */
+ { 0x46, NGX_CACHE_LVL_3, 64 }, /* 64 byte line size */
+ { 0x47, NGX_CACHE_LVL_3, 64 }, /* 64 byte line size */
+ { 0x48, NGX_CACHE_LVL_2, 64 }, /* 64 byte line size */
+ { 0x49, NGX_CACHE_LVL_3, 64 }, /* 64 byte line size */
+ { 0x4a, NGX_CACHE_LVL_3, 64 }, /* 64 byte line size */
+ { 0x4b, NGX_CACHE_LVL_3, 64 }, /* 64 byte line size */
+ { 0x4c, NGX_CACHE_LVL_3, 64 }, /* 64 byte line size */
+ { 0x4d, NGX_CACHE_LVL_3, 64 }, /* 64 byte line size */
+ { 0x4e, NGX_CACHE_LVL_2, 64 }, /* 64 byte line size */
+ { 0x60, NGX_CACHE_LVL_1_DATA, 64 }, /* 64 byte line size */
+ { 0x66, NGX_CACHE_LVL_1_DATA, 64 }, /* 64 byte line size */
+ { 0x67, NGX_CACHE_LVL_1_DATA, 64 }, /* 64 byte line size */
+ { 0x68, NGX_CACHE_LVL_1_DATA, 64 }, /* 64 byte line size */
+ { 0x78, NGX_CACHE_LVL_2, 64 }, /* 64 byte line size */
+ { 0x79, NGX_CACHE_LVL_2, 64 }, /* 64 byte line size */
+ { 0x7a, NGX_CACHE_LVL_2, 64 }, /* 64 byte line size */
+ { 0x7b, NGX_CACHE_LVL_2, 64 }, /* 64 byte line size */
+ { 0x7c, NGX_CACHE_LVL_2, 64 }, /* 64 byte line size */
+ { 0x7d, NGX_CACHE_LVL_2, 64 }, /* 64 byte line size */
+ { 0x7f, NGX_CACHE_LVL_2, 64 }, /* 64 byte line size */
+ { 0x80, NGX_CACHE_LVL_2, 64 }, /* 64 byte line size */
+ { 0x82, NGX_CACHE_LVL_2, 32 }, /* 32 byte line size */
+ { 0x83, NGX_CACHE_LVL_2, 32 }, /* 32 byte line size */
+ { 0x84, NGX_CACHE_LVL_2, 32 }, /* 32 byte line size */
+ { 0x85, NGX_CACHE_LVL_2, 32 }, /* 32 byte line size */
+ { 0x86, NGX_CACHE_LVL_2, 64 }, /* 64 byte line size */
+ { 0x87, NGX_CACHE_LVL_2, 64 }, /* 64 byte line size */
+ { 0xd0, NGX_CACHE_LVL_3, 64 }, /* 64 byte line size */
+ { 0xd1, NGX_CACHE_LVL_3, 64 }, /* 64 byte line size */
+ { 0xd2, NGX_CACHE_LVL_3, 64 }, /* 64 byte line size */
+ { 0xd6, NGX_CACHE_LVL_3, 64 }, /* 64 byte line size */
+ { 0xd7, NGX_CACHE_LVL_3, 64 }, /* 64 byte line size */
+ { 0xd8, NGX_CACHE_LVL_3, 64 }, /* 64 byte line size */
+ { 0xdc, NGX_CACHE_LVL_3, 64 }, /* 64 byte line size */
+ { 0xdd, NGX_CACHE_LVL_3, 64 }, /* 64 byte line size */
+ { 0xde, NGX_CACHE_LVL_3, 64 }, /* 64 byte line size */
+ { 0xe2, NGX_CACHE_LVL_3, 64 }, /* 64 byte line size */
+ { 0xe3, NGX_CACHE_LVL_3, 64 }, /* 64 byte line size */
+ { 0xe4, NGX_CACHE_LVL_3, 64 }, /* 64 byte line size */
+ { 0xea, NGX_CACHE_LVL_3, 64 }, /* 64 byte line size */
+ { 0xeb, NGX_CACHE_LVL_3, 64 }, /* 64 byte line size */
+ { 0xec, NGX_CACHE_LVL_3, 64 }, /* 64 byte line size */
+ { 0xf0, NGX_CACHE_PREFETCHING, 64 }, /* 64-byte prefetching */
+ { 0xf1, NGX_CACHE_PREFETCHING, 128 }, /* 128-byte prefetching */
+ { 0x00, 0, 0}
+};
+
+
#if ( __i386__ )

static ngx_inline void
@@ -67,13 +151,25 @@
#endif


+static ngx_inline
+uint32_t ngx_cpuid_eax(uint32_t op)
+{
+ uint32_t cpu[4];
+
+ ngx_cpuid(op, cpu);
+
+ return cpu[0];
+}
+
+
/* auto detect the L2 cache line size of modern and widespread CPUs */

void
ngx_cpuinfo(void)
{
- u_char *vendor;
- uint32_t vbuf[5], cpu[4], model;
+ u_char *vendor, *dp, des;
+ uint32_t vbuf[5], cache[4], n;
+ ngx_uint_t i, j, k, l1, l2, l3, prefetch;

vbuf[0] = 0;
vbuf[1] = 0;
@@ -81,6 +177,13 @@
vbuf[3] = 0;
vbuf[4] = 0;

+ l1 = 0;
+ l2 = 0;
+ l3 = 0;
+ prefetch = 0;
+
+ dp = (u_char *) cache;
+
ngx_cpuid(0, vbuf);

vendor = (u_char *) &vbuf[1];
@@ -89,39 +192,57 @@
return;
}

- ngx_cpuid(1, cpu);
-
if (ngx_strcmp(vendor, "GenuineIntel") == 0) {

- switch ((cpu[0] & 0xf00) >> 8) {
+ n = ngx_cpuid_eax(2) & 0xFF;

- /* Pentium */
- case 5:
- ngx_cacheline_size = 32;
- break;
+ for (i = 0 ; i < n ; i++) {
+ ngx_cpuid(2, cache);

- /* Pentium Pro, II, III */
- case 6:
- ngx_cacheline_size = 32;
+ for (j = 0; j < 3; j++) {
+ if (cache[j] & (1 << 31)) {
+ cache[j] = 0;
+ }
+ }

- model = ((cpu[0] & 0xf0000) >> 8) | (cpu[0] & 0xf0);
+ for (j = 1; j < 16; j++) {
+ des = dp[j];
+ k = 0;

- if (model >= 0xd0) {
- /* Intel Core, Core 2, Atom */
- ngx_cacheline_size = 64;
- }
+ while (cache_table[k].descriptor != 0) {
+ if (cache_table[k].descriptor == des) {

- break;
+ switch (cache_table[k].type) {

- /*
- * Pentium 4, although its cache line size is 64 bytes,
- * it prefetches up to two cache lines during memory read
- */
- case 15:
- ngx_cacheline_size = 128;
- break;
+ case NGX_CACHE_LVL_1_DATA:
+ l1 = cache_table[k].size;
+ break;
+
+ case NGX_CACHE_LVL_2:
+ l2 = cache_table[k].size;
+ break;
+
+ case NGX_CACHE_LVL_3:
+ l3 = cache_table[k].size;
+ break;
+
+ case NGX_CACHE_PREFETCHING:
+ prefetch = cache_table[k].size;
+ break;
+ }
+
+ break;
+ }
+
+ k++;
+ }
+ }
}

+ ngx_cacheline_size = ngx_max(l1, l2);
+ ngx_cacheline_size = ngx_max(l3, ngx_cacheline_size);
+ ngx_cacheline_size = ngx_max(prefetch, ngx_cacheline_size);
+
} else if (ngx_strcmp(vendor, "AuthenticAMD") == 0) {
ngx_cacheline_size = 64;
}




--
do not fear to be eccentric in opinion, for every opinion now accepted was
once eccentric.
_______________________________________________
nginx-devel mailing list
nginx-devel@nginx.org
http://mailman.nginx.org/mailman/listinfo/nginx-devel
Subject Author Views Posted

[PATCH] optimization of Intel processor cacheline calculation Attachments

Simon Liu 1267 May 05, 2012 08:12AM

Re: [PATCH] optimization of Intel processor cacheline calculation

Maxim Dounin 431 May 05, 2012 05:42PM



Sorry, you do not have permission to post/reply in this forum.

Online Users

Guests: 304
Record Number of Users: 8 on April 13, 2023
Record Number of Guests: 421 on December 02, 2018
Powered by nginx      Powered by FreeBSD      PHP Powered      Powered by MariaDB      ipv6 ready