Welcome! Log In Create A New Profile

Advanced

RE: [Patch] SO_REUSEPORT support from master process

luy
September 18, 2014 04:08PM
Dear All,

Here is the updated patch for SO_REUSEPORT support enablement on Linux OS (attached below). The changes in this version are:

1. Solve the issue with "binary upgrade on the fly" feature. Thanks to Sepherosa Ziehau for his feedback! With this version of patch, there is no issue or connection loss during the binary upgrade. I tested on RHEL 6.5 (with kernel 3.13.9) and CentOS 7. Both are working fine. The new master process inherited all the previous listen sockets during the upgrade and new children processes inherited from the new master. Also, the workload data show there is no connection loss or performance impact during the binary upgrade.

2. Make the duplication of listen sockets happen in the function ngx_http_init_listening instead of ngx_init_cycle.

Please review it and let me know your questions and comments. Thanks very much for your time reviewing the patch.

Thanks,
Yingqi Lu

# HG changeset patch
# User Yingqi Lu <Yingqi.Lu@intel.com>
# Date 1411067116 25200
# Thu Sep 18 12:05:16 2014 -0700
# Node ID 222e0a18a7fd4d61d219ed6e7a2443716978cc71
# Parent 45aef9a5b176d8e522277b9abf6c09fb874ab044
These are the patch files to enable SO_REUSEPORT support (patch is based on nginx-106a8bfa4f42.tar.gz)

diff -r 45aef9a5b176 -r 222e0a18a7fd src/core/ngx_connection.c
--- a/src/core/ngx_connection.c Thu Sep 18 12:03:41 2014 -0700
+++ b/src/core/ngx_connection.c Thu Sep 18 12:05:16 2014 -0700
@@ -304,7 +304,7 @@
ngx_int_t
ngx_open_listening_sockets(ngx_cycle_t *cycle)
{
- int reuseaddr;
+ int reuseaddr, reuseport;
ngx_uint_t i, tries, failed;
ngx_err_t err;
ngx_log_t *log;
@@ -312,6 +312,7 @@
ngx_listening_t *ls;

reuseaddr = 1;
+ reuseport = 1;
#if (NGX_SUPPRESS_WARN)
failed = 0;
#endif
@@ -369,6 +370,23 @@

return NGX_ERROR;
}
+ if (ngx_so_reuseport_enabled)
+ {
+ if (setsockopt(s, SOL_SOCKET, SO_REUSEPORT,
+ (const void *) &reuseport, sizeof(int))
+ == -1) {
+ ngx_log_error(NGX_LOG_EMERG, log, ngx_socket_errno,
+ "setsockopt(SO_REUSEPORT) %V failed",
+ &ls[i].addr_text);
+ if (ngx_close_socket(s) == -1) {
+ ngx_log_error(NGX_LOG_EMERG, log, ngx_socket_errno,
+ ngx_close_socket_n " %V failed",
+ &ls[i].addr_text);
+ }
+
+ return NGX_ERROR;
+ }
+ }

#if (NGX_HAVE_INET6 && defined IPV6_V6ONLY)

diff -r 45aef9a5b176 -r 222e0a18a7fd src/core/ngx_cycle.c
--- a/src/core/ngx_cycle.c Thu Sep 18 12:03:41 2014 -0700
+++ b/src/core/ngx_cycle.c Thu Sep 18 12:05:16 2014 -0700
@@ -26,6 +26,9 @@
ngx_uint_t ngx_test_config;
ngx_uint_t ngx_quiet_mode;

+ngx_uint_t ngx_so_reuseport_enabled;
+ngx_uint_t ngx_num_dup_sockets;
+
#if (NGX_THREADS)
ngx_tls_key_t ngx_core_tls_key;
#endif
@@ -54,7 +57,36 @@
ngx_core_conf_t *ccf, *old_ccf;
ngx_core_module_t *module;
char hostname[NGX_MAXHOSTNAMELEN];
+ ngx_uint_t num_cores, taken;
+ ngx_socket_t temp_s;
+ int one = 1;

+ ngx_so_reuseport_enabled = 0;
+ temp_s = ngx_socket(AF_INET, SOCK_STREAM, 0);
+#ifndef SO_REUSEPORT
+#define SO_REUSEPORT 15
+#endif
+ if (setsockopt(temp_s, SOL_SOCKET, SO_REUSEPORT,
+ (const void *) &one, sizeof(int)) == 0) {
+ ngx_so_reuseport_enabled = 1;
+ }
+ ngx_close_socket(temp_s);
+
+ if (ngx_so_reuseport_enabled) {
+#ifdef _SC_NPROCESSORS_ONLN
+ num_cores = sysconf(_SC_NPROCESSORS_ONLN);
+#else
+ num_cores = 1;
+#endif
+ if (num_cores > 8) {
+ ngx_num_dup_sockets = num_cores/8;
+ } else {
+ ngx_num_dup_sockets = 1;
+ }
+ } else {
+ ngx_num_dup_sockets = 1;
+ }
+
ngx_timezone_update();

/* force localtime update with a new timezone */
@@ -114,7 +146,7 @@
}


- n = old_cycle->paths.nelts ? old_cycle->paths.nelts : 10;
+ n = old_cycle->paths.nelts ? old_cycle->paths.nelts : 10 * ngx_num_dup_sockets;

cycle->paths.elts = ngx_pcalloc(pool, n * sizeof(ngx_path_t *));
if (cycle->paths.elts == NULL) {
@@ -164,7 +196,7 @@
return NULL;
}

- n = old_cycle->listening.nelts ? old_cycle->listening.nelts : 10;
+ n = old_cycle->listening.nelts ? old_cycle->listening.nelts : 10 * ngx_num_dup_sockets;

cycle->listening.elts = ngx_pcalloc(pool, n * sizeof(ngx_listening_t));
if (cycle->listening.elts == NULL) {
@@ -231,7 +263,7 @@

ngx_memzero(&conf, sizeof(ngx_conf_t));
/* STUB: init array ? */
- conf.args = ngx_array_create(pool, 10, sizeof(ngx_str_t));
+ conf.args = ngx_array_create(pool, (10 * ngx_num_dup_sockets), sizeof(ngx_str_t));
if (conf.args == NULL) {
ngx_destroy_pool(pool);
return NULL;
@@ -486,6 +518,7 @@
}

nls = cycle->listening.elts;
+ taken = 0;
for (n = 0; n < cycle->listening.nelts; n++) {

for (i = 0; i < old_cycle->listening.nelts; i++) {
@@ -493,9 +526,9 @@
continue;
}

- if (ngx_cmp_sockaddr(nls[n].sockaddr, nls[n].socklen,
+ if ((ngx_cmp_sockaddr(nls[n].sockaddr, nls[n].socklen,
ls[i].sockaddr, ls[i].socklen, 1)
- == NGX_OK)
+ == NGX_OK) && i >= taken)
{
nls[n].fd = ls[i].fd;
nls[n].previous = &ls[i];
@@ -540,6 +573,7 @@
nls[n].add_deferred = 1;
}
#endif
+ taken = i + 1;
break;
}
}
@@ -747,7 +781,7 @@
exit(1);
}

- n = 10;
+ n = 10 * ngx_num_dup_sockets;
ngx_old_cycles.elts = ngx_pcalloc(ngx_temp_pool,
n * sizeof(ngx_cycle_t *));
if (ngx_old_cycles.elts == NULL) {
diff -r 45aef9a5b176 -r 222e0a18a7fd src/core/ngx_cycle.h
--- a/src/core/ngx_cycle.h Thu Sep 18 12:03:41 2014 -0700
+++ b/src/core/ngx_cycle.h Thu Sep 18 12:05:16 2014 -0700
@@ -136,6 +136,8 @@
extern ngx_module_t ngx_core_module;
extern ngx_uint_t ngx_test_config;
extern ngx_uint_t ngx_quiet_mode;
+extern ngx_uint_t ngx_so_reuseport_enabled;
+extern ngx_uint_t ngx_num_dup_sockets;
#if (NGX_THREADS)
extern ngx_tls_key_t ngx_core_tls_key;
#endif
diff -r 45aef9a5b176 -r 222e0a18a7fd src/http/ngx_http.c
--- a/src/http/ngx_http.c Thu Sep 18 12:03:41 2014 -0700
+++ b/src/http/ngx_http.c Thu Sep 18 12:05:16 2014 -0700
@@ -1671,7 +1671,7 @@
static ngx_int_t
ngx_http_init_listening(ngx_conf_t *cf, ngx_http_conf_port_t *port)
{
- ngx_uint_t i, last, bind_wildcard;
+ ngx_uint_t i, j, last, bind_wildcard;
ngx_listening_t *ls;
ngx_http_port_t *hport;
ngx_http_conf_addr_t *addr;
@@ -1703,42 +1703,43 @@
continue;
}

- ls = ngx_http_add_listening(cf, &addr[i]);
- if (ls == NULL) {
- return NGX_ERROR;
- }
-
- hport = ngx_pcalloc(cf->pool, sizeof(ngx_http_port_t));
- if (hport == NULL) {
- return NGX_ERROR;
- }
-
- ls->servers = hport;
-
- if (i == last - 1) {
- hport->naddrs = last;
-
- } else {
- hport->naddrs = 1;
- i = 0;
- }
-
- switch (ls->sockaddr->sa_family) {
-
-#if (NGX_HAVE_INET6)
- case AF_INET6:
- if (ngx_http_add_addrs6(cf, hport, addr) != NGX_OK) {
+ for(j = 0; j < ngx_num_dup_sockets; j++) {
+ ls = ngx_http_add_listening(cf, &addr[i]);
+ if (ls == NULL) {
return NGX_ERROR;
}
- break;
-#endif
- default: /* AF_INET */
- if (ngx_http_add_addrs(cf, hport, addr) != NGX_OK) {
+
+ hport = ngx_pcalloc(cf->pool, sizeof(ngx_http_port_t));
+ if (hport == NULL) {
return NGX_ERROR;
}
- break;
+
+ ls->servers = hport;
+
+ if (i == last - 1) {
+ hport->naddrs = last;
+
+ } else {
+ hport->naddrs = 1;
+ i = 0;
+ }
+
+ switch (ls->sockaddr->sa_family) {
+
+#if (NGX_HAVE_INET6)
+ case AF_INET6:
+ if (ngx_http_add_addrs6(cf, hport, addr) != NGX_OK) {
+ return NGX_ERROR;
+ }
+ break;
+#endif
+ default: /* AF_INET */
+ if (ngx_http_add_addrs(cf, hport, addr) != NGX_OK) {
+ return NGX_ERROR;
+ }
+ break;
+ }
}
-
addr++;
last--;
}

1. Software and workloads used in performance tests may have been optimized for performance only on Intel microprocessors. Performance tests, such as SYSmark and MobileMark, are measured using specific computer systems, components, software, operations and functions. Any change to any of those factors may cause the results to vary. You should consult other information and performance tests to assist you in fully evaluating your contemplated purchases, including the performance of that product when combined with other products.


-----Original Message-----
From: nginx-devel-bounces@nginx.org [mailto:nginx-devel-bounces@nginx.org] On Behalf Of Lu, Yingqi
Sent: Wednesday, August 27, 2014 10:33 AM
To: nginx-devel@nginx.org
Subject: RE: [Patch] SO_REUSEPORT support from master process

Dear All,

I am resending this patch with plain text instead of HTML format. I will also post the patch at the end of this email. Hope this will be easier for all of you to review. Please let me know if you have trouble viewing the message or the patch itself. This is our first time submitting the patch here. Your feedback and suggestions are highly appreciated.

The "SO_REUSEPORT support for listen sockets support" patches submitted by Sepherosa Ziehau are posted and discussed in [1] and [2]. Last update on the threads was 09/05/2013 and the patch is not included in the current Nginx code. Reading from the discussion, my understanding is that his patch makes a dedicated listen socket for each of the child process. In order to make sure at any given time there is always a listen socket available, the patch makes the first worker process different/special than the rest.

Here, I am proposing a simpler way to enable the SO_REUSEPORT support. It is just to create and configure certain number of listen sockets in the master process with SO_REUSEPORT enabled. All the children processes can inherit. In this case, we do not need to worry about ensuring 1 available listen socket at the run time. The number of the listen sockets to be created is calculated based on the number of active CPU threads. With big system that has more CPU threads (where we have the scalability issue), there are more duplicated listen sockets created to improve the throughput and scalability. With system that has only 8 or less CPU threads, there will be only 1 listen socket. This makes sure duplicated listen sockets only being created when necessary. In case that SO_REUSEPORT is not supported by the OS, it will fall back to the default/original behavior (this is tested on Linux kernel 3.8.8 where SO_REUSEPORT is not supported).

This prototype patch has been tested on an Intel modern dual socket platform with a three tier open source web server workload (PHP+Nginx/memcached/MySQL). The web server has 2 IP network interfaces configured for testing. The Linux kernel used for testing is 3.13.9. Data show:

Case 1: with single listen statement (Listen 80) specified in the configuration file, there is 46.3% throughout increase.
Case 2: with dual listen statements (for example, Listen 192.168.1.1:80 and Listen 192.168.1.2:80), there is 10% throughput increase.

Both testing cases keep everything the same except the patch itself to get above result.

The reason that Case1 has bigger performance gains is that Case1 by default only has 1 listen socket while Case2 by default already has 2.

Please review it and let me know your questions and comments. Thanks very much for your time reviewing the patch.

Thanks,
Yingqi Lu

[1] http://forum.nginx.org/read.php?29,241283,241283
[2] http://forum.nginx.org/read.php?29,241470,241470

# HG changeset patch
# User Yingqi Lu <Yingqi.Lu@intel.com>
# Date 1408145210 25200
# Fri Aug 15 16:26:50 2014 -0700
# Node ID d9c7259d275dbcae8a0d001ee9703b13312b3263
# Parent 6edcb183e62d610808addebbd18249abb7224a0a
These are the patch files for SO_REUSEPORT support.

diff -r 6edcb183e62d -r d9c7259d275d ngx_connection.c
--- a/ngx_connection.c Fri Aug 15 16:25:32 2014 -0700
+++ b/ngx_connection.c Fri Aug 15 16:26:50 2014 -0700
@@ -304,7 +304,7 @@
ngx_int_t
ngx_open_listening_sockets(ngx_cycle_t *cycle) {
- int reuseaddr;
+ int reuseaddr, reuseport;
ngx_uint_t i, tries, failed;
ngx_err_t err;
ngx_log_t *log;
@@ -312,6 +312,7 @@
ngx_listening_t *ls;

reuseaddr = 1;
+ reuseport = 1;
#if (NGX_SUPPRESS_WARN)
failed = 0;
#endif
@@ -370,6 +371,24 @@
return NGX_ERROR;
}

+ if (so_reuseport_enabled)
+ {
+ if (setsockopt(s, SOL_SOCKET, SO_REUSEPORT,
+ (const void *) &reuseport, sizeof(int))
+ == -1) {
+ ngx_log_error(NGX_LOG_EMERG, log, ngx_socket_errno,
+ "setsockopt(SO_REUSEPORT) %V failed",
+ &ls[i].addr_text);
+ if (ngx_close_socket(s) == -1) {
+ ngx_log_error(NGX_LOG_EMERG, log, ngx_socket_errno,
+ ngx_close_socket_n " %V failed",
+ &ls[i].addr_text);
+ }
+
+ return NGX_ERROR;
+ }
+ }
+
#if (NGX_HAVE_INET6 && defined IPV6_V6ONLY)

if (ls[i].sockaddr->sa_family == AF_INET6) { diff -r 6edcb183e62d -r d9c7259d275d ngx_cycle.c
--- a/ngx_cycle.c Fri Aug 15 16:25:32 2014 -0700
+++ b/ngx_cycle.c Fri Aug 15 16:26:50 2014 -0700
@@ -25,7 +25,7 @@

ngx_uint_t ngx_test_config;
ngx_uint_t ngx_quiet_mode;
-
+ngx_uint_t so_reuseport_enabled;
#if (NGX_THREADS)
ngx_tls_key_t ngx_core_tls_key;
#endif
@@ -55,6 +55,34 @@
ngx_core_module_t *module;
char hostname[NGX_MAXHOSTNAMELEN];

+ ngx_uint_t j, num_cores, num_dup_sockets, orig_nelts;
+ ngx_socket_t temp_s;
+ int one = 1;
+ so_reuseport_enabled = 0;
+ temp_s = ngx_socket(AF_INET, SOCK_STREAM, 0); #ifndef SO_REUSEPORT
+#define SO_REUSEPORT 15 #endif
+ if (setsockopt(temp_s, SOL_SOCKET, SO_REUSEPORT,
+ (const void *) &one, sizeof(int)) == 0) {
+ so_reuseport_enabled = 1;
+ }
+ ngx_close_socket(temp_s);
+
+ if (so_reuseport_enabled) {
+#ifdef _SC_NPROCESSORS_ONLN
+ num_cores = sysconf(_SC_NPROCESSORS_ONLN); #else
+ num_cores = 1;
+#endif
+ if (num_cores > 8) {
+ num_dup_sockets = num_cores/8;
+ } else {
+ num_dup_sockets = 1;
+ }
+ } else {
+ num_dup_sockets = 1;
+ }
ngx_timezone_update();

/* force localtime update with a new timezone */ @@ -114,7 +142,7 @@
}


- n = old_cycle->paths.nelts ? old_cycle->paths.nelts : 10;
+ n = old_cycle->paths.nelts ? old_cycle->paths.nelts : 10 *
+ num_dup_sockets;

cycle->paths.elts = ngx_pcalloc(pool, n * sizeof(ngx_path_t *));
if (cycle->paths.elts == NULL) {
@@ -164,7 +192,7 @@
return NULL;
}

- n = old_cycle->listening.nelts ? old_cycle->listening.nelts : 10;
+ n = old_cycle->listening.nelts ? old_cycle->listening.nelts : 10 *
+ num_dup_sockets;

cycle->listening.elts = ngx_pcalloc(pool, n * sizeof(ngx_listening_t));
if (cycle->listening.elts == NULL) { @@ -231,7 +259,7 @@

ngx_memzero(&conf, sizeof(ngx_conf_t));
/* STUB: init array ? */
- conf.args = ngx_array_create(pool, 10, sizeof(ngx_str_t));
+ conf.args = ngx_array_create(pool, (10 * num_dup_sockets),
+ sizeof(ngx_str_t));
if (conf.args == NULL) {
ngx_destroy_pool(pool);
return NULL;
@@ -575,7 +603,15 @@
#endif
}
}
+ orig_nelts = cycle->listening.nelts;
+ cycle->listening.nelts = cycle->listening.nelts * num_dup_sockets;

+ ls = cycle->listening.elts;
+ for (i = 0; i < num_dup_sockets; i++) {
+ for(j = 0; j < orig_nelts; j++) {
+ ls[j + i * orig_nelts] = ls[j];
+ }
+ }
if (ngx_open_listening_sockets(cycle) != NGX_OK) {
goto failed;
}
@@ -747,7 +783,7 @@
exit(1);
}

- n = 10;
+ n = 10 * num_dup_sockets;
ngx_old_cycles.elts = ngx_pcalloc(ngx_temp_pool,
n * sizeof(ngx_cycle_t *));
if (ngx_old_cycles.elts == NULL) { diff -r 6edcb183e62d -r d9c7259d275d ngx_cycle.h
--- a/ngx_cycle.h Fri Aug 15 16:25:32 2014 -0700
+++ b/ngx_cycle.h Fri Aug 15 16:26:50 2014 -0700
@@ -136,6 +136,7 @@
extern ngx_module_t ngx_core_module;
extern ngx_uint_t ngx_test_config;
extern ngx_uint_t ngx_quiet_mode;
+extern ngx_uint_t so_reuseport_enabled;
#if (NGX_THREADS)
extern ngx_tls_key_t ngx_core_tls_key;
#endif

1. Software and workloads used in performance tests may have been optimized for performance only on Intel microprocessors. Performance tests, such as SYSmark and MobileMark, are measured using specific computer systems, components, software, operations and functions. Any change to any of those factors may cause the results to vary. You should consult other information and performance tests to assist you in fully evaluating your contemplated purchases, including the performance of that product when combined with other products.

_______________________________________________
nginx-devel mailing list
nginx-devel@nginx.org
http://mailman.nginx.org/mailman/listinfo/nginx-devel

_______________________________________________
nginx-devel mailing list
nginx-devel@nginx.org
http://mailman.nginx.org/mailman/listinfo/nginx-devel
Subject Author Views Posted

[Patch] SO_REUSEPORT support from master process Attachments

Lu, Yingqi 1448 August 22, 2014 12:56PM

RE: [Patch] SO_REUSEPORT support from master process

Lu, Yingqi 667 August 22, 2014 01:00PM

Re: [Patch] SO_REUSEPORT support from master process

Sepherosa Ziehau 702 August 27, 2014 05:10AM

RE: [Patch] SO_REUSEPORT support from master process

luy 1519 August 27, 2014 12:26PM

Re: [Patch] SO_REUSEPORT support from master process

Sepherosa Ziehau 711 August 28, 2014 05:28AM

RE: [Patch] SO_REUSEPORT support from master process

luy 640 August 27, 2014 01:36PM

RE: [Patch] SO_REUSEPORT support from master process

luy 1490 September 18, 2014 04:08PM

Re: [Patch] SO_REUSEPORT support from master process

Valentin V. Bartenev 590 September 19, 2014 04:38AM

RE: [Patch] SO_REUSEPORT support from master process

luy 663 September 19, 2014 11:54AM

Re: [Patch] SO_REUSEPORT support from master process

Valentin V. Bartenev 603 September 19, 2014 12:50PM

RE: [Patch] SO_REUSEPORT support from master process

luy 694 September 19, 2014 01:34PM

RE: [Patch] SO_REUSEPORT support from master process

luy 601 September 20, 2014 06:24PM

RE: [Patch] SO_REUSEPORT support from master process

luy 597 September 20, 2014 07:02PM

RE: [Patch] SO_REUSEPORT support from master process

luy 701 September 20, 2014 07:46PM

RE: [Patch] SO_REUSEPORT support from master process

luy 607 September 23, 2014 11:38AM

RE: [Patch] SO_REUSEPORT support from master process

luy 655 September 23, 2014 11:48AM

RE: [Patch] SO_REUSEPORT support from master process

luy 712 September 29, 2014 03:46PM

RE: [Patch] SO_REUSEPORT support from master process

luy 608 October 07, 2014 03:34PM

Re: [Patch] SO_REUSEPORT support from master process

Maxim Dounin 633 October 08, 2014 09:00AM

RE: [Patch] SO_REUSEPORT support from master process

luy 596 October 08, 2014 01:08PM

RE: [Patch] SO_REUSEPORT support from master process

luy 585 October 08, 2014 02:26PM

RE: [Patch] SO_REUSEPORT support from master process

luy 571 October 30, 2014 06:26PM

Re: [Patch] SO_REUSEPORT support from master process

Sepherosa Ziehau 641 November 16, 2014 04:08AM

Re: [Patch] SO_REUSEPORT support from master process

Maxim Dounin 835 November 16, 2014 06:52AM



Sorry, you do not have permission to post/reply in this forum.

Online Users

Guests: 138
Record Number of Users: 8 on April 13, 2023
Record Number of Guests: 421 on December 02, 2018
Powered by nginx      Powered by FreeBSD      PHP Powered      Powered by MariaDB      ipv6 ready