master can wait until the children exited

This commit is contained in:
zhengshuxin 2017-09-05 23:47:48 +08:00
parent bc7d725f5c
commit 296d5a995d
10 changed files with 124 additions and 60 deletions

View File

@ -1,4 +1,8 @@
6) 2017.9.5
6.1) feature: master can wait children to exit with sync or async
6.2) feature: master can kill SIGTERM children when reloading
5) 2017.8.11
5.1) safety: master can't fatal for some configure error of any app service.

View File

@ -5,6 +5,8 @@ default_process_limit = 250
# 用户属主
owner_user = root
master_owner = root
# 当停止 master 时是否等待所有子进程退出
waiting_on_stop = true
# 组主
owner_group = root
# 如果子进程服务异常退出重启该子进程服务的时间间隔()

View File

@ -67,7 +67,8 @@ typedef struct ACL_MASTER_SERV {
#define ACL_MASTER_FLAG_RELOADING (1<<3) /* the service is reloading */
#define ACL_MASTER_FLAG_STOPPING (1<<4) /* the service is stopping */
#define ACL_MASTER_FLAG_KILLED (1<<5) /* the service is killed */
#define ACL_MASTER_FLAG_KILL_ONEXIT (1<<6) /* the service can be killed on exit */
#define ACL_MASTER_FLAG_STOP_KILL (1<<6) /* the service can be killed on stopping */
#define ACL_MASTER_FLAG_STOP_WAIT (1<<7) /* master waiting service exited */
#define ACL_MASTER_THROTTLED(f) ((f)->flags & ACL_MASTER_FLAG_THROTTLE)
#define ACL_MASTER_STOPPING(f) ((f)->flags & ACL_MASTER_FLAG_STOPPING)

View File

@ -502,13 +502,17 @@ static int service_transport(ACL_XINETD_CFG_PARSER *xcp, ACL_MASTER_SERV *serv)
static void service_control(ACL_XINETD_CFG_PARSER *xcp, ACL_MASTER_SERV *serv)
{
const char* ptr = get_str_ent(xcp, ACL_VAR_MASETR_SERV_KILL, "off");
if (ptr == NULL || *ptr == 0)
serv->flags &=~ ACL_MASTER_FLAG_KILL_ONEXIT;
else if (EQ(ptr, "on") || EQ(ptr, "true") || EQ(ptr, "1"))
serv->flags |= ACL_MASTER_FLAG_KILL_ONEXIT;
const char* ptr = get_str_ent(xcp, ACL_VAR_MASETR_SERV_STOP_KILL, "off");
if (EQ(ptr, "on") || EQ(ptr, "true") || atoi(ptr) > 1)
serv->flags |= ACL_MASTER_FLAG_STOP_KILL;
else
serv->flags &=~ ACL_MASTER_FLAG_KILL_ONEXIT;
serv->flags &=~ ACL_MASTER_FLAG_STOP_KILL;
ptr = get_str_ent(xcp, ACL_VAR_MASTER_SERV_STOP_WAIT, "off");
if (EQ(ptr, "on") || EQ(ptr, "true") || atoi(ptr) > 0)
serv->flags |= ACL_MASTER_FLAG_STOP_WAIT;
else
serv->flags &= ~ACL_MASTER_FLAG_STOP_WAIT;
}
static void service_wakeup_time(ACL_XINETD_CFG_PARSER *xcp,

View File

@ -42,6 +42,7 @@ char *acl_var_master_service_dir;
char *acl_var_master_log_file;
char *acl_var_master_pid_file;
char *acl_var_master_manage_addr;
char *acl_var_master_waiting_on_stop;
static ACL_CONFIG_STR_TABLE str_tab[] = {
{ ACL_VAR_MASTER_INET_INTERFACES, ACL_DEF_MASTER_INET_INTERFACES,
@ -60,6 +61,8 @@ static ACL_CONFIG_STR_TABLE str_tab[] = {
&acl_var_master_pid_file },
{ ACL_VAR_MASTER_MANAGE_ADDR, ACL_DEF_MASTER_MANAGE_ADDR,
&acl_var_master_manage_addr },
{ ACL_VAR_MASTER_WAITING_ON_STOP, ACL_DEF_MASTER_WAITING_ON_STOP,
&acl_var_master_waiting_on_stop },
{ 0, 0, 0 },
};

View File

@ -19,6 +19,10 @@ extern char *acl_var_master_conf_dir;
#define ACL_DEF_MASTER_INET_INTERFACES ACL_INET_INTERFACES_ALL
extern char *acl_var_master_inet_interfaces;
#define ACL_VAR_MASTER_WAITING_ON_STOP "waiting_on_stop"
#define ACL_DEF_MASTER_WAITING_ON_STOP "true"
extern char *acl_var_master_waiting_on_stop;
#define ACL_VAR_MASTER_PROC_LIMIT "default_process_limit"
#define ACL_DEF_MASTER_PROC_LIMIT 100
extern int acl_var_master_proc_limit;
@ -133,7 +137,8 @@ extern char *acl_var_master_manage_addr;
#define ACL_VAR_MASTER_SERV_REUSEPORT "master_reuseport"
#define ACL_VAR_MASTER_SERV_FASTOPEN "master_fastopen"
#define ACL_VAR_MASTER_SERV_NBLOCK "master_nonblock"
#define ACL_VAR_MASETR_SERV_KILL "master_kill"
#define ACL_VAR_MASETR_SERV_STOP_KILL "master_stop_kill"
#define ACL_VAR_MASTER_SERV_STOP_WAIT "master_stop_wait"
/**
* master_params.c

View File

@ -1,6 +1,7 @@
#include "stdafx.h"
#include <errno.h>
#include <string.h>
#include <signal.h>
#include <unistd.h>
/* Application-specific. */
@ -98,28 +99,34 @@ void acl_master_service_stop(ACL_MASTER_SERV *serv)
void acl_master_service_restart(ACL_MASTER_SERV *serv)
{
if ((serv->flags & ACL_MASTER_FLAG_KILL_ONEXIT) != 0) {
char *path = acl_mystrdup(serv->conf);
acl_master_kill(path);
if (acl_master_start(path) == NULL)
acl_msg_error("can't start service=%s", path);
else
acl_msg_info("start %s ok", path);
acl_myfree(path);
} else {
/* Undo some of the things that master_service_start() did. */
acl_master_wakeup_cleanup(serv);
acl_master_status_cleanup(serv);
/* Undo some of the things that master_service_start() did. */
acl_master_wakeup_cleanup(serv);
acl_master_status_cleanup(serv);
/* Now undo the undone. */
acl_master_status_init(serv);
/* set ACL_MASTER_FLAG_RELOADING flag */
serv->flags |= ACL_MASTER_FLAG_RELOADING;
/* set ACL_MASTER_FLAG_RELOADING flag */
serv->flags |= ACL_MASTER_FLAG_RELOADING;
/* if master_stop_kill was set then kill the children with SIGTERM */
if ((serv->flags & ACL_MASTER_FLAG_STOP_KILL) != 0) {
ACL_BINHASH_INFO **list;
ACL_BINHASH_INFO **info;
ACL_MASTER_PROC *proc;
acl_master_avail_listen(serv);
/* ACL_MASTER_FLAG_RELOADING will be remove in acl_master_spawn */
acl_master_wakeup_init(serv);
info = list = acl_binhash_list(acl_var_master_child_table);
for (; *info; info++) {
proc = (ACL_MASTER_PROC *) info[0]->value;
if (proc->serv == serv)
(void) kill(proc->pid, SIGTERM);
}
acl_myfree(list);
}
/* Now undo the undone. */
acl_master_status_init(serv);
/* re-listen again or prefork children */
acl_master_avail_listen(serv);
/* ACL_MASTER_FLAG_RELOADING will be remove in acl_master_spawn */
acl_master_wakeup_init(serv);
}

View File

@ -171,10 +171,18 @@ static void master_sigdeath(int sig)
action.sa_handler = SIG_IGN;
if (sigaction(SIGTERM, &action, (struct sigaction *) 0) < 0)
acl_msg_fatal("%s: sigaction: %s", myname, strerror(errno));
if (pid <= 1) { /* in docker the master's pid is 1 */
#define EQ !strcasecmp
if (EQ(acl_var_master_waiting_on_stop, "true")
|| EQ(acl_var_master_waiting_on_stop, "yes")
|| EQ(acl_var_master_waiting_on_stop, "on")
|| pid <= 1) { /* in docker the master's pid is 1 */
acl_master_delete_all_children();
} else if (kill(-pid, SIGTERM) < 0) {
acl_msg_error("%s: kill process group: %s", myname, strerror(errno));
acl_msg_error("%s: kill process group(-%ld): %s",
myname, (long) pid, strerror(errno));
exit (1);
}

View File

@ -5,8 +5,6 @@
#include <stdio.h>
#include <unistd.h>
#include <signal.h>
#include <stdarg.h>
#include <syslog.h>
#include <errno.h>
#include <string.h>
@ -69,11 +67,10 @@ static void master_throttle(ACL_MASTER_SERV *serv)
/*
* Perhaps the command to be run is defective,
* perhaps some configuration is wrong, or
* perhaps the system is out of resources. Disable further
* process creation attempts for a while.
* perhaps the system is out of resources.
* Disable further process creation attempts for a while.
*/
if ((serv->flags & ACL_MASTER_FLAG_THROTTLE) == 0) {
serv->flags |= ACL_MASTER_FLAG_THROTTLE;
acl_event_request_timer(acl_var_master_global_event,
master_unthrottle_wrapper, (void *) serv,
@ -296,18 +293,6 @@ static void master_delete_child(ACL_MASTER_PROC *proc)
sizeof(proc->pid), (void (*) (void *)) 0);
acl_ring_detach(&proc->me);
acl_myfree(proc);
/* in ACL_MASTER_FLAG_STOPPING status, the serv will be freed after
* all children exited
*/
if (ACL_MASTER_CHILDREN_SIZE(serv) > 0)
return;
if (ACL_MASTER_STOPPING(serv) || ACL_MASTER_KILLED(serv)) {
acl_msg_info("free service %s which has been %s", serv->path,
ACL_MASTER_STOPPING(serv) ? "stopped" : "killed");
acl_master_ent_free(serv);
}
}
/* acl_master_reap_child - reap dead children */
@ -378,7 +363,9 @@ void acl_master_reap_child(void)
}
if (proc->use_count == 0
&& (serv->flags & ACL_MASTER_FLAG_THROTTLE) == 0) {
&& !ACL_MASTER_THROTTLED(serv)
&& !ACL_MASTER_STOPPING(serv)
&& !ACL_MASTER_KILLED(serv)) {
acl_msg_warn("%s(%d), %s: bad command startup, path=%s"
" -- throttling", __FILE__, __LINE__,
@ -390,20 +377,36 @@ void acl_master_reap_child(void)
}
}
#define WAITING_CHILD 100000
static void waiting_children(int type acl_unused, ACL_EVENT *event, void* ctx)
{
ACL_MASTER_SERV *serv = (ACL_MASTER_SERV *) ctx;
acl_master_reap_child();
if (ACL_MASTER_CHILDREN_SIZE(serv) > 0) {
acl_msg_info("wait for service %s, total_proc=%d, %d",
serv->conf, serv->total_proc,
ACL_MASTER_CHILDREN_SIZE(serv));
acl_event_request_timer(event, waiting_children,
(void *) serv, WAITING_CHILD, 0);
} else {
acl_msg_info("%s(%d): free service %s been %s, total proc=%d",
__FUNCTION__, __LINE__, serv->path,
ACL_MASTER_STOPPING(serv) ? "stopped" : "killed",
serv->total_proc);
acl_master_ent_free(serv);
}
}
/* acl_master_kill_children - kill and delete all child processes of service */
void acl_master_kill_children(ACL_MASTER_SERV *serv)
{
ACL_BINHASH_INFO **list;
ACL_BINHASH_INFO **info;
ACL_MASTER_PROC *proc;
/*
* XXX turn on the throttle so that master_reap_child() doesn't.
* Someone has to turn off the throttle in order to stop the
* associated timer request, so we might just as well do it at the end.
*/
master_throttle(serv);
ACL_MASTER_PROC *proc;
info = list = acl_binhash_list(acl_var_master_child_table);
for (; *info; info++) {
@ -411,12 +414,39 @@ void acl_master_kill_children(ACL_MASTER_SERV *serv)
if (proc->serv == serv)
(void) kill(proc->pid, SIGTERM);
}
acl_myfree(list);
while (serv->total_proc > 0)
if ((serv->flags & ACL_MASTER_FLAG_STOP_WAIT) != 0) {
while (serv->total_proc > 0) {
acl_master_reap_child();
if (serv->total_proc > 0)
acl_doze(100);
}
acl_msg_info("%s(%d): free service %s been %s, total proc=%d",
__FUNCTION__, __LINE__, serv->path,
ACL_MASTER_STOPPING(serv) ? "stopped" : "killed",
serv->total_proc);
acl_master_ent_free(serv);
return;
}
// try waiting children to exit
if (serv->total_proc > 0)
acl_master_reap_child();
acl_myfree(list);
master_unthrottle(serv);
// if there are some other children existing, create a timer to wait
if (serv->total_proc > 0)
acl_event_request_timer(acl_var_master_global_event,
waiting_children, (void *) serv, WAITING_CHILD, 0);
else {
acl_msg_info("%s(%d): free service %s been %s, total proc=%d",
__FUNCTION__, __LINE__, serv->path,
ACL_MASTER_STOPPING(serv) ? "stopped" : "killed",
serv->total_proc);
acl_master_ent_free(serv);
}
}
void acl_master_delete_all_children(void)

View File

@ -70,7 +70,7 @@ static void server_sigterm(int sig acl_unused)
for (; i < max; i++)
close(i);
acl_doze(100);
acl_doze(100); /* just sleep 100 ms for release fd by OS system ! */
exit(0);
}