feat(ext-plugin): stop the runner with SIGTERM (#4367)

This commit is contained in:
罗泽轩 2021-06-11 13:04:42 +08:00 committed by GitHub
parent 6c38fdff9c
commit 68711e443d
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
10 changed files with 198 additions and 6 deletions

View File

@ -291,6 +291,12 @@ http {
apisix.http_init_worker()
}
{% if not use_openresty_1_17 then %}
exit_worker_by_lua_block {
apisix.http_exit_worker()
}
{% end %}
{% if enable_control then %}
server {
listen {* control_server_addr *};

View File

@ -65,7 +65,7 @@ version: print the version of apisix
end
local function check_version(cur_ver_s, need_ver_s)
local function version_greater_equal(cur_ver_s, need_ver_s)
local cur_vers = util.split(cur_ver_s, [[.]])
local need_vers = util.split(need_ver_s, [[.]])
local len = max(#cur_vers, #need_vers)
@ -357,10 +357,15 @@ Please modify "admin_key" in conf/config.yaml .
end
local need_ver = "1.17.3"
if not check_version(or_ver, need_ver) then
if not version_greater_equal(or_ver, need_ver) then
util.die("openresty version must >=", need_ver, " current ", or_ver, "\n")
end
local use_openresty_1_17 = false
if not version_greater_equal(or_ver, "1.19.3") then
use_openresty_1_17 = true
end
local or_info = util.execute_cmd("openresty -V 2>&1")
local with_module_status = true
if or_info and not or_info:find("http_stub_status_module", 1, true) then
@ -446,6 +451,7 @@ Please modify "admin_key" in conf/config.yaml .
-- Using template.render
local sys_conf = {
use_openresty_1_17 = use_openresty_1_17,
lua_path = env.pkg_path_org,
lua_cpath = env.pkg_cpath_org,
os_name = util.trim(util.execute_cmd("uname")),

View File

@ -23,11 +23,18 @@ local type = type
local _M = {}
local WNOHANG = 1
ffi.cdef[[
typedef int32_t pid_t;
typedef unsigned int useconds_t;
int setenv(const char *name, const char *value, int overwrite);
char *strerror(int errnum);
int usleep(useconds_t usec);
pid_t waitpid(pid_t pid, int *wstatus, int options);
]]
@ -52,4 +59,31 @@ function _M.setenv(name, value)
end
local function waitpid_nohang(pid)
local res = C.waitpid(pid, nil, WNOHANG)
if res == -1 then
return nil, err()
end
return res > 0
end
function _M.waitpid(pid, timeout)
local count = 0
local step = 1000 * 10
local total = timeout * 1000 * 1000
while step * count < total do
count = count + 1
C.usleep(step)
local ok, err = waitpid_nohang(pid)
if err then
return nil, err
end
if ok then
return true
end
end
end
return _M

View File

@ -134,6 +134,11 @@ function _M.http_init_worker()
end
function _M.http_exit_worker()
require("apisix.plugins.ext-plugin.init").exit_worker()
end
function _M.http_ssl_phase()
local ngx_ctx = ngx.ctx
local api_ctx = ngx_ctx.api_ctx

View File

@ -36,6 +36,7 @@ if is_http then
ngx_pipe = require("ngx.pipe")
events = require("resty.worker.events")
end
local resty_signal = require "resty.signal"
local bit = require("bit")
local band = bit.band
local lshift = bit.lshift
@ -588,8 +589,10 @@ local function spawn_proc(cmd)
end
local runner
local function setup_runner(cmd)
local proc = spawn_proc(cmd)
runner = spawn_proc(cmd)
ngx_timer_at(0, function(premature)
if premature then
return
@ -599,7 +602,7 @@ local function setup_runner(cmd)
while true do
-- drain output
local max = 3800 -- smaller than Nginx error log length limit
local data, err = proc:stdout_read_any(max)
local data, err = runner:stdout_read_any(max)
if not data then
if exiting() then
return
@ -615,11 +618,13 @@ local function setup_runner(cmd)
end
end
local ok, reason, status = proc:wait()
local ok, reason, status = runner:wait()
if not ok then
core.log.warn("runner exited with reason: ", reason, ", status: ", status)
end
runner = nil
local ok, err = events.post(events_list._source, events_list.runner_exit)
if not ok then
core.log.error("post event failure with ", events_list._source, ", error: ", err)
@ -628,7 +633,7 @@ local function setup_runner(cmd)
core.log.warn("respawn runner 3 seconds later with cmd: ", core.json.encode(cmd))
core.utils.sleep(3)
core.log.warn("respawning new runner...")
proc = spawn_proc(cmd)
runner = spawn_proc(cmd)
end
end)
end
@ -656,4 +661,21 @@ function _M.init_worker()
end
function _M.exit_worker()
if process.type() == "privileged agent" and runner then
-- We need to send SIGTERM in the exit_worker phase, as:
-- 1. privileged agent doesn't support graceful exiting when I write this
-- 2. better to make it work without graceful exiting
local pid = runner:pid()
core.log.notice("terminate runner ", pid, " with SIGTERM")
local num = resty_signal.signum("TERM")
runner:kill(num)
-- give 1s to clean up the mess
core.os.waitpid(pid, 1)
-- then we KILL it via gc finalizer
end
end
return _M

View File

@ -103,3 +103,14 @@ nginx_config:
envs:
- MY_ENV_VAR
```
### APISIX terminates my runner with SIGKILL but not SIGTERM!
Since `v2.7`, APISIX will stop the runner with SIGTERM when it is running on
OpenResty 1.19+.
However, APISIX needs to wait the runner to quit so that we can ensure the resource
for the process group is freed.
Therefore, we send SIGTERM first. And then after 1 second, if the runner is still
running, we will send SIGKILL.

View File

@ -402,7 +402,17 @@ _EOC_
require("apisix").http_init_worker()
$extra_init_worker_by_lua
}
_EOC_
if ($version !~ m/\/1.17.8/) {
$http_config .= <<_EOC_;
exit_worker_by_lua_block {
require("apisix").http_exit_worker()
}
_EOC_
}
$http_config .= <<_EOC_;
log_format main escape=default '\$remote_addr - \$remote_user [\$time_local] \$http_host "\$request" \$status \$body_bytes_sent \$request_time "\$http_referer" "\$http_user_agent" \$upstream_addr \$upstream_status \$upstream_response_time "\$upstream_scheme://\$upstream_host\$upstream_uri"';
# fake server, only for test

View File

@ -0,0 +1,23 @@
#!/usr/bin/env bash
#
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
term() {
eval sleep 1800
}
trap term SIGTERM
sleep 1800

View File

@ -0,0 +1,65 @@
#
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
use t::APISIX;
my $nginx_binary = $ENV{'TEST_NGINX_BINARY'} || 'nginx';
my $version = eval { `$nginx_binary -V 2>&1` };
if ($version =~ m/\/1.17.8/) {
plan(skip_all => "require OpenResty 1.19+");
} else {
plan('no_plan');
}
repeat_each(1);
no_long_string();
no_root_location();
no_shuffle();
log_level("info");
add_block_preprocessor(sub {
my ($block) = @_;
my $cmd = $block->ext_plugin_cmd // "['sleep', '5s']";
my $extra_yaml_config = <<_EOC_;
ext-plugin:
cmd: $cmd
_EOC_
$block->set_value("extra_yaml_config", $extra_yaml_config);
if (!$block->request) {
$block->set_value("request", "GET /t");
}
if (!$block->error_log) {
$block->set_value("no_error_log", "[error]\n[alert]");
}
});
run_tests;
__DATA__
=== TEST 1: terminate spawn runner
--- ext_plugin_cmd
["t/plugin/ext-plugin/runner.sh", "3600"]
--- config
location /t {
return 200;
}
--- shutdown_error_log eval
qr/terminate runner \d+ with SIGTERM/

View File

@ -453,3 +453,13 @@ MY_ENV_VAR foo
{"error_msg":"failed to check the configuration of plugin ext-plugin-pre-req err: property \"conf\" validation failed: failed to validate item 1: property \"name\" is required"}
{"error_msg":"failed to check the configuration of plugin ext-plugin-post-req err: property \"conf\" validation failed: failed to validate item 1: property \"value\" is required"}
=== TEST 15: spawn runner which can't be terminated, ensure APISIX won't be blocked
--- ext_plugin_cmd
["t/plugin/ext-plugin/runner_can_not_terminated.sh"]
--- config
location /t {
return 200;
}