mirror of
https://gitee.com/milvus-io/milvus.git
synced 2024-12-02 03:48:37 +08:00
[skip e2e]Add all pods kill chaos test (#15761)
Signed-off-by: zhuwenxing <wenxing.zhu@zilliz.com>
This commit is contained in:
parent
51cac044aa
commit
b745b6f707
6
.github/workflows/pod-kill-chaos-test.yaml
vendored
6
.github/workflows/pod-kill-chaos-test.yaml
vendored
@ -13,7 +13,7 @@ jobs:
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
pod: [standalone, datacoord, datanode, indexcoord, indexnode, proxy, pulsar, querycoord, querynode, rootcoord, etcd, minio]
|
||||
pod: [allstandalone, allcluster, standalone, datacoord, datanode, indexcoord, indexnode, proxy, pulsar, querycoord, querynode, rootcoord, etcd, minio]
|
||||
|
||||
steps:
|
||||
|
||||
@ -68,8 +68,8 @@ jobs:
|
||||
bash ../../../scripts/docker_image_find_tag.sh -n milvusdb/milvus-dev -t master-latest -f master- -F -L -q
|
||||
helm repo add milvus https://milvus-io.github.io/milvus-helm
|
||||
helm repo update
|
||||
if [ ${{ matrix.pod }} != "standalone" ]; then helm install --wait --timeout 720s ${{ env.RELEASE }} milvus/milvus -f cluster-values.yaml -n=chaos-testing; fi
|
||||
if [ ${{ matrix.pod }} == "standalone" ]; then helm install --wait --timeout 720s ${{ env.RELEASE }} milvus/milvus -f standalone-values.yaml -n=chaos-testing; fi
|
||||
if [[ ${{ matrix.pod }} != *"standalone"* ]]; then helm install --wait --timeout 720s ${{ env.RELEASE }} milvus/milvus -f cluster-values.yaml -n=chaos-testing; fi
|
||||
if [[ ${{ matrix.pod }} == *"standalone"* ]]; then helm install --wait --timeout 720s ${{ env.RELEASE }} milvus/milvus -f standalone-values.yaml -n=chaos-testing; fi
|
||||
kubectl get pods -n chaos-testing
|
||||
sleep 20s
|
||||
kubectl get pods -n chaos-testing
|
||||
|
@ -0,0 +1,55 @@
|
||||
apiVersion: chaos-mesh.org/v1alpha1
|
||||
kind: Workflow
|
||||
metadata:
|
||||
name: test-allcluster-pod-kill
|
||||
namespace: chaos-testing
|
||||
spec:
|
||||
entry: entry
|
||||
templates:
|
||||
- name: entry
|
||||
templateType: Serial
|
||||
deadline: 5m
|
||||
children:
|
||||
- test-all-pods-kill
|
||||
- name: test-first-part-pod-kill
|
||||
templateType: Schedule
|
||||
deadline: 3m
|
||||
schedule:
|
||||
schedule: '*/5 * * * * *'
|
||||
startingDeadlineSeconds: 60
|
||||
concurrencyPolicy: Forbid
|
||||
historyLimit: 1
|
||||
type: PodChaos
|
||||
podChaos:
|
||||
selector:
|
||||
namespaces:
|
||||
- chaos-testing
|
||||
labelSelectors:
|
||||
release: milvus-chaos
|
||||
mode: all
|
||||
action: pod-kill
|
||||
gracePeriod: 0
|
||||
- name: test-second-part-pod-kill
|
||||
templateType: Schedule
|
||||
deadline: 3m
|
||||
schedule:
|
||||
schedule: '*/5 * * * * *'
|
||||
startingDeadlineSeconds: 60
|
||||
concurrencyPolicy: Forbid
|
||||
historyLimit: 1
|
||||
type: PodChaos
|
||||
podChaos:
|
||||
selector:
|
||||
namespaces:
|
||||
- chaos-testing
|
||||
labelSelectors:
|
||||
app.kubernetes.io/instance: milvus-chaos
|
||||
mode: all
|
||||
action: pod-kill
|
||||
gracePeriod: 0
|
||||
- name: test-all-pods-kill
|
||||
templateType: Parallel
|
||||
deadline: 3m
|
||||
children:
|
||||
- test-first-part-pod-kill
|
||||
- test-second-part-pod-kill
|
@ -0,0 +1,55 @@
|
||||
apiVersion: chaos-mesh.org/v1alpha1
|
||||
kind: Workflow
|
||||
metadata:
|
||||
name: test-allstandalone-pod-kill
|
||||
namespace: chaos-testing
|
||||
spec:
|
||||
entry: entry
|
||||
templates:
|
||||
- name: entry
|
||||
templateType: Serial
|
||||
deadline: 5m
|
||||
children:
|
||||
- test-all-pods-kill
|
||||
- name: test-first-part-pod-kill
|
||||
templateType: Schedule
|
||||
deadline: 3m
|
||||
schedule:
|
||||
schedule: '*/5 * * * * *'
|
||||
startingDeadlineSeconds: 60
|
||||
concurrencyPolicy: Forbid
|
||||
historyLimit: 1
|
||||
type: PodChaos
|
||||
podChaos:
|
||||
selector:
|
||||
namespaces:
|
||||
- chaos-testing
|
||||
labelSelectors:
|
||||
release: milvus-chaos
|
||||
mode: all
|
||||
action: pod-kill
|
||||
gracePeriod: 0
|
||||
- name: test-second-part-pod-kill
|
||||
templateType: Schedule
|
||||
deadline: 3m
|
||||
schedule:
|
||||
schedule: '*/5 * * * * *'
|
||||
startingDeadlineSeconds: 60
|
||||
concurrencyPolicy: Forbid
|
||||
historyLimit: 1
|
||||
type: PodChaos
|
||||
podChaos:
|
||||
selector:
|
||||
namespaces:
|
||||
- chaos-testing
|
||||
labelSelectors:
|
||||
app.kubernetes.io/instance: milvus-chaos
|
||||
mode: all
|
||||
action: pod-kill
|
||||
gracePeriod: 0
|
||||
- name: test-all-pods-kill
|
||||
templateType: Parallel
|
||||
deadline: 3m
|
||||
children:
|
||||
- test-first-part-pod-kill
|
||||
- test-second-part-pod-kill
|
@ -145,3 +145,27 @@ Collections:
|
||||
index: fail
|
||||
search: fail
|
||||
query: fail
|
||||
-
|
||||
testcase:
|
||||
name: test_allstandalone_pod_kill
|
||||
chaos: chaos_allstandalone_pod_kill.yaml
|
||||
expectation:
|
||||
cluster_1_node:
|
||||
create: fail
|
||||
insert: fail
|
||||
flush: fail
|
||||
index: fail
|
||||
search: fail
|
||||
query: fail
|
||||
-
|
||||
testcase:
|
||||
name: test_allcluster_pod_kill
|
||||
chaos: chaos_allcluster_pod_kill.yaml
|
||||
expectation:
|
||||
cluster_1_node:
|
||||
create: fail
|
||||
insert: fail
|
||||
flush: fail
|
||||
index: fail
|
||||
search: fail
|
||||
query: fail
|
@ -40,16 +40,16 @@ bash uninstall_milvus.sh ${release} ${ns}|| true
|
||||
|
||||
declare -A pod_map=(["querynode"]="queryNode" ["indexnode"]="indexNode" ["datanode"]="dataNode" ["proxy"]="proxy")
|
||||
echo "install milvus"
|
||||
if [ ${pod} != "standalone" ];
|
||||
if [[ ${pod} != *"standalone"* ]];
|
||||
then
|
||||
echo "insatll cluster"
|
||||
helm install --wait --timeout 360s ${release} milvus/milvus --set image.all.repository=${REPOSITORY:-"milvusdb/milvus-dev"} --set image.all.tag=${IMAGE_TAG:-"master-latest"} --set ${pod_map[${pod}]}.replicas=$node_num -f ../cluster-values.yaml -n=${ns}
|
||||
helm install --wait --timeout 360s ${release} milvus/milvus --set ${pod_map[${pod}]}.replicas=$node_num -f ../cluster-values.yaml -n=${ns}
|
||||
fi
|
||||
|
||||
if [ ${pod} == "standalone" ];
|
||||
if [[ ${pod} == *"standalone"* ]];
|
||||
then
|
||||
echo "install standalone"
|
||||
helm install --wait --timeout 360s ${release} milvus/milvus --set image.all.repository=${REPOSITORY:-"milvusdb/milvus-dev"} --set image.all.tag=${IMAGE_TAG:-"master-latest"} -f ../standalone-values.yaml -n=${ns}
|
||||
helm install --wait --timeout 360s ${release} milvus/milvus -f ../standalone-values.yaml -n=${ns}
|
||||
fi
|
||||
|
||||
# wait all pod ready
|
||||
|
@ -17,8 +17,8 @@ ENTITIES_FOR_SEARCH = 3000 # entities for search_collection
|
||||
|
||||
CHAOS_CONFIG_ENV = 'CHAOS_CONFIG_PATH' # env variables for chao path
|
||||
TESTS_CONFIG_LOCATION = 'chaos_objects/pod_kill/'
|
||||
ALL_CHAOS_YAMLS = 'chaos_querynode_pod_kill.yaml'
|
||||
RELEASE_NAME = 'test-querynode-pod-kill-17-33-50'
|
||||
ALL_CHAOS_YAMLS = 'chaos_allstandalone_pod_kill.yaml'
|
||||
RELEASE_NAME = 'test-allstandalone-pod-kill-19-25-26'
|
||||
WAIT_PER_OP = 10 # time to wait in seconds between operations
|
||||
CHAOS_DURATION = 120 # chaos duration time in seconds
|
||||
DEFAULT_INDEX_PARAM = {"index_type": "IVF_SQ8", "metric_type": "L2", "params": {"nlist": 64}}
|
||||
|
@ -12,6 +12,7 @@ from chaos.checker import (CreateChecker, InsertFlushChecker,
|
||||
from common.cus_resource_opts import CustomResourceOperations as CusResource
|
||||
from utils.util_log import test_log as log
|
||||
from utils.util_k8s import wait_pods_ready, get_pod_list
|
||||
from utils.util_common import findkeys
|
||||
from chaos import chaos_commons as cc
|
||||
from common.common_type import CaseLabel
|
||||
from chaos import constants
|
||||
@ -34,6 +35,27 @@ def assert_statistic(checkers, expectations={}):
|
||||
f"Expect Succ: {str(k)} succ rate {succ_rate}, total: {total}, average time: {average_time:.4f}")
|
||||
|
||||
|
||||
def check_cluster_nodes(chaos_config):
|
||||
|
||||
# if all pods will be effected, the expect is all fail.
|
||||
# Even though the replicas is greater than 1, it can not provide HA, so cluster_nodes is set as 1 for this situation.
|
||||
if "all" in chaos_config["metadata"]["name"]:
|
||||
return 1
|
||||
|
||||
selector = findkeys(chaos_config, "selector")
|
||||
selector = list(selector)
|
||||
log.info(f"chaos target selector: {selector}")
|
||||
assert len(selector) == 1
|
||||
selector = selector[0]
|
||||
namespace = selector["namespaces"][0]
|
||||
labels_dict = selector["labelSelectors"]
|
||||
labels_list = []
|
||||
for k,v in labels_dict.items():
|
||||
labels_list.append(k+"="+v)
|
||||
labels_str = ",".join(labels_list)
|
||||
pods = get_pod_list(namespace, labels_str)
|
||||
return len(pods)
|
||||
|
||||
def record_results(checkers):
|
||||
res = ""
|
||||
for k in checkers.keys():
|
||||
@ -57,17 +79,7 @@ class TestChaosBase:
|
||||
health_checkers = {}
|
||||
|
||||
def parser_testcase_config(self, chaos_yaml, chaos_config):
|
||||
# TODO: need a better way (maybe recursion) to parse chaos_config
|
||||
# selector key is located in different depth when chaos config's kind is different
|
||||
# for now, there are two kinds of chaos config: xxChaos and Schedule(applied in pod kill chaos).
|
||||
if chaos_config["kind"] == "Schedule":
|
||||
for k, v in chaos_config["spec"].items():
|
||||
if "Chaos" in k and "selector" in v.keys():
|
||||
selector = v["selector"]
|
||||
break
|
||||
else:
|
||||
selector = chaos_config["spec"]["selector"]
|
||||
log.info(f"chaos target selector: {selector}")
|
||||
cluster_nodes = check_cluster_nodes(chaos_config)
|
||||
tests_yaml = constants.TESTS_CONFIG_LOCATION + 'testcases.yaml'
|
||||
tests_config = cc.gen_experiment_config(tests_yaml)
|
||||
test_collections = tests_config.get('Collections', None)
|
||||
@ -75,16 +87,8 @@ class TestChaosBase:
|
||||
test_chaos = t.get('testcase', {}).get('chaos', {})
|
||||
if test_chaos in chaos_yaml:
|
||||
expects = t.get('testcase', {}).get('expectation', {}).get('cluster_1_node', {})
|
||||
# get the nums of pods
|
||||
namespace = selector["namespaces"][0]
|
||||
labels_dict = selector["labelSelectors"]
|
||||
labels_list = []
|
||||
for k,v in labels_dict.items():
|
||||
labels_list.append(k+"="+v)
|
||||
labels_str = ",".join(labels_list)
|
||||
pods = get_pod_list(namespace, labels_str)
|
||||
# for the cluster_n_node
|
||||
if len(pods) > 1:
|
||||
if cluster_nodes > 1:
|
||||
expects = t.get('testcase', {}).get('expectation', {}).get('cluster_n_node', {})
|
||||
log.info(f"yaml.expects: {expects}")
|
||||
self.expect_create = expects.get(Op.create.value, constants.SUCC)
|
||||
|
@ -13,8 +13,8 @@ class CustomResourceOperations(object):
|
||||
self.group = group
|
||||
self.version = version
|
||||
self.namespace = namespace
|
||||
if kind.lower() == "schedule":
|
||||
self.plural = "schedules"
|
||||
if kind.lower()[-1] != "s":
|
||||
self.plural = kind.lower() + "s"
|
||||
else:
|
||||
self.plural = kind.lower()
|
||||
|
||||
|
30
tests/python_client/utils/util_common.py
Normal file
30
tests/python_client/utils/util_common.py
Normal file
@ -0,0 +1,30 @@
|
||||
|
||||
|
||||
def findkeys(node, kv):
|
||||
# refer to https://stackoverflow.com/questions/9807634/find-all-occurrences-of-a-key-in-nested-dictionaries-and-lists
|
||||
if isinstance(node, list):
|
||||
for i in node:
|
||||
for x in findkeys(i, kv):
|
||||
yield x
|
||||
elif isinstance(node, dict):
|
||||
if kv in node:
|
||||
yield node[kv]
|
||||
for j in node.values():
|
||||
for x in findkeys(j, kv):
|
||||
yield x
|
||||
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
d = { "id" : "abcde",
|
||||
"key1" : "blah",
|
||||
"key2" : "blah blah",
|
||||
"nestedlist" : [
|
||||
{ "id" : "qwerty",
|
||||
"nestednestedlist" : [
|
||||
{ "id" : "xyz", "keyA" : "blah blah blah" },
|
||||
{ "id" : "fghi", "keyZ" : "blah blah blah" }],
|
||||
"anothernestednestedlist" : [
|
||||
{ "id" : "asdf", "keyQ" : "blah blah" },
|
||||
{ "id" : "yuiop", "keyW" : "blah" }] } ] }
|
||||
print(list(findkeys(d, 'id')))
|
Loading…
Reference in New Issue
Block a user