2021-09-28 17:20:03 +08:00
|
|
|
name: Pod Kill Chaos Test
|
|
|
|
|
|
|
|
on:
|
|
|
|
workflow_dispatch:
|
2021-10-16 22:14:34 +08:00
|
|
|
schedule:
|
|
|
|
- cron: "30 18 * * *"
|
2021-09-28 17:20:03 +08:00
|
|
|
jobs:
|
|
|
|
|
|
|
|
test-pod-kill-chaos:
|
|
|
|
|
|
|
|
runs-on: ubuntu-latest
|
2021-11-17 20:31:14 +08:00
|
|
|
timeout-minutes: 40
|
2021-09-28 17:20:03 +08:00
|
|
|
strategy:
|
|
|
|
fail-fast: false
|
|
|
|
matrix:
|
2021-11-03 20:32:09 +08:00
|
|
|
pod: [standalone, datacoord, datanode, indexcoord, indexnode, proxy, pulsar, querycoord, querynode, rootcoord, etcd, minio]
|
2021-09-28 17:20:03 +08:00
|
|
|
|
|
|
|
steps:
|
|
|
|
|
2021-12-01 15:55:32 +08:00
|
|
|
- name: Set env param
|
|
|
|
run: |
|
|
|
|
echo "RELEASE=test-${{ matrix.pod }}-pod-kill" >> $GITHUB_ENV
|
|
|
|
|
2021-09-28 17:20:03 +08:00
|
|
|
- name: Creating kind cluster
|
|
|
|
uses: helm/kind-action@v1.2.0
|
2021-09-29 19:44:13 +08:00
|
|
|
|
2021-09-28 17:20:03 +08:00
|
|
|
- name: Print cluster information
|
|
|
|
run: |
|
|
|
|
kubectl config view
|
|
|
|
kubectl cluster-info
|
|
|
|
kubectl get nodes
|
|
|
|
kubectl get pods -n kube-system
|
|
|
|
helm version
|
|
|
|
kubectl version
|
|
|
|
|
|
|
|
- uses: actions/checkout@v2
|
|
|
|
|
|
|
|
- name: Set up Python
|
|
|
|
uses: actions/setup-python@v2
|
|
|
|
with:
|
|
|
|
python-version: 3.8
|
2021-09-29 19:44:13 +08:00
|
|
|
|
|
|
|
- name: Install dependency
|
2021-10-15 14:24:34 +08:00
|
|
|
uses: nick-invision/retry@v2
|
|
|
|
with:
|
|
|
|
timeout_minutes: 5
|
|
|
|
max_attempts: 3
|
|
|
|
retry_on: error
|
|
|
|
shell: bash
|
|
|
|
command: |
|
2021-10-23 19:07:11 +08:00
|
|
|
pip install -r tests/python_client/requirements.txt --trusted-host https://test.pypi.org
|
2021-10-15 14:24:34 +08:00
|
|
|
pip install --upgrade protobuf
|
2021-09-28 17:20:03 +08:00
|
|
|
|
|
|
|
- name: Deploy Chaos Mesh
|
|
|
|
shell: bash
|
|
|
|
run: |
|
|
|
|
helm repo add chaos-mesh https://charts.chaos-mesh.org
|
|
|
|
helm search repo chaos-mesh
|
|
|
|
kubectl create ns chaos-testing
|
2021-11-17 20:31:14 +08:00
|
|
|
helm install --wait --timeout 360s chaos-mesh chaos-mesh/chaos-mesh --namespace=chaos-testing --version v2.0.3 --set chaosDaemon.runtime=containerd --set chaosDaemon.socketPath=/run/containerd/containerd.sock
|
2021-09-28 17:20:03 +08:00
|
|
|
kubectl get po -n chaos-testing
|
2021-09-29 19:44:13 +08:00
|
|
|
|
2021-09-28 17:20:03 +08:00
|
|
|
- name: Deploy Milvus
|
|
|
|
shell: bash
|
2021-10-09 18:18:57 +08:00
|
|
|
working-directory: tests/python_client/chaos
|
2021-09-28 17:20:03 +08:00
|
|
|
run: |
|
2021-10-25 21:16:38 +08:00
|
|
|
echo "latest tag:"
|
|
|
|
bash ../../../scripts/docker_image_find_tag.sh -n milvusdb/milvus-dev -t master-latest -f master- -F -L -q
|
2021-09-28 17:20:03 +08:00
|
|
|
helm repo add milvus https://milvus-io.github.io/milvus-helm
|
|
|
|
helm repo update
|
2021-12-01 15:55:32 +08:00
|
|
|
if [ ${{ matrix.pod }} != "standalone" ]; then helm install --wait --timeout 720s ${{ env.RELEASE }} milvus/milvus -f cluster-values.yaml -n=chaos-testing; fi
|
2021-12-03 16:35:32 +08:00
|
|
|
if [ ${{ matrix.pod }} == "standalone" ]; then helm install --wait --timeout 720s ${{ env.RELEASE }} milvus/milvus -f standalone-values.yaml -n=chaos-testing; fi
|
2021-09-28 17:20:03 +08:00
|
|
|
kubectl get pods -n chaos-testing
|
2021-10-12 16:48:35 +08:00
|
|
|
sleep 20s
|
2021-09-28 17:20:03 +08:00
|
|
|
kubectl get pods -n chaos-testing
|
2021-12-01 15:55:32 +08:00
|
|
|
kubectl port-forward service/${{ env.RELEASE }}-milvus 19530 -n chaos-testing >/dev/null 2>&1 &
|
2021-09-28 17:20:03 +08:00
|
|
|
sleep 20s
|
2021-10-01 22:17:16 +08:00
|
|
|
# check whether port-forward success
|
2021-09-28 17:20:03 +08:00
|
|
|
nc -vz 127.0.0.1 19530
|
2021-10-13 19:26:34 +08:00
|
|
|
# check whether milvus server is healthy
|
2021-10-29 20:03:29 +08:00
|
|
|
python scripts/hello_milvus.py
|
2021-09-29 19:44:13 +08:00
|
|
|
|
2021-09-28 17:20:03 +08:00
|
|
|
- name: Chaos Test
|
2021-10-14 17:26:34 +08:00
|
|
|
timeout-minutes: 15
|
2021-09-28 17:20:03 +08:00
|
|
|
shell: bash
|
|
|
|
working-directory: tests/python_client/chaos
|
|
|
|
run: |
|
2021-10-01 22:17:16 +08:00
|
|
|
# replace chaos object
|
2021-10-15 18:12:50 +08:00
|
|
|
sed -i "s/TESTS_CONFIG_LOCATION =.*/TESTS_CONFIG_LOCATION = \'chaos_objects\/pod_kill\/'/g" constants.py
|
2021-11-02 10:52:32 +08:00
|
|
|
sed -i "s/ALL_CHAOS_YAMLS =.*/ALL_CHAOS_YAMLS = \'chaos_${{ matrix.pod }}_pod_kill.yaml\'/g" constants.py
|
2022-01-04 09:03:20 +08:00
|
|
|
sed -i "s/RELEASE_NAME =.*/RELEASE_NAME = \'${{ env.RELEASE }}\'/g" constants.py
|
2021-09-28 17:20:03 +08:00
|
|
|
cat constants.py
|
2021-12-01 15:55:32 +08:00
|
|
|
timeout 14m pytest -s -v test_chaos.py --host 127.0.0.1 --log-cli-level=INFO --capture=no || echo "chaos test failed"
|
2021-10-14 17:26:34 +08:00
|
|
|
|
2022-01-24 18:59:41 +08:00
|
|
|
- name: Result Analysis
|
|
|
|
timeout-minutes: 15
|
|
|
|
shell: bash
|
|
|
|
working-directory: tests/python_client/chaos/reports
|
|
|
|
run: |
|
|
|
|
echo "result analysis"
|
|
|
|
cat ${{ env.RELEASE }}.log || echo "no log file"
|
|
|
|
|
2021-10-15 14:24:34 +08:00
|
|
|
- name: Milvus E2E Test
|
2021-11-17 20:31:14 +08:00
|
|
|
timeout-minutes: 10
|
2021-10-15 14:24:34 +08:00
|
|
|
if: ${{ always() }}
|
|
|
|
shell: bash
|
2021-11-24 11:57:15 +08:00
|
|
|
working-directory: tests/python_client
|
2021-10-15 14:24:34 +08:00
|
|
|
run: |
|
2021-11-17 20:31:14 +08:00
|
|
|
kubectl get pod -n chaos-testing
|
|
|
|
# wait all pod to be ready
|
2021-12-01 15:55:32 +08:00
|
|
|
kubectl wait --for=condition=Ready pod -l app.kubernetes.io/instance=${{ env.RELEASE }} -n chaos-testing --timeout=360s
|
|
|
|
kubectl wait --for=condition=Ready pod -l release=${{ env.RELEASE }} -n chaos-testing --timeout=360s
|
2021-10-15 14:24:34 +08:00
|
|
|
kubectl get pod -n chaos-testing
|
2021-10-18 19:46:42 +08:00
|
|
|
ps aux|grep forward|grep -v grep|awk '{print $2}'|xargs kill -9
|
2021-12-01 15:55:32 +08:00
|
|
|
kubectl port-forward service/${{ env.RELEASE }}-milvus 19530 -n chaos-testing >/dev/null 2>&1 &
|
2021-11-17 20:31:14 +08:00
|
|
|
|
2021-10-18 19:46:42 +08:00
|
|
|
sleep 20s
|
|
|
|
nc -vz 127.0.0.1 19530
|
|
|
|
|
2021-12-01 15:55:32 +08:00
|
|
|
pytest -s -v testcases/test_e2e.py --host 127.0.0.1 --log-cli-level=INFO --capture=no
|
2021-11-24 11:57:15 +08:00
|
|
|
python chaos/scripts/hello_milvus.py --host 127.0.0.1
|
2021-12-13 13:31:27 +08:00
|
|
|
|
|
|
|
- name: Export logs
|
|
|
|
if: ${{ always() }}
|
|
|
|
shell: bash
|
|
|
|
working-directory: tests/python_client/chaos
|
|
|
|
run: |
|
|
|
|
#in this step, verify whether pod has been killed by pod's age
|
|
|
|
kubectl get po -n chaos-testing
|
|
|
|
# export k8s log for chaos mesh and milvus
|
|
|
|
bash ../../scripts/export_log_k8s.sh chaos-testing ${{ env.RELEASE }} k8s_logs/chaos-test
|
|
|
|
|
2021-11-01 10:43:52 +08:00
|
|
|
- name: Deploy Milvus Again If Previous E2E Test Failed
|
|
|
|
timeout-minutes: 15
|
|
|
|
if: ${{ failure() }}
|
|
|
|
shell: bash
|
|
|
|
working-directory: tests/python_client/chaos
|
|
|
|
run: |
|
2021-11-02 20:51:48 +08:00
|
|
|
kubectl config set-context --current --namespace=chaos-testing
|
2021-12-09 14:15:41 +08:00
|
|
|
bash scripts/uninstall_milvus.sh ${{ env.RELEASE }}
|
2021-12-01 15:55:32 +08:00
|
|
|
if [ ${{ matrix.pod }} != "standalone" ]; then helm install --wait --timeout 720s ${{ env.RELEASE }} milvus/milvus -f cluster-values.yaml -n=chaos-testing; fi
|
|
|
|
if [ ${{ matrix.pod }} == "standalone" ]; then helm install --wait --timeout 720s ${{ env.RELEASE }} milvus/milvus --set cluster.enabled=false --set etcd.replicaCount=1 --set minio.mode=standalone --set pulsar.enabled=false -n=chaos-testing; fi
|
2021-11-01 10:43:52 +08:00
|
|
|
kubectl get pods -n chaos-testing
|
|
|
|
sleep 20s
|
|
|
|
kubectl get pods -n chaos-testing
|
|
|
|
ps aux|grep forward|grep -v grep|awk '{print $2}'|xargs kill -9
|
2021-12-01 15:55:32 +08:00
|
|
|
kubectl port-forward service/${{ env.RELEASE }}-milvus 19530 -n chaos-testing >/dev/null 2>&1 &
|
2021-11-01 10:43:52 +08:00
|
|
|
sleep 20s
|
|
|
|
# check whether port-forward success
|
|
|
|
nc -vz 127.0.0.1 19530
|
|
|
|
# check whether milvus server is healthy
|
|
|
|
python scripts/hello_milvus.py
|
|
|
|
|
2021-10-15 14:24:34 +08:00
|
|
|
- name: Data Consist Test
|
|
|
|
timeout-minutes: 5
|
|
|
|
if: ${{ always() }}
|
|
|
|
shell: bash
|
|
|
|
working-directory: tests/python_client/chaos
|
|
|
|
run: |
|
2021-12-01 15:55:32 +08:00
|
|
|
pytest -s -v test_chaos_data_consist.py --host 127.0.0.1 --log-cli-level=INFO --capture=no || echo "data consist chaos test failed"
|
2021-10-15 14:24:34 +08:00
|
|
|
|
|
|
|
- name: Milvus E2E Test
|
2021-11-24 11:57:15 +08:00
|
|
|
timeout-minutes: 10
|
2021-10-15 14:24:34 +08:00
|
|
|
if: ${{ always() }}
|
2021-10-14 17:26:34 +08:00
|
|
|
shell: bash
|
2021-11-24 11:57:15 +08:00
|
|
|
working-directory: tests/python_client
|
2021-10-14 17:26:34 +08:00
|
|
|
run: |
|
2021-10-15 14:24:34 +08:00
|
|
|
kubectl get pod -n chaos-testing
|
2021-12-01 15:55:32 +08:00
|
|
|
kubectl wait --for=condition=Ready pod -l app.kubernetes.io/instance=${{ env.RELEASE }} -n chaos-testing --timeout=360s
|
|
|
|
kubectl wait --for=condition=Ready pod -l release=${{ env.RELEASE }} -n chaos-testing --timeout=360s
|
2021-11-24 11:57:15 +08:00
|
|
|
kubectl get pod -n chaos-testing
|
2021-10-18 19:46:42 +08:00
|
|
|
ps aux|grep forward|grep -v grep|awk '{print $2}'|xargs kill -9
|
2021-12-01 15:55:32 +08:00
|
|
|
kubectl port-forward service/${{ env.RELEASE }}-milvus 19530 -n chaos-testing >/dev/null 2>&1 &
|
2021-10-18 19:46:42 +08:00
|
|
|
sleep 20s
|
|
|
|
nc -vz 127.0.0.1 19530
|
|
|
|
|
2021-12-01 15:55:32 +08:00
|
|
|
pytest -s -v testcases/test_e2e.py --host 127.0.0.1 --log-cli-level=INFO --capture=no
|
2021-11-24 11:57:15 +08:00
|
|
|
python chaos/scripts/hello_milvus.py --host 127.0.0.1
|
2021-10-14 17:26:34 +08:00
|
|
|
|
2021-09-29 19:44:13 +08:00
|
|
|
- name: Export logs
|
2021-09-28 17:20:03 +08:00
|
|
|
if: ${{ always() }}
|
|
|
|
shell: bash
|
2021-09-28 19:50:03 +08:00
|
|
|
working-directory: tests/python_client/chaos
|
2021-09-28 17:20:03 +08:00
|
|
|
run: |
|
2021-09-29 19:44:13 +08:00
|
|
|
#in this step, verify whether pod has been killed by pod's age
|
|
|
|
kubectl get po -n chaos-testing
|
|
|
|
# export k8s log for chaos mesh and milvus
|
2021-12-13 13:31:27 +08:00
|
|
|
bash ../../scripts/export_log_k8s.sh chaos-testing ${{ env.RELEASE }} k8s_logs/data-consist-test
|
2021-12-31 14:31:39 +08:00
|
|
|
bash ../../scripts/export_log_k8s.sh chaos-testing chaos-daemon k8s_logs/chaos-mesh-daemon
|
2021-09-29 19:44:13 +08:00
|
|
|
|
2021-09-28 19:50:03 +08:00
|
|
|
- name: Upload logs
|
|
|
|
if: ${{ always() }}
|
|
|
|
uses: actions/upload-artifact@v2
|
|
|
|
with:
|
|
|
|
name: logs-${{ matrix.pod }}
|
2021-10-30 23:09:51 +08:00
|
|
|
path: |
|
|
|
|
tests/python_client/chaos/k8s_logs
|
|
|
|
tests/python_client/chaos/reports
|