test level 3: restart (#2384)

* test level 3: restart Signed-off-by: zw <zw@milvus.io> * add namepsace Signed-off-by: zw <zw@milvus.io> * update Signed-off-by: zw <zw@milvus.io> * update Signed-off-by: zw <zw@milvus.io> * Update utils.py Signed-off-by: zw <zw@milvus.io> * Update test_level_3.py Signed-off-by: zw <zw@milvus.io> * Update Jenkinsfile Signed-off-by: zw <zw@milvus.io> * debug Signed-off-by: zw <zw@milvus.io> * debug Signed-off-by: zw <zw@milvus.io> * debug Signed-off-by: zw <zw@milvus.io> * debug Signed-off-by: zw <zw@milvus.io> * clean compile warning (#2380) Signed-off-by: yudong.cai <yudong.cai@zilliz.com> Signed-off-by: zw <zw@milvus.io> * delete pod Signed-off-by: zw <zw@milvus.io> * update Signed-off-by: zw <zw@milvus.io> * debug Signed-off-by: zw <zw@milvus.io> * debug Signed-off-by: zw <zw@milvus.io> * recover Signed-off-by: zw <zw@milvus.io> Co-authored-by: zw <zw@milvus.io> Co-authored-by: Cai Yudong <yudong.cai@zilliz.com>
2024-11-30 10:59:32 +08:00 · 2020-05-20 18:51:50 +08:00 · 2020-05-20 18:51:50 +08:00 · 40055fcaf0
commit 40055fcaf0
parent b532a69c8b
16 changed files with 168 additions and 36 deletions
--- a/ci/jenkins/step/singleDevTest.groovy
+++ b/ci/jenkins/step/singleDevTest.groovy
@ -6,7 +6,7 @@ timeout(time: 120, unit: 'MINUTES') {
        checkout([$class: 'GitSCM', branches: [[name: "${env.HELM_BRANCH}"]], userRemoteConfigs: [[url: "https://github.com/milvus-io/milvus-helm.git", name: 'origin', refspec: "+refs/heads/${env.HELM_BRANCH}:refs/remotes/origin/${env.HELM_BRANCH}"]]])
        retry(3) {
            try {
-                sh "helm install --wait --timeout 300s --set image.repository=registry.zilliz.com/milvus/engine --set image.tag=${DOCKER_VERSION} --set image.pullPolicy=Always --set service.type=ClusterIP -f ci/db_backend/mysql_${BINARY_VERSION}_values.yaml -f ci/filebeat/values.yaml --namespace milvus ${env.HELM_RELEASE_NAME} ."
+                sh "helm install --wait --timeout 300s --set image.repository=registry.zilliz.com/milvus/engine --set persistence.enabled=true --set image.tag=${DOCKER_VERSION} --set image.pullPolicy=Always --set service.type=ClusterIP -f ci/db_backend/mysql_${BINARY_VERSION}_values.yaml -f ci/filebeat/values.yaml --namespace milvus ${env.HELM_RELEASE_NAME} ."
            } catch (exc) {
                def helmStatusCMD = "helm get manifest --namespace milvus ${env.HELM_RELEASE_NAME} | kubectl describe -n milvus -f - && \
                                     kubectl logs --namespace milvus -l \"app=milvus,release=${env.HELM_RELEASE_NAME}\" -c milvus && \
@ -22,6 +22,7 @@ timeout(time: 120, unit: 'MINUTES') {
    dir ("tests/milvus_python_test") {
        // sh 'python3 -m pip install -r requirements.txt -i http://pypi.douban.com/simple --trusted-host pypi.douban.com'
        sh 'python3 -m pip install -r requirements.txt'
-        sh "pytest . --alluredir=\"test_out/dev/single/mysql\" --level=1 --ip ${env.HELM_RELEASE_NAME}.milvus.svc.cluster.local"
+        sh "pytest . --alluredir=\"test_out/dev/single/mysql\" --level=1 --ip ${env.HELM_RELEASE_NAME}.milvus.svc.cluster.local --service ${env.HELM_RELEASE_NAME}"
+        // sh "pytest test_restart.py --alluredir=\"test_out/dev/single/mysql\" --level=3 --ip ${env.HELM_RELEASE_NAME}.milvus.svc.cluster.local --service ${env.HELM_RELEASE_NAME}"
    }
 }
--- a/tests/milvus_python_test/collection/test_collection.py
+++ b/tests/milvus_python_test/collection/test_collection.py
@ -1049,6 +1049,7 @@ class TestCollectionLogic(object):
            self.execute(logic_seq, connect)
        else:
            self.execute_with_error(logic_seq, connect)
+        self.tear_down(connect)

    def is_right(self, seq):
        if sorted(seq) == seq:
@ -1089,6 +1090,11 @@ class TestCollectionLogic(object):
                break
        assert error_flag == True

+    def tear_down(self, connect):
+        names = connect.list_collections()[1]
+        for name in names:
+            connect.drop_collection(name)
+
    def gen_params(self):
        collection_name = gen_unique_str("test_collection")
        top_k = 1
--- a/tests/milvus_python_test/collection/test_collection_count.py
+++ b/tests/milvus_python_test/collection/test_collection_count.py
--- a/tests/milvus_python_test/collection/test_collection_stats.py
+++ b/tests/milvus_python_test/collection/test_collection_stats.py
--- a/tests/milvus_python_test/conftest.py
+++ b/tests/milvus_python_test/conftest.py
@ -13,6 +13,7 @@ timeout = 1

 def pytest_addoption(parser):
    parser.addoption("--ip", action="store", default="localhost")
+    parser.addoption("--service", action="store", default="")
    parser.addoption("--port", action="store", default=19530)
    parser.addoption("--http-port", action="store", default=19121)
    parser.addoption("--handler", action="store", default="GRPC")
@ -35,6 +36,7 @@ def check_server_connection(request):
@pytest.fixture(scope="module")
 def connect(request):
    ip = request.config.getoption("--ip")
+    service_name = request.config.getoption("--service")
    port = request.config.getoption("--port")
    http_port = request.config.getoption("--http-port")
    handler = request.config.getoption("--handler")
@ -47,7 +49,7 @@ def connect(request):
        pytest.exit("Milvus server can not connected, exit pytest ...")
    def fin():
        try:
-            # milvus.disconnect()
+            milvus.close()
            pass
        except Exception as e:
            logging.getLogger().info(str(e))
@ -58,6 +60,7 @@ def connect(request):
@pytest.fixture(scope="module")
 def dis_connect(request):
    ip = request.config.getoption("--ip")
+    service_name = request.config.getoption("--service")
    port = request.config.getoption("--port")
    http_port = request.config.getoption("--http-port")
    handler = request.config.getoption("--handler")
@ -71,12 +74,13 @@ def dis_connect(request):
@pytest.fixture(scope="module")
 def args(request):
    ip = request.config.getoption("--ip")
+    service_name = request.config.getoption("--service")
    port = request.config.getoption("--port")
    http_port = request.config.getoption("--http-port")
    handler = request.config.getoption("--handler")
    if handler == "HTTP":
        port = http_port
-    args = {"ip": ip, "port": port, "handler": handler}
+    args = {"ip": ip, "port": port, "handler": handler, "service_name": service_name}
    return args


@ -108,10 +112,10 @@ def collection(request, connect):
        pytest.exit("collection can not be created, exit pytest ...")

    def teardown():
-        # status, collection_names = connect.list_collections()
-        # for collection_name in collection_names:
-        #     connect.drop_collection(collection_name)
-        connect.drop_collection(collection_name)
+        status, collection_names = connect.list_collections()
+        for collection_name in collection_names:
+            connect.drop_collection(collection_name)
+        # connect.drop_collection(collection_name)

    request.addfinalizer(teardown)

@ -135,10 +139,10 @@ def ip_collection(request, connect):
        pytest.exit("collection can not be created, exit pytest ...")

    def teardown():
-        # status, collection_names = connect.list_collections()
-        # for collection_name in collection_names:
-        #     connect.drop_collection(collection_name)
-        connect.drop_collection(collection_name)
+        status, collection_names = connect.list_collections()
+        for collection_name in collection_names:
+            connect.drop_collection(collection_name)
+        # connect.drop_collection(collection_name)

    request.addfinalizer(teardown)

@ -162,10 +166,10 @@ def jac_collection(request, connect):
        pytest.exit("collection can not be created, exit pytest ...")

    def teardown():
-        # status, collection_names = connect.list_collections()
-        # for collection_name in collection_names:
-        #     connect.drop_collection(collection_name)
-        connect.drop_collection(collection_name)
+        status, collection_names = connect.list_collections()
+        for collection_name in collection_names:
+            connect.drop_collection(collection_name)
+        # connect.drop_collection(collection_name)

    request.addfinalizer(teardown)

@ -188,10 +192,10 @@ def ham_collection(request, connect):
        pytest.exit("collection can not be created, exit pytest ...")

    def teardown():
-        # status, collection_names = connect.list_collections()
-        # for collection_name in collection_names:
-        #     connect.drop_collection(collection_name)
-        connect.drop_collection(collection_name)
+        status, collection_names = connect.list_collections()
+        for collection_name in collection_names:
+            connect.drop_collection(collection_name)
+        # connect.drop_collection(collection_name)

    request.addfinalizer(teardown)

@ -214,10 +218,10 @@ def tanimoto_collection(request, connect):
        pytest.exit("collection can not be created, exit pytest ...")

    def teardown():
-        # status, collection_names = connect.list_collections()
-        # for collection_name in collection_names:
-        #     connect.drop_collection(collection_name)
-        connect.drop_collection(collection_name)
+        status, collection_names = connect.list_collections()
+        for collection_name in collection_names:
+            connect.drop_collection(collection_name)
+        # connect.drop_collection(collection_name)

    request.addfinalizer(teardown)
    return collection_name
@ -239,10 +243,10 @@ def substructure_collection(request, connect):
        pytest.exit("collection can not be created, exit pytest ...")

    def teardown():
-        # status, collection_names = connect.list_collections()
-        # for collection_name in collection_names:
-        #     connect.drop_collection(collection_name)
-        connect.drop_collection(collection_name)
+        status, collection_names = connect.list_collections()
+        for collection_name in collection_names:
+            connect.drop_collection(collection_name)
+        # connect.drop_collection(collection_name)

    request.addfinalizer(teardown)
    return collection_name
@ -264,10 +268,10 @@ def superstructure_collection(request, connect):
        pytest.exit("collection can not be created, exit pytest ...")

    def teardown():
-        # status, collection_names = connect.list_collections()
-        # for collection_name in collection_names:
-        #     connect.drop_collection(collection_name)
-        connect.drop_collection(collection_name)
+        status, collection_names = connect.list_collections()
+        for collection_name in collection_names:
+            connect.drop_collection(collection_name)
+        # connect.drop_collection(collection_name)

    request.addfinalizer(teardown)
    return collection_name
--- a/tests/milvus_python_test/test_delete_vectors.py
+++ b/tests/milvus_python_test/test_delete_vectors.py
--- a/tests/milvus_python_test/entity/test_get_entity_by_id.py
+++ b/tests/milvus_python_test/entity/test_get_entity_by_id.py
--- a/tests/milvus_python_test/entity/test_insert.py
+++ b/tests/milvus_python_test/entity/test_insert.py
--- a/tests/milvus_python_test/entity/test_list_id_in_segment.py
+++ b/tests/milvus_python_test/entity/test_list_id_in_segment.py
--- a/tests/milvus_python_test/test_search_vectors.py
+++ b/tests/milvus_python_test/test_search_vectors.py
--- a/tests/milvus_python_test/entity/test_search_by_id.py
+++ b/tests/milvus_python_test/entity/test_search_by_id.py
--- a/tests/milvus_python_test/pytest.ini
+++ b/tests/milvus_python_test/pytest.ini
@ -4,7 +4,7 @@ log_format = [%(asctime)s-%(levelname)s-%(name)s]: %(message)s (%(filename)s:%(l
 log_cli = true
 log_level = 20

-timeout = 600
+timeout = 360

 markers = 
    level: test level
--- a/tests/milvus_python_test/requirements.txt
+++ b/tests/milvus_python_test/requirements.txt
@ -9,3 +9,4 @@ pytest-level==0.1.1
 pytest-xdist==1.23.2
 scikit-learn>=0.19.1
 pymilvus-test>=0.2.0
+kubernetes==10.0.1
--- a/tests/milvus_python_test/test_compact.py
+++ b/tests/milvus_python_test/test_compact.py
@ -636,7 +636,6 @@ class TestCompactJAC:
                     'index_file_size': index_file_size,
                     'metric_type': MetricType.JACCARD}
            connect.create_collection(param)
-        time.sleep(6)
        for i in range(num_collections):
            status, ids = connect.insert(collection_name=collection_list[i], records=vectors)
            assert status.OK()
@ -646,6 +645,8 @@ class TestCompactJAC:
            assert status.OK()
            status = connect.compact(collection_list[i])
            assert status.OK()
+            status = connect.drop_collection(collection_list[i])
+            assert status.OK()

    @pytest.mark.timeout(COMPACT_TIMEOUT)
    def test_add_vector_after_compact(self, connect, jac_collection):
--- a/tests/milvus_python_test/test_restart.py
+++ b/tests/milvus_python_test/test_restart.py
@ -0,0 +1,67 @@
+import time
+import random
+import pdb
+import threading
+import logging
+from multiprocessing import Pool, Process
+import pytest
+from milvus import IndexType, MetricType
+from utils import *
+
+
+dim = 128
+index_file_size = 10
+collection_id = "test_partition_restart"
+nprobe = 1
+tag = "1970-01-01"
+
+
+class TestRestartBase:
+
+    """
+    ******************************************************************
+      The following cases are used to test `create_partition` function 
+    ******************************************************************
+    """
+    @pytest.fixture(scope="function", autouse=True)
+    def skip_check(self, connect, args):
+        if args["server_name"].find("shards") != -1:
+            reason = "Skip restart cases in shards mode"
+            logging.getLogger().info(reason)
+            pytest.skip(reason)
+
+
+    @pytest.mark.level(2)
+    def test_create_partition_insert_restart(self, connect, collection, args):
+        '''
+        target: return the same row count after server restart
+        method: call function: create partition, then insert, restart server and assert row count
+        expected: status ok, and row count keep the same
+        '''
+        status = connect.create_partition(collection, tag)
+        assert status.OK()
+        nq = 1000
+        vectors = gen_vectors(nq, dim)
+        ids = [i for i in range(nq)]
+        status, ids = connect.insert(collection, vectors, ids, partition_tag=tag)
+        assert status.OK()
+        status = connect.flush([collection])
+        assert status.OK()
+        status, res = connect.count_entities(collection)
+        logging.getLogger().info(res)
+        assert res == nq
+
+        # restart server
+        if restart_server(args["service_name"]):
+            logging.getLogger().info("Restart success") 
+        else:
+            logging.getLogger().info("Restart failed")
+        # assert row count again
+
+        # debug
+        new_connect = get_milvus(args["ip"], args["port"], handler=args["handler"]) 
+        status, res = new_connect.count_entities(collection)
+        logging.getLogger().info(status)
+        logging.getLogger().info(res)
+        assert status.OK()
+        assert res == nq
--- a/tests/milvus_python_test/utils.py
+++ b/tests/milvus_python_test/utils.py
@ -1,8 +1,8 @@
-# STL imports
+import os
+import sys
 import random
 import string
 import struct
-import sys
 import logging
 import time, datetime
 import copy
@ -625,3 +625,55 @@ def assert_equal_vector(v1, v2):
        assert False
    for i in range(len(v1)):
        assert abs(v1[i] - v2[i]) < epsilon
+
+
+def restart_server(helm_release_name):
+    res = True
+    timeout = 120
+    from kubernetes import client, config
+    client.rest.logger.setLevel(logging.WARNING)
+
+    namespace = "milvus"
+    # service_name = "%s.%s.svc.cluster.local" % (helm_release_name, namespace)
+    config.load_kube_config()
+    v1 = client.CoreV1Api()
+    pod_name = None
+    # config_map_names = v1.list_namespaced_config_map(namespace, pretty='true')
+    # body = {"replicas": 0}
+    pods = v1.list_namespaced_pod(namespace)
+    for i in pods.items:
+        if i.metadata.name.find(helm_release_name) != -1 and i.metadata.name.find("mysql") == -1:
+            pod_name = i.metadata.name
+            break
+            # v1.patch_namespaced_config_map(config_map_name, namespace, body, pretty='true')
+    # status_res = v1.read_namespaced_service_status(helm_release_name, namespace, pretty='true')
+    # print(status_res)
+    if pod_name is not None:
+        try:
+            v1.delete_namespaced_pod(pod_name, namespace)
+        except Exception as e:
+            logging.error(str(e))
+            logging.error("Exception when calling CoreV1Api->delete_namespaced_pod")
+            res = False
+            return res
+        time.sleep(5)
+        # check if restart successfully
+        pods = v1.list_namespaced_pod(namespace)
+        for i in pods.items:
+            pod_name_tmp = i.metadata.name
+            if pod_name_tmp.find(helm_release_name) != -1:
+                logging.debug(pod_name_tmp)
+                start_time = time.time()
+                while time.time() - start_time > timeout:
+                    status_res = v1.read_namespaced_pod_status(pod_name_tmp, namespace, pretty='true')
+                    if status_res.status.phase == "Running":
+                        break
+                    time.sleep(1)
+                if time.time() - start_time > timeout:
+                    logging.error("Restart pod: %s timeout" % pod_name_tmp)
+                    res = False
+                    return res
+    else:
+        logging.error("Pod: %s not found" % helm_release_name)
+        res = False
+    return res