From f314d887f8108372b3e30c408134acd11d8ea9f0 Mon Sep 17 00:00:00 2001 From: NicoYuan1986 <109071306+NicoYuan1986@users.noreply.github.com> Date: Mon, 13 Mar 2023 15:35:54 +0800 Subject: [PATCH] Add test cases of upsert (#22630) Signed-off-by: nico --- tests/python_client/base/partition_wrapper.py | 11 ++ .../python_client/testcases/test_partition.py | 156 ++++++++++++++++++ tests/python_client/testcases/test_search.py | 21 +-- 3 files changed, 175 insertions(+), 13 deletions(-) diff --git a/tests/python_client/base/partition_wrapper.py b/tests/python_client/base/partition_wrapper.py index 45a9fc561d..c599691eff 100644 --- a/tests/python_client/base/partition_wrapper.py +++ b/tests/python_client/base/partition_wrapper.py @@ -132,6 +132,17 @@ class ApiPartitionWrapper: **kwargs).run() return res, check_result + def upsert(self, data, check_task=None, check_items=None, **kwargs): + timeout = kwargs.get("timeout", TIMEOUT) + kwargs.update({"timeout": timeout}) + + func_name = sys._getframe().f_code.co_name + res, succ = api_request([self.partition.upsert, data], **kwargs) + check_result = ResponseChecker(res, func_name, check_task, + check_items, is_succ=succ, data=data, + **kwargs).run() + return res, check_result + def get_replicas(self, timeout=None, check_task=None, check_items=None, **kwargs): timeout = TIMEOUT if timeout is None else timeout func_name = sys._getframe().f_code.co_name diff --git a/tests/python_client/testcases/test_partition.py b/tests/python_client/testcases/test_partition.py index fe0ba44cd8..e201f22151 100644 --- a/tests/python_client/testcases/test_partition.py +++ b/tests/python_client/testcases/test_partition.py @@ -538,6 +538,40 @@ class TestPartitionParams(TestcaseBase): assert not partition_w.is_empty assert partition_w.num_entities == (nums + nums) + @pytest.mark.tags(CaseLabel.L1) + def test_partition_upsert(self): + """ + target: verify upsert entities multiple times + method: 1. create a collection and a partition + 2. partition.upsert(data) + 3. upsert data again + expected: upsert data successfully + """ + # create collection and a partition + collection_w = self.init_collection_wrap() + partition_name = cf.gen_unique_str(prefix) + partition_w = self.init_partition_wrap(collection_w, partition_name) + + # insert data and load + cf.insert_data(collection_w) + collection_w.create_index(ct.default_float_vec_field_name, ct.default_index) + collection_w.load() + + # upsert data + upsert_nb = 1000 + data, values = cf.gen_default_data_for_upsert(nb=upsert_nb, start=2000) + partition_w.upsert(data) + res = partition_w.query("int64 >= 2000 && int64 < 3000", [ct.default_float_field_name])[0] + assert partition_w.num_entities == upsert_nb + ct.default_nb // 2 + assert [res[i][ct.default_float_field_name] for i in range(upsert_nb)] == values.to_list() + + # upsert data + data, values = cf.gen_default_data_for_upsert(nb=upsert_nb, start=ct.default_nb) + partition_w.upsert(data) + res = partition_w.query("int64 >= 3000 && int64 < 4000", [ct.default_float_field_name])[0] + assert partition_w.num_entities == upsert_nb * 2 + ct.default_nb // 2 + assert [res[i][ct.default_float_field_name] for i in range(upsert_nb)] == values.to_list() + class TestPartitionOperations(TestcaseBase): """ Test case of partition interface in operations """ @@ -1012,6 +1046,128 @@ class TestPartitionOperations(TestcaseBase): res = partition_w.delete(expr) assert len(res) == 2 + @pytest.mark.tags(CaseLabel.L1) + def test_partition_upsert_empty_partition(self): + """ + target: verify upsert data in empty partition + method: 1. create a collection + 2. upsert some data in empty partition + expected: upsert successfully + """ + # create collection + collection_w = self.init_collection_wrap() + + # get the default partition + partition_name = ct.default_partition_name + partition_w = self.init_partition_wrap(collection_w, partition_name) + assert partition_w.num_entities == 0 + + # upsert data to the empty partition + data = cf.gen_default_data_for_upsert()[0] + partition_w.upsert(data) + assert partition_w.num_entities == ct.default_nb + + @pytest.mark.tags(CaseLabel.L1) + def test_partition_upsert_dropped_partition(self): + """ + target: verify upsert data in a dropped partition + method: 1. create a partition and drop + 2. upsert some data into the dropped partition + expected: raise exception + """ + # create partition + partition_w = self.init_partition_wrap() + + # drop partition + partition_w.drop() + + # insert data to partition + partition_w.upsert(cf.gen_default_dataframe_data(), + check_task=CheckTasks.err_res, + check_items={ct.err_code: 1, ct.err_msg: "Partition not exist"}) + + @pytest.mark.tags(CaseLabel.L2) + def test_partition_upsert_mismatched_data(self): + """ + target: test upsert mismatched data in partition + method: 1. create a partition + 2. insert some data + 3. upsert with mismatched data + expected: raise exception + """ + # create a partition + partition_w = self.init_partition_wrap() + + # insert data + data = cf.gen_default_dataframe_data() + partition_w.insert(data) + + # upsert mismatched data + upsert_data = cf.gen_default_data_for_upsert(dim=ct.default_dim-1)[0] + error = {ct.err_code: 1, ct.err_msg: "Collection field dim is 128, but entities field dim is 127"} + partition_w.upsert(upsert_data, check_task=CheckTasks.err_res, check_items=error) + + @pytest.mark.tags(CaseLabel.L2) + def test_partition_upsert_with_auto_id(self): + """ + target: test upsert data in partition when auto_id=True + method: 1. create a partition + 2. insert some data + 3. upsert data + expected: raise exception + """ + # create a partition + schema = cf.gen_default_collection_schema(auto_id=True) + collection_w = self.init_collection_wrap(schema=schema) + partition_w = self.init_partition_wrap(collection_w) + + # insert data + data = cf.gen_default_dataframe_data() + data.drop(ct.default_int64_field_name, axis=1, inplace=True) + partition_w.insert(data) + + # upsert data + upsert_data = cf.gen_default_data_for_upsert()[0] + upsert_data.drop(ct.default_int64_field_name, axis=1, inplace=True) + error = {ct.err_code: 1, ct.err_msg: "Upsert don't support autoid == true"} + partition_w.upsert(upsert_data, check_task=CheckTasks.err_res, check_items=error) + + @pytest.mark.tags(CaseLabel.L2) + def test_partition_upsert_same_pk_in_different_partitions(self): + """ + target: test upsert same pk in different partitions + method: 1. create 2 partitions + 2. insert some data + 3. upsert data + expected: raise exception + """ + # create 2 partitions + collection_w = self.init_collection_wrap() + partition_1 = self.init_partition_wrap(collection_w) + partition_2 = self.init_partition_wrap(collection_w) + + # insert data + nb = 1000 + data = cf.gen_default_dataframe_data(nb) + partition_1.insert(data) + data = cf.gen_default_dataframe_data(nb, start=nb) + partition_2.insert(data) + + # upsert data in 2 partitions + upsert_data, values = cf.gen_default_data_for_upsert(1) + partition_1.upsert(upsert_data) + partition_2.upsert(upsert_data) + + # load + collection_w.create_index(ct.default_float_vec_field_name, ct.default_flat_index) + collection_w.load() + + # query and check the results + expr = "int64 == 0" + res1 = partition_1.query(expr, [ct.default_float_field_name])[0] + res2 = partition_2.query(expr, [ct.default_float_field_name])[0] + assert res1 == res2 + @pytest.mark.tags(CaseLabel.L0) def test_create_partition_repeat(self): """ diff --git a/tests/python_client/testcases/test_search.py b/tests/python_client/testcases/test_search.py index 927166d95f..006fda6fad 100644 --- a/tests/python_client/testcases/test_search.py +++ b/tests/python_client/testcases/test_search.py @@ -3070,7 +3070,7 @@ class TestSearchBase(TestcaseBase): partition_num=1, dim=dim, is_index=False)[0:5] vectors = [[random.random() for _ in range(dim)] for _ in range(nq)] - # 2. create patition + # 2. create partition partition_name = "search_partition_empty" collection_w.create_partition(partition_name=partition_name, description="search partition empty") par = collection_w.partitions @@ -3236,7 +3236,7 @@ class TestSearchBase(TestcaseBase): partition_num=1, dim=dim, is_index=False)[0:5] vectors = [[random.random() for _ in range(dim)] for _ in range(nq)] - # 2. create patition + # 2. create partition partition_name = "search_partition_empty" collection_w.create_partition(partition_name=partition_name, description="search partition empty") par = collection_w.partitions @@ -3279,7 +3279,7 @@ class TestSearchBase(TestcaseBase): partition_num=1, dim=dim, is_index=False)[0:5] vectors = [[random.random() for _ in range(dim)] for _ in range(nq)] - # 2. create patition + # 2. create partition par_name = collection_w.partitions[0].name # collection_w.load() # 3. create different index @@ -3399,7 +3399,7 @@ class TestSearchDSL(TestcaseBase): "limit": ct.default_top_k}) -class TestsearchString(TestcaseBase): +class TestsearchString(TestcaseBase): """ ****************************************************************** The following cases are used to test search about string @@ -3455,7 +3455,6 @@ class TestsearchString(TestcaseBase): "limit": default_limit, "_async": _async}) - @pytest.mark.tags(CaseLabel.L2) def test_search_string_field_is_primary_true(self, dim, _async): """ @@ -3505,7 +3504,7 @@ class TestsearchString(TestcaseBase): default_search_params, default_limit, default_search_mix_exp, output_fields=output_fields, - _async=_async, + _async=_async, travel_timestamp=0, check_task=CheckTasks.check_search_results, check_items={"nq": default_nq, @@ -3522,7 +3521,6 @@ class TestsearchString(TestcaseBase): collection search uses invalid string expr expected: Raise exception """ - # 1. initialize with data collection_w, _, _, insert_ids = \ self.init_collection_general(prefix, True, auto_id=auto_id, dim=default_dim)[0:4] @@ -3537,8 +3535,6 @@ class TestsearchString(TestcaseBase): "err_msg": "failed to create query plan: type mismatch"} ) - - @pytest.mark.tags(CaseLabel.L2) @pytest.mark.parametrize("expression", cf.gen_normal_string_expressions(ct.default_string_field_name)) def test_search_with_different_string_expr(self, dim, expression, _async): @@ -3631,8 +3627,7 @@ class TestsearchString(TestcaseBase): collection search uses string expr in string field, string field is not primary expected: Search successfully """ - # 1. initialize with binary data - + # 1. initialize with binary data collection_w, _, binary_raw_vector, insert_ids = self.init_collection_general(prefix, True, 2, is_binary=True, auto_id=auto_id, @@ -3680,7 +3675,7 @@ class TestsearchString(TestcaseBase): search_params, default_limit, default_search_mix_exp, output_fields=output_fields, - _async=_async, + _async=_async, travel_timestamp=0, check_task=CheckTasks.check_search_results, check_items={"nq": default_nq, @@ -3786,7 +3781,7 @@ class TestsearchString(TestcaseBase): search_string_exp = "varchar >= \"\"" - limit =1 + limit = 1 # 2. search log.info("test_search_string_field_is_primary_true: searching collection %s" % collection_w.name)