diff --git a/tests/python_client/common/bulk_insert_data.py b/tests/python_client/common/bulk_insert_data.py index 4c9bfc9bad..df8b962265 100644 --- a/tests/python_client/common/bulk_insert_data.py +++ b/tests/python_client/common/bulk_insert_data.py @@ -589,40 +589,39 @@ def gen_data_by_data_field(data_field, rows, start=0, float_vector=True, dim=128 else: data = pd.Series([json.dumps({ gen_unique_str(): None}) for _ in range(start, rows + start)]) + data =[json.dumps({gen_unique_str():None}) for _ in range(start, rows + start)] elif data_field == DataField.array_bool_field: if not nullable: data = pd.Series( [np.array([random.choice([True, False]) for _ in range(array_length)], dtype=np.dtype("bool")) for i in range(start, rows + start)]) else: - data = pd.Series( - [None for i in range(start, rows + start)]) + data = [None for _ in range(start, rows + start)] elif data_field == DataField.array_int_field: if not nullable: data = pd.Series( [np.array([random.randint(-999999, 9999999) for _ in range(array_length)], dtype=np.dtype("int64")) for i in range(start, rows + start)]) else: - data = pd.Series( - [None for i in range(start, rows + start)]) + data = [None for _ in range(start, rows + start)] elif data_field == DataField.array_float_field: if not nullable: data = pd.Series( [np.array([random.random() for _ in range(array_length)], dtype=np.dtype("float32")) for i in range(start, rows + start)]) else: - data = pd.Series( - [None for i in range(start, rows + start)]) + data = [None for _ in range(start, rows + start)] + elif data_field == DataField.array_string_field: if not nullable: data = pd.Series( [np.array([gen_unique_str(str(i)) for _ in range(array_length)], dtype=np.dtype("str")) for i in range(start, rows + start)]) else: - data = pd.Series( - [None for i in range(start, rows + start)]) + data = [None for _ in range(start, rows + start)] else: raise Exception("unsupported field name") + return data diff --git a/tests/python_client/common/common_func.py b/tests/python_client/common/common_func.py index 9e72939b80..c500663e76 100644 --- a/tests/python_client/common/common_func.py +++ b/tests/python_client/common/common_func.py @@ -676,7 +676,7 @@ def gen_array_collection_schema(description=ct.default_desc, primary_field=ct.de gen_array_field(name=ct.default_float_array_field_name, element_type=DataType.FLOAT, max_capacity=max_capacity), gen_array_field(name=ct.default_string_array_field_name, element_type=DataType.VARCHAR, - max_capacity=max_capacity, max_length=max_length)] + max_capacity=max_capacity, max_length=max_length, nullable=True)] if with_json is False: fields.remove(gen_json_field()) @@ -2934,7 +2934,7 @@ def gen_sparse_vectors(nb, dim=1000, sparse_format="dok"): return vectors -def gen_vectors_based_on_vector_type(num, dim, vector_data_type): +def gen_vectors_based_on_vector_type(num, dim, vector_data_type=ct.float_type): """ generate float16 vector data raw_vectors : the vectors diff --git a/tests/python_client/testcases/test_bulk_insert.py b/tests/python_client/testcases/test_bulk_insert.py index c82d52b46f..e9fed79e82 100644 --- a/tests/python_client/testcases/test_bulk_insert.py +++ b/tests/python_client/testcases/test_bulk_insert.py @@ -887,6 +887,8 @@ class TestBulkInsert(TestcaseBaseBulkInsert): expr_field = df.string_field expr = f"{expr_field} >= '0'" else: + res, _ = self.collection_wrap.query(expr=f"{df.string_field} >= '0'", output_fields=[df.string_field, df.int_field]) + assert len(res) == 0 expr_field = df.pk_field expr = f"{expr_field} >= 0" @@ -925,7 +927,7 @@ class TestBulkInsert(TestcaseBaseBulkInsert): if enable_dynamic_field is False and include_meta is True: pytest.skip("include_meta only works with enable_dynamic_field") if nullable is True: - pytest.skip("issue #36241") + pytest.skip("not support bulk insert numpy files in field which set nullable == true") float_vec_field_dim = dim binary_vec_field_dim = ((dim+random.randint(-16, 32)) // 8) * 8 bf16_vec_field_dim = dim+random.randint(-16, 32) @@ -1201,18 +1203,26 @@ class TestBulkInsert(TestcaseBaseBulkInsert): assert "name" in fields_from_search assert "address" in fields_from_search # query data - res, _ = self.collection_wrap.query(expr=f"{df.string_field} >= '0'", output_fields=[df.string_field]) - if nullable is False: - assert len(res) == entities - query_data = [r[df.string_field] for r in res][:len(self.collection_wrap.partitions)] - res, _ = self.collection_wrap.query(expr=f"{df.string_field} in {query_data}", output_fields=[df.string_field]) - if nullable is False: - assert len(res) == len(query_data) + if not nullable: + expr_field = df.string_field + expr = f"{expr_field} >= '0'" + else: + res, _ = self.collection_wrap.query(expr=f"{df.string_field} >= '0'", output_fields=[df.string_field]) + assert len(res) == 0 + expr_field = df.pk_field + expr = f"{expr_field} >= 0" + + res, _ = self.collection_wrap.query(expr=f"{expr}", output_fields=[df.string_field]) + assert len(res) == entities + query_data = [r[expr_field] for r in res][:len(self.collection_wrap.partitions)] + res, _ = self.collection_wrap.query(expr=f"{expr_field} in {query_data}", output_fields=[expr_field]) + assert len(res) == len(query_data) res, _ = self.collection_wrap.query(expr=f"TextMatch({df.text_field}, 'milvus')", output_fields=[df.text_field]) - if nullable is False: + if not nullable: assert len(res) == entities else: assert 0 < len(res) < entities + if enable_partition_key: assert len(self.collection_wrap.partitions) > 1 diff --git a/tests/python_client/testcases/test_collection.py b/tests/python_client/testcases/test_collection.py index edb39eebcc..53d219be87 100644 --- a/tests/python_client/testcases/test_collection.py +++ b/tests/python_client/testcases/test_collection.py @@ -4664,3 +4664,48 @@ class TestCollectionDefaultValueInvalid(TestcaseBase): self.field_schema_wrap.init_field_schema(name="int8_null", dtype=DataType.INT8, default_value=None, check_task=CheckTasks.err_res, check_items=error) + +class TestCollectionDefaultValueValid(TestcaseBase): + """ Test case of collection interface """ + + """ + ****************************************************************** + # The followings are valid cases + ****************************************************************** + """ + @pytest.mark.tags(CaseLabel.L1) + @pytest.mark.skip(reason="issue 36457") + def test_create_collection_default_value_twice(self): + """ + target: test create collection with set default value twice + method: create collection with default value twice + expected: successfully + """ + self._connect() + int_fields = [] + c_name = cf.gen_unique_str(prefix) + # add other vector fields to maximum fields num + int_fields.append(cf.gen_int64_field(is_primary=True)) + int_fields.append(cf.gen_float_field(default_value=numpy.float32(10.0))) + int_fields.append(cf.gen_float_vec_field()) + schema = cf.gen_collection_schema(fields=int_fields) + self.collection_wrap.init_collection(c_name, schema=schema) + self.collection_wrap.init_collection(c_name, schema=schema) + + @pytest.mark.tags(CaseLabel.L1) + def test_create_collection_none_twice(self): + """ + target: test create collection with nullable field twice + method: create collection with nullable field twice + expected: successfully + """ + self._connect() + int_fields = [] + c_name = cf.gen_unique_str(prefix) + int_fields.append(cf.gen_int64_field(is_primary=True)) + int_fields.append(cf.gen_float_field(nullable=True)) + int_fields.append(cf.gen_float_vec_field()) + schema = cf.gen_collection_schema(fields=int_fields) + self.collection_wrap.init_collection(c_name, schema=schema) + self.collection_wrap.init_collection(c_name, schema=schema) + \ No newline at end of file diff --git a/tests/python_client/testcases/test_delete.py b/tests/python_client/testcases/test_delete.py index 56646ceb94..9dcfdb8fad 100644 --- a/tests/python_client/testcases/test_delete.py +++ b/tests/python_client/testcases/test_delete.py @@ -2336,3 +2336,84 @@ class TestDeleteComplexExpr(TestcaseBase): check_task=CheckTasks.check_query_results, check_items={'count(*)': nb - len(filter_ids)}) + +class TestCollectionSearchNoneAndDefaultData(TestcaseBase): + """ + Test case of delete interface with None data + """ + + @pytest.fixture(scope="function", params=[0, 0.5, 1]) + def null_data_percent(self, request): + yield request.param + + @pytest.mark.tags(CaseLabel.L1) + def test_delete_search_with_none_data(self, null_data_percent): + """ + target: test delete and search when there is None data + method: search entities after it was deleted + expected: deleted entity is not in the search result + """ + # init collection with nb default data + collection_w, _, _, ids = self.init_collection_general(prefix, insert_data=True, + nullable_fields={ct.default_float_field_name: null_data_percent}, + default_value_fields = {ct.default_string_field_name: "data"})[0:4] + entity, _ = collection_w.query(tmp_expr, output_fields=["*"]) + search_res, _ = collection_w.search([entity[0][ct.default_float_vec_field_name]], + ct.default_float_vec_field_name, + ct.default_search_params, ct.default_limit) + # assert search results contains entity + assert 0 in search_res[0].ids + + expr = f'{ct.default_int64_field_name} in {ids[:ct.default_nb // 2]}' + collection_w.delete(expr) + search_res_2, _ = collection_w.search([entity[0][ct.default_float_vec_field_name]], + ct.default_float_vec_field_name, + ct.default_search_params, ct.default_limit) + # assert search result is not equal to entity + log.debug(f"Second search result ids: {search_res_2[0].ids}") + inter = set(ids[:ct.default_nb // 2] + ).intersection(set(search_res_2[0].ids)) + # Using bounded staleness, we could still search the "deleted" entities, + # since the search requests arrived query nodes earlier than query nodes consume the delete requests. + assert len(inter) == 0 + + @pytest.mark.tags(CaseLabel.L2) + def test_delete_entities_repeatedly_with_string_none_data(self, null_data_percent): + """ + target: test delete entities twice with string expr + method: delete with same expr twice + expected: No exception for second deletion + """ + # init collection with nb default data + collection_w = \ + self.init_collection_general(prefix, nb=tmp_nb, insert_data=True, primary_field=ct.default_string_field_name, + nullable_fields={ct.default_float_field_name: null_data_percent}, + default_value_fields={ct.default_int64_field_name: 100})[0] + + # assert delete successfully and no exception + collection_w.delete(expr=default_string_expr) + collection_w.num_entities + collection_w.query(default_string_expr, + check_task=CheckTasks.check_query_empty) + collection_w.delete(expr=default_string_expr) + + @pytest.mark.tags(CaseLabel.L1) + @pytest.mark.skip(reason="waiting for the expr code part to be merged") + def test_delete_entities_repeatedly_with_expr_on_none_fields(self, null_data_percent): + """ + target: test delete entities twice with string expr + method: delete with same expr twice + expected: No exception for second deletion + """ + # init collection with nb default data + collection_w = \ + self.init_collection_general(prefix, nb=tmp_nb, insert_data=True, primary_field=ct.default_string_field_name, + nullable_fields={ct.default_float_field_name: null_data_percent}, + default_value_fields={ct.default_int64_field_name: 100})[0] + + # assert delete successfully and no exception + collection_w.delete(expr=default_string_expr) + collection_w.num_entities + collection_w.query(default_string_expr, + check_task=CheckTasks.check_query_empty) + collection_w.delete(expr=default_string_expr) diff --git a/tests/python_client/testcases/test_query.py b/tests/python_client/testcases/test_query.py index a5fbfa9ca5..2f0e475364 100644 --- a/tests/python_client/testcases/test_query.py +++ b/tests/python_client/testcases/test_query.py @@ -3965,6 +3965,49 @@ class TestQueryCount(TestcaseBase): check_items={"count": ct.default_nb, "batch_size": batch_size}) + @pytest.mark.tags(CaseLabel.L1) + @pytest.mark.repeat(3) + @pytest.mark.skip(reason="issue #36538") + def test_count_query_search_after_release_partition_load(self): + """ + target: test query count(*) after release collection and load partition + method: 1. create a collection and 2 partitions with nullable and default value fields + 2. insert data + 3. load one partition + 4. delete half data in each partition + 5. release the collection and load one partition + 6. search + expected: No exception + """ + # insert data + collection_w = self.init_collection_general(prefix, True, 200, partition_num=1, is_index=True)[0] + collection_w.query(expr='', output_fields=[ct.default_count_output], + check_task=CheckTasks.check_query_results, + check_items={"exp_res": [{ct.default_count_output: 200}]}) + collection_w.release() + partition_w1, partition_w2 = collection_w.partitions + # load + partition_w1.load() + # delete data + delete_ids = [i for i in range(50, 150)] + collection_w.delete(f"int64 in {delete_ids}") + # release + collection_w.release() + # partition_w1.load() + collection_w.load(partition_names=[partition_w1.name]) + # search on collection, partition1, partition2 + collection_w.query(expr='', output_fields=[ct.default_count_output], + check_task=CheckTasks.check_query_results, + check_items={"exp_res": [{ct.default_count_output: 50}]}) + partition_w1.query(expr='', output_fields=[ct.default_count_output], + check_task=CheckTasks.check_query_results, + check_items={"exp_res": [{ct.default_count_output: 50}]}) + vectors = [[random.random() for _ in range(ct.default_dim)] for _ in range(ct.default_nq)] + collection_w.search(vectors[:1], ct.default_float_vec_field_name, ct.default_search_params, 200, + partition_names=[partition_w2.name], + check_task=CheckTasks.err_res, + check_items={ct.err_code: 1, ct.err_msg: 'not loaded'}) + class TestQueryIterator(TestcaseBase): """ @@ -4503,6 +4546,51 @@ class TestQueryNoneAndDefaultData(TestcaseBase): collection_w.query(term_expr, output_fields=[ct.default_int64_field_name, default_float_field_name], check_task=CheckTasks.check_query_results, check_items={exp_res: res}) + @pytest.mark.tags(CaseLabel.L1) + @pytest.mark.skip(reason="issue #36538") + def test_query_none_count(self, null_data_percent): + """ + target: test query count(*) with None and default data + method: 1. create a collection and 2 partitions with nullable and default value fields + 2. insert data + 3. load one partition + 4. delete half data in each partition + 5. release the collection and load one partition + 6. search + expected: No exception + """ + # insert data + collection_w = self.init_collection_general(prefix, True, 200, partition_num=1, is_index=True, + nullable_fields={ct.default_float_field_name: null_data_percent}, + default_value_fields={ct.default_string_field_name: "data"})[0] + collection_w.query(expr='', output_fields=[ct.default_count_output], + check_task=CheckTasks.check_query_results, + check_items={"exp_res": [{ct.default_count_output: 200}]}) + collection_w.release() + partition_w1, partition_w2 = collection_w.partitions + # load + partition_w1.load() + # delete data + delete_ids = [i for i in range(50, 150)] + collection_w.delete(f"int64 in {delete_ids}") + # release + collection_w.release() + # partition_w1.load() + collection_w.load(partition_names=[partition_w1.name]) + # search on collection, partition1, partition2 + collection_w.query(expr='', output_fields=[ct.default_count_output], + check_task=CheckTasks.check_query_results, + check_items={"exp_res": [{ct.default_count_output: 50}]}) + partition_w1.query(expr='', output_fields=[ct.default_count_output], + check_task=CheckTasks.check_query_results, + check_items={"exp_res": [{ct.default_count_output: 50}]}) + vectors = [[random.random() for _ in range(ct.default_dim)] for _ in range(ct.default_nq)] + collection_w.search(vectors[:1], ct.default_float_vec_field_name, ct.default_search_params, 200, + partition_names=[partition_w2.name], + check_task=CheckTasks.err_res, + check_items={ct.err_code: 1, ct.err_msg: 'not loaded'}) + + class TestQueryTextMatch(TestcaseBase): """ ****************************************************************** diff --git a/tests/python_client/testcases/test_search.py b/tests/python_client/testcases/test_search.py index 3b64ee9cde..3126af59c6 100644 --- a/tests/python_client/testcases/test_search.py +++ b/tests/python_client/testcases/test_search.py @@ -13113,6 +13113,39 @@ class TestCollectionSearchNoneAndDefaultData(TestcaseBase): "output_fields": [default_int64_field_name, default_float_field_name]}) + @pytest.mark.tags(CaseLabel.L2) + def test_search_none_data_partial_load(self, is_flush, enable_dynamic_field, null_data_percent): + """ + target: test search normal case with none data inserted + method: create connection, collection with nullable fields, insert data including none, and search + expected: 1. search successfully with limit(topK) + """ + # 1. initialize with data + collection_w, _, _, insert_ids, time_stamp = \ + self.init_collection_general(prefix, True, is_flush=is_flush, + enable_dynamic_field=enable_dynamic_field, + nullable_fields={ct.default_float_field_name: null_data_percent})[0:5] + # 2. release and partial load again + collection_w.release() + loaded_fields = [default_int64_field_name, ct.default_float_vec_field_name] + if not enable_dynamic_field: + loaded_fields.append(default_float_field_name) + collection_w.load(load_fields=loaded_fields) + # 3. generate search data + vectors = cf.gen_vectors_based_on_vector_type(default_nq, default_dim) + # 4. search after partial load field with None data + output_fields = [default_int64_field_name, default_float_field_name] + collection_w.search(vectors[:default_nq], default_search_field, + default_search_params, default_limit, + default_search_exp, + output_fields=output_fields, + check_task=CheckTasks.check_search_results, + check_items={"nq": default_nq, + "ids": insert_ids, + "limit": default_limit, + "output_fields": output_fields}) + + class TestSearchWithTextMatchFilter(TestcaseBase): """ ****************************************************************** @@ -13259,3 +13292,4 @@ class TestSearchWithTextMatchFilter(TestcaseBase): for r in res: r = r.to_dict() assert any([token in r["entity"][field] for token in top_10_tokens]) +