diff --git a/tests/python_client/testcases/test_bulk_insert.py b/tests/python_client/testcases/test_bulk_insert.py index 9e1a4ae0bc..e21b6a1720 100644 --- a/tests/python_client/testcases/test_bulk_insert.py +++ b/tests/python_client/testcases/test_bulk_insert.py @@ -828,7 +828,8 @@ class TestBulkInsert(TestcaseBaseBulkInsert): @pytest.mark.parametrize("dim", [128]) # 128 @pytest.mark.parametrize("entities", [1000]) # 1000 @pytest.mark.parametrize("enable_dynamic_field", [True]) - def test_bulk_insert_all_field_with_new_json_format(self, auto_id, dim, entities, enable_dynamic_field): + @pytest.mark.parametrize("enable_partition_key", [True, False]) + def test_bulk_insert_all_field_with_new_json_format(self, auto_id, dim, entities, enable_dynamic_field, enable_partition_key): """ collection schema 1: [pk, int64, float64, string float_vector] data file: vectors.npy and uid.npy, @@ -841,7 +842,7 @@ class TestBulkInsert(TestcaseBaseBulkInsert): cf.gen_int64_field(name=df.pk_field, is_primary=True, auto_id=auto_id), cf.gen_int64_field(name=df.int_field), cf.gen_float_field(name=df.float_field), - cf.gen_string_field(name=df.string_field), + cf.gen_string_field(name=df.string_field, is_partition_key=enable_partition_key), cf.gen_json_field(name=df.json_field), cf.gen_array_field(name=df.array_int_field, element_type=DataType.INT64), cf.gen_array_field(name=df.array_float_field, element_type=DataType.FLOAT), @@ -945,16 +946,23 @@ class TestBulkInsert(TestcaseBaseBulkInsert): if enable_dynamic_field: assert "name" in fields_from_search assert "address" in fields_from_search - - + # query data + res, _ = self.collection_wrap.query(expr=f"{df.string_field} >= '0'", output_fields=[df.string_field]) + assert len(res) == entities + query_data = [r[df.string_field] for r in res][:len(self.collection_wrap.partitions)] + res, _ = self.collection_wrap.query(expr=f"{df.string_field} in {query_data}", output_fields=[df.string_field]) + assert len(res) == len(query_data) + if enable_partition_key: + assert len(self.collection_wrap.partitions) > 1 @pytest.mark.tags(CaseLabel.L3) @pytest.mark.parametrize("auto_id", [True, False]) @pytest.mark.parametrize("dim", [128]) # 128 @pytest.mark.parametrize("entities", [1000]) # 1000 @pytest.mark.parametrize("enable_dynamic_field", [True, False]) + @pytest.mark.parametrize("enable_partition_key", [True, False]) @pytest.mark.parametrize("include_meta", [True, False]) - def test_bulk_insert_all_field_with_numpy(self, auto_id, dim, entities, enable_dynamic_field, include_meta): + def test_bulk_insert_all_field_with_numpy(self, auto_id, dim, entities, enable_dynamic_field, enable_partition_key, include_meta): """ collection schema 1: [pk, int64, float64, string float_vector] data file: vectors.npy and uid.npy, @@ -970,7 +978,7 @@ class TestBulkInsert(TestcaseBaseBulkInsert): cf.gen_int64_field(name=df.pk_field, is_primary=True, auto_id=auto_id), cf.gen_int64_field(name=df.int_field), cf.gen_float_field(name=df.float_field), - cf.gen_string_field(name=df.string_field), + cf.gen_string_field(name=df.string_field, is_partition_key=enable_partition_key), cf.gen_json_field(name=df.json_field), cf.gen_float_vec_field(name=df.float_vec_field, dim=dim), # cf.gen_float_vec_field(name=df.image_float_vec_field, dim=dim), @@ -1072,14 +1080,25 @@ class TestBulkInsert(TestcaseBaseBulkInsert): if enable_dynamic_field and include_meta: assert "name" in fields_from_search assert "address" in fields_from_search + # query data + res, _ = self.collection_wrap.query(expr=f"{df.string_field} >= '0'", output_fields=[df.string_field]) + assert len(res) == entities + query_data = [r[df.string_field] for r in res][:len(self.collection_wrap.partitions)] + res, _ = self.collection_wrap.query(expr=f"{df.string_field} in {query_data}", output_fields=[df.string_field]) + assert len(res) == len(query_data) + if enable_partition_key: + assert len(self.collection_wrap.partitions) > 1 + + @pytest.mark.tags(CaseLabel.L3) @pytest.mark.parametrize("auto_id", [True, False]) @pytest.mark.parametrize("dim", [128]) # 128 @pytest.mark.parametrize("entities", [1000]) # 1000 @pytest.mark.parametrize("enable_dynamic_field", [True, False]) + @pytest.mark.parametrize("enable_partition_key", [True, False]) @pytest.mark.parametrize("include_meta", [True, False]) - def test_bulk_insert_all_field_with_parquet(self, auto_id, dim, entities, enable_dynamic_field, include_meta): + def test_bulk_insert_all_field_with_parquet(self, auto_id, dim, entities, enable_dynamic_field, enable_partition_key, include_meta): """ collection schema 1: [pk, int64, float64, string float_vector] data file: vectors.parquet and uid.parquet, @@ -1094,15 +1113,13 @@ class TestBulkInsert(TestcaseBaseBulkInsert): cf.gen_int64_field(name=df.pk_field, is_primary=True, auto_id=auto_id), cf.gen_int64_field(name=df.int_field), cf.gen_float_field(name=df.float_field), - cf.gen_string_field(name=df.string_field), + cf.gen_string_field(name=df.string_field, is_partition_key=enable_partition_key), cf.gen_json_field(name=df.json_field), cf.gen_array_field(name=df.array_int_field, element_type=DataType.INT64), cf.gen_array_field(name=df.array_float_field, element_type=DataType.FLOAT), cf.gen_array_field(name=df.array_string_field, element_type=DataType.VARCHAR, max_length=100), cf.gen_array_field(name=df.array_bool_field, element_type=DataType.BOOL), cf.gen_float_vec_field(name=df.float_vec_field, dim=dim), - # cf.gen_float_vec_field(name=df.image_float_vec_field, dim=dim), - # cf.gen_float_vec_field(name=df.text_float_vec_field, dim=dim), cf.gen_binary_vec_field(name=df.binary_vec_field, dim=dim), cf.gen_bfloat16_vec_field(name=df.bf16_vec_field, dim=dim), cf.gen_float16_vec_field(name=df.fp16_vec_field, dim=dim) @@ -1199,6 +1216,14 @@ class TestBulkInsert(TestcaseBaseBulkInsert): if enable_dynamic_field and include_meta: assert "name" in fields_from_search assert "address" in fields_from_search + # query data + res, _ = self.collection_wrap.query(expr=f"{df.string_field} >= '0'", output_fields=[df.string_field]) + assert len(res) == entities + query_data = [r[df.string_field] for r in res][:len(self.collection_wrap.partitions)] + res, _ = self.collection_wrap.query(expr=f"{df.string_field} in {query_data}", output_fields=[df.string_field]) + assert len(res) == len(query_data) + if enable_partition_key: + assert len(self.collection_wrap.partitions) > 1 @pytest.mark.tags(CaseLabel.L3) @pytest.mark.parametrize("auto_id", [True])