test: Add part of test cases for default and null support (#36186)

issue : #36129

Signed-off-by: binbin lv <binbin.lv@zilliz.com>
This commit is contained in:
binbin 2024-09-12 17:29:13 +08:00 committed by GitHub
parent 5e5e87cc2f
commit b5ff348906
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

View File

@ -1340,7 +1340,7 @@ class TestCollectionSearch(TestcaseBase):
"""
@pytest.mark.tags(CaseLabel.L0)
def test_search_normal(self, nq, dim, auto_id, is_flush, enable_dynamic_field, vector_data_type, null_data_percent):
def test_search_normal(self, nq, dim, auto_id, is_flush, enable_dynamic_field, vector_data_type):
"""
target: test search normal case
method: create connection, collection, insert and search
@ -1350,8 +1350,7 @@ class TestCollectionSearch(TestcaseBase):
collection_w, _, _, insert_ids, time_stamp = \
self.init_collection_general(prefix, True, auto_id=auto_id, dim=dim, is_flush=is_flush,
enable_dynamic_field=enable_dynamic_field,
vector_data_type=vector_data_type,
nullable_fields={ct.default_float_field_name: null_data_percent})[0:5]
vector_data_type=vector_data_type)[0:5]
# 2. generate search data
vectors = cf.gen_vectors_based_on_vector_type(nq, dim, vector_data_type)
# 3. search after insert
@ -1467,7 +1466,9 @@ class TestCollectionSearch(TestcaseBase):
self.init_collection_general(prefix, True, auto_id=auto_id, dim=dim, is_flush=is_flush,
enable_dynamic_field=enable_dynamic_field,
multiple_dim_array=multiple_dim_array,
vector_data_type=vector_data_type)[0:5]
vector_data_type=vector_data_type,
nullable_fields={ct.default_string_field_name: 1},
default_value_fields={ct.default_float_field_name: np.float32(10.0)})[0:5]
# 2. generate search data
vectors = cf.gen_vectors_based_on_vector_type(nq, dim, vector_data_type)
vector_name_list = cf.extract_vector_field_name_list(collection_w)
@ -1477,10 +1478,13 @@ class TestCollectionSearch(TestcaseBase):
collection_w.search(vectors[:nq], search_field,
default_search_params, default_limit,
default_search_exp,
output_fields = [ct.default_float_field_name, ct.default_string_field_name],
check_task=CheckTasks.check_search_results,
check_items={"nq": nq,
"ids": insert_ids,
"limit": default_limit})
"limit": default_limit,
"output_fields": [ct.default_float_field_name,
ct.default_string_field_name]})
@pytest.mark.tags(CaseLabel.L1)
def test_search_random_primary_key(self, random_primary_key):
@ -1562,8 +1566,7 @@ class TestCollectionSearch(TestcaseBase):
expected: search successfully
"""
# initialize with data
collection_w, insert_data, _, insert_ids = self.init_collection_general(prefix, True)[
0:4]
collection_w, insert_data, _, insert_ids = self.init_collection_general(prefix, True)[0:4]
# search
collection_w.search(vectors[:nq], default_search_field,
search_params, default_limit,
@ -1780,8 +1783,7 @@ class TestCollectionSearch(TestcaseBase):
auto_id = True
enable_dynamic_field = True
collection_w, _, _, insert_ids, time_stamp = self.init_collection_general(prefix, True, nb,
1, auto_id=auto_id,
dim=dim,
1, auto_id=auto_id, dim=dim,
enable_dynamic_field=enable_dynamic_field)[0:5]
# 2. release collection
log.info("test_search_collection_after_release_load: releasing collection %s" %
@ -7016,6 +7018,10 @@ class TestCollectionRangeSearch(TestcaseBase):
def enable_dynamic_field(self, request):
yield request.param
@pytest.fixture(scope="function", params=[0, 0.5, 1])
def null_data_percent(self, request):
yield request.param
"""
******************************************************************
# The followings are valid range search cases
@ -7024,7 +7030,7 @@ class TestCollectionRangeSearch(TestcaseBase):
@pytest.mark.tags(CaseLabel.L0)
@pytest.mark.parametrize("vector_data_type", ct.all_dense_vector_types)
@pytest.mark.parametrize("with_growing", [False, True])
def test_range_search_default(self, index_type, metric, vector_data_type, with_growing):
def test_range_search_default(self, index_type, metric, vector_data_type, with_growing, null_data_percent):
"""
target: verify the range search returns correct results
method: 1. create collection, insert 10k vectors,
@ -7033,7 +7039,8 @@ class TestCollectionRangeSearch(TestcaseBase):
4. verified the range search results is same as the search results in the range
"""
collection_w = self.init_collection_general(prefix, auto_id=True, insert_data=False, is_index=False,
vector_data_type=vector_data_type, with_json=False)[0]
vector_data_type=vector_data_type, with_json=False,
nullable_fields={ct.default_float_field_name: null_data_percent})[0]
nb = 1000
rounds = 10
for i in range(rounds):
@ -10897,7 +10904,8 @@ class TestCollectionHybridSearchValid(TestcaseBase):
self.init_collection_general(prefix, True, dim=dim, is_flush=is_flush,
primary_field=primary_field, enable_dynamic_field=enable_dynamic_field,
multiple_dim_array=multiple_dim_array,
vector_data_type=vector_data_type)[0:5]
vector_data_type=vector_data_type,
nullable_fields={ct.default_float_field_name: 1})[0:5]
# 2. extract vector field name
vector_name_list = cf.extract_vector_field_name_list(collection_w)
vector_name_list.append(ct.default_float_vec_field_name)
@ -12925,3 +12933,432 @@ class TestSparseSearch(TestcaseBase):
ct.default_sparse_search_params, batch_size,
check_task=CheckTasks.check_search_iterator,
check_items={"batch_size": batch_size})
class TestCollectionSearchNoneAndDefaultData(TestcaseBase):
""" Test case of search interface """
@pytest.fixture(scope="function", params=[default_nb_medium])
def nb(self, request):
yield request.param
@pytest.fixture(scope="function", params=[200])
def nq(self, request):
yield request.param
@pytest.fixture(scope="function", params=[32, 128])
def dim(self, request):
yield request.param
@pytest.fixture(scope="function", params=[False, True])
def auto_id(self, request):
yield request.param
@pytest.fixture(scope="function", params=[False, True])
def _async(self, request):
yield request.param
@pytest.fixture(scope="function", params=["JACCARD", "HAMMING"])
def metrics(self, request):
yield request.param
@pytest.fixture(scope="function", params=[False, True])
def is_flush(self, request):
yield request.param
@pytest.fixture(scope="function", params=[True, False])
def enable_dynamic_field(self, request):
yield request.param
@pytest.fixture(scope="function", params=["IP", "COSINE", "L2"])
def metric_type(self, request):
yield request.param
@pytest.fixture(scope="function", params=[True, False])
def random_primary_key(self, request):
yield request.param
@pytest.fixture(scope="function", params=["FLOAT_VECTOR", "FLOAT16_VECTOR", "BFLOAT16_VECTOR"])
def vector_data_type(self, request):
yield request.param
@pytest.fixture(scope="function", params=["STL_SORT", "INVERTED"])
def numeric_scalar_index(self, request):
yield request.param
@pytest.fixture(scope="function", params=["TRIE", "INVERTED", "BITMAP"])
def varchar_scalar_index(self, request):
yield request.param
@pytest.fixture(scope="function", params=[200, 600])
def batch_size(self, request):
yield request.param
@pytest.fixture(scope="function", params=[0, 0.5, 1])
def null_data_percent(self, request):
yield request.param
"""
******************************************************************
# The following are valid base cases
******************************************************************
"""
@pytest.mark.tags(CaseLabel.L0)
def test_search_normal_none_data(self, nq, dim, auto_id, is_flush, enable_dynamic_field, vector_data_type, null_data_percent):
"""
target: test search normal case with none data inserted
method: create connection, collection with nullable fields, insert data including none, and search
expected: 1. search successfully with limit(topK)
"""
# 1. initialize with data
collection_w, _, _, insert_ids, time_stamp = \
self.init_collection_general(prefix, True, auto_id=auto_id, dim=dim, is_flush=is_flush,
enable_dynamic_field=enable_dynamic_field,
vector_data_type=vector_data_type,
nullable_fields={ct.default_float_field_name: null_data_percent})[0:5]
# 2. generate search data
vectors = cf.gen_vectors_based_on_vector_type(nq, dim, vector_data_type)
# 3. search after insert
collection_w.search(vectors[:nq], default_search_field,
default_search_params, default_limit,
default_search_exp,
output_fields=[default_int64_field_name,
default_float_field_name],
guarantee_timestamp=0,
check_task=CheckTasks.check_search_results,
check_items={"nq": nq,
"ids": insert_ids,
"limit": default_limit,
"output_fields": [default_int64_field_name,
default_float_field_name]})
@pytest.mark.tags(CaseLabel.L2)
@pytest.mark.skip(reason="issue #36184")
def test_search_after_none_data_all_field_datatype(self, varchar_scalar_index, numeric_scalar_index,
null_data_percent, _async):
"""
target: test search after different index
method: test search after different index and corresponding search params
expected: search successfully with limit(topK)
"""
# 1. initialize with data
nullable_fields = {ct.default_int32_field_name: null_data_percent,
ct.default_int16_field_name: null_data_percent,
ct.default_int8_field_name: null_data_percent,
ct.default_bool_field_name: null_data_percent,
ct.default_float_field_name: null_data_percent,
ct.default_double_field_name: null_data_percent,
ct.default_string_field_name: null_data_percent}
collection_w, _, _, insert_ids = self.init_collection_general(prefix, True, 5000, partition_num=1,
is_all_data_type=True, dim=default_dim, is_index=False,
nullable_fields=nullable_fields)[0:4]
# 2. create index on vector field and load
index = "HNSW"
params = cf.get_index_params_params(index)
default_index = {"index_type": index, "params": params, "metric_type": "COSINE"}
vector_name_list = cf.extract_vector_field_name_list(collection_w)
vector_name_list.append(ct.default_float_vec_field_name)
for vector_name in vector_name_list:
collection_w.create_index(vector_name, default_index)
# 3. create index on scalar field with None data
scalar_index_params = {"index_type": varchar_scalar_index, "params": {}}
collection_w.create_index(ct.default_string_field_name, scalar_index_params)
# 4. create index on scalar field with default data
scalar_index_params = {"index_type": numeric_scalar_index, "params": {}}
collection_w.create_index(ct.default_int64_field_name, scalar_index_params)
collection_w.create_index(ct.default_int32_field_name, scalar_index_params)
collection_w.create_index(ct.default_int16_field_name, scalar_index_params)
collection_w.create_index(ct.default_int8_field_name, scalar_index_params)
collection_w.create_index(ct.default_bool_field_name, scalar_index_params)
collection_w.create_index(ct.default_float_field_name, scalar_index_params)
collection_w.load()
# 5. search
search_params = cf.gen_search_param(index, "COSINE")
limit = search_params[0]["params"]["ef"]
log.info("Searching with search params: {}".format(search_params[0]))
vectors = [[random.random() for _ in range(default_dim)] for _ in range(default_nq)]
collection_w.search(vectors[:default_nq], default_search_field,
search_param, limit, default_search_exp, _async=_async,
output_fields=[ct.default_string_field_name, ct.default_float_field_name],
check_task=CheckTasks.check_search_results,
check_items={"nq": default_nq,
"ids": insert_ids,
"limit": limit,
"_async": _async,
"output_fields": [ct.default_string_field_name,
ct.default_float_field_name]})
@pytest.mark.tags(CaseLabel.L0)
def test_search_default_value_with_insert(self, nq, dim, auto_id, is_flush, enable_dynamic_field, vector_data_type):
"""
target: test search normal case with default value set
method: create connection, collection with default value set, insert and search
expected: 1. search successfully with limit(topK)
"""
# 1. initialize with data
collection_w, _, _, insert_ids, time_stamp = \
self.init_collection_general(prefix, True, auto_id=auto_id, dim=dim, is_flush=is_flush,
enable_dynamic_field=enable_dynamic_field,
vector_data_type=vector_data_type,
default_value_fields={ct.default_float_field_name: np.float32(10.0)})[0:5]
# 2. generate search data
vectors = cf.gen_vectors_based_on_vector_type(nq, dim, vector_data_type)
# 3. search after insert
collection_w.search(vectors[:nq], default_search_field,
default_search_params, default_limit,
default_search_exp,
output_fields=[default_int64_field_name,
default_float_field_name],
guarantee_timestamp=0,
check_task=CheckTasks.check_search_results,
check_items={"nq": nq,
"ids": insert_ids,
"limit": default_limit,
"output_fields": [default_int64_field_name,
default_float_field_name]})
@pytest.mark.tags(CaseLabel.L1)
def test_search_default_value_without_insert(self, enable_dynamic_field):
"""
target: test search normal case with default value set
method: create connection, collection with default value set, no insert and search
expected: 1. search successfully with limit(topK)
"""
# 1. initialize with data
collection_w = self.init_collection_general(prefix, False, dim=default_dim,
enable_dynamic_field=enable_dynamic_field,
nullable_fields={ct.default_float_field_name: 0},
default_value_fields={ct.default_float_field_name: np.float32(10.0)})[0]
# 2. generate search data
vectors = cf.gen_vectors_based_on_vector_type(default_nq, default_dim, "FLOAT_VECTOR")
# 3. search after insert
collection_w.search(vectors[:default_nq], default_search_field,
default_search_params, default_limit,
default_search_exp,
guarantee_timestamp=0,
check_task=CheckTasks.check_search_results,
check_items={"nq": default_nq,
"limit": 0})
@pytest.mark.tags(CaseLabel.L2)
def test_search_after_default_data_all_field_datatype(self, varchar_scalar_index, numeric_scalar_index, _async):
"""
target: test search after different index
method: test search after different index and corresponding search params
expected: search successfully with limit(topK)
"""
# 1. initialize with data
default_value_fields = {ct.default_int32_field_name: np.int32(1),
ct.default_int16_field_name: np.int32(2),
ct.default_int8_field_name: np.int32(3),
ct.default_bool_field_name: True,
ct.default_float_field_name: np.float32(10.0),
ct.default_double_field_name: 10.0,
ct.default_string_field_name: "1"}
collection_w, _, _, insert_ids = self.init_collection_general(prefix, True, 5000, partition_num=1,
is_all_data_type=True, dim=default_dim, is_index=False,
default_value_fields=default_value_fields)[0:4]
# 2. create index on vector field and load
index = "HNSW"
params = cf.get_index_params_params(index)
default_index = {"index_type": index, "params": params, "metric_type": "L2"}
vector_name_list = cf.extract_vector_field_name_list(collection_w)
vector_name_list.append(ct.default_float_vec_field_name)
for vector_name in vector_name_list:
collection_w.create_index(vector_name, default_index)
# 3. create index on scalar field with None data
scalar_index_params = {"index_type": varchar_scalar_index, "params": {}}
collection_w.create_index(ct.default_string_field_name, scalar_index_params)
# 4. create index on scalar field with default data
scalar_index_params = {"index_type": numeric_scalar_index, "params": {}}
collection_w.create_index(ct.default_int64_field_name, scalar_index_params)
collection_w.create_index(ct.default_int32_field_name, scalar_index_params)
collection_w.create_index(ct.default_int16_field_name, scalar_index_params)
collection_w.create_index(ct.default_int8_field_name, scalar_index_params)
if numeric_scalar_index != "STL_SORT":
collection_w.create_index(ct.default_bool_field_name, scalar_index_params)
collection_w.create_index(ct.default_float_field_name, scalar_index_params)
collection_w.load()
# 5. search
search_params = cf.gen_search_param(index, "L2")
limit = search_params[0]["params"]["ef"]
log.info("Searching with search params: {}".format(search_params[0]))
vectors = [[random.random() for _ in range(default_dim)] for _ in range(default_nq)]
output_fields = [ct.default_int64_field_name, ct.default_int32_field_name,
ct.default_int16_field_name, ct.default_int8_field_name,
ct.default_bool_field_name, ct.default_float_field_name,
ct.default_double_field_name, ct.default_string_field_name]
collection_w.search(vectors[:default_nq], default_search_field,
search_param, limit, default_search_exp, _async=_async,
output_fields=output_fields,
check_task=CheckTasks.check_search_results,
check_items={"nq": default_nq,
"ids": insert_ids,
"limit": limit,
"_async": _async,
"output_fields": output_fields})
@pytest.mark.tags(CaseLabel.L1)
@pytest.mark.skip(reason="issue #36003")
def test_search_both_default_value_non_data(self, nq, dim, auto_id, is_flush, enable_dynamic_field, vector_data_type):
"""
target: test search normal case with default value set
method: create connection, collection with default value set, insert and search
expected: 1. search successfully with limit(topK)
"""
# 1. initialize with data
collection_w, _, _, insert_ids, time_stamp = \
self.init_collection_general(prefix, True, auto_id=auto_id, dim=dim, is_flush=is_flush,
enable_dynamic_field=enable_dynamic_field,
vector_data_type=vector_data_type,
nullable_fields={ct.default_float_field_name: 1},
default_value_fields={ct.default_float_field_name: np.float32(10.0)})[0:5]
# 2. generate search data
vectors = cf.gen_vectors_based_on_vector_type(nq, dim, vector_data_type)
# 3. search after insert
collection_w.search(vectors[:nq], default_search_field,
default_search_params, default_limit,
default_search_exp,
output_fields=[default_int64_field_name,
default_float_field_name],
guarantee_timestamp=0,
check_task=CheckTasks.check_search_results,
check_items={"nq": nq,
"ids": insert_ids,
"limit": default_limit,
"output_fields": [default_int64_field_name,
default_float_field_name]})
@pytest.mark.tags(CaseLabel.L1)
def test_search_collection_with_non_default_data_after_release_load(self, nq, _async, null_data_percent):
"""
target: search the pre-released collection after load
method: 1. create collection
2. release collection
3. load collection
4. search the pre-released collection
expected: search successfully
"""
# 1. initialize without data
nb= 2000
dim = 64
auto_id = True
collection_w, _, _, insert_ids, time_stamp = self.init_collection_general(prefix, True, nb,
1, auto_id=auto_id, dim=dim,
nullable_fields={ct.default_string_field_name: null_data_percent},
default_value_fields={ct.default_float_field_name: np.float32(10.0)})[0:5]
# 2. release collection
collection_w.release()
# 3. Search the pre-released collection after load
collection_w.load()
log.info("test_search_collection_awith_non_default_data_after_release_load: searching after load")
vectors = [[random.random() for _ in range(dim)] for _ in range(nq)]
collection_w.search(vectors[:nq], default_search_field, default_search_params,
default_limit, default_search_exp, _async=_async,
output_fields = [ct.default_float_field_name, ct.default_string_field_name],
check_task=CheckTasks.check_search_results,
check_items={"nq": nq,
"ids": insert_ids,
"limit": default_limit,
"_async": _async,
"output_fields": [ct.default_float_field_name,
ct.default_string_field_name]})
@pytest.mark.tags(CaseLabel.L1)
@pytest.mark.tags(CaseLabel.GPU)
@pytest.mark.skip(reason="issue #36184")
def test_search_after_different_index_with_params_none_default_data(self, varchar_scalar_index, numeric_scalar_index,
null_data_percent, _async):
"""
target: test search after different index
method: test search after different index and corresponding search params
expected: search successfully with limit(topK)
"""
# 1. initialize with data
collection_w, _, _, insert_ids = self.init_collection_general(prefix, True, 5000, partition_num=1,
is_all_data_type=True, dim=default_dim, is_index=False,
nullable_fields={ct.default_string_field_name: null_data_percent},
default_value_fields={ct.default_float_field_name: np.float32(10.0)})[0:4]
# 2. create index on vector field and load
index = "HNSW"
params = cf.get_index_params_params(index)
default_index = {"index_type": index, "params": params, "metric_type": "COSINE"}
vector_name_list = cf.extract_vector_field_name_list(collection_w)
vector_name_list.append(ct.default_float_vec_field_name)
for vector_name in vector_name_list:
collection_w.create_index(vector_name, default_index)
# 3. create index on scalar field with None data
scalar_index_params = {"index_type": varchar_scalar_index, "params": {}}
collection_w.create_index(ct.default_string_field_name, scalar_index_params)
# 4. create index on scalar field with default data
scalar_index_params = {"index_type": numeric_scalar_index, "params": {}}
collection_w.create_index(ct.default_float_field_name, scalar_index_params)
collection_w.load()
# 5. search
search_params = cf.gen_search_param(index, "COSINE")
limit = search_params[0]["params"]["ef"]
log.info("Searching with search params: {}".format(search_params[0]))
vectors = [[random.random() for _ in range(default_dim)] for _ in range(default_nq)]
collection_w.search(vectors[:default_nq], default_search_field,
search_param, limit, default_search_exp, _async=_async,
output_fields=[ct.default_string_field_name, ct.default_float_field_name],
check_task=CheckTasks.check_search_results,
check_items={"nq": default_nq,
"ids": insert_ids,
"limit": limit,
"_async": _async,
"output_fields": [ct.default_string_field_name,
ct.default_float_field_name]})
@pytest.mark.tags(CaseLabel.L1)
def test_search_iterator_with_none_data(self, batch_size, null_data_percent):
"""
target: test search iterator normal
method: 1. search iterator
2. check the result, expect pk
expected: search successfully
"""
# 1. initialize with data
dim = 64
collection_w = self.init_collection_general(prefix, True, dim=dim, is_index=False,
nullable_fields={ct.default_string_field_name: null_data_percent})[0]
collection_w.create_index(field_name, {"metric_type": "L2"})
collection_w.load()
# 2. search iterator
search_params = {"metric_type": "L2"}
vectors = cf.gen_vectors_based_on_vector_type(1, dim, "FLOAT_VECTOR")
collection_w.search_iterator(vectors[:1], field_name, search_params, batch_size,
check_task=CheckTasks.check_search_iterator,
check_items={"batch_size": batch_size})
@pytest.mark.tags(CaseLabel.L1)
def test_search_normal_none_data_partition_key(self, is_flush, enable_dynamic_field, vector_data_type, null_data_percent):
"""
target: test search normal case with none data inserted
method: create connection, collection with nullable fields, insert data including none, and search
expected: 1. search successfully with limit(topK)
"""
# 1. initialize with data
collection_w, _, _, insert_ids, time_stamp = \
self.init_collection_general(prefix, True, dim=default_dim, is_flush=is_flush,
enable_dynamic_field=enable_dynamic_field,
vector_data_type=vector_data_type,
nullable_fields={ct.default_float_field_name: null_data_percent},
is_partition_key=ct.default_float_field_name)[0:5]
# 2. generate search data
vectors = cf.gen_vectors_based_on_vector_type(default_nq, default_dim, vector_data_type)
# 3. search after insert
collection_w.search(vectors[:default_nq], default_search_field,
default_search_params, default_limit,
default_search_exp,
output_fields=[default_int64_field_name,
default_float_field_name],
guarantee_timestamp=0,
check_task=CheckTasks.check_search_results,
check_items={"nq": default_nq,
"ids": insert_ids,
"limit": default_limit,
"output_fields": [default_int64_field_name,
default_float_field_name]})