mirror of
https://gitee.com/milvus-io/milvus.git
synced 2024-12-02 03:48:37 +08:00
test: add null and default test cases (#36539)
issue: #36129 Signed-off-by: binbin lv <binbin.lv@zilliz.com>
This commit is contained in:
parent
8ed34dce84
commit
d1d5a50014
@ -589,40 +589,39 @@ def gen_data_by_data_field(data_field, rows, start=0, float_vector=True, dim=128
|
||||
else:
|
||||
data = pd.Series([json.dumps({
|
||||
gen_unique_str(): None}) for _ in range(start, rows + start)])
|
||||
data =[json.dumps({gen_unique_str():None}) for _ in range(start, rows + start)]
|
||||
elif data_field == DataField.array_bool_field:
|
||||
if not nullable:
|
||||
data = pd.Series(
|
||||
[np.array([random.choice([True, False]) for _ in range(array_length)], dtype=np.dtype("bool"))
|
||||
for i in range(start, rows + start)])
|
||||
else:
|
||||
data = pd.Series(
|
||||
[None for i in range(start, rows + start)])
|
||||
data = [None for _ in range(start, rows + start)]
|
||||
elif data_field == DataField.array_int_field:
|
||||
if not nullable:
|
||||
data = pd.Series(
|
||||
[np.array([random.randint(-999999, 9999999) for _ in range(array_length)], dtype=np.dtype("int64"))
|
||||
for i in range(start, rows + start)])
|
||||
else:
|
||||
data = pd.Series(
|
||||
[None for i in range(start, rows + start)])
|
||||
data = [None for _ in range(start, rows + start)]
|
||||
elif data_field == DataField.array_float_field:
|
||||
if not nullable:
|
||||
data = pd.Series(
|
||||
[np.array([random.random() for _ in range(array_length)], dtype=np.dtype("float32"))
|
||||
for i in range(start, rows + start)])
|
||||
else:
|
||||
data = pd.Series(
|
||||
[None for i in range(start, rows + start)])
|
||||
data = [None for _ in range(start, rows + start)]
|
||||
|
||||
elif data_field == DataField.array_string_field:
|
||||
if not nullable:
|
||||
data = pd.Series(
|
||||
[np.array([gen_unique_str(str(i)) for _ in range(array_length)], dtype=np.dtype("str"))
|
||||
for i in range(start, rows + start)])
|
||||
else:
|
||||
data = pd.Series(
|
||||
[None for i in range(start, rows + start)])
|
||||
data = [None for _ in range(start, rows + start)]
|
||||
else:
|
||||
raise Exception("unsupported field name")
|
||||
|
||||
return data
|
||||
|
||||
|
||||
|
@ -676,7 +676,7 @@ def gen_array_collection_schema(description=ct.default_desc, primary_field=ct.de
|
||||
gen_array_field(name=ct.default_float_array_field_name, element_type=DataType.FLOAT,
|
||||
max_capacity=max_capacity),
|
||||
gen_array_field(name=ct.default_string_array_field_name, element_type=DataType.VARCHAR,
|
||||
max_capacity=max_capacity, max_length=max_length)]
|
||||
max_capacity=max_capacity, max_length=max_length, nullable=True)]
|
||||
if with_json is False:
|
||||
fields.remove(gen_json_field())
|
||||
|
||||
@ -2934,7 +2934,7 @@ def gen_sparse_vectors(nb, dim=1000, sparse_format="dok"):
|
||||
return vectors
|
||||
|
||||
|
||||
def gen_vectors_based_on_vector_type(num, dim, vector_data_type):
|
||||
def gen_vectors_based_on_vector_type(num, dim, vector_data_type=ct.float_type):
|
||||
"""
|
||||
generate float16 vector data
|
||||
raw_vectors : the vectors
|
||||
|
@ -887,6 +887,8 @@ class TestBulkInsert(TestcaseBaseBulkInsert):
|
||||
expr_field = df.string_field
|
||||
expr = f"{expr_field} >= '0'"
|
||||
else:
|
||||
res, _ = self.collection_wrap.query(expr=f"{df.string_field} >= '0'", output_fields=[df.string_field, df.int_field])
|
||||
assert len(res) == 0
|
||||
expr_field = df.pk_field
|
||||
expr = f"{expr_field} >= 0"
|
||||
|
||||
@ -925,7 +927,7 @@ class TestBulkInsert(TestcaseBaseBulkInsert):
|
||||
if enable_dynamic_field is False and include_meta is True:
|
||||
pytest.skip("include_meta only works with enable_dynamic_field")
|
||||
if nullable is True:
|
||||
pytest.skip("issue #36241")
|
||||
pytest.skip("not support bulk insert numpy files in field which set nullable == true")
|
||||
float_vec_field_dim = dim
|
||||
binary_vec_field_dim = ((dim+random.randint(-16, 32)) // 8) * 8
|
||||
bf16_vec_field_dim = dim+random.randint(-16, 32)
|
||||
@ -1201,18 +1203,26 @@ class TestBulkInsert(TestcaseBaseBulkInsert):
|
||||
assert "name" in fields_from_search
|
||||
assert "address" in fields_from_search
|
||||
# query data
|
||||
res, _ = self.collection_wrap.query(expr=f"{df.string_field} >= '0'", output_fields=[df.string_field])
|
||||
if nullable is False:
|
||||
assert len(res) == entities
|
||||
query_data = [r[df.string_field] for r in res][:len(self.collection_wrap.partitions)]
|
||||
res, _ = self.collection_wrap.query(expr=f"{df.string_field} in {query_data}", output_fields=[df.string_field])
|
||||
if nullable is False:
|
||||
assert len(res) == len(query_data)
|
||||
if not nullable:
|
||||
expr_field = df.string_field
|
||||
expr = f"{expr_field} >= '0'"
|
||||
else:
|
||||
res, _ = self.collection_wrap.query(expr=f"{df.string_field} >= '0'", output_fields=[df.string_field])
|
||||
assert len(res) == 0
|
||||
expr_field = df.pk_field
|
||||
expr = f"{expr_field} >= 0"
|
||||
|
||||
res, _ = self.collection_wrap.query(expr=f"{expr}", output_fields=[df.string_field])
|
||||
assert len(res) == entities
|
||||
query_data = [r[expr_field] for r in res][:len(self.collection_wrap.partitions)]
|
||||
res, _ = self.collection_wrap.query(expr=f"{expr_field} in {query_data}", output_fields=[expr_field])
|
||||
assert len(res) == len(query_data)
|
||||
res, _ = self.collection_wrap.query(expr=f"TextMatch({df.text_field}, 'milvus')", output_fields=[df.text_field])
|
||||
if nullable is False:
|
||||
if not nullable:
|
||||
assert len(res) == entities
|
||||
else:
|
||||
assert 0 < len(res) < entities
|
||||
|
||||
if enable_partition_key:
|
||||
assert len(self.collection_wrap.partitions) > 1
|
||||
|
||||
|
@ -4664,3 +4664,48 @@ class TestCollectionDefaultValueInvalid(TestcaseBase):
|
||||
self.field_schema_wrap.init_field_schema(name="int8_null", dtype=DataType.INT8, default_value=None,
|
||||
check_task=CheckTasks.err_res, check_items=error)
|
||||
|
||||
|
||||
class TestCollectionDefaultValueValid(TestcaseBase):
|
||||
""" Test case of collection interface """
|
||||
|
||||
"""
|
||||
******************************************************************
|
||||
# The followings are valid cases
|
||||
******************************************************************
|
||||
"""
|
||||
@pytest.mark.tags(CaseLabel.L1)
|
||||
@pytest.mark.skip(reason="issue 36457")
|
||||
def test_create_collection_default_value_twice(self):
|
||||
"""
|
||||
target: test create collection with set default value twice
|
||||
method: create collection with default value twice
|
||||
expected: successfully
|
||||
"""
|
||||
self._connect()
|
||||
int_fields = []
|
||||
c_name = cf.gen_unique_str(prefix)
|
||||
# add other vector fields to maximum fields num
|
||||
int_fields.append(cf.gen_int64_field(is_primary=True))
|
||||
int_fields.append(cf.gen_float_field(default_value=numpy.float32(10.0)))
|
||||
int_fields.append(cf.gen_float_vec_field())
|
||||
schema = cf.gen_collection_schema(fields=int_fields)
|
||||
self.collection_wrap.init_collection(c_name, schema=schema)
|
||||
self.collection_wrap.init_collection(c_name, schema=schema)
|
||||
|
||||
@pytest.mark.tags(CaseLabel.L1)
|
||||
def test_create_collection_none_twice(self):
|
||||
"""
|
||||
target: test create collection with nullable field twice
|
||||
method: create collection with nullable field twice
|
||||
expected: successfully
|
||||
"""
|
||||
self._connect()
|
||||
int_fields = []
|
||||
c_name = cf.gen_unique_str(prefix)
|
||||
int_fields.append(cf.gen_int64_field(is_primary=True))
|
||||
int_fields.append(cf.gen_float_field(nullable=True))
|
||||
int_fields.append(cf.gen_float_vec_field())
|
||||
schema = cf.gen_collection_schema(fields=int_fields)
|
||||
self.collection_wrap.init_collection(c_name, schema=schema)
|
||||
self.collection_wrap.init_collection(c_name, schema=schema)
|
||||
|
@ -2336,3 +2336,84 @@ class TestDeleteComplexExpr(TestcaseBase):
|
||||
check_task=CheckTasks.check_query_results,
|
||||
check_items={'count(*)': nb - len(filter_ids)})
|
||||
|
||||
|
||||
class TestCollectionSearchNoneAndDefaultData(TestcaseBase):
|
||||
"""
|
||||
Test case of delete interface with None data
|
||||
"""
|
||||
|
||||
@pytest.fixture(scope="function", params=[0, 0.5, 1])
|
||||
def null_data_percent(self, request):
|
||||
yield request.param
|
||||
|
||||
@pytest.mark.tags(CaseLabel.L1)
|
||||
def test_delete_search_with_none_data(self, null_data_percent):
|
||||
"""
|
||||
target: test delete and search when there is None data
|
||||
method: search entities after it was deleted
|
||||
expected: deleted entity is not in the search result
|
||||
"""
|
||||
# init collection with nb default data
|
||||
collection_w, _, _, ids = self.init_collection_general(prefix, insert_data=True,
|
||||
nullable_fields={ct.default_float_field_name: null_data_percent},
|
||||
default_value_fields = {ct.default_string_field_name: "data"})[0:4]
|
||||
entity, _ = collection_w.query(tmp_expr, output_fields=["*"])
|
||||
search_res, _ = collection_w.search([entity[0][ct.default_float_vec_field_name]],
|
||||
ct.default_float_vec_field_name,
|
||||
ct.default_search_params, ct.default_limit)
|
||||
# assert search results contains entity
|
||||
assert 0 in search_res[0].ids
|
||||
|
||||
expr = f'{ct.default_int64_field_name} in {ids[:ct.default_nb // 2]}'
|
||||
collection_w.delete(expr)
|
||||
search_res_2, _ = collection_w.search([entity[0][ct.default_float_vec_field_name]],
|
||||
ct.default_float_vec_field_name,
|
||||
ct.default_search_params, ct.default_limit)
|
||||
# assert search result is not equal to entity
|
||||
log.debug(f"Second search result ids: {search_res_2[0].ids}")
|
||||
inter = set(ids[:ct.default_nb // 2]
|
||||
).intersection(set(search_res_2[0].ids))
|
||||
# Using bounded staleness, we could still search the "deleted" entities,
|
||||
# since the search requests arrived query nodes earlier than query nodes consume the delete requests.
|
||||
assert len(inter) == 0
|
||||
|
||||
@pytest.mark.tags(CaseLabel.L2)
|
||||
def test_delete_entities_repeatedly_with_string_none_data(self, null_data_percent):
|
||||
"""
|
||||
target: test delete entities twice with string expr
|
||||
method: delete with same expr twice
|
||||
expected: No exception for second deletion
|
||||
"""
|
||||
# init collection with nb default data
|
||||
collection_w = \
|
||||
self.init_collection_general(prefix, nb=tmp_nb, insert_data=True, primary_field=ct.default_string_field_name,
|
||||
nullable_fields={ct.default_float_field_name: null_data_percent},
|
||||
default_value_fields={ct.default_int64_field_name: 100})[0]
|
||||
|
||||
# assert delete successfully and no exception
|
||||
collection_w.delete(expr=default_string_expr)
|
||||
collection_w.num_entities
|
||||
collection_w.query(default_string_expr,
|
||||
check_task=CheckTasks.check_query_empty)
|
||||
collection_w.delete(expr=default_string_expr)
|
||||
|
||||
@pytest.mark.tags(CaseLabel.L1)
|
||||
@pytest.mark.skip(reason="waiting for the expr code part to be merged")
|
||||
def test_delete_entities_repeatedly_with_expr_on_none_fields(self, null_data_percent):
|
||||
"""
|
||||
target: test delete entities twice with string expr
|
||||
method: delete with same expr twice
|
||||
expected: No exception for second deletion
|
||||
"""
|
||||
# init collection with nb default data
|
||||
collection_w = \
|
||||
self.init_collection_general(prefix, nb=tmp_nb, insert_data=True, primary_field=ct.default_string_field_name,
|
||||
nullable_fields={ct.default_float_field_name: null_data_percent},
|
||||
default_value_fields={ct.default_int64_field_name: 100})[0]
|
||||
|
||||
# assert delete successfully and no exception
|
||||
collection_w.delete(expr=default_string_expr)
|
||||
collection_w.num_entities
|
||||
collection_w.query(default_string_expr,
|
||||
check_task=CheckTasks.check_query_empty)
|
||||
collection_w.delete(expr=default_string_expr)
|
||||
|
@ -3965,6 +3965,49 @@ class TestQueryCount(TestcaseBase):
|
||||
check_items={"count": ct.default_nb,
|
||||
"batch_size": batch_size})
|
||||
|
||||
@pytest.mark.tags(CaseLabel.L1)
|
||||
@pytest.mark.repeat(3)
|
||||
@pytest.mark.skip(reason="issue #36538")
|
||||
def test_count_query_search_after_release_partition_load(self):
|
||||
"""
|
||||
target: test query count(*) after release collection and load partition
|
||||
method: 1. create a collection and 2 partitions with nullable and default value fields
|
||||
2. insert data
|
||||
3. load one partition
|
||||
4. delete half data in each partition
|
||||
5. release the collection and load one partition
|
||||
6. search
|
||||
expected: No exception
|
||||
"""
|
||||
# insert data
|
||||
collection_w = self.init_collection_general(prefix, True, 200, partition_num=1, is_index=True)[0]
|
||||
collection_w.query(expr='', output_fields=[ct.default_count_output],
|
||||
check_task=CheckTasks.check_query_results,
|
||||
check_items={"exp_res": [{ct.default_count_output: 200}]})
|
||||
collection_w.release()
|
||||
partition_w1, partition_w2 = collection_w.partitions
|
||||
# load
|
||||
partition_w1.load()
|
||||
# delete data
|
||||
delete_ids = [i for i in range(50, 150)]
|
||||
collection_w.delete(f"int64 in {delete_ids}")
|
||||
# release
|
||||
collection_w.release()
|
||||
# partition_w1.load()
|
||||
collection_w.load(partition_names=[partition_w1.name])
|
||||
# search on collection, partition1, partition2
|
||||
collection_w.query(expr='', output_fields=[ct.default_count_output],
|
||||
check_task=CheckTasks.check_query_results,
|
||||
check_items={"exp_res": [{ct.default_count_output: 50}]})
|
||||
partition_w1.query(expr='', output_fields=[ct.default_count_output],
|
||||
check_task=CheckTasks.check_query_results,
|
||||
check_items={"exp_res": [{ct.default_count_output: 50}]})
|
||||
vectors = [[random.random() for _ in range(ct.default_dim)] for _ in range(ct.default_nq)]
|
||||
collection_w.search(vectors[:1], ct.default_float_vec_field_name, ct.default_search_params, 200,
|
||||
partition_names=[partition_w2.name],
|
||||
check_task=CheckTasks.err_res,
|
||||
check_items={ct.err_code: 1, ct.err_msg: 'not loaded'})
|
||||
|
||||
|
||||
class TestQueryIterator(TestcaseBase):
|
||||
"""
|
||||
@ -4503,6 +4546,51 @@ class TestQueryNoneAndDefaultData(TestcaseBase):
|
||||
collection_w.query(term_expr, output_fields=[ct.default_int64_field_name, default_float_field_name],
|
||||
check_task=CheckTasks.check_query_results, check_items={exp_res: res})
|
||||
|
||||
@pytest.mark.tags(CaseLabel.L1)
|
||||
@pytest.mark.skip(reason="issue #36538")
|
||||
def test_query_none_count(self, null_data_percent):
|
||||
"""
|
||||
target: test query count(*) with None and default data
|
||||
method: 1. create a collection and 2 partitions with nullable and default value fields
|
||||
2. insert data
|
||||
3. load one partition
|
||||
4. delete half data in each partition
|
||||
5. release the collection and load one partition
|
||||
6. search
|
||||
expected: No exception
|
||||
"""
|
||||
# insert data
|
||||
collection_w = self.init_collection_general(prefix, True, 200, partition_num=1, is_index=True,
|
||||
nullable_fields={ct.default_float_field_name: null_data_percent},
|
||||
default_value_fields={ct.default_string_field_name: "data"})[0]
|
||||
collection_w.query(expr='', output_fields=[ct.default_count_output],
|
||||
check_task=CheckTasks.check_query_results,
|
||||
check_items={"exp_res": [{ct.default_count_output: 200}]})
|
||||
collection_w.release()
|
||||
partition_w1, partition_w2 = collection_w.partitions
|
||||
# load
|
||||
partition_w1.load()
|
||||
# delete data
|
||||
delete_ids = [i for i in range(50, 150)]
|
||||
collection_w.delete(f"int64 in {delete_ids}")
|
||||
# release
|
||||
collection_w.release()
|
||||
# partition_w1.load()
|
||||
collection_w.load(partition_names=[partition_w1.name])
|
||||
# search on collection, partition1, partition2
|
||||
collection_w.query(expr='', output_fields=[ct.default_count_output],
|
||||
check_task=CheckTasks.check_query_results,
|
||||
check_items={"exp_res": [{ct.default_count_output: 50}]})
|
||||
partition_w1.query(expr='', output_fields=[ct.default_count_output],
|
||||
check_task=CheckTasks.check_query_results,
|
||||
check_items={"exp_res": [{ct.default_count_output: 50}]})
|
||||
vectors = [[random.random() for _ in range(ct.default_dim)] for _ in range(ct.default_nq)]
|
||||
collection_w.search(vectors[:1], ct.default_float_vec_field_name, ct.default_search_params, 200,
|
||||
partition_names=[partition_w2.name],
|
||||
check_task=CheckTasks.err_res,
|
||||
check_items={ct.err_code: 1, ct.err_msg: 'not loaded'})
|
||||
|
||||
|
||||
class TestQueryTextMatch(TestcaseBase):
|
||||
"""
|
||||
******************************************************************
|
||||
|
@ -13113,6 +13113,39 @@ class TestCollectionSearchNoneAndDefaultData(TestcaseBase):
|
||||
"output_fields": [default_int64_field_name,
|
||||
default_float_field_name]})
|
||||
|
||||
@pytest.mark.tags(CaseLabel.L2)
|
||||
def test_search_none_data_partial_load(self, is_flush, enable_dynamic_field, null_data_percent):
|
||||
"""
|
||||
target: test search normal case with none data inserted
|
||||
method: create connection, collection with nullable fields, insert data including none, and search
|
||||
expected: 1. search successfully with limit(topK)
|
||||
"""
|
||||
# 1. initialize with data
|
||||
collection_w, _, _, insert_ids, time_stamp = \
|
||||
self.init_collection_general(prefix, True, is_flush=is_flush,
|
||||
enable_dynamic_field=enable_dynamic_field,
|
||||
nullable_fields={ct.default_float_field_name: null_data_percent})[0:5]
|
||||
# 2. release and partial load again
|
||||
collection_w.release()
|
||||
loaded_fields = [default_int64_field_name, ct.default_float_vec_field_name]
|
||||
if not enable_dynamic_field:
|
||||
loaded_fields.append(default_float_field_name)
|
||||
collection_w.load(load_fields=loaded_fields)
|
||||
# 3. generate search data
|
||||
vectors = cf.gen_vectors_based_on_vector_type(default_nq, default_dim)
|
||||
# 4. search after partial load field with None data
|
||||
output_fields = [default_int64_field_name, default_float_field_name]
|
||||
collection_w.search(vectors[:default_nq], default_search_field,
|
||||
default_search_params, default_limit,
|
||||
default_search_exp,
|
||||
output_fields=output_fields,
|
||||
check_task=CheckTasks.check_search_results,
|
||||
check_items={"nq": default_nq,
|
||||
"ids": insert_ids,
|
||||
"limit": default_limit,
|
||||
"output_fields": output_fields})
|
||||
|
||||
|
||||
class TestSearchWithTextMatchFilter(TestcaseBase):
|
||||
"""
|
||||
******************************************************************
|
||||
@ -13259,3 +13292,4 @@ class TestSearchWithTextMatchFilter(TestcaseBase):
|
||||
for r in res:
|
||||
r = r.to_dict()
|
||||
assert any([token in r["entity"][field] for token in top_10_tokens])
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user