mirror of
https://gitee.com/milvus-io/milvus.git
synced 2024-11-30 02:48:45 +08:00
Add test cases of array (#27603)
Signed-off-by: nico <cheng.yuan@zilliz.com>
This commit is contained in:
parent
2446aa0cd7
commit
a693af014f
@ -96,6 +96,14 @@ def gen_json_field(name=ct.default_json_field_name, description=ct.default_desc,
|
|||||||
return json_field
|
return json_field
|
||||||
|
|
||||||
|
|
||||||
|
def gen_array_field(name=ct.default_array_field_name, element_type=DataType.INT64, max_capacity=ct.default_max_capacity,
|
||||||
|
description=ct.default_desc, is_primary=False, **kwargs):
|
||||||
|
array_field, _ = ApiFieldSchemaWrapper().init_field_schema(name=name, dtype=DataType.ARRAY,
|
||||||
|
element_type=element_type, max_capacity=max_capacity,
|
||||||
|
description=description, is_primary=is_primary, **kwargs)
|
||||||
|
return array_field
|
||||||
|
|
||||||
|
|
||||||
def gen_int8_field(name=ct.default_int8_field_name, description=ct.default_desc, is_primary=False, **kwargs):
|
def gen_int8_field(name=ct.default_int8_field_name, description=ct.default_desc, is_primary=False, **kwargs):
|
||||||
int8_field, _ = ApiFieldSchemaWrapper().init_field_schema(name=name, dtype=DataType.INT8, description=description,
|
int8_field, _ = ApiFieldSchemaWrapper().init_field_schema(name=name, dtype=DataType.INT8, description=description,
|
||||||
is_primary=is_primary, **kwargs)
|
is_primary=is_primary, **kwargs)
|
||||||
@ -170,6 +178,34 @@ def gen_default_collection_schema(description=ct.default_desc, primary_field=ct.
|
|||||||
return schema
|
return schema
|
||||||
|
|
||||||
|
|
||||||
|
def gen_array_collection_schema(description=ct.default_desc, primary_field=ct.default_int64_field_name, auto_id=False,
|
||||||
|
dim=ct.default_dim, enable_dynamic_field=False, max_capacity=ct.default_max_capacity,
|
||||||
|
max_length=100, with_json=False, **kwargs):
|
||||||
|
if enable_dynamic_field:
|
||||||
|
if primary_field is ct.default_int64_field_name:
|
||||||
|
fields = [gen_int64_field(), gen_float_vec_field(dim=dim)]
|
||||||
|
elif primary_field is ct.default_string_field_name:
|
||||||
|
fields = [gen_string_field(), gen_float_vec_field(dim=dim)]
|
||||||
|
else:
|
||||||
|
log.error("Primary key only support int or varchar")
|
||||||
|
assert False
|
||||||
|
else:
|
||||||
|
fields = [gen_int64_field(), gen_float_vec_field(dim=dim), gen_json_field(),
|
||||||
|
gen_array_field(name=ct.default_int32_array_field_name, element_type=DataType.INT32,
|
||||||
|
max_capacity=max_capacity),
|
||||||
|
gen_array_field(name=ct.default_float_array_field_name, element_type=DataType.FLOAT,
|
||||||
|
max_capacity=max_capacity),
|
||||||
|
gen_array_field(name=ct.default_string_array_field_name, element_type=DataType.VARCHAR,
|
||||||
|
max_capacity=max_capacity, max_length=max_length)]
|
||||||
|
if with_json is False:
|
||||||
|
fields.remove(gen_json_field())
|
||||||
|
|
||||||
|
schema, _ = ApiCollectionSchemaWrapper().init_collection_schema(fields=fields, description=description,
|
||||||
|
primary_field=primary_field, auto_id=auto_id,
|
||||||
|
enable_dynamic_field=enable_dynamic_field, **kwargs)
|
||||||
|
return schema
|
||||||
|
|
||||||
|
|
||||||
def gen_bulk_insert_collection_schema(description=ct.default_desc, primary_field=ct.default_int64_field_name, with_varchar_field=True,
|
def gen_bulk_insert_collection_schema(description=ct.default_desc, primary_field=ct.default_int64_field_name, with_varchar_field=True,
|
||||||
auto_id=False, dim=ct.default_dim, enable_dynamic_field=False, with_json=False):
|
auto_id=False, dim=ct.default_dim, enable_dynamic_field=False, with_json=False):
|
||||||
if enable_dynamic_field:
|
if enable_dynamic_field:
|
||||||
@ -359,6 +395,33 @@ def gen_default_data_for_upsert(nb=ct.default_nb, dim=ct.default_dim, start=0, s
|
|||||||
return df, float_values
|
return df, float_values
|
||||||
|
|
||||||
|
|
||||||
|
def gen_array_dataframe_data(nb=ct.default_nb, dim=ct.default_dim, start=0,
|
||||||
|
array_length=ct.default_max_capacity, with_json=False, random_primary_key=False):
|
||||||
|
if not random_primary_key:
|
||||||
|
int_values = pd.Series(data=[i for i in range(start, start + nb)])
|
||||||
|
else:
|
||||||
|
int_values = pd.Series(data=random.sample(range(start, start + nb), nb))
|
||||||
|
float_vec_values = gen_vectors(nb, dim)
|
||||||
|
json_values = [{"number": i, "float": i * 1.0} for i in range(start, start + nb)]
|
||||||
|
|
||||||
|
int32_values = pd.Series(data=[[np.int32(j) for j in range(i, i + array_length)] for i in range(start, start + nb)])
|
||||||
|
float_values = pd.Series(data=[[np.float32(j) for j in range(i, i + array_length)] for i in range(start, start + nb)])
|
||||||
|
string_values = pd.Series(data=[[str(j) for j in range(i, i + array_length)] for i in range(start, start + nb)])
|
||||||
|
|
||||||
|
df = pd.DataFrame({
|
||||||
|
ct.default_int64_field_name: int_values,
|
||||||
|
ct.default_float_vec_field_name: float_vec_values,
|
||||||
|
ct.default_json_field_name: json_values,
|
||||||
|
ct.default_int32_array_field_name: int32_values,
|
||||||
|
ct.default_float_array_field_name: float_values,
|
||||||
|
ct.default_string_array_field_name: string_values,
|
||||||
|
})
|
||||||
|
if with_json is False:
|
||||||
|
df.drop(ct.default_json_field_name, axis=1, inplace=True)
|
||||||
|
|
||||||
|
return df
|
||||||
|
|
||||||
|
|
||||||
def gen_dataframe_multi_vec_fields(vec_fields, nb=ct.default_nb):
|
def gen_dataframe_multi_vec_fields(vec_fields, nb=ct.default_nb):
|
||||||
"""
|
"""
|
||||||
gen dataframe data for fields: int64, float, float_vec and vec_fields
|
gen dataframe data for fields: int64, float, float_vec and vec_fields
|
||||||
@ -683,6 +746,25 @@ def gen_data_by_type(field, nb=None, start=None):
|
|||||||
if nb is None:
|
if nb is None:
|
||||||
return [random.random() for i in range(dim)]
|
return [random.random() for i in range(dim)]
|
||||||
return [[random.random() for i in range(dim)] for _ in range(nb)]
|
return [[random.random() for i in range(dim)] for _ in range(nb)]
|
||||||
|
if data_type == DataType.ARRAY:
|
||||||
|
max_capacity = field.params['max_capacity']
|
||||||
|
element_type = field.element_type
|
||||||
|
if element_type == DataType.INT32:
|
||||||
|
if nb is None:
|
||||||
|
return [random.randint(-2147483648, 2147483647) for _ in range(max_capacity)]
|
||||||
|
return [[random.randint(-2147483648, 2147483647) for _ in range(max_capacity)] for _ in range(nb)]
|
||||||
|
if element_type == DataType.FLOAT:
|
||||||
|
if nb is None:
|
||||||
|
return [np.float32(random.random()) for _ in range(max_capacity)]
|
||||||
|
return [[np.float32(random.random()) for _ in range(max_capacity)] for _ in range(nb)]
|
||||||
|
if element_type == DataType.VARCHAR:
|
||||||
|
max_length = field.params['max_length']
|
||||||
|
max_length = min(20, max_length - 1)
|
||||||
|
length = random.randint(0, max_length)
|
||||||
|
if nb is None:
|
||||||
|
return ["".join([chr(random.randint(97, 122)) for _ in range(length)]) for _ in range(max_capacity)]
|
||||||
|
return [["".join([chr(random.randint(97, 122)) for _ in range(length)]) for _ in range(max_capacity)] for _ in range(nb)]
|
||||||
|
|
||||||
return None
|
return None
|
||||||
|
|
||||||
|
|
||||||
@ -986,6 +1068,21 @@ def gen_json_field_expressions():
|
|||||||
return expressions
|
return expressions
|
||||||
|
|
||||||
|
|
||||||
|
def gen_array_field_expressions():
|
||||||
|
expressions = [
|
||||||
|
"int32_array[0] > 0",
|
||||||
|
"0 <= int32_array[0] < 400 or 1000 > float_array[1] >= 500",
|
||||||
|
"int32_array[1] not in [1, 2, 3]",
|
||||||
|
"int32_array[1] in [1, 2, 3] and string_array[1] != '2'",
|
||||||
|
"int32_array == [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]",
|
||||||
|
"int32_array[1] + 1 == 3 && int32_array[0] - 1 != 1",
|
||||||
|
"int32_array[1] % 100 == 0 && string_array[1] in ['1', '2']",
|
||||||
|
"int32_array[1] in [300/2, -10*30+800, (200-100)*2] "
|
||||||
|
"or (float_array[1] <= -4**5/2 || 100 <= int32_array[1] < 200)"
|
||||||
|
]
|
||||||
|
return expressions
|
||||||
|
|
||||||
|
|
||||||
def gen_field_compare_expressions(fields1=None, fields2=None):
|
def gen_field_compare_expressions(fields1=None, fields2=None):
|
||||||
if fields1 is None:
|
if fields1 is None:
|
||||||
fields1 = ["int64_1"]
|
fields1 = ["int64_1"]
|
||||||
@ -1240,6 +1337,35 @@ def assert_json_contains(expr, list_data):
|
|||||||
return result_ids
|
return result_ids
|
||||||
|
|
||||||
|
|
||||||
|
def assert_array_contains(expr, list_data):
|
||||||
|
nb = len(list_data)
|
||||||
|
result_ids = []
|
||||||
|
exp_ids = eval(expr.split(', ', 1)[1].split(')', 1)[0])
|
||||||
|
reverse = True if "not array" or "not ARRAY" in expr else False
|
||||||
|
expr_prefix = expr.split('(', 1)[0]
|
||||||
|
if "array_contains_any" or "ARRAY_CONTAINS_ANY" in expr_prefix:
|
||||||
|
for i in range(nb):
|
||||||
|
set_list_data = set(tuple(element) if isinstance(element, list) else element for element in list_data[i])
|
||||||
|
if set(exp_ids) & set_list_data:
|
||||||
|
result_ids.append(i)
|
||||||
|
elif "array_contains_all" or "ARRAY_CONTAINS_ALL" in expr_prefix:
|
||||||
|
for i in range(nb):
|
||||||
|
set_list_data = set(tuple(element) if isinstance(element, list) else element for element in list_data[i])
|
||||||
|
if set(exp_ids).issubset(set_list_data):
|
||||||
|
result_ids.append(i)
|
||||||
|
elif "array_contains" or "ARRAY_CONTAINS" in expr_prefix:
|
||||||
|
for i in range(nb):
|
||||||
|
if exp_ids in list_data[i]:
|
||||||
|
result_ids.append(i)
|
||||||
|
else:
|
||||||
|
log.warning("unknown expr: %s" % expr)
|
||||||
|
|
||||||
|
if reverse:
|
||||||
|
result_ids = [x for x in result_ids if x not in range(nb)]
|
||||||
|
|
||||||
|
return result_ids
|
||||||
|
|
||||||
|
|
||||||
def assert_equal_index(index_1, index_2):
|
def assert_equal_index(index_1, index_2):
|
||||||
return index_to_dict(index_1) == index_to_dict(index_2)
|
return index_to_dict(index_1) == index_to_dict(index_2)
|
||||||
|
|
||||||
|
@ -8,6 +8,7 @@ default_drop_interval = 3
|
|||||||
default_dim = 128
|
default_dim = 128
|
||||||
default_nb = 2000
|
default_nb = 2000
|
||||||
default_nb_medium = 5000
|
default_nb_medium = 5000
|
||||||
|
default_max_capacity = 100
|
||||||
default_top_k = 10
|
default_top_k = 10
|
||||||
default_nq = 2
|
default_nq = 2
|
||||||
default_limit = 10
|
default_limit = 10
|
||||||
@ -38,6 +39,10 @@ default_float_field_name = "float"
|
|||||||
default_double_field_name = "double"
|
default_double_field_name = "double"
|
||||||
default_string_field_name = "varchar"
|
default_string_field_name = "varchar"
|
||||||
default_json_field_name = "json_field"
|
default_json_field_name = "json_field"
|
||||||
|
default_array_field_name = "int_array"
|
||||||
|
default_int32_array_field_name = "int32_array"
|
||||||
|
default_float_array_field_name = "float_array"
|
||||||
|
default_string_array_field_name = "string_array"
|
||||||
default_float_vec_field_name = "float_vector"
|
default_float_vec_field_name = "float_vector"
|
||||||
another_float_vec_field_name = "float_vector1"
|
another_float_vec_field_name = "float_vector1"
|
||||||
default_binary_vec_field_name = "binary_vector"
|
default_binary_vec_field_name = "binary_vector"
|
||||||
|
@ -4,6 +4,7 @@ import numpy
|
|||||||
import pandas as pd
|
import pandas as pd
|
||||||
import pytest
|
import pytest
|
||||||
|
|
||||||
|
from pymilvus import DataType
|
||||||
from base.client_base import TestcaseBase
|
from base.client_base import TestcaseBase
|
||||||
from common import common_func as cf
|
from common import common_func as cf
|
||||||
from common import common_type as ct
|
from common import common_type as ct
|
||||||
@ -3820,7 +3821,7 @@ class TestCollectionString(TestcaseBase):
|
|||||||
class TestCollectionJSON(TestcaseBase):
|
class TestCollectionJSON(TestcaseBase):
|
||||||
"""
|
"""
|
||||||
******************************************************************
|
******************************************************************
|
||||||
The following cases are used to test about string
|
The following cases are used to test about json
|
||||||
******************************************************************
|
******************************************************************
|
||||||
"""
|
"""
|
||||||
@pytest.mark.tags(CaseLabel.L1)
|
@pytest.mark.tags(CaseLabel.L1)
|
||||||
@ -3895,3 +3896,189 @@ class TestCollectionJSON(TestcaseBase):
|
|||||||
self.collection_wrap.init_collection(name=c_name, schema=schema,
|
self.collection_wrap.init_collection(name=c_name, schema=schema,
|
||||||
check_task=CheckTasks.check_collection_property,
|
check_task=CheckTasks.check_collection_property,
|
||||||
check_items={exp_name: c_name, exp_schema: schema})
|
check_items={exp_name: c_name, exp_schema: schema})
|
||||||
|
|
||||||
|
|
||||||
|
class TestCollectionARRAY(TestcaseBase):
|
||||||
|
"""
|
||||||
|
******************************************************************
|
||||||
|
The following cases are used to test about array
|
||||||
|
******************************************************************
|
||||||
|
"""
|
||||||
|
|
||||||
|
@pytest.mark.tags(CaseLabel.L2)
|
||||||
|
def test_collection_array_field_element_type_not_exist(self):
|
||||||
|
"""
|
||||||
|
target: test create collection with ARRAY field without element type
|
||||||
|
method: create collection with one array field without element type
|
||||||
|
expected: Raise exception
|
||||||
|
"""
|
||||||
|
int_field = cf.gen_int64_field(is_primary=True)
|
||||||
|
vec_field = cf.gen_float_vec_field()
|
||||||
|
array_field = cf.gen_array_field(element_type=None)
|
||||||
|
array_schema = cf.gen_collection_schema([int_field, vec_field, array_field])
|
||||||
|
self.init_collection_wrap(schema=array_schema, check_task=CheckTasks.err_res,
|
||||||
|
check_items={ct.err_code: 65535, ct.err_msg: "element data type None is not valid"})
|
||||||
|
|
||||||
|
@pytest.mark.tags(CaseLabel.L2)
|
||||||
|
# @pytest.mark.skip("issue #27522")
|
||||||
|
@pytest.mark.parametrize("element_type", [1001, 'a', [], (), {1}, DataType.BINARY_VECTOR,
|
||||||
|
DataType.FLOAT_VECTOR, DataType.JSON, DataType.ARRAY])
|
||||||
|
def test_collection_array_field_element_type_invalid(self, element_type):
|
||||||
|
"""
|
||||||
|
target: Create a field with invalid element_type
|
||||||
|
method: Create a field with invalid element_type
|
||||||
|
1. Type not in DataType: 1, 'a', ...
|
||||||
|
2. Type in DataType: binary_vector, float_vector, json_field, array_field
|
||||||
|
expected: Raise exception
|
||||||
|
"""
|
||||||
|
int_field = cf.gen_int64_field(is_primary=True)
|
||||||
|
vec_field = cf.gen_float_vec_field()
|
||||||
|
array_field = cf.gen_array_field(element_type=element_type)
|
||||||
|
array_schema = cf.gen_collection_schema([int_field, vec_field, array_field])
|
||||||
|
error = {ct.err_code: 65535, ct.err_msg: "element data type None is not valid"}
|
||||||
|
if element_type in ['a', {1}]:
|
||||||
|
error = {ct.err_code: 1, ct.err_msg: "Unexpected error"}
|
||||||
|
self.init_collection_wrap(schema=array_schema, check_task=CheckTasks.err_res, check_items=error)
|
||||||
|
|
||||||
|
@pytest.mark.tags(CaseLabel.L2)
|
||||||
|
def test_collection_array_field_no_capacity(self):
|
||||||
|
"""
|
||||||
|
target: Create a field without giving max_capacity
|
||||||
|
method: Create a field without giving max_capacity
|
||||||
|
expected: Raise exception
|
||||||
|
"""
|
||||||
|
int_field = cf.gen_int64_field(is_primary=True)
|
||||||
|
vec_field = cf.gen_float_vec_field()
|
||||||
|
array_field = cf.gen_array_field(max_capacity=None)
|
||||||
|
array_schema = cf.gen_collection_schema([int_field, vec_field, array_field])
|
||||||
|
self.init_collection_wrap(schema=array_schema, check_task=CheckTasks.err_res,
|
||||||
|
check_items={ct.err_code: 65535,
|
||||||
|
ct.err_msg: "the value of max_capacity must be an integer"})
|
||||||
|
|
||||||
|
@pytest.mark.tags(CaseLabel.L2)
|
||||||
|
@pytest.mark.parametrize("max_capacity", [[], 'a', (), -1, 4097])
|
||||||
|
def test_collection_array_field_invalid_capacity(self, max_capacity):
|
||||||
|
"""
|
||||||
|
target: Create a field with invalid max_capacity
|
||||||
|
method: Create a field with invalid max_capacity
|
||||||
|
1. Type invalid: [], 'a', ()
|
||||||
|
2. Value invalid: <0, >max_capacity(4096)
|
||||||
|
expected: Raise exception
|
||||||
|
"""
|
||||||
|
int_field = cf.gen_int64_field(is_primary=True)
|
||||||
|
vec_field = cf.gen_float_vec_field()
|
||||||
|
array_field = cf.gen_array_field(max_capacity=max_capacity)
|
||||||
|
array_schema = cf.gen_collection_schema([int_field, vec_field, array_field])
|
||||||
|
self.init_collection_wrap(schema=array_schema, check_task=CheckTasks.err_res,
|
||||||
|
check_items={ct.err_code: 65535,
|
||||||
|
ct.err_msg: "the maximum capacity specified for a "
|
||||||
|
"Array should be in (0, 4096]"})
|
||||||
|
|
||||||
|
@pytest.mark.tags(CaseLabel.L2)
|
||||||
|
def test_collection_string_array_without_max_length(self):
|
||||||
|
"""
|
||||||
|
target: Create string array without giving max length
|
||||||
|
method: Create string array without giving max length
|
||||||
|
expected: Raise exception
|
||||||
|
"""
|
||||||
|
int_field = cf.gen_int64_field(is_primary=True)
|
||||||
|
vec_field = cf.gen_float_vec_field()
|
||||||
|
array_field = cf.gen_array_field(element_type=DataType.VARCHAR)
|
||||||
|
array_schema = cf.gen_collection_schema([int_field, vec_field, array_field])
|
||||||
|
self.init_collection_wrap(schema=array_schema, check_task=CheckTasks.err_res,
|
||||||
|
check_items={ct.err_code: 65535,
|
||||||
|
ct.err_msg: "type param(max_length) should be specified for "
|
||||||
|
"varChar field of collection"})
|
||||||
|
|
||||||
|
@pytest.mark.tags(CaseLabel.L2)
|
||||||
|
@pytest.mark.parametrize("max_length", [[], 'a', (), -1, 65536])
|
||||||
|
def test_collection_string_array_max_length_invalid(self, max_length):
|
||||||
|
"""
|
||||||
|
target: Create string array with invalid max length
|
||||||
|
method: Create string array with invalid max length
|
||||||
|
1. Type invalid: [], 'a', ()
|
||||||
|
2. Value invalid: <0, >max_length(65535)
|
||||||
|
expected: Raise exception
|
||||||
|
"""
|
||||||
|
int_field = cf.gen_int64_field(is_primary=True)
|
||||||
|
vec_field = cf.gen_float_vec_field()
|
||||||
|
array_field = cf.gen_array_field(element_type=DataType.VARCHAR, max_length=max_length)
|
||||||
|
array_schema = cf.gen_collection_schema([int_field, vec_field, array_field])
|
||||||
|
self.init_collection_wrap(schema=array_schema, check_task=CheckTasks.err_res,
|
||||||
|
check_items={ct.err_code: 65535,
|
||||||
|
ct.err_msg: "the maximum length specified for a VarChar "
|
||||||
|
"should be in (0, 65535]"})
|
||||||
|
|
||||||
|
@pytest.mark.tags(CaseLabel.L2)
|
||||||
|
def test_collection_array_field_all_datatype(self):
|
||||||
|
"""
|
||||||
|
target: test create collection with ARRAY field all data type
|
||||||
|
method: 1. Create field respectively: int8, int16, int32, int64, varchar, bool, float, double
|
||||||
|
2. Insert data respectively: int8, int16, int32, int64, varchar, bool, float, double
|
||||||
|
expected: Raise exception
|
||||||
|
"""
|
||||||
|
# Create field respectively
|
||||||
|
nb = ct.default_nb
|
||||||
|
pk_field = cf.gen_int64_field(is_primary=True)
|
||||||
|
vec_field = cf.gen_float_vec_field()
|
||||||
|
int8_array = cf.gen_array_field(name="int8_array", element_type=DataType.INT8, max_capacity=nb)
|
||||||
|
int16_array = cf.gen_array_field(name="int16_array", element_type=DataType.INT16, max_capacity=nb)
|
||||||
|
int32_array = cf.gen_array_field(name="int32_array", element_type=DataType.INT32, max_capacity=nb)
|
||||||
|
int64_array = cf.gen_array_field(name="int64_array", element_type=DataType.INT64, max_capacity=nb)
|
||||||
|
bool_array = cf.gen_array_field(name="bool_array", element_type=DataType.BOOL, max_capacity=nb)
|
||||||
|
float_array = cf.gen_array_field(name="float_array", element_type=DataType.FLOAT, max_capacity=nb)
|
||||||
|
double_array = cf.gen_array_field(name="double_array", element_type=DataType.DOUBLE, max_capacity=nb)
|
||||||
|
string_array = cf.gen_array_field(name="string_array", element_type=DataType.VARCHAR, max_capacity=nb,
|
||||||
|
max_length=100)
|
||||||
|
array_schema = cf.gen_collection_schema([pk_field, vec_field, int8_array, int16_array, int32_array,
|
||||||
|
int64_array, bool_array, float_array, double_array, string_array])
|
||||||
|
collection_w = self.init_collection_wrap(schema=array_schema,
|
||||||
|
check_task=CheckTasks.check_collection_property,
|
||||||
|
check_items={exp_schema: array_schema})
|
||||||
|
|
||||||
|
# check array in collection.describe()
|
||||||
|
res = collection_w.describe()[0]
|
||||||
|
log.info(res)
|
||||||
|
fields = [
|
||||||
|
{"field_id": 100, "name": "int64", "description": "", "type": 5, "params": {},
|
||||||
|
"element_type": 0, "is_primary": True},
|
||||||
|
{"field_id": 101, "name": "float_vector", "description": "", "type": 101,
|
||||||
|
"params": {"dim": ct.default_dim}, "element_type": 0},
|
||||||
|
{"field_id": 102, "name": "int8_array", "description": "", "type": 22,
|
||||||
|
"params": {"max_capacity": "2000"}, "element_type": 2},
|
||||||
|
{"field_id": 103, "name": "int16_array", "description": "", "type": 22,
|
||||||
|
"params": {"max_capacity": "2000"}, "element_type": 3},
|
||||||
|
{"field_id": 104, "name": "int32_array", "description": "", "type": 22,
|
||||||
|
"params": {"max_capacity": "2000"}, "element_type": 4},
|
||||||
|
{"field_id": 105, "name": "int64_array", "description": "", "type": 22,
|
||||||
|
"params": {"max_capacity": "2000"}, "element_type": 5},
|
||||||
|
{"field_id": 106, "name": "bool_array", "description": "", "type": 22,
|
||||||
|
"params": {"max_capacity": "2000"}, "element_type": 1},
|
||||||
|
{"field_id": 107, "name": "float_array", "description": "", "type": 22,
|
||||||
|
"params": {"max_capacity": "2000"}, "element_type": 10},
|
||||||
|
{"field_id": 108, "name": "double_array", "description": "", "type": 22,
|
||||||
|
"params": {"max_capacity": "2000"}, "element_type": 11},
|
||||||
|
{"field_id": 109, "name": "string_array", "description": "", "type": 22,
|
||||||
|
"params": {"max_length": "100", "max_capacity": "2000"}, "element_type": 21}
|
||||||
|
]
|
||||||
|
assert res["fields"] == fields
|
||||||
|
|
||||||
|
# Insert data respectively
|
||||||
|
nb = 10
|
||||||
|
pk_values = [i for i in range(nb)]
|
||||||
|
float_vec = cf.gen_vectors(nb, ct.default_dim)
|
||||||
|
int8_values = [[numpy.int8(j) for j in range(nb)] for i in range(nb)]
|
||||||
|
int16_values = [[numpy.int16(j) for j in range(nb)] for i in range(nb)]
|
||||||
|
int32_values = [[numpy.int32(j) for j in range(nb)] for i in range(nb)]
|
||||||
|
int64_values = [[numpy.int64(j) for j in range(nb)] for i in range(nb)]
|
||||||
|
bool_values = [[numpy.bool_(j) for j in range(nb)] for i in range(nb)]
|
||||||
|
float_values = [[numpy.float32(j) for j in range(nb)] for i in range(nb)]
|
||||||
|
double_values = [[numpy.double(j) for j in range(nb)] for i in range(nb)]
|
||||||
|
string_values = [[str(j) for j in range(nb)] for i in range(nb)]
|
||||||
|
data = [pk_values, float_vec, int8_values, int16_values, int32_values, int64_values,
|
||||||
|
bool_values, float_values, double_values, string_values]
|
||||||
|
collection_w.insert(data)
|
||||||
|
|
||||||
|
# check insert successfully
|
||||||
|
collection_w.flush()
|
||||||
|
collection_w.num_entities == nb
|
||||||
|
@ -1,6 +1,7 @@
|
|||||||
import random
|
import random
|
||||||
import time
|
import time
|
||||||
import pandas as pd
|
import pandas as pd
|
||||||
|
import numpy as np
|
||||||
import pytest
|
import pytest
|
||||||
|
|
||||||
from base.client_base import TestcaseBase
|
from base.client_base import TestcaseBase
|
||||||
@ -1892,6 +1893,52 @@ class TestDeleteComplexExpr(TestcaseBase):
|
|||||||
# query to check
|
# query to check
|
||||||
collection_w.query(f"int64 in {filter_ids}", check_task=CheckTasks.check_query_empty)
|
collection_w.query(f"int64 in {filter_ids}", check_task=CheckTasks.check_query_empty)
|
||||||
|
|
||||||
|
@pytest.mark.tags(CaseLabel.L2)
|
||||||
|
@pytest.mark.parametrize("expression", cf.gen_array_field_expressions())
|
||||||
|
@pytest.mark.parametrize("enable_dynamic_field", [True, False])
|
||||||
|
def test_delete_array_expressions(self, expression, enable_dynamic_field):
|
||||||
|
"""
|
||||||
|
target: test delete entities using normal expression
|
||||||
|
method: delete using normal expression
|
||||||
|
expected: delete successfully
|
||||||
|
"""
|
||||||
|
# 1. create a collection
|
||||||
|
nb = ct.default_nb
|
||||||
|
schema = cf.gen_array_collection_schema()
|
||||||
|
collection_w = self.init_collection_wrap(schema=schema, enable_dynamic_field=enable_dynamic_field)
|
||||||
|
|
||||||
|
# 2. insert data
|
||||||
|
array_length = 100
|
||||||
|
data = []
|
||||||
|
for i in range(nb):
|
||||||
|
arr = {ct.default_int64_field_name: i,
|
||||||
|
ct.default_float_vec_field_name: cf.gen_vectors(1, ct.default_dim)[0],
|
||||||
|
ct.default_int32_array_field_name: [np.int32(i) for i in range(array_length)],
|
||||||
|
ct.default_float_array_field_name: [np.float32(i) for i in range(array_length)],
|
||||||
|
ct.default_string_array_field_name: [str(i) for i in range(array_length)]}
|
||||||
|
data.append(arr)
|
||||||
|
collection_w.insert(data)
|
||||||
|
collection_w.flush()
|
||||||
|
|
||||||
|
# 3. filter result with expression in collection
|
||||||
|
expression = expression.replace("&&", "and").replace("||", "or")
|
||||||
|
filter_ids = []
|
||||||
|
for i in range(nb):
|
||||||
|
int32_array = data[i][ct.default_int32_array_field_name]
|
||||||
|
float_array = data[i][ct.default_float_array_field_name]
|
||||||
|
string_array = data[i][ct.default_string_array_field_name]
|
||||||
|
if not expression or eval(expression):
|
||||||
|
filter_ids.append(i)
|
||||||
|
|
||||||
|
# 4. delete by array expression
|
||||||
|
collection_w.create_index(ct.default_float_vec_field_name, ct.default_flat_index)
|
||||||
|
collection_w.load()
|
||||||
|
res = collection_w.delete(expression)[0]
|
||||||
|
assert res.delete_count == len(filter_ids)
|
||||||
|
|
||||||
|
# 5. query to check
|
||||||
|
collection_w.query(expression, check_task=CheckTasks.check_query_empty)
|
||||||
|
|
||||||
@pytest.mark.tags(CaseLabel.L1)
|
@pytest.mark.tags(CaseLabel.L1)
|
||||||
@pytest.mark.parametrize("field_name", ["varchar", "json_field['string']", "NewStr"])
|
@pytest.mark.parametrize("field_name", ["varchar", "json_field['string']", "NewStr"])
|
||||||
@pytest.mark.parametrize("like", ["like", "LIKE"])
|
@pytest.mark.parametrize("like", ["like", "LIKE"])
|
||||||
@ -1981,7 +2028,7 @@ class TestDeleteComplexExpr(TestcaseBase):
|
|||||||
collection_w = self.init_collection_general(prefix, False, enable_dynamic_field=enable_dynamic_field)[0]
|
collection_w = self.init_collection_general(prefix, False, enable_dynamic_field=enable_dynamic_field)[0]
|
||||||
|
|
||||||
# insert
|
# insert
|
||||||
listMix = [[i, i + 2] for i in range(ct.default_nb)] # only int
|
listMix = [[i, i + 2] for i in range(ct.default_nb)] # only int
|
||||||
if enable_dynamic_field:
|
if enable_dynamic_field:
|
||||||
data = cf.gen_default_rows_data()
|
data = cf.gen_default_rows_data()
|
||||||
for i in range(ct.default_nb):
|
for i in range(ct.default_nb):
|
||||||
|
@ -42,6 +42,8 @@ default_vector_field_name = "vector"
|
|||||||
default_float_field_name = ct.default_float_field_name
|
default_float_field_name = ct.default_float_field_name
|
||||||
default_bool_field_name = ct.default_bool_field_name
|
default_bool_field_name = ct.default_bool_field_name
|
||||||
default_string_field_name = ct.default_string_field_name
|
default_string_field_name = ct.default_string_field_name
|
||||||
|
default_int32_array_field_name = ct.default_int32_array_field_name
|
||||||
|
default_string_array_field_name = ct.default_string_array_field_name
|
||||||
|
|
||||||
|
|
||||||
class TestHighLevelApi(TestcaseBase):
|
class TestHighLevelApi(TestcaseBase):
|
||||||
@ -195,6 +197,41 @@ class TestHighLevelApi(TestcaseBase):
|
|||||||
"primary_field": default_primary_key_field_name})
|
"primary_field": default_primary_key_field_name})
|
||||||
client_w.drop_collection(client, collection_name)
|
client_w.drop_collection(client, collection_name)
|
||||||
|
|
||||||
|
@pytest.mark.tags(CaseLabel.L1)
|
||||||
|
def test_high_level_array_insert_search(self):
|
||||||
|
"""
|
||||||
|
target: test search (high level api) normal case
|
||||||
|
method: create connection, collection, insert and search
|
||||||
|
expected: search/query successfully
|
||||||
|
"""
|
||||||
|
client = self._connect(enable_high_level_api=True)
|
||||||
|
collection_name = cf.gen_unique_str(prefix)
|
||||||
|
# 1. create collection
|
||||||
|
client_w.create_collection(client, collection_name, default_dim)
|
||||||
|
collections = client_w.list_collections(client)[0]
|
||||||
|
assert collection_name in collections
|
||||||
|
# 2. insert
|
||||||
|
rng = np.random.default_rng(seed=19530)
|
||||||
|
rows = [{
|
||||||
|
default_primary_key_field_name: i,
|
||||||
|
default_vector_field_name: list(rng.random((1, default_dim))[0]),
|
||||||
|
default_float_field_name: i * 1.0,
|
||||||
|
default_int32_array_field_name: [i, i+1, i+2],
|
||||||
|
default_string_array_field_name: [str(i), str(i + 1), str(i + 2)]
|
||||||
|
} for i in range(default_nb)]
|
||||||
|
client_w.insert(client, collection_name, rows)
|
||||||
|
client_w.flush(client, collection_name)
|
||||||
|
assert client_w.num_entities(client, collection_name)[0] == default_nb
|
||||||
|
# 3. search
|
||||||
|
vectors_to_search = rng.random((1, default_dim))
|
||||||
|
insert_ids = [i for i in range(default_nb)]
|
||||||
|
client_w.search(client, collection_name, vectors_to_search,
|
||||||
|
check_task=CheckTasks.check_search_results,
|
||||||
|
check_items={"enable_high_level_api": True,
|
||||||
|
"nq": len(vectors_to_search),
|
||||||
|
"ids": insert_ids,
|
||||||
|
"limit": default_limit})
|
||||||
|
|
||||||
@pytest.mark.tags(CaseLabel.L2)
|
@pytest.mark.tags(CaseLabel.L2)
|
||||||
@pytest.mark.skip(reason="issue 25110")
|
@pytest.mark.skip(reason="issue 25110")
|
||||||
def test_high_level_search_query_string(self):
|
def test_high_level_search_query_string(self):
|
||||||
|
@ -247,6 +247,21 @@ class TestIndexOperation(TestcaseBase):
|
|||||||
ct.err_msg: f"there is no vector index on collection: {collection_w.name}, "
|
ct.err_msg: f"there is no vector index on collection: {collection_w.name}, "
|
||||||
f"please create index firstly"})
|
f"please create index firstly"})
|
||||||
|
|
||||||
|
@pytest.mark.tags(CaseLabel.L2)
|
||||||
|
def test_index_create_on_array_field(self):
|
||||||
|
"""
|
||||||
|
target: Test create index on array field
|
||||||
|
method: create index on array field
|
||||||
|
expected: raise exception
|
||||||
|
"""
|
||||||
|
schema = cf.gen_array_collection_schema()
|
||||||
|
collection_w = self.init_collection_wrap(schema=schema)
|
||||||
|
error = {ct.err_code: 1100,
|
||||||
|
ct.err_msg: "create index on json field is not supported: expected=supported field, "
|
||||||
|
"actual=create index on Array field: invalid parameter"}
|
||||||
|
collection_w.create_index(ct.default_string_array_field_name, {},
|
||||||
|
check_task=CheckTasks.err_res, check_items=error)
|
||||||
|
|
||||||
@pytest.mark.tags(CaseLabel.L1)
|
@pytest.mark.tags(CaseLabel.L1)
|
||||||
def test_index_collection_empty(self):
|
def test_index_collection_empty(self):
|
||||||
"""
|
"""
|
||||||
|
@ -58,8 +58,7 @@ class TestInsertParams(TestcaseBase):
|
|||||||
df = cf.gen_default_dataframe_data(ct.default_nb)
|
df = cf.gen_default_dataframe_data(ct.default_nb)
|
||||||
mutation_res, _ = collection_w.insert(data=df)
|
mutation_res, _ = collection_w.insert(data=df)
|
||||||
assert mutation_res.insert_count == ct.default_nb
|
assert mutation_res.insert_count == ct.default_nb
|
||||||
assert mutation_res.primary_keys == df[ct.default_int64_field_name].values.tolist(
|
assert mutation_res.primary_keys == df[ct.default_int64_field_name].values.tolist()
|
||||||
)
|
|
||||||
assert collection_w.num_entities == ct.default_nb
|
assert collection_w.num_entities == ct.default_nb
|
||||||
|
|
||||||
@pytest.mark.tags(CaseLabel.L0)
|
@pytest.mark.tags(CaseLabel.L0)
|
||||||
@ -204,8 +203,7 @@ class TestInsertParams(TestcaseBase):
|
|||||||
df, _ = cf.gen_default_binary_dataframe_data(ct.default_nb)
|
df, _ = cf.gen_default_binary_dataframe_data(ct.default_nb)
|
||||||
mutation_res, _ = collection_w.insert(data=df)
|
mutation_res, _ = collection_w.insert(data=df)
|
||||||
assert mutation_res.insert_count == ct.default_nb
|
assert mutation_res.insert_count == ct.default_nb
|
||||||
assert mutation_res.primary_keys == df[ct.default_int64_field_name].values.tolist(
|
assert mutation_res.primary_keys == df[ct.default_int64_field_name].values.tolist()
|
||||||
)
|
|
||||||
assert collection_w.num_entities == ct.default_nb
|
assert collection_w.num_entities == ct.default_nb
|
||||||
|
|
||||||
@pytest.mark.tags(CaseLabel.L0)
|
@pytest.mark.tags(CaseLabel.L0)
|
||||||
@ -2221,3 +2219,172 @@ class TestUpsertInvalid(TestcaseBase):
|
|||||||
data = (int_values, default_value, string_values, vectors)
|
data = (int_values, default_value, string_values, vectors)
|
||||||
collection_w.upsert(data, check_task=CheckTasks.err_res,
|
collection_w.upsert(data, check_task=CheckTasks.err_res,
|
||||||
check_items={ct.err_code: 1, ct.err_msg: "Field varchar don't match in entities[0]"})
|
check_items={ct.err_code: 1, ct.err_msg: "Field varchar don't match in entities[0]"})
|
||||||
|
|
||||||
|
|
||||||
|
class TestInsertArray(TestcaseBase):
|
||||||
|
""" Test case of Insert array """
|
||||||
|
|
||||||
|
@pytest.mark.tags(CaseLabel.L1)
|
||||||
|
@pytest.mark.parametrize("auto_id", [True, False])
|
||||||
|
def test_insert_array_dataframe(self, auto_id):
|
||||||
|
"""
|
||||||
|
target: test insert DataFrame data
|
||||||
|
method: Insert data in the form of dataframe
|
||||||
|
expected: assert num entities
|
||||||
|
"""
|
||||||
|
schema = cf.gen_array_collection_schema(auto_id=auto_id)
|
||||||
|
collection_w = self.init_collection_wrap(schema=schema)
|
||||||
|
data = cf.gen_array_dataframe_data()
|
||||||
|
if auto_id:
|
||||||
|
data = data.drop(ct.default_int64_field_name, axis=1)
|
||||||
|
collection_w.insert(data=data)
|
||||||
|
collection_w.flush()
|
||||||
|
assert collection_w.num_entities == ct.default_nb
|
||||||
|
|
||||||
|
@pytest.mark.tags(CaseLabel.L1)
|
||||||
|
@pytest.mark.parametrize("auto_id", [True, False])
|
||||||
|
def test_insert_array_list(self, auto_id):
|
||||||
|
"""
|
||||||
|
target: test insert list data
|
||||||
|
method: Insert data in the form of a list
|
||||||
|
expected: assert num entities
|
||||||
|
"""
|
||||||
|
schema = cf.gen_array_collection_schema(auto_id=auto_id)
|
||||||
|
collection_w = self.init_collection_wrap(schema=schema)
|
||||||
|
|
||||||
|
nb = ct.default_nb
|
||||||
|
arr_len = ct.default_max_capacity
|
||||||
|
pk_values = [i for i in range(nb)]
|
||||||
|
float_vec = cf.gen_vectors(nb, ct.default_dim)
|
||||||
|
int32_values = [[np.int32(j) for j in range(i, i+arr_len)] for i in range(nb)]
|
||||||
|
float_values = [[np.float32(j) for j in range(i, i+arr_len)] for i in range(nb)]
|
||||||
|
string_values = [[str(j) for j in range(i, i+arr_len)] for i in range(nb)]
|
||||||
|
|
||||||
|
data = [pk_values, float_vec, int32_values, float_values, string_values]
|
||||||
|
if auto_id:
|
||||||
|
del data[0]
|
||||||
|
# log.info(data[0][1])
|
||||||
|
collection_w.insert(data=data)
|
||||||
|
assert collection_w.num_entities == nb
|
||||||
|
|
||||||
|
@pytest.mark.tags(CaseLabel.L1)
|
||||||
|
def test_insert_array_rows(self):
|
||||||
|
"""
|
||||||
|
target: test insert row data
|
||||||
|
method: Insert data in the form of rows
|
||||||
|
expected: assert num entities
|
||||||
|
"""
|
||||||
|
schema = cf.gen_array_collection_schema()
|
||||||
|
collection_w = self.init_collection_wrap(schema=schema)
|
||||||
|
|
||||||
|
data = cf.get_row_data_by_schema(schema=schema)
|
||||||
|
collection_w.insert(data=data)
|
||||||
|
assert collection_w.num_entities == ct.default_nb
|
||||||
|
|
||||||
|
collection_w.upsert(data[:2])
|
||||||
|
|
||||||
|
@pytest.mark.tags(CaseLabel.L2)
|
||||||
|
def test_insert_array_empty_list(self):
|
||||||
|
"""
|
||||||
|
target: test insert DataFrame data
|
||||||
|
method: Insert data with the length of array = 0
|
||||||
|
expected: assert num entities
|
||||||
|
"""
|
||||||
|
nb = ct.default_nb
|
||||||
|
schema = cf.gen_array_collection_schema()
|
||||||
|
collection_w = self.init_collection_wrap(schema=schema)
|
||||||
|
data = cf.gen_array_dataframe_data()
|
||||||
|
data[ct.default_int32_array_field_name] = [[] for _ in range(nb)]
|
||||||
|
collection_w.insert(data=data)
|
||||||
|
assert collection_w.num_entities == ct.default_nb
|
||||||
|
|
||||||
|
@pytest.mark.tags(CaseLabel.L2)
|
||||||
|
def test_insert_array_length_differ(self):
|
||||||
|
"""
|
||||||
|
target: test insert row data
|
||||||
|
method: Insert data with every row's array length differ
|
||||||
|
expected: assert num entities
|
||||||
|
"""
|
||||||
|
nb = ct.default_nb
|
||||||
|
schema = cf.gen_array_collection_schema()
|
||||||
|
collection_w = self.init_collection_wrap(schema=schema)
|
||||||
|
array = []
|
||||||
|
for i in range(nb):
|
||||||
|
arr_len1 = random.randint(0, ct.default_max_capacity)
|
||||||
|
arr_len2 = random.randint(0, ct.default_max_capacity)
|
||||||
|
arr = {
|
||||||
|
ct.default_int64_field_name: i,
|
||||||
|
ct.default_float_vec_field_name: [random.random() for _ in range(ct.default_dim)],
|
||||||
|
ct.default_int32_array_field_name: [np.int32(j) for j in range(arr_len1)],
|
||||||
|
ct.default_float_array_field_name: [np.float32(j) for j in range(arr_len2)],
|
||||||
|
ct.default_string_array_field_name: [str(j) for j in range(ct.default_max_capacity)],
|
||||||
|
}
|
||||||
|
array.append(arr)
|
||||||
|
|
||||||
|
collection_w.insert(array)
|
||||||
|
assert collection_w.num_entities == nb
|
||||||
|
|
||||||
|
data = cf.get_row_data_by_schema(nb=2, schema=schema)
|
||||||
|
collection_w.upsert(data)
|
||||||
|
|
||||||
|
@pytest.mark.tags(CaseLabel.L2)
|
||||||
|
def test_insert_array_length_invalid(self):
|
||||||
|
"""
|
||||||
|
target: Insert actual array length > max_capacity
|
||||||
|
method: Insert actual array length > max_capacity
|
||||||
|
expected: raise error
|
||||||
|
"""
|
||||||
|
# init collection
|
||||||
|
schema = cf.gen_array_collection_schema()
|
||||||
|
collection_w = self.init_collection_wrap(schema=schema)
|
||||||
|
# Insert actual array length > max_capacity
|
||||||
|
arr_len = ct.default_max_capacity + 1
|
||||||
|
data = cf.get_row_data_by_schema(schema=schema)
|
||||||
|
data[1][ct.default_float_array_field_name] = [np.float32(i) for i in range(arr_len)]
|
||||||
|
err_msg = (f"the length (101) of 1th array exceeds max capacity ({ct.default_max_capacity}): "
|
||||||
|
f"expected=valid length array, actual=array length exceeds max capacity: invalid parameter")
|
||||||
|
collection_w.insert(data=data, check_task=CheckTasks.err_res,
|
||||||
|
check_items={ct.err_code: 1100, ct.err_msg: err_msg})
|
||||||
|
|
||||||
|
@pytest.mark.tags(CaseLabel.L2)
|
||||||
|
def test_insert_array_type_invalid(self):
|
||||||
|
"""
|
||||||
|
target: Insert array type invalid
|
||||||
|
method: 1. Insert string values to an int array
|
||||||
|
2. upsert float values to a string array
|
||||||
|
expected: raise error
|
||||||
|
"""
|
||||||
|
# init collection
|
||||||
|
arr_len = 10
|
||||||
|
schema = cf.gen_array_collection_schema()
|
||||||
|
collection_w = self.init_collection_wrap(schema=schema)
|
||||||
|
data = cf.get_row_data_by_schema(schema=schema)
|
||||||
|
|
||||||
|
# 1. Insert string values to an int array
|
||||||
|
data[1][ct.default_int32_array_field_name] = [str(i) for i in range(arr_len)]
|
||||||
|
err_msg = "The data in the same column must be of the same type."
|
||||||
|
collection_w.insert(data=data, check_task=CheckTasks.err_res,
|
||||||
|
check_items={ct.err_code: 1, ct.err_msg: err_msg})
|
||||||
|
|
||||||
|
# 2. upsert float values to a string array
|
||||||
|
data = cf.get_row_data_by_schema(schema=schema)
|
||||||
|
data[1][ct.default_string_array_field_name] = [np.float32(i) for i in range(arr_len)]
|
||||||
|
collection_w.upsert(data=data, check_task=CheckTasks.err_res,
|
||||||
|
check_items={ct.err_code: 1, ct.err_msg: err_msg})
|
||||||
|
|
||||||
|
@pytest.mark.tags(CaseLabel.L2)
|
||||||
|
def test_insert_array_mixed_value(self):
|
||||||
|
"""
|
||||||
|
target: Insert array consisting of mixed values
|
||||||
|
method: Insert array consisting of mixed values
|
||||||
|
expected: raise error
|
||||||
|
"""
|
||||||
|
# init collection
|
||||||
|
schema = cf.gen_array_collection_schema()
|
||||||
|
collection_w = self.init_collection_wrap(schema=schema)
|
||||||
|
# Insert array consisting of mixed values
|
||||||
|
data = cf.get_row_data_by_schema(schema=schema)
|
||||||
|
data[1][ct.default_string_array_field_name] = ["a", 1, [2.0, 3.0], False]
|
||||||
|
collection_w.insert(data=data, check_task=CheckTasks.err_res,
|
||||||
|
check_items={ct.err_code: 1,
|
||||||
|
ct.err_msg: "The data in the same column must be of the same type."})
|
||||||
|
@ -1018,6 +1018,50 @@ class TestQueryParams(TestcaseBase):
|
|||||||
res = collection_w.query(expression, limit=limit, offset=offset)[0]
|
res = collection_w.query(expression, limit=limit, offset=offset)[0]
|
||||||
assert len(res) == limit - offset
|
assert len(res) == limit - offset
|
||||||
|
|
||||||
|
@pytest.mark.tags(CaseLabel.L2)
|
||||||
|
@pytest.mark.skip("Too many are not supported")
|
||||||
|
@pytest.mark.parametrize("expression", cf.gen_normal_expressions_field("array_length(float_array)")[1:])
|
||||||
|
def test_query_expr_array_length(self, expression, enable_dynamic_field):
|
||||||
|
"""
|
||||||
|
target: test query with expression using json_contains_any
|
||||||
|
method: query with expression using json_contains_any
|
||||||
|
expected: succeed
|
||||||
|
"""
|
||||||
|
# 1. create a collection
|
||||||
|
nb = ct.default_nb
|
||||||
|
max_capacity = 1000
|
||||||
|
schema = cf.gen_array_collection_schema(max_capacity=max_capacity)
|
||||||
|
collection_w = self.init_collection_wrap(schema=schema, enable_dynamic_field=enable_dynamic_field)
|
||||||
|
|
||||||
|
# 2. insert data
|
||||||
|
data = []
|
||||||
|
length = []
|
||||||
|
for i in range(nb):
|
||||||
|
array_length = random.randint(0, max_capacity)
|
||||||
|
length.append(array_length)
|
||||||
|
arr = {ct.default_int64_field_name: i,
|
||||||
|
ct.default_float_vec_field_name: cf.gen_vectors(1, ct.default_dim)[0],
|
||||||
|
ct.default_int32_array_field_name: [],
|
||||||
|
ct.default_float_array_field_name: [np.float32(i) for i in range(array_length)],
|
||||||
|
ct.default_string_array_field_name: []}
|
||||||
|
data.append(arr)
|
||||||
|
collection_w.insert(data)
|
||||||
|
|
||||||
|
# 3. load and query
|
||||||
|
collection_w.create_index(ct.default_float_vec_field_name, ct.default_flat_index)
|
||||||
|
collection_w.load()
|
||||||
|
res = collection_w.query(expression)[0]
|
||||||
|
|
||||||
|
# 4. check
|
||||||
|
expression = expression.replace("&&", "and").replace("||", "or")
|
||||||
|
expression = expression.replace("array_length(float_array)", "array_length")
|
||||||
|
filter_ids = []
|
||||||
|
for i in range(nb):
|
||||||
|
array_length = length[i]
|
||||||
|
if not expression or eval(expression):
|
||||||
|
filter_ids.append(i)
|
||||||
|
assert len(res) == len(filter_ids)
|
||||||
|
|
||||||
@pytest.mark.tags(CaseLabel.L1)
|
@pytest.mark.tags(CaseLabel.L1)
|
||||||
def test_query_expr_empty_without_limit(self):
|
def test_query_expr_empty_without_limit(self):
|
||||||
"""
|
"""
|
||||||
|
@ -1,3 +1,4 @@
|
|||||||
|
import numpy as np
|
||||||
from pymilvus.orm.types import CONSISTENCY_STRONG, CONSISTENCY_BOUNDED, CONSISTENCY_SESSION, CONSISTENCY_EVENTUALLY
|
from pymilvus.orm.types import CONSISTENCY_STRONG, CONSISTENCY_BOUNDED, CONSISTENCY_SESSION, CONSISTENCY_EVENTUALLY
|
||||||
from common.constants import *
|
from common.constants import *
|
||||||
from utils.util_pymilvus import *
|
from utils.util_pymilvus import *
|
||||||
@ -550,6 +551,63 @@ class TestCollectionSearchInvalid(TestcaseBase):
|
|||||||
"err_msg": "failed to create query plan: cannot parse "
|
"err_msg": "failed to create query plan: cannot parse "
|
||||||
"expression: %s" % expression})
|
"expression: %s" % expression})
|
||||||
|
|
||||||
|
@pytest.mark.tags(CaseLabel.L1)
|
||||||
|
def test_search_with_expression_invalid_array_one(self):
|
||||||
|
"""
|
||||||
|
target: test search with invalid array expressions
|
||||||
|
method: test search with invalid array expressions:
|
||||||
|
the order of array > the length of array
|
||||||
|
expected: searched successfully with correct limit(topK)
|
||||||
|
"""
|
||||||
|
# 1. create a collection
|
||||||
|
nb = ct.default_nb
|
||||||
|
schema = cf.gen_array_collection_schema()
|
||||||
|
collection_w = self.init_collection_wrap(schema=schema)
|
||||||
|
data = cf.get_row_data_by_schema(schema=schema)
|
||||||
|
data[1][ct.default_int32_array_field_name] = [1]
|
||||||
|
collection_w.insert(data)
|
||||||
|
collection_w.create_index("float_vector", ct.default_index)
|
||||||
|
collection_w.load()
|
||||||
|
|
||||||
|
# 2. search
|
||||||
|
expression = "int32_array[101] > 0"
|
||||||
|
msg = ("failed to search: attempt #0: failed to search/query delegator 1 for channel "
|
||||||
|
"by-dev-rootcoord-dml_: fail to Search, QueryNode ID=1, reason=worker(1) query"
|
||||||
|
" failed: UnknownError: Assert \")index >= 0 && index < length_\" at /go/src/"
|
||||||
|
"github.com/milvus-io/milvus/internal/core/src/common/Array.h:454 => index out"
|
||||||
|
" of range, index=101, length=100: attempt #1: no available shard delegator "
|
||||||
|
"found: service unavailable")
|
||||||
|
collection_w.search(vectors[:default_nq], default_search_field,
|
||||||
|
default_search_params, nb, expression,
|
||||||
|
check_task=CheckTasks.err_res,
|
||||||
|
check_items={ct.err_code: 65538,
|
||||||
|
ct.err_msg: msg})
|
||||||
|
|
||||||
|
@pytest.mark.tags(CaseLabel.L1)
|
||||||
|
def test_search_with_expression_invalid_array_two(self):
|
||||||
|
"""
|
||||||
|
target: test search with invalid array expressions
|
||||||
|
method: test search with invalid array expressions
|
||||||
|
expected: searched successfully with correct limit(topK)
|
||||||
|
"""
|
||||||
|
# 1. create a collection
|
||||||
|
nb = ct.default_nb
|
||||||
|
schema = cf.gen_array_collection_schema()
|
||||||
|
collection_w = self.init_collection_wrap(schema=schema)
|
||||||
|
data = cf.get_row_data_by_schema(schema=schema)
|
||||||
|
collection_w.insert(data)
|
||||||
|
collection_w.create_index("float_vector", ct.default_index)
|
||||||
|
collection_w.load()
|
||||||
|
|
||||||
|
# 2. search
|
||||||
|
expression = "int32_array[0] - 1 < 1"
|
||||||
|
error = {ct.err_code: 65535,
|
||||||
|
ct.err_msg: f"failed to create query plan: cannot parse expression: {expression}, "
|
||||||
|
f"error: LessThan is not supported in execution backend"}
|
||||||
|
collection_w.search(vectors[:default_nq], default_search_field,
|
||||||
|
default_search_params, nb, expression,
|
||||||
|
check_task=CheckTasks.err_res, check_items=error)
|
||||||
|
|
||||||
@pytest.mark.tags(CaseLabel.L2)
|
@pytest.mark.tags(CaseLabel.L2)
|
||||||
def test_search_partition_invalid_type(self, get_invalid_partition):
|
def test_search_partition_invalid_type(self, get_invalid_partition):
|
||||||
"""
|
"""
|
||||||
@ -3019,6 +3077,57 @@ class TestCollectionSearch(TestcaseBase):
|
|||||||
ids = hits.ids
|
ids = hits.ids
|
||||||
assert set(ids).issubset(filter_ids_set)
|
assert set(ids).issubset(filter_ids_set)
|
||||||
|
|
||||||
|
@pytest.mark.tags(CaseLabel.L2)
|
||||||
|
@pytest.mark.parametrize("expression", cf.gen_array_field_expressions())
|
||||||
|
def test_search_with_expression_array(self, expression, _async, enable_dynamic_field):
|
||||||
|
"""
|
||||||
|
target: test search with different expressions
|
||||||
|
method: test search with different expressions
|
||||||
|
expected: searched successfully with correct limit(topK)
|
||||||
|
"""
|
||||||
|
# 1. create a collection
|
||||||
|
nb = ct.default_nb
|
||||||
|
schema = cf.gen_array_collection_schema()
|
||||||
|
collection_w = self.init_collection_wrap(schema=schema, enable_dynamic_field=enable_dynamic_field)
|
||||||
|
|
||||||
|
# 2. insert data
|
||||||
|
array_length = 10
|
||||||
|
data = []
|
||||||
|
for i in range(nb):
|
||||||
|
arr = {ct.default_int64_field_name: i,
|
||||||
|
ct.default_float_vec_field_name: cf.gen_vectors(1, ct.default_dim)[0],
|
||||||
|
ct.default_int32_array_field_name: [np.int32(i) for i in range(array_length)],
|
||||||
|
ct.default_float_array_field_name: [np.float32(i) for i in range(array_length)],
|
||||||
|
ct.default_string_array_field_name: [str(i) for i in range(array_length)]}
|
||||||
|
data.append(arr)
|
||||||
|
collection_w.insert(data)
|
||||||
|
|
||||||
|
# 3. filter result with expression in collection
|
||||||
|
expression = expression.replace("&&", "and").replace("||", "or")
|
||||||
|
filter_ids = []
|
||||||
|
for i in range(nb):
|
||||||
|
int32_array = data[i][ct.default_int32_array_field_name]
|
||||||
|
float_array = data[i][ct.default_float_array_field_name]
|
||||||
|
string_array = data[i][ct.default_string_array_field_name]
|
||||||
|
if not expression or eval(expression):
|
||||||
|
filter_ids.append(i)
|
||||||
|
|
||||||
|
# 4. create index
|
||||||
|
collection_w.create_index("float_vector", ct.default_index)
|
||||||
|
collection_w.load()
|
||||||
|
|
||||||
|
# 5. search with expression
|
||||||
|
log.info("test_search_with_expression: searching with expression: %s" % expression)
|
||||||
|
search_res, _ = collection_w.search(vectors[:default_nq], default_search_field,
|
||||||
|
default_search_params, nb, expression, _async=_async)
|
||||||
|
if _async:
|
||||||
|
search_res.done()
|
||||||
|
search_res = search_res.result()
|
||||||
|
|
||||||
|
for hits in search_res:
|
||||||
|
ids = hits.ids
|
||||||
|
assert set(ids) == set(filter_ids)
|
||||||
|
|
||||||
@pytest.mark.tags(CaseLabel.L2)
|
@pytest.mark.tags(CaseLabel.L2)
|
||||||
@pytest.mark.xfail(reason="issue 24514")
|
@pytest.mark.xfail(reason="issue 24514")
|
||||||
@pytest.mark.parametrize("expression", cf.gen_normal_expressions_field(default_float_field_name))
|
@pytest.mark.parametrize("expression", cf.gen_normal_expressions_field(default_float_field_name))
|
||||||
|
Loading…
Reference in New Issue
Block a user