Add test cases of array (#27603)

Signed-off-by: nico <cheng.yuan@zilliz.com>
This commit is contained in:
nico 2023-10-24 09:26:31 +08:00 committed by GitHub
parent 2446aa0cd7
commit a693af014f
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
9 changed files with 743 additions and 6 deletions

View File

@ -96,6 +96,14 @@ def gen_json_field(name=ct.default_json_field_name, description=ct.default_desc,
return json_field return json_field
def gen_array_field(name=ct.default_array_field_name, element_type=DataType.INT64, max_capacity=ct.default_max_capacity,
description=ct.default_desc, is_primary=False, **kwargs):
array_field, _ = ApiFieldSchemaWrapper().init_field_schema(name=name, dtype=DataType.ARRAY,
element_type=element_type, max_capacity=max_capacity,
description=description, is_primary=is_primary, **kwargs)
return array_field
def gen_int8_field(name=ct.default_int8_field_name, description=ct.default_desc, is_primary=False, **kwargs): def gen_int8_field(name=ct.default_int8_field_name, description=ct.default_desc, is_primary=False, **kwargs):
int8_field, _ = ApiFieldSchemaWrapper().init_field_schema(name=name, dtype=DataType.INT8, description=description, int8_field, _ = ApiFieldSchemaWrapper().init_field_schema(name=name, dtype=DataType.INT8, description=description,
is_primary=is_primary, **kwargs) is_primary=is_primary, **kwargs)
@ -170,6 +178,34 @@ def gen_default_collection_schema(description=ct.default_desc, primary_field=ct.
return schema return schema
def gen_array_collection_schema(description=ct.default_desc, primary_field=ct.default_int64_field_name, auto_id=False,
dim=ct.default_dim, enable_dynamic_field=False, max_capacity=ct.default_max_capacity,
max_length=100, with_json=False, **kwargs):
if enable_dynamic_field:
if primary_field is ct.default_int64_field_name:
fields = [gen_int64_field(), gen_float_vec_field(dim=dim)]
elif primary_field is ct.default_string_field_name:
fields = [gen_string_field(), gen_float_vec_field(dim=dim)]
else:
log.error("Primary key only support int or varchar")
assert False
else:
fields = [gen_int64_field(), gen_float_vec_field(dim=dim), gen_json_field(),
gen_array_field(name=ct.default_int32_array_field_name, element_type=DataType.INT32,
max_capacity=max_capacity),
gen_array_field(name=ct.default_float_array_field_name, element_type=DataType.FLOAT,
max_capacity=max_capacity),
gen_array_field(name=ct.default_string_array_field_name, element_type=DataType.VARCHAR,
max_capacity=max_capacity, max_length=max_length)]
if with_json is False:
fields.remove(gen_json_field())
schema, _ = ApiCollectionSchemaWrapper().init_collection_schema(fields=fields, description=description,
primary_field=primary_field, auto_id=auto_id,
enable_dynamic_field=enable_dynamic_field, **kwargs)
return schema
def gen_bulk_insert_collection_schema(description=ct.default_desc, primary_field=ct.default_int64_field_name, with_varchar_field=True, def gen_bulk_insert_collection_schema(description=ct.default_desc, primary_field=ct.default_int64_field_name, with_varchar_field=True,
auto_id=False, dim=ct.default_dim, enable_dynamic_field=False, with_json=False): auto_id=False, dim=ct.default_dim, enable_dynamic_field=False, with_json=False):
if enable_dynamic_field: if enable_dynamic_field:
@ -359,6 +395,33 @@ def gen_default_data_for_upsert(nb=ct.default_nb, dim=ct.default_dim, start=0, s
return df, float_values return df, float_values
def gen_array_dataframe_data(nb=ct.default_nb, dim=ct.default_dim, start=0,
array_length=ct.default_max_capacity, with_json=False, random_primary_key=False):
if not random_primary_key:
int_values = pd.Series(data=[i for i in range(start, start + nb)])
else:
int_values = pd.Series(data=random.sample(range(start, start + nb), nb))
float_vec_values = gen_vectors(nb, dim)
json_values = [{"number": i, "float": i * 1.0} for i in range(start, start + nb)]
int32_values = pd.Series(data=[[np.int32(j) for j in range(i, i + array_length)] for i in range(start, start + nb)])
float_values = pd.Series(data=[[np.float32(j) for j in range(i, i + array_length)] for i in range(start, start + nb)])
string_values = pd.Series(data=[[str(j) for j in range(i, i + array_length)] for i in range(start, start + nb)])
df = pd.DataFrame({
ct.default_int64_field_name: int_values,
ct.default_float_vec_field_name: float_vec_values,
ct.default_json_field_name: json_values,
ct.default_int32_array_field_name: int32_values,
ct.default_float_array_field_name: float_values,
ct.default_string_array_field_name: string_values,
})
if with_json is False:
df.drop(ct.default_json_field_name, axis=1, inplace=True)
return df
def gen_dataframe_multi_vec_fields(vec_fields, nb=ct.default_nb): def gen_dataframe_multi_vec_fields(vec_fields, nb=ct.default_nb):
""" """
gen dataframe data for fields: int64, float, float_vec and vec_fields gen dataframe data for fields: int64, float, float_vec and vec_fields
@ -683,6 +746,25 @@ def gen_data_by_type(field, nb=None, start=None):
if nb is None: if nb is None:
return [random.random() for i in range(dim)] return [random.random() for i in range(dim)]
return [[random.random() for i in range(dim)] for _ in range(nb)] return [[random.random() for i in range(dim)] for _ in range(nb)]
if data_type == DataType.ARRAY:
max_capacity = field.params['max_capacity']
element_type = field.element_type
if element_type == DataType.INT32:
if nb is None:
return [random.randint(-2147483648, 2147483647) for _ in range(max_capacity)]
return [[random.randint(-2147483648, 2147483647) for _ in range(max_capacity)] for _ in range(nb)]
if element_type == DataType.FLOAT:
if nb is None:
return [np.float32(random.random()) for _ in range(max_capacity)]
return [[np.float32(random.random()) for _ in range(max_capacity)] for _ in range(nb)]
if element_type == DataType.VARCHAR:
max_length = field.params['max_length']
max_length = min(20, max_length - 1)
length = random.randint(0, max_length)
if nb is None:
return ["".join([chr(random.randint(97, 122)) for _ in range(length)]) for _ in range(max_capacity)]
return [["".join([chr(random.randint(97, 122)) for _ in range(length)]) for _ in range(max_capacity)] for _ in range(nb)]
return None return None
@ -986,6 +1068,21 @@ def gen_json_field_expressions():
return expressions return expressions
def gen_array_field_expressions():
expressions = [
"int32_array[0] > 0",
"0 <= int32_array[0] < 400 or 1000 > float_array[1] >= 500",
"int32_array[1] not in [1, 2, 3]",
"int32_array[1] in [1, 2, 3] and string_array[1] != '2'",
"int32_array == [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]",
"int32_array[1] + 1 == 3 && int32_array[0] - 1 != 1",
"int32_array[1] % 100 == 0 && string_array[1] in ['1', '2']",
"int32_array[1] in [300/2, -10*30+800, (200-100)*2] "
"or (float_array[1] <= -4**5/2 || 100 <= int32_array[1] < 200)"
]
return expressions
def gen_field_compare_expressions(fields1=None, fields2=None): def gen_field_compare_expressions(fields1=None, fields2=None):
if fields1 is None: if fields1 is None:
fields1 = ["int64_1"] fields1 = ["int64_1"]
@ -1240,6 +1337,35 @@ def assert_json_contains(expr, list_data):
return result_ids return result_ids
def assert_array_contains(expr, list_data):
nb = len(list_data)
result_ids = []
exp_ids = eval(expr.split(', ', 1)[1].split(')', 1)[0])
reverse = True if "not array" or "not ARRAY" in expr else False
expr_prefix = expr.split('(', 1)[0]
if "array_contains_any" or "ARRAY_CONTAINS_ANY" in expr_prefix:
for i in range(nb):
set_list_data = set(tuple(element) if isinstance(element, list) else element for element in list_data[i])
if set(exp_ids) & set_list_data:
result_ids.append(i)
elif "array_contains_all" or "ARRAY_CONTAINS_ALL" in expr_prefix:
for i in range(nb):
set_list_data = set(tuple(element) if isinstance(element, list) else element for element in list_data[i])
if set(exp_ids).issubset(set_list_data):
result_ids.append(i)
elif "array_contains" or "ARRAY_CONTAINS" in expr_prefix:
for i in range(nb):
if exp_ids in list_data[i]:
result_ids.append(i)
else:
log.warning("unknown expr: %s" % expr)
if reverse:
result_ids = [x for x in result_ids if x not in range(nb)]
return result_ids
def assert_equal_index(index_1, index_2): def assert_equal_index(index_1, index_2):
return index_to_dict(index_1) == index_to_dict(index_2) return index_to_dict(index_1) == index_to_dict(index_2)

View File

@ -8,6 +8,7 @@ default_drop_interval = 3
default_dim = 128 default_dim = 128
default_nb = 2000 default_nb = 2000
default_nb_medium = 5000 default_nb_medium = 5000
default_max_capacity = 100
default_top_k = 10 default_top_k = 10
default_nq = 2 default_nq = 2
default_limit = 10 default_limit = 10
@ -38,6 +39,10 @@ default_float_field_name = "float"
default_double_field_name = "double" default_double_field_name = "double"
default_string_field_name = "varchar" default_string_field_name = "varchar"
default_json_field_name = "json_field" default_json_field_name = "json_field"
default_array_field_name = "int_array"
default_int32_array_field_name = "int32_array"
default_float_array_field_name = "float_array"
default_string_array_field_name = "string_array"
default_float_vec_field_name = "float_vector" default_float_vec_field_name = "float_vector"
another_float_vec_field_name = "float_vector1" another_float_vec_field_name = "float_vector1"
default_binary_vec_field_name = "binary_vector" default_binary_vec_field_name = "binary_vector"

View File

@ -4,6 +4,7 @@ import numpy
import pandas as pd import pandas as pd
import pytest import pytest
from pymilvus import DataType
from base.client_base import TestcaseBase from base.client_base import TestcaseBase
from common import common_func as cf from common import common_func as cf
from common import common_type as ct from common import common_type as ct
@ -3820,7 +3821,7 @@ class TestCollectionString(TestcaseBase):
class TestCollectionJSON(TestcaseBase): class TestCollectionJSON(TestcaseBase):
""" """
****************************************************************** ******************************************************************
The following cases are used to test about string The following cases are used to test about json
****************************************************************** ******************************************************************
""" """
@pytest.mark.tags(CaseLabel.L1) @pytest.mark.tags(CaseLabel.L1)
@ -3895,3 +3896,189 @@ class TestCollectionJSON(TestcaseBase):
self.collection_wrap.init_collection(name=c_name, schema=schema, self.collection_wrap.init_collection(name=c_name, schema=schema,
check_task=CheckTasks.check_collection_property, check_task=CheckTasks.check_collection_property,
check_items={exp_name: c_name, exp_schema: schema}) check_items={exp_name: c_name, exp_schema: schema})
class TestCollectionARRAY(TestcaseBase):
"""
******************************************************************
The following cases are used to test about array
******************************************************************
"""
@pytest.mark.tags(CaseLabel.L2)
def test_collection_array_field_element_type_not_exist(self):
"""
target: test create collection with ARRAY field without element type
method: create collection with one array field without element type
expected: Raise exception
"""
int_field = cf.gen_int64_field(is_primary=True)
vec_field = cf.gen_float_vec_field()
array_field = cf.gen_array_field(element_type=None)
array_schema = cf.gen_collection_schema([int_field, vec_field, array_field])
self.init_collection_wrap(schema=array_schema, check_task=CheckTasks.err_res,
check_items={ct.err_code: 65535, ct.err_msg: "element data type None is not valid"})
@pytest.mark.tags(CaseLabel.L2)
# @pytest.mark.skip("issue #27522")
@pytest.mark.parametrize("element_type", [1001, 'a', [], (), {1}, DataType.BINARY_VECTOR,
DataType.FLOAT_VECTOR, DataType.JSON, DataType.ARRAY])
def test_collection_array_field_element_type_invalid(self, element_type):
"""
target: Create a field with invalid element_type
method: Create a field with invalid element_type
1. Type not in DataType: 1, 'a', ...
2. Type in DataType: binary_vector, float_vector, json_field, array_field
expected: Raise exception
"""
int_field = cf.gen_int64_field(is_primary=True)
vec_field = cf.gen_float_vec_field()
array_field = cf.gen_array_field(element_type=element_type)
array_schema = cf.gen_collection_schema([int_field, vec_field, array_field])
error = {ct.err_code: 65535, ct.err_msg: "element data type None is not valid"}
if element_type in ['a', {1}]:
error = {ct.err_code: 1, ct.err_msg: "Unexpected error"}
self.init_collection_wrap(schema=array_schema, check_task=CheckTasks.err_res, check_items=error)
@pytest.mark.tags(CaseLabel.L2)
def test_collection_array_field_no_capacity(self):
"""
target: Create a field without giving max_capacity
method: Create a field without giving max_capacity
expected: Raise exception
"""
int_field = cf.gen_int64_field(is_primary=True)
vec_field = cf.gen_float_vec_field()
array_field = cf.gen_array_field(max_capacity=None)
array_schema = cf.gen_collection_schema([int_field, vec_field, array_field])
self.init_collection_wrap(schema=array_schema, check_task=CheckTasks.err_res,
check_items={ct.err_code: 65535,
ct.err_msg: "the value of max_capacity must be an integer"})
@pytest.mark.tags(CaseLabel.L2)
@pytest.mark.parametrize("max_capacity", [[], 'a', (), -1, 4097])
def test_collection_array_field_invalid_capacity(self, max_capacity):
"""
target: Create a field with invalid max_capacity
method: Create a field with invalid max_capacity
1. Type invalid: [], 'a', ()
2. Value invalid: <0, >max_capacity(4096)
expected: Raise exception
"""
int_field = cf.gen_int64_field(is_primary=True)
vec_field = cf.gen_float_vec_field()
array_field = cf.gen_array_field(max_capacity=max_capacity)
array_schema = cf.gen_collection_schema([int_field, vec_field, array_field])
self.init_collection_wrap(schema=array_schema, check_task=CheckTasks.err_res,
check_items={ct.err_code: 65535,
ct.err_msg: "the maximum capacity specified for a "
"Array should be in (0, 4096]"})
@pytest.mark.tags(CaseLabel.L2)
def test_collection_string_array_without_max_length(self):
"""
target: Create string array without giving max length
method: Create string array without giving max length
expected: Raise exception
"""
int_field = cf.gen_int64_field(is_primary=True)
vec_field = cf.gen_float_vec_field()
array_field = cf.gen_array_field(element_type=DataType.VARCHAR)
array_schema = cf.gen_collection_schema([int_field, vec_field, array_field])
self.init_collection_wrap(schema=array_schema, check_task=CheckTasks.err_res,
check_items={ct.err_code: 65535,
ct.err_msg: "type param(max_length) should be specified for "
"varChar field of collection"})
@pytest.mark.tags(CaseLabel.L2)
@pytest.mark.parametrize("max_length", [[], 'a', (), -1, 65536])
def test_collection_string_array_max_length_invalid(self, max_length):
"""
target: Create string array with invalid max length
method: Create string array with invalid max length
1. Type invalid: [], 'a', ()
2. Value invalid: <0, >max_length(65535)
expected: Raise exception
"""
int_field = cf.gen_int64_field(is_primary=True)
vec_field = cf.gen_float_vec_field()
array_field = cf.gen_array_field(element_type=DataType.VARCHAR, max_length=max_length)
array_schema = cf.gen_collection_schema([int_field, vec_field, array_field])
self.init_collection_wrap(schema=array_schema, check_task=CheckTasks.err_res,
check_items={ct.err_code: 65535,
ct.err_msg: "the maximum length specified for a VarChar "
"should be in (0, 65535]"})
@pytest.mark.tags(CaseLabel.L2)
def test_collection_array_field_all_datatype(self):
"""
target: test create collection with ARRAY field all data type
method: 1. Create field respectively: int8, int16, int32, int64, varchar, bool, float, double
2. Insert data respectively: int8, int16, int32, int64, varchar, bool, float, double
expected: Raise exception
"""
# Create field respectively
nb = ct.default_nb
pk_field = cf.gen_int64_field(is_primary=True)
vec_field = cf.gen_float_vec_field()
int8_array = cf.gen_array_field(name="int8_array", element_type=DataType.INT8, max_capacity=nb)
int16_array = cf.gen_array_field(name="int16_array", element_type=DataType.INT16, max_capacity=nb)
int32_array = cf.gen_array_field(name="int32_array", element_type=DataType.INT32, max_capacity=nb)
int64_array = cf.gen_array_field(name="int64_array", element_type=DataType.INT64, max_capacity=nb)
bool_array = cf.gen_array_field(name="bool_array", element_type=DataType.BOOL, max_capacity=nb)
float_array = cf.gen_array_field(name="float_array", element_type=DataType.FLOAT, max_capacity=nb)
double_array = cf.gen_array_field(name="double_array", element_type=DataType.DOUBLE, max_capacity=nb)
string_array = cf.gen_array_field(name="string_array", element_type=DataType.VARCHAR, max_capacity=nb,
max_length=100)
array_schema = cf.gen_collection_schema([pk_field, vec_field, int8_array, int16_array, int32_array,
int64_array, bool_array, float_array, double_array, string_array])
collection_w = self.init_collection_wrap(schema=array_schema,
check_task=CheckTasks.check_collection_property,
check_items={exp_schema: array_schema})
# check array in collection.describe()
res = collection_w.describe()[0]
log.info(res)
fields = [
{"field_id": 100, "name": "int64", "description": "", "type": 5, "params": {},
"element_type": 0, "is_primary": True},
{"field_id": 101, "name": "float_vector", "description": "", "type": 101,
"params": {"dim": ct.default_dim}, "element_type": 0},
{"field_id": 102, "name": "int8_array", "description": "", "type": 22,
"params": {"max_capacity": "2000"}, "element_type": 2},
{"field_id": 103, "name": "int16_array", "description": "", "type": 22,
"params": {"max_capacity": "2000"}, "element_type": 3},
{"field_id": 104, "name": "int32_array", "description": "", "type": 22,
"params": {"max_capacity": "2000"}, "element_type": 4},
{"field_id": 105, "name": "int64_array", "description": "", "type": 22,
"params": {"max_capacity": "2000"}, "element_type": 5},
{"field_id": 106, "name": "bool_array", "description": "", "type": 22,
"params": {"max_capacity": "2000"}, "element_type": 1},
{"field_id": 107, "name": "float_array", "description": "", "type": 22,
"params": {"max_capacity": "2000"}, "element_type": 10},
{"field_id": 108, "name": "double_array", "description": "", "type": 22,
"params": {"max_capacity": "2000"}, "element_type": 11},
{"field_id": 109, "name": "string_array", "description": "", "type": 22,
"params": {"max_length": "100", "max_capacity": "2000"}, "element_type": 21}
]
assert res["fields"] == fields
# Insert data respectively
nb = 10
pk_values = [i for i in range(nb)]
float_vec = cf.gen_vectors(nb, ct.default_dim)
int8_values = [[numpy.int8(j) for j in range(nb)] for i in range(nb)]
int16_values = [[numpy.int16(j) for j in range(nb)] for i in range(nb)]
int32_values = [[numpy.int32(j) for j in range(nb)] for i in range(nb)]
int64_values = [[numpy.int64(j) for j in range(nb)] for i in range(nb)]
bool_values = [[numpy.bool_(j) for j in range(nb)] for i in range(nb)]
float_values = [[numpy.float32(j) for j in range(nb)] for i in range(nb)]
double_values = [[numpy.double(j) for j in range(nb)] for i in range(nb)]
string_values = [[str(j) for j in range(nb)] for i in range(nb)]
data = [pk_values, float_vec, int8_values, int16_values, int32_values, int64_values,
bool_values, float_values, double_values, string_values]
collection_w.insert(data)
# check insert successfully
collection_w.flush()
collection_w.num_entities == nb

View File

@ -1,6 +1,7 @@
import random import random
import time import time
import pandas as pd import pandas as pd
import numpy as np
import pytest import pytest
from base.client_base import TestcaseBase from base.client_base import TestcaseBase
@ -1892,6 +1893,52 @@ class TestDeleteComplexExpr(TestcaseBase):
# query to check # query to check
collection_w.query(f"int64 in {filter_ids}", check_task=CheckTasks.check_query_empty) collection_w.query(f"int64 in {filter_ids}", check_task=CheckTasks.check_query_empty)
@pytest.mark.tags(CaseLabel.L2)
@pytest.mark.parametrize("expression", cf.gen_array_field_expressions())
@pytest.mark.parametrize("enable_dynamic_field", [True, False])
def test_delete_array_expressions(self, expression, enable_dynamic_field):
"""
target: test delete entities using normal expression
method: delete using normal expression
expected: delete successfully
"""
# 1. create a collection
nb = ct.default_nb
schema = cf.gen_array_collection_schema()
collection_w = self.init_collection_wrap(schema=schema, enable_dynamic_field=enable_dynamic_field)
# 2. insert data
array_length = 100
data = []
for i in range(nb):
arr = {ct.default_int64_field_name: i,
ct.default_float_vec_field_name: cf.gen_vectors(1, ct.default_dim)[0],
ct.default_int32_array_field_name: [np.int32(i) for i in range(array_length)],
ct.default_float_array_field_name: [np.float32(i) for i in range(array_length)],
ct.default_string_array_field_name: [str(i) for i in range(array_length)]}
data.append(arr)
collection_w.insert(data)
collection_w.flush()
# 3. filter result with expression in collection
expression = expression.replace("&&", "and").replace("||", "or")
filter_ids = []
for i in range(nb):
int32_array = data[i][ct.default_int32_array_field_name]
float_array = data[i][ct.default_float_array_field_name]
string_array = data[i][ct.default_string_array_field_name]
if not expression or eval(expression):
filter_ids.append(i)
# 4. delete by array expression
collection_w.create_index(ct.default_float_vec_field_name, ct.default_flat_index)
collection_w.load()
res = collection_w.delete(expression)[0]
assert res.delete_count == len(filter_ids)
# 5. query to check
collection_w.query(expression, check_task=CheckTasks.check_query_empty)
@pytest.mark.tags(CaseLabel.L1) @pytest.mark.tags(CaseLabel.L1)
@pytest.mark.parametrize("field_name", ["varchar", "json_field['string']", "NewStr"]) @pytest.mark.parametrize("field_name", ["varchar", "json_field['string']", "NewStr"])
@pytest.mark.parametrize("like", ["like", "LIKE"]) @pytest.mark.parametrize("like", ["like", "LIKE"])
@ -1981,7 +2028,7 @@ class TestDeleteComplexExpr(TestcaseBase):
collection_w = self.init_collection_general(prefix, False, enable_dynamic_field=enable_dynamic_field)[0] collection_w = self.init_collection_general(prefix, False, enable_dynamic_field=enable_dynamic_field)[0]
# insert # insert
listMix = [[i, i + 2] for i in range(ct.default_nb)] # only int listMix = [[i, i + 2] for i in range(ct.default_nb)] # only int
if enable_dynamic_field: if enable_dynamic_field:
data = cf.gen_default_rows_data() data = cf.gen_default_rows_data()
for i in range(ct.default_nb): for i in range(ct.default_nb):

View File

@ -42,6 +42,8 @@ default_vector_field_name = "vector"
default_float_field_name = ct.default_float_field_name default_float_field_name = ct.default_float_field_name
default_bool_field_name = ct.default_bool_field_name default_bool_field_name = ct.default_bool_field_name
default_string_field_name = ct.default_string_field_name default_string_field_name = ct.default_string_field_name
default_int32_array_field_name = ct.default_int32_array_field_name
default_string_array_field_name = ct.default_string_array_field_name
class TestHighLevelApi(TestcaseBase): class TestHighLevelApi(TestcaseBase):
@ -195,6 +197,41 @@ class TestHighLevelApi(TestcaseBase):
"primary_field": default_primary_key_field_name}) "primary_field": default_primary_key_field_name})
client_w.drop_collection(client, collection_name) client_w.drop_collection(client, collection_name)
@pytest.mark.tags(CaseLabel.L1)
def test_high_level_array_insert_search(self):
"""
target: test search (high level api) normal case
method: create connection, collection, insert and search
expected: search/query successfully
"""
client = self._connect(enable_high_level_api=True)
collection_name = cf.gen_unique_str(prefix)
# 1. create collection
client_w.create_collection(client, collection_name, default_dim)
collections = client_w.list_collections(client)[0]
assert collection_name in collections
# 2. insert
rng = np.random.default_rng(seed=19530)
rows = [{
default_primary_key_field_name: i,
default_vector_field_name: list(rng.random((1, default_dim))[0]),
default_float_field_name: i * 1.0,
default_int32_array_field_name: [i, i+1, i+2],
default_string_array_field_name: [str(i), str(i + 1), str(i + 2)]
} for i in range(default_nb)]
client_w.insert(client, collection_name, rows)
client_w.flush(client, collection_name)
assert client_w.num_entities(client, collection_name)[0] == default_nb
# 3. search
vectors_to_search = rng.random((1, default_dim))
insert_ids = [i for i in range(default_nb)]
client_w.search(client, collection_name, vectors_to_search,
check_task=CheckTasks.check_search_results,
check_items={"enable_high_level_api": True,
"nq": len(vectors_to_search),
"ids": insert_ids,
"limit": default_limit})
@pytest.mark.tags(CaseLabel.L2) @pytest.mark.tags(CaseLabel.L2)
@pytest.mark.skip(reason="issue 25110") @pytest.mark.skip(reason="issue 25110")
def test_high_level_search_query_string(self): def test_high_level_search_query_string(self):

View File

@ -247,6 +247,21 @@ class TestIndexOperation(TestcaseBase):
ct.err_msg: f"there is no vector index on collection: {collection_w.name}, " ct.err_msg: f"there is no vector index on collection: {collection_w.name}, "
f"please create index firstly"}) f"please create index firstly"})
@pytest.mark.tags(CaseLabel.L2)
def test_index_create_on_array_field(self):
"""
target: Test create index on array field
method: create index on array field
expected: raise exception
"""
schema = cf.gen_array_collection_schema()
collection_w = self.init_collection_wrap(schema=schema)
error = {ct.err_code: 1100,
ct.err_msg: "create index on json field is not supported: expected=supported field, "
"actual=create index on Array field: invalid parameter"}
collection_w.create_index(ct.default_string_array_field_name, {},
check_task=CheckTasks.err_res, check_items=error)
@pytest.mark.tags(CaseLabel.L1) @pytest.mark.tags(CaseLabel.L1)
def test_index_collection_empty(self): def test_index_collection_empty(self):
""" """

View File

@ -58,8 +58,7 @@ class TestInsertParams(TestcaseBase):
df = cf.gen_default_dataframe_data(ct.default_nb) df = cf.gen_default_dataframe_data(ct.default_nb)
mutation_res, _ = collection_w.insert(data=df) mutation_res, _ = collection_w.insert(data=df)
assert mutation_res.insert_count == ct.default_nb assert mutation_res.insert_count == ct.default_nb
assert mutation_res.primary_keys == df[ct.default_int64_field_name].values.tolist( assert mutation_res.primary_keys == df[ct.default_int64_field_name].values.tolist()
)
assert collection_w.num_entities == ct.default_nb assert collection_w.num_entities == ct.default_nb
@pytest.mark.tags(CaseLabel.L0) @pytest.mark.tags(CaseLabel.L0)
@ -204,8 +203,7 @@ class TestInsertParams(TestcaseBase):
df, _ = cf.gen_default_binary_dataframe_data(ct.default_nb) df, _ = cf.gen_default_binary_dataframe_data(ct.default_nb)
mutation_res, _ = collection_w.insert(data=df) mutation_res, _ = collection_w.insert(data=df)
assert mutation_res.insert_count == ct.default_nb assert mutation_res.insert_count == ct.default_nb
assert mutation_res.primary_keys == df[ct.default_int64_field_name].values.tolist( assert mutation_res.primary_keys == df[ct.default_int64_field_name].values.tolist()
)
assert collection_w.num_entities == ct.default_nb assert collection_w.num_entities == ct.default_nb
@pytest.mark.tags(CaseLabel.L0) @pytest.mark.tags(CaseLabel.L0)
@ -2221,3 +2219,172 @@ class TestUpsertInvalid(TestcaseBase):
data = (int_values, default_value, string_values, vectors) data = (int_values, default_value, string_values, vectors)
collection_w.upsert(data, check_task=CheckTasks.err_res, collection_w.upsert(data, check_task=CheckTasks.err_res,
check_items={ct.err_code: 1, ct.err_msg: "Field varchar don't match in entities[0]"}) check_items={ct.err_code: 1, ct.err_msg: "Field varchar don't match in entities[0]"})
class TestInsertArray(TestcaseBase):
""" Test case of Insert array """
@pytest.mark.tags(CaseLabel.L1)
@pytest.mark.parametrize("auto_id", [True, False])
def test_insert_array_dataframe(self, auto_id):
"""
target: test insert DataFrame data
method: Insert data in the form of dataframe
expected: assert num entities
"""
schema = cf.gen_array_collection_schema(auto_id=auto_id)
collection_w = self.init_collection_wrap(schema=schema)
data = cf.gen_array_dataframe_data()
if auto_id:
data = data.drop(ct.default_int64_field_name, axis=1)
collection_w.insert(data=data)
collection_w.flush()
assert collection_w.num_entities == ct.default_nb
@pytest.mark.tags(CaseLabel.L1)
@pytest.mark.parametrize("auto_id", [True, False])
def test_insert_array_list(self, auto_id):
"""
target: test insert list data
method: Insert data in the form of a list
expected: assert num entities
"""
schema = cf.gen_array_collection_schema(auto_id=auto_id)
collection_w = self.init_collection_wrap(schema=schema)
nb = ct.default_nb
arr_len = ct.default_max_capacity
pk_values = [i for i in range(nb)]
float_vec = cf.gen_vectors(nb, ct.default_dim)
int32_values = [[np.int32(j) for j in range(i, i+arr_len)] for i in range(nb)]
float_values = [[np.float32(j) for j in range(i, i+arr_len)] for i in range(nb)]
string_values = [[str(j) for j in range(i, i+arr_len)] for i in range(nb)]
data = [pk_values, float_vec, int32_values, float_values, string_values]
if auto_id:
del data[0]
# log.info(data[0][1])
collection_w.insert(data=data)
assert collection_w.num_entities == nb
@pytest.mark.tags(CaseLabel.L1)
def test_insert_array_rows(self):
"""
target: test insert row data
method: Insert data in the form of rows
expected: assert num entities
"""
schema = cf.gen_array_collection_schema()
collection_w = self.init_collection_wrap(schema=schema)
data = cf.get_row_data_by_schema(schema=schema)
collection_w.insert(data=data)
assert collection_w.num_entities == ct.default_nb
collection_w.upsert(data[:2])
@pytest.mark.tags(CaseLabel.L2)
def test_insert_array_empty_list(self):
"""
target: test insert DataFrame data
method: Insert data with the length of array = 0
expected: assert num entities
"""
nb = ct.default_nb
schema = cf.gen_array_collection_schema()
collection_w = self.init_collection_wrap(schema=schema)
data = cf.gen_array_dataframe_data()
data[ct.default_int32_array_field_name] = [[] for _ in range(nb)]
collection_w.insert(data=data)
assert collection_w.num_entities == ct.default_nb
@pytest.mark.tags(CaseLabel.L2)
def test_insert_array_length_differ(self):
"""
target: test insert row data
method: Insert data with every row's array length differ
expected: assert num entities
"""
nb = ct.default_nb
schema = cf.gen_array_collection_schema()
collection_w = self.init_collection_wrap(schema=schema)
array = []
for i in range(nb):
arr_len1 = random.randint(0, ct.default_max_capacity)
arr_len2 = random.randint(0, ct.default_max_capacity)
arr = {
ct.default_int64_field_name: i,
ct.default_float_vec_field_name: [random.random() for _ in range(ct.default_dim)],
ct.default_int32_array_field_name: [np.int32(j) for j in range(arr_len1)],
ct.default_float_array_field_name: [np.float32(j) for j in range(arr_len2)],
ct.default_string_array_field_name: [str(j) for j in range(ct.default_max_capacity)],
}
array.append(arr)
collection_w.insert(array)
assert collection_w.num_entities == nb
data = cf.get_row_data_by_schema(nb=2, schema=schema)
collection_w.upsert(data)
@pytest.mark.tags(CaseLabel.L2)
def test_insert_array_length_invalid(self):
"""
target: Insert actual array length > max_capacity
method: Insert actual array length > max_capacity
expected: raise error
"""
# init collection
schema = cf.gen_array_collection_schema()
collection_w = self.init_collection_wrap(schema=schema)
# Insert actual array length > max_capacity
arr_len = ct.default_max_capacity + 1
data = cf.get_row_data_by_schema(schema=schema)
data[1][ct.default_float_array_field_name] = [np.float32(i) for i in range(arr_len)]
err_msg = (f"the length (101) of 1th array exceeds max capacity ({ct.default_max_capacity}): "
f"expected=valid length array, actual=array length exceeds max capacity: invalid parameter")
collection_w.insert(data=data, check_task=CheckTasks.err_res,
check_items={ct.err_code: 1100, ct.err_msg: err_msg})
@pytest.mark.tags(CaseLabel.L2)
def test_insert_array_type_invalid(self):
"""
target: Insert array type invalid
method: 1. Insert string values to an int array
2. upsert float values to a string array
expected: raise error
"""
# init collection
arr_len = 10
schema = cf.gen_array_collection_schema()
collection_w = self.init_collection_wrap(schema=schema)
data = cf.get_row_data_by_schema(schema=schema)
# 1. Insert string values to an int array
data[1][ct.default_int32_array_field_name] = [str(i) for i in range(arr_len)]
err_msg = "The data in the same column must be of the same type."
collection_w.insert(data=data, check_task=CheckTasks.err_res,
check_items={ct.err_code: 1, ct.err_msg: err_msg})
# 2. upsert float values to a string array
data = cf.get_row_data_by_schema(schema=schema)
data[1][ct.default_string_array_field_name] = [np.float32(i) for i in range(arr_len)]
collection_w.upsert(data=data, check_task=CheckTasks.err_res,
check_items={ct.err_code: 1, ct.err_msg: err_msg})
@pytest.mark.tags(CaseLabel.L2)
def test_insert_array_mixed_value(self):
"""
target: Insert array consisting of mixed values
method: Insert array consisting of mixed values
expected: raise error
"""
# init collection
schema = cf.gen_array_collection_schema()
collection_w = self.init_collection_wrap(schema=schema)
# Insert array consisting of mixed values
data = cf.get_row_data_by_schema(schema=schema)
data[1][ct.default_string_array_field_name] = ["a", 1, [2.0, 3.0], False]
collection_w.insert(data=data, check_task=CheckTasks.err_res,
check_items={ct.err_code: 1,
ct.err_msg: "The data in the same column must be of the same type."})

View File

@ -1018,6 +1018,50 @@ class TestQueryParams(TestcaseBase):
res = collection_w.query(expression, limit=limit, offset=offset)[0] res = collection_w.query(expression, limit=limit, offset=offset)[0]
assert len(res) == limit - offset assert len(res) == limit - offset
@pytest.mark.tags(CaseLabel.L2)
@pytest.mark.skip("Too many are not supported")
@pytest.mark.parametrize("expression", cf.gen_normal_expressions_field("array_length(float_array)")[1:])
def test_query_expr_array_length(self, expression, enable_dynamic_field):
"""
target: test query with expression using json_contains_any
method: query with expression using json_contains_any
expected: succeed
"""
# 1. create a collection
nb = ct.default_nb
max_capacity = 1000
schema = cf.gen_array_collection_schema(max_capacity=max_capacity)
collection_w = self.init_collection_wrap(schema=schema, enable_dynamic_field=enable_dynamic_field)
# 2. insert data
data = []
length = []
for i in range(nb):
array_length = random.randint(0, max_capacity)
length.append(array_length)
arr = {ct.default_int64_field_name: i,
ct.default_float_vec_field_name: cf.gen_vectors(1, ct.default_dim)[0],
ct.default_int32_array_field_name: [],
ct.default_float_array_field_name: [np.float32(i) for i in range(array_length)],
ct.default_string_array_field_name: []}
data.append(arr)
collection_w.insert(data)
# 3. load and query
collection_w.create_index(ct.default_float_vec_field_name, ct.default_flat_index)
collection_w.load()
res = collection_w.query(expression)[0]
# 4. check
expression = expression.replace("&&", "and").replace("||", "or")
expression = expression.replace("array_length(float_array)", "array_length")
filter_ids = []
for i in range(nb):
array_length = length[i]
if not expression or eval(expression):
filter_ids.append(i)
assert len(res) == len(filter_ids)
@pytest.mark.tags(CaseLabel.L1) @pytest.mark.tags(CaseLabel.L1)
def test_query_expr_empty_without_limit(self): def test_query_expr_empty_without_limit(self):
""" """

View File

@ -1,3 +1,4 @@
import numpy as np
from pymilvus.orm.types import CONSISTENCY_STRONG, CONSISTENCY_BOUNDED, CONSISTENCY_SESSION, CONSISTENCY_EVENTUALLY from pymilvus.orm.types import CONSISTENCY_STRONG, CONSISTENCY_BOUNDED, CONSISTENCY_SESSION, CONSISTENCY_EVENTUALLY
from common.constants import * from common.constants import *
from utils.util_pymilvus import * from utils.util_pymilvus import *
@ -550,6 +551,63 @@ class TestCollectionSearchInvalid(TestcaseBase):
"err_msg": "failed to create query plan: cannot parse " "err_msg": "failed to create query plan: cannot parse "
"expression: %s" % expression}) "expression: %s" % expression})
@pytest.mark.tags(CaseLabel.L1)
def test_search_with_expression_invalid_array_one(self):
"""
target: test search with invalid array expressions
method: test search with invalid array expressions:
the order of array > the length of array
expected: searched successfully with correct limit(topK)
"""
# 1. create a collection
nb = ct.default_nb
schema = cf.gen_array_collection_schema()
collection_w = self.init_collection_wrap(schema=schema)
data = cf.get_row_data_by_schema(schema=schema)
data[1][ct.default_int32_array_field_name] = [1]
collection_w.insert(data)
collection_w.create_index("float_vector", ct.default_index)
collection_w.load()
# 2. search
expression = "int32_array[101] > 0"
msg = ("failed to search: attempt #0: failed to search/query delegator 1 for channel "
"by-dev-rootcoord-dml_: fail to Search, QueryNode ID=1, reason=worker(1) query"
" failed: UnknownError: Assert \")index >= 0 && index < length_\" at /go/src/"
"github.com/milvus-io/milvus/internal/core/src/common/Array.h:454 => index out"
" of range, index=101, length=100: attempt #1: no available shard delegator "
"found: service unavailable")
collection_w.search(vectors[:default_nq], default_search_field,
default_search_params, nb, expression,
check_task=CheckTasks.err_res,
check_items={ct.err_code: 65538,
ct.err_msg: msg})
@pytest.mark.tags(CaseLabel.L1)
def test_search_with_expression_invalid_array_two(self):
"""
target: test search with invalid array expressions
method: test search with invalid array expressions
expected: searched successfully with correct limit(topK)
"""
# 1. create a collection
nb = ct.default_nb
schema = cf.gen_array_collection_schema()
collection_w = self.init_collection_wrap(schema=schema)
data = cf.get_row_data_by_schema(schema=schema)
collection_w.insert(data)
collection_w.create_index("float_vector", ct.default_index)
collection_w.load()
# 2. search
expression = "int32_array[0] - 1 < 1"
error = {ct.err_code: 65535,
ct.err_msg: f"failed to create query plan: cannot parse expression: {expression}, "
f"error: LessThan is not supported in execution backend"}
collection_w.search(vectors[:default_nq], default_search_field,
default_search_params, nb, expression,
check_task=CheckTasks.err_res, check_items=error)
@pytest.mark.tags(CaseLabel.L2) @pytest.mark.tags(CaseLabel.L2)
def test_search_partition_invalid_type(self, get_invalid_partition): def test_search_partition_invalid_type(self, get_invalid_partition):
""" """
@ -3019,6 +3077,57 @@ class TestCollectionSearch(TestcaseBase):
ids = hits.ids ids = hits.ids
assert set(ids).issubset(filter_ids_set) assert set(ids).issubset(filter_ids_set)
@pytest.mark.tags(CaseLabel.L2)
@pytest.mark.parametrize("expression", cf.gen_array_field_expressions())
def test_search_with_expression_array(self, expression, _async, enable_dynamic_field):
"""
target: test search with different expressions
method: test search with different expressions
expected: searched successfully with correct limit(topK)
"""
# 1. create a collection
nb = ct.default_nb
schema = cf.gen_array_collection_schema()
collection_w = self.init_collection_wrap(schema=schema, enable_dynamic_field=enable_dynamic_field)
# 2. insert data
array_length = 10
data = []
for i in range(nb):
arr = {ct.default_int64_field_name: i,
ct.default_float_vec_field_name: cf.gen_vectors(1, ct.default_dim)[0],
ct.default_int32_array_field_name: [np.int32(i) for i in range(array_length)],
ct.default_float_array_field_name: [np.float32(i) for i in range(array_length)],
ct.default_string_array_field_name: [str(i) for i in range(array_length)]}
data.append(arr)
collection_w.insert(data)
# 3. filter result with expression in collection
expression = expression.replace("&&", "and").replace("||", "or")
filter_ids = []
for i in range(nb):
int32_array = data[i][ct.default_int32_array_field_name]
float_array = data[i][ct.default_float_array_field_name]
string_array = data[i][ct.default_string_array_field_name]
if not expression or eval(expression):
filter_ids.append(i)
# 4. create index
collection_w.create_index("float_vector", ct.default_index)
collection_w.load()
# 5. search with expression
log.info("test_search_with_expression: searching with expression: %s" % expression)
search_res, _ = collection_w.search(vectors[:default_nq], default_search_field,
default_search_params, nb, expression, _async=_async)
if _async:
search_res.done()
search_res = search_res.result()
for hits in search_res:
ids = hits.ids
assert set(ids) == set(filter_ids)
@pytest.mark.tags(CaseLabel.L2) @pytest.mark.tags(CaseLabel.L2)
@pytest.mark.xfail(reason="issue 24514") @pytest.mark.xfail(reason="issue 24514")
@pytest.mark.parametrize("expression", cf.gen_normal_expressions_field(default_float_field_name)) @pytest.mark.parametrize("expression", cf.gen_normal_expressions_field(default_float_field_name))