mirror of
https://gitee.com/milvus-io/milvus.git
synced 2024-11-29 18:38:44 +08:00
test: support enable or disable multiple vector fields in all data type tests (#31201)
issue: #29799 Signed-off-by: binbin lv <binbin.lv@zilliz.com>
This commit is contained in:
parent
c2aad513c0
commit
3b5209c460
@ -263,7 +263,8 @@ class TestcaseBase(Base):
|
||||
default_schema = cf.gen_collection_schema_all_datatype(auto_id=auto_id, dim=dim,
|
||||
primary_field=primary_field,
|
||||
enable_dynamic_field=enable_dynamic_field,
|
||||
with_json=with_json)
|
||||
with_json=with_json,
|
||||
multiple_dim_array=multiple_dim_array)
|
||||
log.info("init_collection_general: collection creation")
|
||||
collection_w = self.init_collection_wrap(name=collection_name, schema=default_schema, **kwargs)
|
||||
vector_name_list = cf.extract_vector_field_name_list(collection_w)
|
||||
@ -273,8 +274,8 @@ class TestcaseBase(Base):
|
||||
# 3 insert data if specified
|
||||
if insert_data:
|
||||
collection_w, vectors, binary_raw_vectors, insert_ids, time_stamp = \
|
||||
cf.insert_data(collection_w, nb, is_binary, is_all_data_type, auto_id=auto_id,
|
||||
dim=dim, enable_dynamic_field=enable_dynamic_field, with_json=with_json,
|
||||
cf.insert_data(collection_w, nb, is_binary, is_all_data_type, auto_id=auto_id,
|
||||
dim=dim, enable_dynamic_field=enable_dynamic_field, with_json=with_json,
|
||||
random_primary_key=random_primary_key, multiple_dim_array=multiple_dim_array,
|
||||
primary_field=primary_field, vector_data_type=vector_data_type)
|
||||
if is_flush:
|
||||
@ -286,10 +287,11 @@ class TestcaseBase(Base):
|
||||
if is_binary:
|
||||
collection_w.create_index(ct.default_binary_vec_field_name, ct.default_bin_flat_index)
|
||||
else:
|
||||
collection_w.create_index(ct.default_float_vec_field_name, ct.default_flat_index)
|
||||
if len(multiple_dim_array) != 0 or is_all_data_type:
|
||||
for vector_name in vector_name_list:
|
||||
collection_w.create_index(vector_name, ct.default_flat_index)
|
||||
if len(multiple_dim_array) == 0 or is_all_data_type == False:
|
||||
vector_name_list.append(ct.default_float_vec_field_name)
|
||||
for vector_name in vector_name_list:
|
||||
collection_w.create_index(vector_name, ct.default_flat_index)
|
||||
|
||||
collection_w.load()
|
||||
|
||||
return collection_w, vectors, binary_raw_vectors, insert_ids, time_stamp
|
||||
|
@ -19,6 +19,8 @@ from base.schema_wrapper import ApiCollectionSchemaWrapper, ApiFieldSchemaWrappe
|
||||
from common import common_type as ct
|
||||
from utils.util_log import test_log as log
|
||||
from customize.milvus_operator import MilvusOperator
|
||||
import pickle
|
||||
import tensorflow as tf
|
||||
fake = Faker()
|
||||
"""" Methods of processing data """
|
||||
|
||||
@ -337,19 +339,25 @@ def gen_multiple_json_default_collection_schema(description=ct.default_desc, pri
|
||||
def gen_collection_schema_all_datatype(description=ct.default_desc,
|
||||
primary_field=ct.default_int64_field_name,
|
||||
auto_id=False, dim=ct.default_dim,
|
||||
enable_dynamic_field=False, with_json=True, **kwargs):
|
||||
enable_dynamic_field=False, with_json=True, multiple_dim_array=[], **kwargs):
|
||||
if enable_dynamic_field:
|
||||
fields = [gen_int64_field(), gen_float_vec_field(dim=dim),
|
||||
gen_float_vec_field(name=ct.default_float16_vec_field_name, dim=dim, vector_data_type="FLOAT16_VECTOR"),
|
||||
gen_float_vec_field(name=ct.default_bfloat16_vec_field_name, dim=dim, vector_data_type="BFLOAT16_VECTOR")]
|
||||
fields = [gen_int64_field()]
|
||||
else:
|
||||
fields = [gen_int64_field(), gen_int32_field(), gen_int16_field(), gen_int8_field(),
|
||||
gen_bool_field(), gen_float_field(), gen_double_field(), gen_string_field(),
|
||||
gen_json_field(), gen_float_vec_field(dim=dim),
|
||||
gen_float_vec_field(name=ct.default_float16_vec_field_name, dim=dim, vector_data_type="FLOAT16_VECTOR"),
|
||||
gen_float_vec_field(name=ct.default_bfloat16_vec_field_name, dim=dim, vector_data_type="BFLOAT16_VECTOR")]
|
||||
gen_json_field()]
|
||||
if with_json is False:
|
||||
fields.remove(gen_json_field())
|
||||
|
||||
if len(multiple_dim_array) == 0:
|
||||
fields.append(gen_float_vec_field(dim=dim))
|
||||
else:
|
||||
multiple_dim_array.insert(0, dim)
|
||||
for i in range(len(multiple_dim_array)):
|
||||
fields.append(gen_float_vec_field(name=f"multiple_vector_{ct.vector_data_type_all[i%3]}",
|
||||
dim=multiple_dim_array[i],
|
||||
vector_data_type=ct.vector_data_type_all[i%3]))
|
||||
|
||||
schema, _ = ApiCollectionSchemaWrapper().init_collection_schema(fields=fields, description=description,
|
||||
primary_field=primary_field, auto_id=auto_id,
|
||||
enable_dynamic_field=enable_dynamic_field, **kwargs)
|
||||
@ -391,11 +399,20 @@ def gen_schema_multi_string_fields(string_fields):
|
||||
|
||||
def gen_vectors(nb, dim, vector_data_type="FLOAT_VECTOR"):
|
||||
if vector_data_type == "FLOAT_VECTOR":
|
||||
start = time.time()
|
||||
vectors = [[random.random() for _ in range(dim)] for _ in range(nb)]
|
||||
end = time.time() - start
|
||||
log.debug(f'FLOAT_VECTOR: {end}')
|
||||
elif vector_data_type == "FLOAT16_VECTOR":
|
||||
start = time.time()
|
||||
vectors = gen_fp16_vectors(nb, dim)[1]
|
||||
end = time.time() - start
|
||||
log.debug(f'FLOAT16_VECTOR: {end}')
|
||||
elif vector_data_type == "BFLOAT16_VECTOR":
|
||||
start = time.time()
|
||||
vectors = gen_bf16_vectors(nb, dim)[1]
|
||||
end = time.time() - start
|
||||
log.debug(f'BFLOAT16_VECTOR: {end}')
|
||||
|
||||
if dim > 1:
|
||||
if vector_data_type=="FLOAT_VECTOR":
|
||||
@ -470,6 +487,7 @@ def gen_default_rows_data(nb=ct.default_nb, dim=ct.default_dim, start=0, with_js
|
||||
for i in range(len(multiple_dim_array)):
|
||||
dict[multiple_vector_field_name[i]] = gen_vectors(1, multiple_dim_array[i],
|
||||
vector_data_type=vector_data_type)[0]
|
||||
log.debug("generated default row data")
|
||||
|
||||
return array
|
||||
|
||||
@ -594,7 +612,8 @@ def gen_dataframe_multi_string_fields(string_fields, nb=ct.default_nb):
|
||||
|
||||
|
||||
def gen_dataframe_all_data_type(nb=ct.default_nb, dim=ct.default_dim, start=0, with_json=True,
|
||||
auto_id=False, random_primary_key=False):
|
||||
auto_id=False, random_primary_key=False, multiple_dim_array=[],
|
||||
multiple_vector_field_name=[]):
|
||||
if not random_primary_key:
|
||||
int64_values = pd.Series(data=[i for i in range(start, start + nb)])
|
||||
else:
|
||||
@ -609,8 +628,6 @@ def gen_dataframe_all_data_type(nb=ct.default_nb, dim=ct.default_dim, start=0, w
|
||||
json_values = [{"number": i, "string": str(i), "bool": bool(i),
|
||||
"list": [j for j in range(i, i + ct.default_json_list_length)]} for i in range(start, start + nb)]
|
||||
float_vec_values = gen_vectors(nb, dim)
|
||||
float16_vec_values = gen_vectors(nb, dim, "FLOAT16_VECTOR")
|
||||
bfloat16_vec_values = gen_vectors(nb, dim, "BFLOAT16_VECTOR")
|
||||
df = pd.DataFrame({
|
||||
ct.default_int64_field_name: int64_values,
|
||||
ct.default_int32_field_name: int32_values,
|
||||
@ -620,20 +637,27 @@ def gen_dataframe_all_data_type(nb=ct.default_nb, dim=ct.default_dim, start=0, w
|
||||
ct.default_float_field_name: float_values,
|
||||
ct.default_double_field_name: double_values,
|
||||
ct.default_string_field_name: string_values,
|
||||
ct.default_json_field_name: json_values,
|
||||
ct.default_float_vec_field_name: float_vec_values,
|
||||
ct.default_float16_vec_field_name: float16_vec_values,
|
||||
ct.default_bfloat16_vec_field_name: bfloat16_vec_values
|
||||
ct.default_json_field_name: json_values
|
||||
})
|
||||
|
||||
if len(multiple_dim_array) == 0:
|
||||
df[ct.default_float_vec_field_name] = float_vec_values
|
||||
else:
|
||||
for i in range(len(multiple_dim_array)):
|
||||
df[multiple_vector_field_name[i]] = gen_vectors(nb, multiple_dim_array[i], ct.vector_data_type_all[i%3])
|
||||
|
||||
if with_json is False:
|
||||
df.drop(ct.default_json_field_name, axis=1, inplace=True)
|
||||
if auto_id:
|
||||
df.drop(ct.default_int64_field_name, axis=1, inplace=True)
|
||||
log.debug("generated data completed")
|
||||
|
||||
return df
|
||||
|
||||
|
||||
def gen_default_rows_data_all_data_type(nb=ct.default_nb, dim=ct.default_dim, start=0, with_json=True):
|
||||
def gen_default_rows_data_all_data_type(nb=ct.default_nb, dim=ct.default_dim, start=0, with_json=True,
|
||||
multiple_dim_array=[],
|
||||
multiple_vector_field_name=[], partition_id=0):
|
||||
array = []
|
||||
for i in range(start, start + nb):
|
||||
dict = {ct.default_int64_field_name: i,
|
||||
@ -645,14 +669,21 @@ def gen_default_rows_data_all_data_type(nb=ct.default_nb, dim=ct.default_dim, st
|
||||
ct.default_double_field_name: i * 1.0,
|
||||
ct.default_string_field_name: str(i),
|
||||
ct.default_json_field_name: {"number": i, "string": str(i), "bool": bool(i),
|
||||
"list": [j for j in range(i, i + ct.default_json_list_length)]},
|
||||
ct.default_float_vec_field_name: gen_vectors(1, dim)[0],
|
||||
ct.default_float16_vec_field_name: gen_vectors(1, dim, "FLOAT16_VECTOR")[0],
|
||||
ct.default_bfloat16_vec_field_name: gen_vectors(1, dim, "BFLOAT16_VECTOR")[0]
|
||||
"list": [j for j in range(i, i + ct.default_json_list_length)]}
|
||||
}
|
||||
if with_json is False:
|
||||
dict.pop(ct.default_json_field_name, None)
|
||||
array.append(dict)
|
||||
if len(multiple_dim_array) == 0:
|
||||
dict[ct.default_float_vec_field_name] = gen_vectors(1, dim)[0]
|
||||
else:
|
||||
for i in range(len(multiple_dim_array)):
|
||||
dict[multiple_vector_field_name[i]] = gen_vectors(nb, multiple_dim_array[i],
|
||||
ct.vector_data_type_all[i])[0]
|
||||
|
||||
with open(ct.rows_all_data_type_file_path + f'_{partition_id}' + '.txt', 'wb') as json_file:
|
||||
pickle.dump(array, json_file)
|
||||
log.info("generated rows data")
|
||||
|
||||
return array
|
||||
|
||||
@ -1590,25 +1621,40 @@ def insert_data(collection_w, nb=ct.default_nb, is_binary=False, is_all_data_typ
|
||||
# prepare data
|
||||
for i in range(num):
|
||||
log.debug("Dynamic field is enabled: %s" % enable_dynamic_field)
|
||||
if not enable_dynamic_field:
|
||||
default_data = gen_default_dataframe_data(nb // num, dim=dim, start=start, with_json=with_json,
|
||||
random_primary_key=random_primary_key,
|
||||
multiple_dim_array=multiple_dim_array,
|
||||
multiple_vector_field_name=vector_name_list,
|
||||
vector_data_type=vector_data_type)
|
||||
if not is_binary:
|
||||
if not is_all_data_type:
|
||||
if not enable_dynamic_field:
|
||||
default_data = gen_default_dataframe_data(nb // num, dim=dim, start=start, with_json=with_json,
|
||||
random_primary_key=random_primary_key,
|
||||
multiple_dim_array=multiple_dim_array,
|
||||
multiple_vector_field_name=vector_name_list,
|
||||
vector_data_type=vector_data_type)
|
||||
else:
|
||||
default_data = gen_default_rows_data(nb // num, dim=dim, start=start, with_json=with_json,
|
||||
multiple_dim_array=multiple_dim_array,
|
||||
multiple_vector_field_name=vector_name_list,
|
||||
vector_data_type=vector_data_type)
|
||||
|
||||
else:
|
||||
if not enable_dynamic_field:
|
||||
default_data = gen_dataframe_all_data_type(nb // num, dim=dim, start=start, with_json=with_json,
|
||||
random_primary_key=random_primary_key,
|
||||
multiple_dim_array=multiple_dim_array,
|
||||
multiple_vector_field_name=vector_name_list)
|
||||
else:
|
||||
if os.path.exists(ct.rows_all_data_type_file_path + f'_{i}' + '.txt'):
|
||||
with open(ct.rows_all_data_type_file_path + f'_{i}' + '.txt', 'rb') as f:
|
||||
default_data = pickle.load(f)
|
||||
else:
|
||||
default_data = gen_default_rows_data_all_data_type(nb // num, dim=dim, start=start,
|
||||
with_json=with_json,
|
||||
multiple_dim_array=multiple_dim_array,
|
||||
multiple_vector_field_name=vector_name_list,
|
||||
partition_id = i)
|
||||
else:
|
||||
default_data = gen_default_rows_data(nb // num, dim=dim, start=start, with_json=with_json,
|
||||
multiple_dim_array=multiple_dim_array,
|
||||
multiple_vector_field_name=vector_name_list,
|
||||
vector_data_type=vector_data_type)
|
||||
if is_binary:
|
||||
default_data, binary_raw_data = gen_default_binary_dataframe_data(nb // num, dim=dim, start=start)
|
||||
binary_raw_vectors.extend(binary_raw_data)
|
||||
if is_all_data_type:
|
||||
default_data = gen_dataframe_all_data_type(nb // num, dim=dim, start=start, with_json=with_json,
|
||||
random_primary_key=random_primary_key)
|
||||
if enable_dynamic_field:
|
||||
default_data = gen_default_rows_data_all_data_type(nb // num, dim=dim, start=start, with_json=with_json)
|
||||
|
||||
if auto_id:
|
||||
if enable_dynamic_field:
|
||||
for data in default_data:
|
||||
@ -1623,7 +1669,7 @@ def insert_data(collection_w, nb=ct.default_nb, is_binary=False, is_all_data_typ
|
||||
default_data.drop(ct.default_string_field_name, axis=1, inplace=True)
|
||||
# insert
|
||||
insert_res = collection_w.insert(default_data, par[i].name)[0]
|
||||
log.info(f"inserted {nb} data into collection {collection_w.name}")
|
||||
log.info(f"inserted {nb // num} data into collection {collection_w.name}")
|
||||
time_stamp = insert_res.timestamp
|
||||
insert_ids.extend(insert_res.primary_keys)
|
||||
vectors.append(default_data)
|
||||
@ -1831,8 +1877,9 @@ def gen_bf16_vectors(num, dim):
|
||||
for _ in range(num):
|
||||
raw_vector = [random.random() for _ in range(dim)]
|
||||
raw_vectors.append(raw_vector)
|
||||
bf16_vector = np.array(jnp.array(raw_vector, dtype=jnp.bfloat16)).view(np.uint8).tolist()
|
||||
bf16_vector = tf.cast(raw_vector, dtype=tf.bfloat16).numpy().view(np.uint8).tolist()
|
||||
bf16_vectors.append(bytes(bf16_vector))
|
||||
return raw_vectors, bf16_vectors
|
||||
|
||||
return raw_vectors, bf16_vectors
|
||||
|
||||
|
@ -48,6 +48,10 @@ default_float16_vec_field_name = "float16_vector"
|
||||
default_bfloat16_vec_field_name = "bfloat16_vector"
|
||||
another_float_vec_field_name = "float_vector1"
|
||||
default_binary_vec_field_name = "binary_vector"
|
||||
float_type = "FLOAT_VECTOR"
|
||||
float16_type = "FLOAT16_VECTOR"
|
||||
bfloat16_type = "BFLOAT16_VECTOR"
|
||||
vector_data_type_all = [float_type, float16_type, bfloat16_type]
|
||||
default_partition_name = "_default"
|
||||
default_resource_group_name = '__default_resource_group'
|
||||
default_resource_group_capacity = 1000000
|
||||
@ -105,6 +109,8 @@ default_flat_index = {"index_type": "FLAT", "params": {}, "metric_type": "COSINE
|
||||
default_bin_flat_index = {"index_type": "BIN_FLAT", "params": {}, "metric_type": "JACCARD"}
|
||||
default_count_output = "count(*)"
|
||||
|
||||
rows_all_data_type_file_path = "/tmp/rows_all_data_type"
|
||||
|
||||
"""" List of parameters used to pass """
|
||||
get_invalid_strs = [
|
||||
[],
|
||||
|
@ -57,4 +57,5 @@ fastparquet==2023.7.0
|
||||
# for bf16 datatype
|
||||
jax==0.4.13
|
||||
jaxlib==0.4.13
|
||||
tensorflow==2.13.1
|
||||
|
||||
|
@ -2142,7 +2142,7 @@ class TestIndexDiskann(TestcaseBase):
|
||||
"""
|
||||
target: test drop diskann index normal
|
||||
method: 1.create collection and insert data
|
||||
2.create diskann index and uses collection.drop_index () drop index
|
||||
2.create diskann index and uses collection.drop_index () drop index
|
||||
expected: drop index successfully
|
||||
"""
|
||||
c_name = cf.gen_unique_str(prefix)
|
||||
|
@ -2272,6 +2272,66 @@ class TestCollectionSearch(TestcaseBase):
|
||||
"limit": limit,
|
||||
"_async": _async})
|
||||
|
||||
@pytest.mark.tags(CaseLabel.L2)
|
||||
@pytest.mark.tags(CaseLabel.GPU)
|
||||
@pytest.mark.skip(reason="waiting for the address of bf16 data generation slow problem")
|
||||
@pytest.mark.parametrize("index, params",
|
||||
zip(ct.all_index_types[:7],
|
||||
ct.default_index_params[:7]))
|
||||
def test_search_after_different_index_with_params_all_vector_type_multiple_vectors(self, index, params, auto_id,
|
||||
_async, enable_dynamic_field,
|
||||
scalar_index):
|
||||
"""
|
||||
target: test search after different index
|
||||
method: test search after different index and corresponding search params
|
||||
expected: search successfully with limit(topK)
|
||||
"""
|
||||
if index == "DISKANN":
|
||||
pytest.skip("https://github.com/milvus-io/milvus/issues/30793")
|
||||
# 1. initialize with data
|
||||
collection_w, _, _, insert_ids, time_stamp = self.init_collection_general(prefix, True, 5000,
|
||||
partition_num=1,
|
||||
is_all_data_type=True,
|
||||
auto_id=auto_id,
|
||||
dim=default_dim, is_index=False,
|
||||
enable_dynamic_field=enable_dynamic_field,
|
||||
multiple_dim_array=[default_dim, default_dim])[0:5]
|
||||
# 2. create index on vector field and load
|
||||
if params.get("m"):
|
||||
if (default_dim % params["m"]) != 0:
|
||||
params["m"] = default_dim // 4
|
||||
if params.get("PQM"):
|
||||
if (default_dim % params["PQM"]) != 0:
|
||||
params["PQM"] = default_dim // 4
|
||||
default_index = {"index_type": index, "params": params, "metric_type": "COSINE"}
|
||||
vector_name_list = cf.extract_vector_field_name_list(collection_w)
|
||||
for vector_name in vector_name_list:
|
||||
collection_w.create_index(vector_name, default_index)
|
||||
# 3. create index on scalar field
|
||||
scalar_index_params = {"index_type": scalar_index, "params": {}}
|
||||
collection_w.create_index(ct.default_int64_field_name, scalar_index_params)
|
||||
collection_w.load()
|
||||
# 4. search
|
||||
search_params = cf.gen_search_param(index, "COSINE")
|
||||
vectors = [[random.random() for _ in range(default_dim)] for _ in range(default_nq)]
|
||||
for search_param in search_params:
|
||||
log.info("Searching with search params: {}".format(search_param))
|
||||
limit = default_limit
|
||||
if index == "HNSW":
|
||||
limit = search_param["params"]["ef"]
|
||||
if limit > max_limit:
|
||||
limit = default_nb
|
||||
if index == "DISKANN":
|
||||
limit = search_param["params"]["search_list"]
|
||||
collection_w.search(vectors[:default_nq], vector_name_list[0],
|
||||
search_param, limit,
|
||||
default_search_exp, _async=_async,
|
||||
check_task=CheckTasks.check_search_results,
|
||||
check_items={"nq": default_nq,
|
||||
"ids": insert_ids,
|
||||
"limit": limit,
|
||||
"_async": _async})
|
||||
|
||||
@pytest.mark.tags(CaseLabel.GPU)
|
||||
@pytest.mark.parametrize("index, params",
|
||||
zip(ct.all_index_types[9:11],
|
||||
@ -3331,25 +3391,32 @@ class TestCollectionSearch(TestcaseBase):
|
||||
expected: search success
|
||||
"""
|
||||
# 1. initialize with data
|
||||
collection_w, _, _, insert_ids = self.init_collection_general(prefix, True, nb, is_all_data_type=True,
|
||||
auto_id=auto_id, dim=dim)[0:4]
|
||||
collection_w, _, _, insert_ids = self.init_collection_general(prefix, True, nb,
|
||||
is_all_data_type=True,
|
||||
auto_id=auto_id,
|
||||
dim=dim,
|
||||
enable_dynamic_field=enable_dynamic_field,
|
||||
multiple_dim_array=[dim, dim])[0:4]
|
||||
# 2. search
|
||||
log.info("test_search_expression_all_data_type: Searching collection %s" %
|
||||
collection_w.name)
|
||||
vectors = [[random.random() for _ in range(dim)] for _ in range(nq)]
|
||||
search_exp = "int64 >= 0 && int32 >= 0 && int16 >= 0 " \
|
||||
"&& int8 >= 0 && float >= 0 && double >= 0"
|
||||
res = collection_w.search(vectors[:nq], default_search_field,
|
||||
default_search_params, default_limit,
|
||||
search_exp, _async=_async,
|
||||
output_fields=[default_int64_field_name,
|
||||
default_float_field_name,
|
||||
default_bool_field_name],
|
||||
check_task=CheckTasks.check_search_results,
|
||||
check_items={"nq": nq,
|
||||
"ids": insert_ids,
|
||||
"limit": default_limit,
|
||||
"_async": _async})[0]
|
||||
vector_name_list = cf.extract_vector_field_name_list(collection_w)
|
||||
for search_field in vector_name_list:
|
||||
vector_data_type = search_field[:-9].lstrip("multiple_vector_")
|
||||
vectors = cf.gen_vectors_based_on_vector_type(nq, dim, vector_data_type)
|
||||
res = collection_w.search(vectors[:nq], search_field,
|
||||
default_search_params, default_limit,
|
||||
search_exp, _async=_async,
|
||||
output_fields=[default_int64_field_name,
|
||||
default_float_field_name,
|
||||
default_bool_field_name],
|
||||
check_task=CheckTasks.check_search_results,
|
||||
check_items={"nq": nq,
|
||||
"ids": insert_ids,
|
||||
"limit": default_limit,
|
||||
"_async": _async})[0]
|
||||
if _async:
|
||||
res.done()
|
||||
res = res.result()
|
||||
@ -10642,6 +10709,64 @@ class TestCollectionHybridSearchValid(TestcaseBase):
|
||||
"ids": insert_ids,
|
||||
"limit": default_limit})
|
||||
|
||||
@pytest.mark.tags(CaseLabel.L1)
|
||||
@pytest.mark.parametrize("primary_field", [ct.default_int64_field_name, ct.default_string_field_name])
|
||||
def test_hybrid_search_different_metric_type_each_field(self, primary_field, dim, auto_id, is_flush,
|
||||
enable_dynamic_field, metric_type):
|
||||
"""
|
||||
target: test hybrid search for fields with different metric type
|
||||
method: create connection, collection, insert and search
|
||||
expected: hybrid search successfully with limit(topK)
|
||||
"""
|
||||
# 1. initialize collection with data
|
||||
collection_w, _, _, insert_ids, time_stamp = \
|
||||
self.init_collection_general(prefix, True, auto_id=auto_id, dim=dim, is_flush=is_flush, is_index=False,
|
||||
primary_field=primary_field,
|
||||
enable_dynamic_field=False, multiple_dim_array=[dim, dim])[0:5]
|
||||
# 2. extract vector field name
|
||||
vector_name_list = cf.extract_vector_field_name_list(collection_w)
|
||||
vector_name_list.append(ct.default_float_vec_field_name)
|
||||
log.debug(vector_name_list)
|
||||
flat_index = {"index_type": "FLAT", "params": {}, "metric_type": "L2"}
|
||||
collection_w.create_index(vector_name_list[0], flat_index)
|
||||
flat_index = {"index_type": "FLAT", "params": {}, "metric_type": "IP"}
|
||||
collection_w.create_index(vector_name_list[1], flat_index)
|
||||
flat_index = {"index_type": "FLAT", "params": {}, "metric_type": "COSINE"}
|
||||
collection_w.create_index(vector_name_list[2], flat_index)
|
||||
collection_w.load()
|
||||
# 3. prepare search params
|
||||
req_list = []
|
||||
search_param = {
|
||||
"data": [[random.random() for _ in range(dim)] for _ in range(1)],
|
||||
"anns_field": vector_name_list[0],
|
||||
"param": {"metric_type": "L2", "offset": 0},
|
||||
"limit": default_limit,
|
||||
"expr": "int64 > 0"}
|
||||
req = AnnSearchRequest(**search_param)
|
||||
req_list.append(req)
|
||||
search_param = {
|
||||
"data": [[random.random() for _ in range(dim)] for _ in range(1)],
|
||||
"anns_field": vector_name_list[1],
|
||||
"param": {"metric_type": "IP", "offset": 0},
|
||||
"limit": default_limit,
|
||||
"expr": "int64 > 0"}
|
||||
req = AnnSearchRequest(**search_param)
|
||||
req_list.append(req)
|
||||
search_param = {
|
||||
"data": [[random.random() for _ in range(dim)] for _ in range(1)],
|
||||
"anns_field": vector_name_list[2],
|
||||
"param": {"metric_type": "COSINE", "offset": 0},
|
||||
"limit": default_limit,
|
||||
"expr": "int64 > 0"}
|
||||
req = AnnSearchRequest(**search_param)
|
||||
req_list.append(req)
|
||||
# 4. hybrid search
|
||||
hybrid_search = collection_w.hybrid_search(req_list, WeightedRanker(0.1, 0.9, 1), default_limit,
|
||||
check_task=CheckTasks.check_search_results,
|
||||
check_items={"nq": 1,
|
||||
"ids": insert_ids,
|
||||
"limit": default_limit})[0]
|
||||
|
||||
@pytest.mark.tags(CaseLabel.L1)
|
||||
@pytest.mark.parametrize("primary_field", [ct.default_int64_field_name, ct.default_string_field_name])
|
||||
@pytest.mark.xfail(reason="issue 29923")
|
||||
|
@ -41,7 +41,7 @@ export PIP_TRUSTED_HOST="nexus-nexus-repository-manager.nexus"
|
||||
export PIP_INDEX_URL="http://nexus-nexus-repository-manager.nexus:8081/repository/pypi-all/simple"
|
||||
export PIP_INDEX="http://nexus-nexus-repository-manager.nexus:8081/repository/pypi-all/pypi"
|
||||
export PIP_FIND_LINKS="http://nexus-nexus-repository-manager.nexus:8081/repository/pypi-all/pypi"
|
||||
python3 -m pip install --no-cache-dir -r requirements.txt --timeout 30 --retries 6
|
||||
python3 -m pip install --no-cache-dir -r requirements.txt --timeout 300 --retries 6
|
||||
}
|
||||
|
||||
# Login in ci docker registry
|
||||
|
Loading…
Reference in New Issue
Block a user