2023-06-07 15:44:36 +08:00
|
|
|
|
import pytest
|
|
|
|
|
|
|
|
|
|
from base.client_base import TestcaseBase
|
|
|
|
|
from common import common_func as cf
|
|
|
|
|
from common import common_type as ct
|
|
|
|
|
from common.common_type import CaseLabel, CheckTasks
|
|
|
|
|
from utils.util_pymilvus import *
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
class TestPartitionKeyParams(TestcaseBase):
|
|
|
|
|
@pytest.mark.tags(CaseLabel.L0)
|
|
|
|
|
@pytest.mark.parametrize("par_key_field", [ct.default_int64_field_name, ct.default_string_field_name])
|
|
|
|
|
def test_partition_key_on_field_schema(self, par_key_field):
|
|
|
|
|
"""
|
|
|
|
|
Method:
|
|
|
|
|
1. create a collection with partition key on
|
|
|
|
|
2. verify insert, build, load and search successfully
|
|
|
|
|
3. drop collection
|
|
|
|
|
"""
|
|
|
|
|
self._connect()
|
|
|
|
|
pk_field = cf.gen_int64_field(name='pk', is_primary=True)
|
|
|
|
|
int64_field = cf.gen_int64_field(is_partition_key=(par_key_field == ct.default_int64_field_name))
|
|
|
|
|
string_field = cf.gen_string_field(is_partition_key=(par_key_field == ct.default_string_field_name))
|
|
|
|
|
vector_field = cf.gen_float_vec_field()
|
|
|
|
|
schema = cf.gen_collection_schema(fields=[pk_field, int64_field, string_field, vector_field], auto_id=True)
|
|
|
|
|
c_name = cf.gen_unique_str("par_key")
|
2024-08-21 09:22:56 +08:00
|
|
|
|
collection_w = self.init_collection_wrap(name=c_name, schema=schema)
|
2023-09-26 18:43:32 +08:00
|
|
|
|
assert len(collection_w.partitions) == ct.default_partition_num
|
2023-06-07 15:44:36 +08:00
|
|
|
|
|
|
|
|
|
# insert
|
|
|
|
|
nb = 1000
|
|
|
|
|
string_prefix = cf.gen_str_by_length(length=6)
|
|
|
|
|
entities_per_parkey = 10
|
|
|
|
|
for _ in range(entities_per_parkey):
|
|
|
|
|
int64_values = [i for i in range(0, nb)]
|
|
|
|
|
string_values = [string_prefix + str(i) for i in range(0, nb)]
|
|
|
|
|
float_vec_values = gen_vectors(nb, ct.default_dim)
|
|
|
|
|
data = [int64_values, string_values, float_vec_values]
|
|
|
|
|
collection_w.insert(data)
|
|
|
|
|
|
|
|
|
|
# flush
|
|
|
|
|
collection_w.flush()
|
|
|
|
|
# build index
|
|
|
|
|
collection_w.create_index(field_name=vector_field.name, index_params=ct.default_index)
|
|
|
|
|
# load
|
|
|
|
|
collection_w.load()
|
|
|
|
|
# search
|
|
|
|
|
nq = 10
|
|
|
|
|
search_vectors = gen_vectors(nq, ct.default_dim)
|
|
|
|
|
# search with mixed filtered
|
|
|
|
|
res1 = collection_w.search(data=search_vectors, anns_field=vector_field.name,
|
|
|
|
|
param=ct.default_search_params, limit=entities_per_parkey,
|
|
|
|
|
expr=f'{int64_field.name} in [1,3,5] && {string_field.name} in ["{string_prefix}1","{string_prefix}3","{string_prefix}5"]',
|
|
|
|
|
output_fields=[int64_field.name, string_field.name],
|
|
|
|
|
check_task=CheckTasks.check_search_results,
|
2024-08-21 09:22:56 +08:00
|
|
|
|
check_items={"nq": nq, "limit": entities_per_parkey})[0]
|
2023-06-07 15:44:36 +08:00
|
|
|
|
# search with partition key filter only or with non partition key
|
|
|
|
|
res2 = collection_w.search(data=search_vectors, anns_field=vector_field.name,
|
|
|
|
|
param=ct.default_search_params, limit=entities_per_parkey,
|
|
|
|
|
expr=f'{int64_field.name} in [1,3,5]',
|
|
|
|
|
output_fields=[int64_field.name, string_field.name],
|
|
|
|
|
check_task=CheckTasks.check_search_results,
|
2024-08-21 09:22:56 +08:00
|
|
|
|
check_items={"nq": nq, "limit": entities_per_parkey})[0]
|
2023-06-07 15:44:36 +08:00
|
|
|
|
# search with partition key filter only or with non partition key
|
|
|
|
|
res3 = collection_w.search(data=search_vectors, anns_field=vector_field.name,
|
|
|
|
|
param=ct.default_search_params, limit=entities_per_parkey,
|
|
|
|
|
expr=f'{string_field.name} in ["{string_prefix}1","{string_prefix}3","{string_prefix}5"]',
|
|
|
|
|
output_fields=[int64_field.name, string_field.name],
|
|
|
|
|
check_task=CheckTasks.check_search_results,
|
2024-08-21 09:22:56 +08:00
|
|
|
|
check_items={"nq": nq, "limit": entities_per_parkey})[0]
|
2023-06-07 15:44:36 +08:00
|
|
|
|
# assert the results persist
|
2024-08-21 09:22:56 +08:00
|
|
|
|
for i in range(nq):
|
|
|
|
|
assert res1[i].ids == res2[i].ids == res3[i].ids
|
2023-06-07 15:44:36 +08:00
|
|
|
|
|
|
|
|
|
@pytest.mark.tags(CaseLabel.L0)
|
|
|
|
|
@pytest.mark.parametrize("par_key_field", [ct.default_int64_field_name, ct.default_string_field_name])
|
|
|
|
|
@pytest.mark.parametrize("index_on_par_key_field", [True, False])
|
|
|
|
|
def test_partition_key_on_collection_schema(self, par_key_field, index_on_par_key_field):
|
|
|
|
|
"""
|
|
|
|
|
Method:
|
|
|
|
|
1. create a collection with partition key on collection schema with customized num_partitions
|
|
|
|
|
2. verify insert, build, load and search successfully
|
|
|
|
|
3. drop collection
|
|
|
|
|
"""
|
|
|
|
|
self._connect()
|
|
|
|
|
pk_field = cf.gen_string_field(name='pk', is_primary=True)
|
|
|
|
|
int64_field = cf.gen_int64_field()
|
|
|
|
|
string_field = cf.gen_string_field()
|
|
|
|
|
vector_field = cf.gen_float_vec_field()
|
|
|
|
|
schema = cf.gen_collection_schema(fields=[pk_field, int64_field, string_field, vector_field],
|
|
|
|
|
auto_id=False, partition_key_field=par_key_field)
|
|
|
|
|
c_name = cf.gen_unique_str("par_key")
|
2024-08-21 09:22:56 +08:00
|
|
|
|
collection_w = self.init_collection_wrap(name=c_name, schema=schema, num_partitions=9)
|
2023-06-07 15:44:36 +08:00
|
|
|
|
|
|
|
|
|
# insert
|
|
|
|
|
nb = 1000
|
|
|
|
|
string_prefix = cf.gen_str_by_length(length=6)
|
|
|
|
|
entities_per_parkey = 20
|
|
|
|
|
for n in range(entities_per_parkey):
|
2024-08-21 09:22:56 +08:00
|
|
|
|
pk_values = [str(i) for i in range(n * nb, (n + 1) * nb)]
|
2023-06-07 15:44:36 +08:00
|
|
|
|
int64_values = [i for i in range(0, nb)]
|
|
|
|
|
string_values = [string_prefix + str(i) for i in range(0, nb)]
|
|
|
|
|
float_vec_values = gen_vectors(nb, ct.default_dim)
|
|
|
|
|
data = [pk_values, int64_values, string_values, float_vec_values]
|
|
|
|
|
collection_w.insert(data)
|
|
|
|
|
|
|
|
|
|
# flush
|
|
|
|
|
collection_w.flush()
|
|
|
|
|
# build index
|
|
|
|
|
collection_w.create_index(field_name=vector_field.name, index_params=ct.default_index)
|
|
|
|
|
if index_on_par_key_field:
|
|
|
|
|
collection_w.create_index(field_name=par_key_field, index_params={})
|
|
|
|
|
# load
|
|
|
|
|
collection_w.load()
|
|
|
|
|
# search
|
|
|
|
|
nq = 10
|
|
|
|
|
search_vectors = gen_vectors(nq, ct.default_dim)
|
|
|
|
|
# search with mixed filtered
|
|
|
|
|
res1 = collection_w.search(data=search_vectors, anns_field=vector_field.name,
|
|
|
|
|
param=ct.default_search_params, limit=entities_per_parkey,
|
|
|
|
|
expr=f'{int64_field.name} in [1,3,5] && {string_field.name} in ["{string_prefix}1","{string_prefix}3","{string_prefix}5"]',
|
|
|
|
|
output_fields=[int64_field.name, string_field.name],
|
|
|
|
|
check_task=CheckTasks.check_search_results,
|
2024-08-21 09:22:56 +08:00
|
|
|
|
check_items={"nq": nq, "limit": entities_per_parkey})[0]
|
2023-06-07 15:44:36 +08:00
|
|
|
|
|
|
|
|
|
@pytest.mark.tags(CaseLabel.L1)
|
|
|
|
|
def test_partition_key_off_in_field_but_enable_in_schema(self):
|
|
|
|
|
"""
|
|
|
|
|
Method:
|
|
|
|
|
1. create a collection with partition key off in field schema but enable in collection schema
|
|
|
|
|
2. verify the collection created successfully
|
|
|
|
|
"""
|
|
|
|
|
self._connect()
|
|
|
|
|
pk_field = cf.gen_int64_field(name='pk', is_primary=True)
|
|
|
|
|
int64_field = cf.gen_int64_field(is_partition_key=False)
|
|
|
|
|
string_field = cf.gen_string_field()
|
|
|
|
|
vector_field = cf.gen_float_vec_field()
|
|
|
|
|
schema = cf.gen_collection_schema(fields=[pk_field, int64_field, string_field, vector_field],
|
|
|
|
|
partition_key_field=int64_field.name, auto_id=True)
|
|
|
|
|
|
|
|
|
|
err_msg = "fail to create collection"
|
|
|
|
|
c_name = cf.gen_unique_str("par_key")
|
2024-08-21 09:22:56 +08:00
|
|
|
|
collection_w = self.init_collection_wrap(name=c_name, schema=schema, num_partitions=10)
|
2023-06-07 15:44:36 +08:00
|
|
|
|
assert len(collection_w.partitions) == 10
|
|
|
|
|
|
|
|
|
|
@pytest.mark.skip("need more investigation")
|
|
|
|
|
@pytest.mark.tags(CaseLabel.L1)
|
|
|
|
|
def test_partition_key_bulk_insert(self):
|
|
|
|
|
"""
|
|
|
|
|
Method:
|
|
|
|
|
1. create a collection with partition key on
|
|
|
|
|
2. bulk insert data
|
|
|
|
|
3. verify the data bulk inserted and be searched successfully
|
|
|
|
|
"""
|
2024-08-21 09:22:56 +08:00
|
|
|
|
pass
|
2023-06-07 15:44:36 +08:00
|
|
|
|
|
|
|
|
|
|
|
|
|
|
class TestPartitionKeyInvalidParams(TestcaseBase):
|
|
|
|
|
@pytest.mark.tags(CaseLabel.L1)
|
|
|
|
|
def test_max_partitions(self):
|
|
|
|
|
"""
|
|
|
|
|
Method:
|
|
|
|
|
1. create a collection with max partitions
|
|
|
|
|
2. insert, build, load and search
|
|
|
|
|
3. drop collection
|
|
|
|
|
4. create a collection with max partitions + 1
|
|
|
|
|
5. verify the error raised
|
|
|
|
|
"""
|
2024-06-20 09:11:59 +08:00
|
|
|
|
max_partition = ct.max_partition_num
|
2023-06-07 15:44:36 +08:00
|
|
|
|
self._connect()
|
|
|
|
|
pk_field = cf.gen_int64_field(name='pk', is_primary=True)
|
|
|
|
|
int64_field = cf.gen_int64_field()
|
|
|
|
|
string_field = cf.gen_string_field(is_partition_key=True)
|
|
|
|
|
vector_field = cf.gen_float_vec_field()
|
|
|
|
|
schema = cf.gen_collection_schema(fields=[pk_field, int64_field, string_field, vector_field],
|
|
|
|
|
auto_id=True)
|
|
|
|
|
c_name = cf.gen_unique_str("par_key")
|
2024-08-21 09:22:56 +08:00
|
|
|
|
collection_w = self.init_collection_wrap(name=c_name, schema=schema, num_partitions=max_partition)
|
2023-06-07 15:44:36 +08:00
|
|
|
|
assert len(collection_w.partitions) == max_partition
|
|
|
|
|
|
|
|
|
|
# insert
|
|
|
|
|
nb = 100
|
|
|
|
|
string_prefix = cf.gen_str_by_length(length=6)
|
|
|
|
|
for _ in range(5):
|
|
|
|
|
int64_values = [i for i in range(0, nb)]
|
|
|
|
|
string_values = [string_prefix + str(i) for i in range(0, nb)]
|
|
|
|
|
float_vec_values = gen_vectors(nb, ct.default_dim)
|
|
|
|
|
data = [int64_values, string_values, float_vec_values]
|
|
|
|
|
collection_w.insert(data)
|
|
|
|
|
|
|
|
|
|
# drop collection
|
|
|
|
|
collection_w.drop()
|
|
|
|
|
|
|
|
|
|
# create a collection with min partitions - 1
|
|
|
|
|
num_partitions = max_partition + 1
|
|
|
|
|
err_msg = f"partition number ({num_partitions}) exceeds max configuration ({max_partition})"
|
|
|
|
|
c_name = cf.gen_unique_str("par_key")
|
2024-08-21 09:22:56 +08:00
|
|
|
|
self.init_collection_wrap(name=c_name, schema=schema, num_partitions=num_partitions,
|
|
|
|
|
check_task=CheckTasks.err_res,
|
|
|
|
|
check_items={"err_code": 1100, "err_msg": err_msg})
|
2023-06-07 15:44:36 +08:00
|
|
|
|
|
|
|
|
|
@pytest.mark.tags(CaseLabel.L1)
|
|
|
|
|
def test_min_partitions(self):
|
|
|
|
|
"""
|
|
|
|
|
Method:
|
|
|
|
|
1. create a collection with min partitions
|
|
|
|
|
2. insert, build, load and search
|
|
|
|
|
3. drop collection
|
|
|
|
|
4. create a collection with min partitions - 1
|
|
|
|
|
5. verify the error raised
|
|
|
|
|
"""
|
|
|
|
|
min_partition = 1
|
|
|
|
|
self._connect()
|
|
|
|
|
pk_field = cf.gen_string_field(name='pk', is_primary=True)
|
|
|
|
|
int64_field = cf.gen_int64_field()
|
|
|
|
|
string_field = cf.gen_string_field()
|
|
|
|
|
vector_field = cf.gen_float_vec_field()
|
|
|
|
|
schema = cf.gen_collection_schema(fields=[pk_field, int64_field, string_field, vector_field],
|
|
|
|
|
partition_key_field=int64_field.name)
|
|
|
|
|
c_name = cf.gen_unique_str("par_key")
|
2024-08-21 09:22:56 +08:00
|
|
|
|
collection_w = self.init_collection_wrap(name=c_name, schema=schema, num_partitions=min_partition)
|
2023-06-07 15:44:36 +08:00
|
|
|
|
assert len(collection_w.partitions) == min_partition
|
|
|
|
|
|
|
|
|
|
# insert
|
|
|
|
|
nb = 100
|
|
|
|
|
string_prefix = cf.gen_str_by_length(length=6)
|
|
|
|
|
for _ in range(5):
|
|
|
|
|
pk_values = [str(i) for i in range(0, nb)]
|
|
|
|
|
int64_values = [i for i in range(0, nb)]
|
|
|
|
|
string_values = [string_prefix + str(i) for i in range(0, nb)]
|
|
|
|
|
float_vec_values = gen_vectors(nb, ct.default_dim)
|
|
|
|
|
data = [pk_values, int64_values, string_values, float_vec_values]
|
|
|
|
|
collection_w.insert(data)
|
|
|
|
|
collection_w.flush()
|
|
|
|
|
|
|
|
|
|
# drop collection
|
|
|
|
|
collection_w.drop()
|
|
|
|
|
|
|
|
|
|
# create a collection with min partitions - 1
|
|
|
|
|
err_msg = "The specified num_partitions should be greater than or equal to 1"
|
|
|
|
|
c_name = cf.gen_unique_str("par_key")
|
2024-08-21 09:22:56 +08:00
|
|
|
|
self.init_collection_wrap(name=c_name, schema=schema, num_partitions=min_partition - 1,
|
|
|
|
|
check_task=CheckTasks.err_res,
|
|
|
|
|
check_items={"err_code": 2, "err_msg": err_msg})
|
|
|
|
|
self.init_collection_wrap(name=c_name, schema=schema, num_partitions=min_partition - 3,
|
|
|
|
|
check_task=CheckTasks.err_res,
|
|
|
|
|
check_items={"err_code": 2, "err_msg": err_msg})
|
2023-06-07 15:44:36 +08:00
|
|
|
|
|
|
|
|
|
@pytest.mark.tags(CaseLabel.L0)
|
|
|
|
|
@pytest.mark.parametrize("is_par_key", [None, "", "invalid", 0.1, [], {}, ()])
|
|
|
|
|
def test_invalid_partition_key_values(self, is_par_key):
|
|
|
|
|
"""
|
|
|
|
|
Method:
|
|
|
|
|
1. create a collection with invalid partition keys
|
|
|
|
|
2. verify the error raised
|
|
|
|
|
"""
|
|
|
|
|
self._connect()
|
|
|
|
|
err_msg = "Param is_partition_key must be bool type"
|
2024-08-21 09:22:56 +08:00
|
|
|
|
cf.gen_int64_field(is_partition_key=is_par_key,
|
|
|
|
|
check_task=CheckTasks.err_res,
|
|
|
|
|
check_items={"err_code": 2, "err_msg": err_msg})
|
2023-06-07 15:44:36 +08:00
|
|
|
|
|
|
|
|
|
@pytest.mark.tags(CaseLabel.L0)
|
|
|
|
|
@pytest.mark.parametrize("num_partitions", [True, False, "", "invalid", 0.1, [], {}, ()])
|
|
|
|
|
def test_invalid_partitions_values(self, num_partitions):
|
|
|
|
|
"""
|
|
|
|
|
Method:
|
|
|
|
|
1. create a collection with invalid num_partitions
|
|
|
|
|
2. verify the error raised
|
|
|
|
|
"""
|
|
|
|
|
self._connect()
|
|
|
|
|
pk_field = cf.gen_int64_field(name='pk', is_primary=True)
|
|
|
|
|
int64_field = cf.gen_int64_field(is_partition_key=True)
|
|
|
|
|
string_field = cf.gen_string_field()
|
|
|
|
|
vector_field = cf.gen_float_vec_field()
|
|
|
|
|
schema = cf.gen_collection_schema(fields=[pk_field, int64_field, string_field, vector_field])
|
|
|
|
|
|
|
|
|
|
err_msg = "invalid num_partitions type"
|
|
|
|
|
c_name = cf.gen_unique_str("par_key")
|
2024-08-21 09:22:56 +08:00
|
|
|
|
self.init_collection_wrap(name=c_name, schema=schema, num_partitions=num_partitions,
|
|
|
|
|
check_task=CheckTasks.err_res,
|
|
|
|
|
check_items={"err_code": 2, "err_msg": err_msg})
|
2023-06-07 15:44:36 +08:00
|
|
|
|
|
|
|
|
|
@pytest.mark.tags(CaseLabel.L0)
|
|
|
|
|
def test_partition_key_on_multi_fields(self):
|
|
|
|
|
"""
|
|
|
|
|
Method:
|
|
|
|
|
1. create a collection with partition key on multi fields
|
|
|
|
|
2. verify the error raised
|
|
|
|
|
"""
|
|
|
|
|
self._connect()
|
|
|
|
|
# both defined in field schema
|
|
|
|
|
pk_field = cf.gen_int64_field(name='pk', is_primary=True)
|
|
|
|
|
int64_field = cf.gen_int64_field(is_partition_key=True)
|
|
|
|
|
string_field = cf.gen_string_field(is_partition_key=True)
|
|
|
|
|
vector_field = cf.gen_float_vec_field()
|
|
|
|
|
err_msg = "Expected only one partition key field"
|
2024-08-21 09:22:56 +08:00
|
|
|
|
cf.gen_collection_schema(fields=[pk_field, int64_field, string_field, vector_field],
|
|
|
|
|
auto_id=True,
|
|
|
|
|
check_task=CheckTasks.err_res,
|
|
|
|
|
check_items={"err_code": 2, "err_msg": err_msg})
|
2023-06-07 15:44:36 +08:00
|
|
|
|
|
|
|
|
|
# both defined in collection schema
|
|
|
|
|
err_msg = "Param partition_key_field must be str type"
|
|
|
|
|
int64_field = cf.gen_int64_field()
|
|
|
|
|
string_field = cf.gen_string_field()
|
2024-08-21 09:22:56 +08:00
|
|
|
|
cf.gen_collection_schema(fields=[pk_field, int64_field, string_field, vector_field],
|
|
|
|
|
partition_key_field=[int64_field.name, string_field.name],
|
|
|
|
|
auto_id=True,
|
|
|
|
|
check_task=CheckTasks.err_res,
|
|
|
|
|
check_items={"err_code": 2, "err_msg": err_msg})
|
2023-06-07 15:44:36 +08:00
|
|
|
|
|
|
|
|
|
# one defined in field schema, one defined in collection schema
|
|
|
|
|
err_msg = "Expected only one partition key field"
|
|
|
|
|
int64_field = cf.gen_int64_field(is_partition_key=True)
|
|
|
|
|
string_field = cf.gen_string_field()
|
2024-08-21 09:22:56 +08:00
|
|
|
|
cf.gen_collection_schema(fields=[pk_field, int64_field, string_field, vector_field],
|
|
|
|
|
partition_key_field=string_field.name,
|
|
|
|
|
auto_id=True,
|
|
|
|
|
check_task=CheckTasks.err_res,
|
|
|
|
|
check_items={"err_code": 2, "err_msg": err_msg})
|
2023-06-07 15:44:36 +08:00
|
|
|
|
|
|
|
|
|
@pytest.mark.tags(CaseLabel.L0)
|
|
|
|
|
@pytest.mark.parametrize("is_int64_primary", [True, False])
|
|
|
|
|
def test_partition_key_on_primary_key(self, is_int64_primary):
|
|
|
|
|
"""
|
|
|
|
|
Method:
|
|
|
|
|
1. create a collection with partition key on primary key
|
|
|
|
|
2. verify the error raised
|
|
|
|
|
"""
|
|
|
|
|
self._connect()
|
|
|
|
|
if is_int64_primary:
|
|
|
|
|
pk_field = cf.gen_int64_field(name='pk', is_primary=True, is_partition_key=True)
|
|
|
|
|
else:
|
|
|
|
|
pk_field = cf.gen_string_field(name='pk', is_primary=True, is_partition_key=True)
|
|
|
|
|
int64_field = cf.gen_int64_field()
|
|
|
|
|
string_field = cf.gen_string_field()
|
|
|
|
|
vector_field = cf.gen_float_vec_field()
|
|
|
|
|
schema = cf.gen_collection_schema(fields=[pk_field, int64_field, string_field, vector_field],
|
|
|
|
|
auto_id=False)
|
|
|
|
|
|
|
|
|
|
err_msg = "the partition key field must not be primary field"
|
|
|
|
|
c_name = cf.gen_unique_str("par_key")
|
2024-08-21 09:22:56 +08:00
|
|
|
|
self.init_collection_wrap(name=c_name, schema=schema,
|
|
|
|
|
check_task=CheckTasks.err_res,
|
|
|
|
|
check_items={"err_code": 2, "err_msg": err_msg})
|
2023-06-07 15:44:36 +08:00
|
|
|
|
|
|
|
|
|
# if settings on collection schema
|
|
|
|
|
if is_int64_primary:
|
|
|
|
|
pk_field = cf.gen_int64_field(name='pk', is_primary=True)
|
|
|
|
|
else:
|
|
|
|
|
pk_field = cf.gen_string_field(name='pk', is_primary=True)
|
|
|
|
|
schema = cf.gen_collection_schema(fields=[pk_field, int64_field, string_field, vector_field],
|
|
|
|
|
partition_key_field=pk_field.name,
|
|
|
|
|
auto_id=False)
|
|
|
|
|
|
|
|
|
|
err_msg = "the partition key field must not be primary field"
|
|
|
|
|
c_name = cf.gen_unique_str("par_key")
|
2024-08-21 09:22:56 +08:00
|
|
|
|
self.init_collection_wrap(name=c_name, schema=schema,
|
|
|
|
|
check_task=CheckTasks.err_res,
|
|
|
|
|
check_items={"err_code": 2, "err_msg": err_msg})
|
2023-06-07 15:44:36 +08:00
|
|
|
|
|
|
|
|
|
@pytest.mark.tags(CaseLabel.L0)
|
|
|
|
|
def test_partition_key_on_and_off(self):
|
|
|
|
|
"""
|
|
|
|
|
Method:
|
|
|
|
|
1. create a collection with one field partition key on and the other field partition key on in schema
|
|
|
|
|
2. verify the error raised
|
|
|
|
|
"""
|
|
|
|
|
self._connect()
|
|
|
|
|
pk_field = cf.gen_int64_field(name='pk', is_primary=True)
|
|
|
|
|
int64_field = cf.gen_int64_field(is_partition_key=True)
|
|
|
|
|
string_field = cf.gen_string_field()
|
|
|
|
|
vector_field = cf.gen_float_vec_field()
|
|
|
|
|
err_msg = "Expected only one partition key field"
|
2024-08-21 09:22:56 +08:00
|
|
|
|
cf.gen_collection_schema(fields=[pk_field, int64_field, string_field, vector_field],
|
|
|
|
|
partition_key_field=vector_field.name,
|
|
|
|
|
auto_id=True,
|
|
|
|
|
check_task=CheckTasks.err_res,
|
|
|
|
|
check_items={"err_code": 2, "err_msg": err_msg})
|
2023-06-07 15:44:36 +08:00
|
|
|
|
|
|
|
|
|
# if two fields with same type
|
|
|
|
|
string_field = cf.gen_string_field(name="string1", is_partition_key=True)
|
|
|
|
|
string_field2 = cf.gen_string_field(name="string2")
|
|
|
|
|
err_msg = "Expected only one partition key field"
|
2024-08-21 09:22:56 +08:00
|
|
|
|
cf.gen_collection_schema(fields=[pk_field, string_field, string_field2, vector_field],
|
|
|
|
|
partition_key_field=string_field2.name,
|
|
|
|
|
auto_id=True,
|
|
|
|
|
check_task=CheckTasks.err_res,
|
|
|
|
|
check_items={"err_code": 2, "err_msg": err_msg})
|
2023-06-07 15:44:36 +08:00
|
|
|
|
|
|
|
|
|
@pytest.mark.tags(CaseLabel.L0)
|
|
|
|
|
@pytest.mark.parametrize("field_type", [DataType.FLOAT_VECTOR, DataType.BINARY_VECTOR, DataType.FLOAT,
|
|
|
|
|
DataType.DOUBLE, DataType.BOOL, DataType.INT8,
|
|
|
|
|
DataType.INT16, DataType.INT32, DataType.JSON])
|
|
|
|
|
def test_partition_key_on_invalid_type_fields(self, field_type):
|
|
|
|
|
"""
|
|
|
|
|
Method:
|
|
|
|
|
1. create a collection with partition key on invalid type fields
|
|
|
|
|
2. verify the error raised
|
|
|
|
|
"""
|
|
|
|
|
self._connect()
|
|
|
|
|
pk_field = cf.gen_int64_field(name='pk', is_primary=True)
|
|
|
|
|
int8_field = cf.gen_int8_field(is_partition_key=(field_type == DataType.INT8))
|
|
|
|
|
int16_field = cf.gen_int16_field(is_partition_key=(field_type == DataType.INT16))
|
|
|
|
|
int32_field = cf.gen_int32_field(is_partition_key=(field_type == DataType.INT32))
|
|
|
|
|
bool_field = cf.gen_bool_field(is_partition_key=(field_type == DataType.BOOL))
|
|
|
|
|
float_field = cf.gen_float_field(is_partition_key=(field_type == DataType.FLOAT))
|
|
|
|
|
double_field = cf.gen_double_field(is_partition_key=(field_type == DataType.DOUBLE))
|
|
|
|
|
json_field = cf.gen_json_field(is_partition_key=(field_type == DataType.JSON))
|
|
|
|
|
int64_field = cf.gen_int64_field()
|
|
|
|
|
string_field = cf.gen_string_field()
|
|
|
|
|
vector_field = cf.gen_float_vec_field(is_partition_key=(field_type == DataType.FLOAT_VECTOR))
|
|
|
|
|
if field_type == DataType.BINARY_VECTOR:
|
|
|
|
|
vector_field = cf.gen_binary_vec_field(is_partition_key=(field_type == DataType.BINARY_VECTOR))
|
|
|
|
|
|
|
|
|
|
err_msg = "Partition key field type must be DataType.INT64 or DataType.VARCHAR"
|
2024-08-21 09:22:56 +08:00
|
|
|
|
cf.gen_collection_schema(fields=[pk_field, int8_field, int16_field, int32_field,
|
|
|
|
|
bool_field, float_field, double_field, json_field,
|
|
|
|
|
int64_field, string_field, vector_field],
|
|
|
|
|
auto_id=True,
|
|
|
|
|
check_task=CheckTasks.err_res,
|
|
|
|
|
check_items={"err_code": 2, "err_msg": err_msg})
|
2023-06-07 15:44:36 +08:00
|
|
|
|
|
|
|
|
|
@pytest.mark.tags(CaseLabel.L1)
|
|
|
|
|
def test_partition_key_on_not_existed_fields(self):
|
|
|
|
|
"""
|
|
|
|
|
Method:
|
|
|
|
|
1. create a collection with partition key on not existed fields
|
|
|
|
|
2. verify the error raised
|
|
|
|
|
"""
|
|
|
|
|
self._connect()
|
|
|
|
|
pk_field = cf.gen_int64_field(name='pk', is_primary=True)
|
|
|
|
|
int64_field = cf.gen_int64_field()
|
|
|
|
|
string_field = cf.gen_string_field()
|
|
|
|
|
vector_field = cf.gen_float_vec_field()
|
|
|
|
|
err_msg = "the specified partition key field {non_existing_field} not exist"
|
2024-08-21 09:22:56 +08:00
|
|
|
|
cf.gen_collection_schema(fields=[pk_field, int64_field, string_field, vector_field],
|
|
|
|
|
partition_key_field="non_existing_field",
|
|
|
|
|
auto_id=True,
|
|
|
|
|
check_task=CheckTasks.err_res,
|
|
|
|
|
check_items={"err_code": 2, "err_msg": err_msg})
|
2023-06-07 15:44:36 +08:00
|
|
|
|
|
|
|
|
|
@pytest.mark.tags(CaseLabel.L1)
|
|
|
|
|
def test_partition_key_on_empty_and_num_partitions_set(self):
|
|
|
|
|
"""
|
|
|
|
|
Method:
|
|
|
|
|
1. create a collection with partition key on empty and num_partitions set
|
|
|
|
|
2. verify the error raised
|
|
|
|
|
"""
|
|
|
|
|
self._connect()
|
|
|
|
|
pk_field = cf.gen_int64_field(name='pk', is_primary=True)
|
|
|
|
|
int64_field = cf.gen_int64_field()
|
|
|
|
|
string_field = cf.gen_string_field()
|
|
|
|
|
vector_field = cf.gen_float_vec_field()
|
|
|
|
|
err_msg = "the specified partition key field {} not exist"
|
2024-08-21 09:22:56 +08:00
|
|
|
|
cf.gen_collection_schema(fields=[pk_field, int64_field, string_field, vector_field],
|
|
|
|
|
partition_key_field="", auto_id=True,
|
|
|
|
|
check_task=CheckTasks.err_res,
|
|
|
|
|
check_items={"err_code": 2, "err_msg": err_msg})
|
2023-06-07 15:44:36 +08:00
|
|
|
|
|
|
|
|
|
schema = cf.gen_default_collection_schema()
|
|
|
|
|
err_msg = "num_partitions should only be specified with partition key field enabled"
|
|
|
|
|
c_name = cf.gen_unique_str("par_key")
|
2024-08-21 09:22:56 +08:00
|
|
|
|
self.init_collection_wrap(name=c_name, schema=schema, num_partitions=200,
|
|
|
|
|
check_task=CheckTasks.err_res,
|
|
|
|
|
check_items={"err_code": 2, "err_msg": err_msg})
|
2023-06-07 15:44:36 +08:00
|
|
|
|
|
|
|
|
|
@pytest.mark.tags(CaseLabel.L1)
|
|
|
|
|
@pytest.mark.parametrize("invalid_data", [99, True, None, [], {}, ()])
|
|
|
|
|
def test_partition_key_insert_invalid_data(self, invalid_data):
|
|
|
|
|
"""
|
|
|
|
|
Method:
|
|
|
|
|
1. create a collection with partition key on int64 field
|
|
|
|
|
2. insert entities with invalid partition key
|
|
|
|
|
3. verify the error raised
|
|
|
|
|
"""
|
|
|
|
|
self._connect()
|
|
|
|
|
pk_field = cf.gen_string_field(name='pk', is_primary=True)
|
|
|
|
|
int64_field = cf.gen_int64_field()
|
|
|
|
|
string_field = cf.gen_string_field()
|
|
|
|
|
vector_field = cf.gen_float_vec_field()
|
|
|
|
|
schema = cf.gen_collection_schema(fields=[pk_field, int64_field, string_field, vector_field],
|
|
|
|
|
partition_key_field=string_field.name, auto_id=False)
|
|
|
|
|
|
|
|
|
|
c_name = cf.gen_unique_str("par_key")
|
2024-08-21 09:22:56 +08:00
|
|
|
|
collection_w = self.init_collection_wrap(name=c_name, schema=schema)
|
2023-06-07 15:44:36 +08:00
|
|
|
|
|
|
|
|
|
# insert
|
|
|
|
|
nb = 10
|
|
|
|
|
string_prefix = cf.gen_str_by_length(length=6)
|
|
|
|
|
pk_values = [str(i) for i in range(0, nb)]
|
|
|
|
|
int64_values = [i for i in range(0, nb)]
|
|
|
|
|
string_values = [string_prefix + str(i) for i in range(0, nb)]
|
|
|
|
|
string_values[1] = invalid_data # inject invalid data
|
|
|
|
|
float_vec_values = gen_vectors(nb, ct.default_dim)
|
|
|
|
|
data = [pk_values, int64_values, string_values, float_vec_values]
|
|
|
|
|
|
|
|
|
|
err_msg = "expect string input"
|
2024-08-21 09:22:56 +08:00
|
|
|
|
collection_w.insert(data, check_task=CheckTasks.err_res, check_items={"err_code": 2, "err_msg": err_msg})
|
2023-06-07 15:44:36 +08:00
|
|
|
|
|
|
|
|
|
|
|
|
|
|
class TestPartitionApiForbidden(TestcaseBase):
|
|
|
|
|
@pytest.mark.tags(CaseLabel.L1)
|
|
|
|
|
def test_create_partition(self):
|
|
|
|
|
"""
|
|
|
|
|
Method:
|
|
|
|
|
1. return error if create partition when partition key is on
|
|
|
|
|
2. return error if insert partition when partition key is on
|
|
|
|
|
3. return error if drop partition when partition key is on
|
|
|
|
|
4. return success if show partition when partition key is on
|
|
|
|
|
5. return error if load partition when partition key is on
|
|
|
|
|
6. return error if release partition when partition key is on
|
|
|
|
|
Expected: raise exception
|
|
|
|
|
"""
|
|
|
|
|
self._connect()
|
|
|
|
|
pk_field = cf.gen_int64_field(name='pk', is_primary=True)
|
|
|
|
|
int64_field = cf.gen_int64_field()
|
|
|
|
|
string_field = cf.gen_string_field(is_partition_key=True)
|
|
|
|
|
vector_field = cf.gen_float_vec_field()
|
|
|
|
|
schema = cf.gen_collection_schema(fields=[pk_field, int64_field, string_field, vector_field], auto_id=True)
|
|
|
|
|
c_name = cf.gen_unique_str("par_key")
|
2024-08-21 09:22:56 +08:00
|
|
|
|
collection_w = self.init_collection_wrap(name=c_name, schema=schema)
|
2023-06-07 15:44:36 +08:00
|
|
|
|
|
|
|
|
|
# create partition
|
|
|
|
|
err_msg = "disable create partition if partition key mode is used"
|
|
|
|
|
partition_name = cf.gen_unique_str("partition")
|
2024-08-21 09:22:56 +08:00
|
|
|
|
collection_w.create_partition(partition_name,
|
|
|
|
|
check_task=CheckTasks.err_res,
|
|
|
|
|
check_items={"err_code": 2, "err_msg": err_msg})
|
|
|
|
|
self.init_partition_wrap(collection_w, partition_name,
|
|
|
|
|
check_task=CheckTasks.err_res,
|
|
|
|
|
check_items={"err_code": 2, "err_msg": err_msg})
|
2023-06-07 15:44:36 +08:00
|
|
|
|
|
|
|
|
|
# get partition is allowed
|
2024-08-21 09:22:56 +08:00
|
|
|
|
partitions = collection_w.partitions
|
2023-06-07 15:44:36 +08:00
|
|
|
|
collection_w.partition(partitions[0].name)
|
2024-08-21 09:22:56 +08:00
|
|
|
|
partition_w = self.init_partition_wrap(collection_w, partitions[0].name)
|
|
|
|
|
assert partition_w.name == partitions[0].name
|
2023-06-07 15:44:36 +08:00
|
|
|
|
# has partition is allowed
|
|
|
|
|
assert collection_w.has_partition(partitions[0].name)
|
|
|
|
|
assert self.utility_wrap.has_partition(collection_w.name, partitions[0].name)
|
|
|
|
|
|
|
|
|
|
# insert
|
|
|
|
|
nb = 100
|
|
|
|
|
string_prefix = cf.gen_str_by_length(length=6)
|
|
|
|
|
entities_per_parkey = 10
|
|
|
|
|
for _ in range(entities_per_parkey):
|
|
|
|
|
int64_values = [i for i in range(0, nb)]
|
|
|
|
|
string_values = [string_prefix + str(i) for i in range(0, nb)]
|
|
|
|
|
float_vec_values = gen_vectors(nb, ct.default_dim)
|
|
|
|
|
data = [int64_values, string_values, float_vec_values]
|
2024-08-21 09:22:56 +08:00
|
|
|
|
collection_w.insert(data)
|
2023-06-07 15:44:36 +08:00
|
|
|
|
|
|
|
|
|
err_msg = "not support manually specifying the partition names if partition key mode is used"
|
2024-08-21 09:22:56 +08:00
|
|
|
|
partition_w.insert(data, check_task=CheckTasks.err_res,
|
|
|
|
|
check_items={"err_code": 2, "err_msg": err_msg})
|
|
|
|
|
collection_w.insert(data, partition_name=partitions[0].name,
|
|
|
|
|
check_task=CheckTasks.err_res,
|
|
|
|
|
check_items={"err_code": 2, "err_msg": err_msg})
|
2023-06-07 15:44:36 +08:00
|
|
|
|
|
|
|
|
|
err_msg = "disable load partitions if partition key mode is used"
|
2024-08-21 09:22:56 +08:00
|
|
|
|
partition_w.load(check_task=CheckTasks.err_res,
|
|
|
|
|
check_items={"err_code": 2, "err_msg": err_msg})
|
|
|
|
|
collection_w.load(partition_names=[partitions[0].name],
|
|
|
|
|
check_task=CheckTasks.err_res,
|
|
|
|
|
check_items={"err_code": 2, "err_msg": err_msg})
|
2023-06-07 15:44:36 +08:00
|
|
|
|
|
|
|
|
|
# flush
|
|
|
|
|
collection_w.flush()
|
|
|
|
|
|
|
|
|
|
# build index
|
|
|
|
|
collection_w.create_index(field_name=vector_field.name, index_params=ct.default_index)
|
|
|
|
|
# load
|
|
|
|
|
collection_w.load()
|
|
|
|
|
# search
|
|
|
|
|
nq = 10
|
|
|
|
|
search_vectors = gen_vectors(nq, ct.default_dim)
|
|
|
|
|
# search with mixed filtered
|
2024-08-21 09:22:56 +08:00
|
|
|
|
res1 = collection_w.search(data=search_vectors, anns_field=vector_field.name,
|
|
|
|
|
param=ct.default_search_params, limit=entities_per_parkey,
|
|
|
|
|
expr=f'{int64_field.name} in [1,3,5] && {string_field.name} in ["{string_prefix}1","{string_prefix}3","{string_prefix}5"]',
|
|
|
|
|
output_fields=[int64_field.name, string_field.name],
|
|
|
|
|
check_task=CheckTasks.check_search_results,
|
|
|
|
|
check_items={"nq": nq, "limit": ct.default_limit})[0]
|
2023-06-07 15:44:36 +08:00
|
|
|
|
pks = res1[0].ids[:3]
|
|
|
|
|
err_msg = "not support manually specifying the partition names if partition key mode is used"
|
2024-08-21 09:22:56 +08:00
|
|
|
|
collection_w.search(data=search_vectors, anns_field=vector_field.name, partition_names=[partitions[0].name],
|
|
|
|
|
param=ct.default_search_params, limit=entities_per_parkey,
|
|
|
|
|
expr=f'{int64_field.name} in [1,3,5]',
|
|
|
|
|
output_fields=[int64_field.name, string_field.name],
|
|
|
|
|
check_task=CheckTasks.err_res,
|
|
|
|
|
check_items={"err_code": nq, "err_msg": err_msg})
|
|
|
|
|
partition_w.search(data=search_vectors, anns_field=vector_field.name,
|
|
|
|
|
params=ct.default_search_params, limit=entities_per_parkey,
|
|
|
|
|
expr=f'{string_field.name} in ["{string_prefix}1","{string_prefix}3","{string_prefix}5"]',
|
|
|
|
|
output_fields=[int64_field.name, string_field.name],
|
|
|
|
|
check_task=CheckTasks.err_res,
|
|
|
|
|
check_items={"err_code": nq, "err_msg": err_msg})
|
2023-06-07 15:44:36 +08:00
|
|
|
|
|
|
|
|
|
# partition loading progress is allowed
|
|
|
|
|
self.utility_wrap.loading_progress(collection_name=collection_w.name)
|
|
|
|
|
self.utility_wrap.loading_progress(collection_name=collection_w.name,
|
|
|
|
|
partition_names=[partitions[0].name])
|
|
|
|
|
|
|
|
|
|
# partition wait for loading complete is allowed
|
|
|
|
|
self.utility_wrap.wait_for_loading_complete(collection_name=collection_w.name)
|
|
|
|
|
self.utility_wrap.wait_for_loading_complete(collection_name=collection_w.name,
|
|
|
|
|
partition_names=[partitions[0].name])
|
|
|
|
|
# partition flush is allowed: #24165
|
2024-08-21 09:22:56 +08:00
|
|
|
|
partition_w.flush()
|
2023-06-07 15:44:36 +08:00
|
|
|
|
|
|
|
|
|
# partition delete is not allowed
|
2024-08-21 09:22:56 +08:00
|
|
|
|
partition_w.delete(expr=f'{pk_field.name} in {pks}',
|
|
|
|
|
check_task=CheckTasks.err_res,
|
|
|
|
|
check_items={"err_code": 2, "err_msg": err_msg})
|
|
|
|
|
collection_w.delete(expr=f'{pk_field.name} in {pks}', partition_name=partitions[0].name,
|
|
|
|
|
check_task=CheckTasks.err_res,
|
|
|
|
|
check_items={"err_code": 2, "err_msg": err_msg})
|
2023-06-07 15:44:36 +08:00
|
|
|
|
# partition query is not allowed
|
2024-08-21 09:22:56 +08:00
|
|
|
|
partition_w.query(expr=f'{pk_field.name} in {pks}',
|
|
|
|
|
check_task=CheckTasks.err_res,
|
|
|
|
|
check_items={"err_code": 2, "err_msg": err_msg})
|
|
|
|
|
collection_w.query(expr=f'{pk_field.name} in {pks}', partition_names=[partitions[0].name],
|
|
|
|
|
check_task=CheckTasks.err_res,
|
|
|
|
|
check_items={"err_code": 2, "err_msg": err_msg})
|
2023-06-07 15:44:36 +08:00
|
|
|
|
# partition upsert is not allowed
|
|
|
|
|
# self.partition_wrap.upsert(data=data, check_task=CheckTasks.err_res,
|
|
|
|
|
# check_items={"err_code": 2, "err_msg": err_msg})
|
|
|
|
|
# self.collection_wrap.upsert(data=data, partition_name=partitions[0].name,
|
|
|
|
|
# chek_task=CheckTasks.err_res, check_items={"err_code": 2, "err_msg": err_msg})
|
|
|
|
|
# partition release
|
|
|
|
|
err_msg = "disable release partitions if partition key mode is used"
|
2024-08-21 09:22:56 +08:00
|
|
|
|
partition_w.release(check_task=CheckTasks.err_res, check_items={"err_code": 2, "err_msg": err_msg})
|
2023-06-07 15:44:36 +08:00
|
|
|
|
# partition drop
|
|
|
|
|
err_msg = "disable drop partition if partition key mode is used"
|
2024-08-21 09:22:56 +08:00
|
|
|
|
partition_w.drop(check_task=CheckTasks.err_res, check_items={"err_code": 2, "err_msg": err_msg})
|
2023-06-07 15:44:36 +08:00
|
|
|
|
|
|
|
|
|
# # partition bulk insert
|
|
|
|
|
# self.utility_wrap.do_bulk_insert(collection_w.name, files, partition_names=[partitions[0].name],
|
|
|
|
|
# check_task=CheckTasks.err_res,
|
|
|
|
|
# check_items={"err_code": 2, "err_msg": err_msg})
|