milvus/tests/python_client/milvus_client/test_milvus_client_insert.py

991 lines
52 KiB
Python
Raw Normal View History

import multiprocessing
import numbers
import random
import numpy
import threading
import pytest
import pandas as pd
import decimal
from decimal import Decimal, getcontext
from time import sleep
import heapq
from base.client_base import TestcaseBase
from utils.util_log import test_log as log
from common import common_func as cf
from common import common_type as ct
from common.common_type import CaseLabel, CheckTasks
from utils.util_pymilvus import *
from common.constants import *
from pymilvus.orm.types import CONSISTENCY_STRONG, CONSISTENCY_BOUNDED, CONSISTENCY_SESSION, CONSISTENCY_EVENTUALLY
from base.high_level_api_wrapper import HighLevelApiWrapper
client_w = HighLevelApiWrapper()
prefix = "milvus_client_api_insert"
epsilon = ct.epsilon
default_nb = ct.default_nb
default_nb_medium = ct.default_nb_medium
default_nq = ct.default_nq
default_dim = ct.default_dim
default_limit = ct.default_limit
default_search_exp = "id >= 0"
exp_res = "exp_res"
default_search_string_exp = "varchar >= \"0\""
default_search_mix_exp = "int64 >= 0 && varchar >= \"0\""
default_invaild_string_exp = "varchar >= 0"
default_json_search_exp = "json_field[\"number\"] >= 0"
perfix_expr = 'varchar like "0%"'
default_search_field = ct.default_float_vec_field_name
default_search_params = ct.default_search_params
default_primary_key_field_name = "id"
default_vector_field_name = "vector"
default_float_field_name = ct.default_float_field_name
default_bool_field_name = ct.default_bool_field_name
default_string_field_name = ct.default_string_field_name
default_int32_array_field_name = ct.default_int32_array_field_name
default_string_array_field_name = ct.default_string_array_field_name
class TestMilvusClientInsertInvalid(TestcaseBase):
""" Test case of search interface """
@pytest.fixture(scope="function", params=[False, True])
def auto_id(self, request):
yield request.param
@pytest.fixture(scope="function", params=["COSINE", "L2"])
def metric_type(self, request):
yield request.param
"""
******************************************************************
# The following are invalid base cases
******************************************************************
"""
@pytest.mark.tags(CaseLabel.L1)
def test_milvus_client_insert_column_data(self):
"""
target: test insert column data
method: create connection, collection, insert and search
expected: raise error
"""
client = self._connect(enable_milvus_client_api=True)
collection_name = cf.gen_unique_str(prefix)
# 1. create collection
client_w.create_collection(client, collection_name, default_dim)
# 2. insert
vectors = [[random.random() for _ in range(default_dim)] for _ in range(default_nb)]
data = [[i for i in range(default_nb)], vectors]
error = {ct.err_code: 999, ct.err_msg: "Input data type is inconsistent with defined schema, please check it."}
client_w.insert(client, collection_name, data,
check_task=CheckTasks.err_res, check_items=error)
client_w.drop_collection(client, collection_name)
@pytest.mark.tags(CaseLabel.L1)
def test_milvus_client_insert_empty_collection_name(self):
"""
target: test high level api: client.create_collection
method: create collection with invalid primary field
expected: Raise exception
"""
client = self._connect(enable_milvus_client_api=True)
collection_name = ""
rng = np.random.default_rng(seed=19530)
rows = [{default_primary_key_field_name: i, default_vector_field_name: list(rng.random((1, default_dim))[0]),
default_float_field_name: i * 1.0, default_string_field_name: str(i)} for i in range(default_nb)]
error = {ct.err_code: 999, ct.err_msg: f"`collection_name` value {collection_name} is illegal"}
client_w.insert(client, collection_name, rows,
check_task=CheckTasks.err_res, check_items=error)
@pytest.mark.tags(CaseLabel.L1)
@pytest.mark.parametrize("collection_name", ["12-s", "12 s", "(mn)", "中文", "%$#"])
def test_milvus_client_insert_invalid_collection_name(self, collection_name):
"""
target: test high level api: client.create_collection
method: create collection with invalid primary field
expected: Raise exception
"""
client = self._connect(enable_milvus_client_api=True)
rng = np.random.default_rng(seed=19530)
rows = [{default_primary_key_field_name: i, default_vector_field_name: list(rng.random((1, default_dim))[0]),
default_float_field_name: i * 1.0, default_string_field_name: str(i)} for i in range(default_nb)]
error = {ct.err_code: 1100, ct.err_msg: f"Invalid collection name: {collection_name}. the first character of a "
f"collection name must be an underscore or letter: invalid parameter"}
client_w.insert(client, collection_name, rows,
check_task=CheckTasks.err_res, check_items=error)
@pytest.mark.tags(CaseLabel.L1)
def test_milvus_client_insert_collection_name_over_max_length(self):
"""
target: test high level api: client.create_collection
method: create collection with invalid primary field
expected: Raise exception
"""
client = self._connect(enable_milvus_client_api=True)
collection_name = "a".join("a" for i in range(256))
rng = np.random.default_rng(seed=19530)
rows = [{default_primary_key_field_name: i, default_vector_field_name: list(rng.random((1, default_dim))[0]),
default_float_field_name: i * 1.0, default_string_field_name: str(i)} for i in range(default_nb)]
error = {ct.err_code: 1100, ct.err_msg: f"invalid dimension: {collection_name}. "
f"the length of a collection name must be less than 255 characters: "
f"invalid parameter"}
client_w.insert(client, collection_name, rows,
check_task=CheckTasks.err_res, check_items=error)
@pytest.mark.tags(CaseLabel.L1)
def test_milvus_client_insert_not_exist_collection_name(self):
"""
target: test high level api: client.create_collection
method: create collection with invalid primary field
expected: Raise exception
"""
client = self._connect(enable_milvus_client_api=True)
collection_name = cf.gen_unique_str("insert_not_exist")
rng = np.random.default_rng(seed=19530)
rows = [{default_primary_key_field_name: i, default_vector_field_name: list(rng.random((1, default_dim))[0]),
default_float_field_name: i * 1.0, default_string_field_name: str(i)} for i in range(default_nb)]
error = {ct.err_code: 100, ct.err_msg: f"can't find collection collection not found"
f"[database=default][collection={collection_name}]"}
client_w.insert(client, collection_name, rows,
check_task=CheckTasks.err_res, check_items=error)
@pytest.mark.tags(CaseLabel.L1)
@pytest.mark.parametrize("data", ["12-s", "12 s", "(mn)", "中文", "%$#", " "])
def test_milvus_client_insert_data_invalid_type(self, data):
"""
target: test high level api: client.create_collection
method: create collection with invalid primary field
expected: Raise exception
"""
client = self._connect(enable_milvus_client_api=True)
collection_name = cf.gen_unique_str(prefix)
# 1. create collection
client_w.create_collection(client, collection_name, default_dim, consistency_level="Strong")
# 2. insert
error = {ct.err_code: 999, ct.err_msg: f"wrong type of argument 'data',expected 'Dict' or list of 'Dict', got 'str'"}
client_w.insert(client, collection_name, data,
check_task=CheckTasks.err_res, check_items=error)
@pytest.mark.tags(CaseLabel.L1)
# @pytest.mark.xfail(reason="pymilvus issue 1895")
def test_milvus_client_insert_data_empty(self):
"""
target: test high level api: client.create_collection
method: create collection with invalid primary field
expected: Raise exception
"""
client = self._connect(enable_milvus_client_api=True)
collection_name = cf.gen_unique_str(prefix)
# 1. create collection
client_w.create_collection(client, collection_name, default_dim, consistency_level="Strong")
# 2. insert
error = {ct.err_code: 999, ct.err_msg: f"wrong type of argument 'data',expected 'Dict' or list of 'Dict', got 'str'"}
client_w.insert(client, collection_name, data="",
check_task=CheckTasks.err_res, check_items=error)
@pytest.mark.tags(CaseLabel.L1)
def test_milvus_client_insert_data_vector_field_missing(self):
"""
target: test high level api: client.create_collection
method: create collection with invalid primary field
expected: Raise exception
"""
client = self._connect(enable_milvus_client_api=True)
collection_name = cf.gen_unique_str(prefix)
# 1. create collection
client_w.create_collection(client, collection_name, default_dim, consistency_level="Strong")
# 2. insert
rng = np.random.default_rng(seed=19530)
rows = [{default_primary_key_field_name: i,
default_float_field_name: i * 1.0, default_string_field_name: str(i)} for i in range(default_nb)]
error = {ct.err_code: 999, ct.err_msg: f"Field vector don't match in entities[0]"}
client_w.insert(client, collection_name, data=rows,
check_task=CheckTasks.err_res, check_items=error)
@pytest.mark.tags(CaseLabel.L1)
def test_milvus_client_insert_data_id_field_missing(self):
"""
target: test high level api: client.create_collection
method: create collection with invalid primary field
expected: Raise exception
"""
client = self._connect(enable_milvus_client_api=True)
collection_name = cf.gen_unique_str(prefix)
# 1. create collection
client_w.create_collection(client, collection_name, default_dim, consistency_level="Strong")
# 2. insert
rng = np.random.default_rng(seed=19530)
rows = [{default_vector_field_name: list(rng.random((1, default_dim))[0]),
default_float_field_name: i * 1.0, default_string_field_name: str(i)} for i in range(default_nb)]
error = {ct.err_code: 999, ct.err_msg: f"Field id don't match in entities[0]"}
client_w.insert(client, collection_name, data=rows,
check_task=CheckTasks.err_res, check_items=error)
@pytest.mark.tags(CaseLabel.L1)
def test_milvus_client_insert_data_extra_field(self):
"""
target: test milvus client: insert extra field than schema
method: insert extra field than schema when enable_dynamic_field is False
expected: Raise exception
"""
client = self._connect(enable_milvus_client_api=True)
collection_name = cf.gen_unique_str(prefix)
# 1. create collection
client_w.create_collection(client, collection_name, default_dim, enable_dynamic_field=False)
# 2. insert
rng = np.random.default_rng(seed=19530)
rows = [{default_primary_key_field_name: i, default_vector_field_name: list(rng.random((1, default_dim))[0]),
default_float_field_name: i * 1.0, default_string_field_name: str(i)} for i in range(default_nb)]
error = {ct.err_code: 999, ct.err_msg: f"Attempt to insert an unexpected field `{default_float_field_name}`"
f" to collection without enabling dynamic field"}
client_w.insert(client, collection_name, data=rows,
check_task=CheckTasks.err_res, check_items=error)
@pytest.mark.tags(CaseLabel.L1)
def test_milvus_client_insert_data_dim_not_match(self):
"""
target: test milvus client: insert extra field than schema
method: insert extra field than schema when enable_dynamic_field is False
expected: Raise exception
"""
client = self._connect(enable_milvus_client_api=True)
collection_name = cf.gen_unique_str(prefix)
# 1. create collection
client_w.create_collection(client, collection_name, default_dim)
# 2. insert
rng = np.random.default_rng(seed=19530)
rows = [{default_primary_key_field_name: i, default_vector_field_name: list(rng.random((1, default_dim+1))[0]),
default_float_field_name: i * 1.0, default_string_field_name: str(i)} for i in range(default_nb)]
error = {ct.err_code: 65536, ct.err_msg: f"of float data should divide the dim({default_dim})"}
client_w.insert(client, collection_name, data= rows,
check_task=CheckTasks.err_res, check_items=error)
@pytest.mark.tags(CaseLabel.L1)
def test_milvus_client_insert_not_matched_data(self):
"""
target: test milvus client: insert not matched data then defined
method: insert string to int primary field
expected: Raise exception
"""
client = self._connect(enable_milvus_client_api=True)
collection_name = cf.gen_unique_str(prefix)
# 1. create collection
client_w.create_collection(client, collection_name, default_dim)
# 2. insert
rng = np.random.default_rng(seed=19530)
rows = [{default_primary_key_field_name: str(i), default_vector_field_name: list(rng.random((1, default_dim))[0]),
default_float_field_name: i * 1.0, default_string_field_name: str(i)} for i in range(default_nb)]
error = {ct.err_code: 999, ct.err_msg: f"The Input data type is inconsistent with defined schema"}
client_w.insert(client, collection_name, data=rows,
check_task=CheckTasks.err_res, check_items=error)
@pytest.mark.tags(CaseLabel.L1)
@pytest.mark.parametrize("partition_name", ["12-s", "12 s", "(mn)", "中文", "%$#", " "])
def test_milvus_client_insert_invalid_partition_name(self, partition_name):
"""
target: test milvus client: insert extra field than schema
method: insert extra field than schema when enable_dynamic_field is False
expected: Raise exception
"""
client = self._connect(enable_milvus_client_api=True)
collection_name = cf.gen_unique_str(prefix)
# 1. create collection
client_w.create_collection(client, collection_name, default_dim)
# 2. insert
rng = np.random.default_rng(seed=19530)
rows = [{default_primary_key_field_name: i, default_vector_field_name: list(rng.random((1, default_dim))[0]),
default_float_field_name: i * 1.0, default_string_field_name: str(i)} for i in range(default_nb)]
error = {ct.err_code: 65535, ct.err_msg: f"Invalid partition name: {partition_name}. The first character of "
f"a partition name must be an underscore or letter."}
client_w.insert(client, collection_name, data= rows, partition_name=partition_name,
check_task=CheckTasks.err_res, check_items=error)
@pytest.mark.tags(CaseLabel.L1)
def test_milvus_client_insert_not_exist_partition_name(self):
"""
target: test milvus client: insert extra field than schema
method: insert extra field than schema when enable_dynamic_field is False
expected: Raise exception
"""
client = self._connect(enable_milvus_client_api=True)
collection_name = cf.gen_unique_str(prefix)
# 1. create collection
client_w.create_collection(client, collection_name, default_dim)
# 2. insert
rng = np.random.default_rng(seed=19530)
rows = [{default_primary_key_field_name: i, default_vector_field_name: list(rng.random((1, default_dim))[0]),
default_float_field_name: i * 1.0, default_string_field_name: str(i)} for i in range(default_nb)]
partition_name = cf.gen_unique_str("partition_not_exist")
error = {ct.err_code: 200, ct.err_msg: f"partition not found[partition={partition_name}]"}
client_w.insert(client, collection_name, data= rows, partition_name=partition_name,
check_task=CheckTasks.err_res, check_items=error)
@pytest.mark.tags(CaseLabel.L2)
def test_milvus_client_insert_collection_partition_not_match(self):
"""
target: test milvus client: insert extra field than schema
method: insert extra field than schema when enable_dynamic_field is False
expected: Raise exception
"""
client = self._connect(enable_milvus_client_api=True)
collection_name = cf.gen_unique_str(prefix)
another_collection_name = cf.gen_unique_str(prefix + "another")
partition_name = cf.gen_unique_str("partition")
# 1. create collection
client_w.create_collection(client, collection_name, default_dim)
client_w.create_collection(client, another_collection_name, default_dim)
client_w.create_partition(client, another_collection_name, partition_name)
# 2. insert
rng = np.random.default_rng(seed=19530)
rows = [{default_primary_key_field_name: i, default_vector_field_name: list(rng.random((1, default_dim))[0]),
default_float_field_name: i * 1.0, default_string_field_name: str(i)} for i in range(default_nb)]
error = {ct.err_code: 200, ct.err_msg: f"partition not found[partition={partition_name}]"}
client_w.insert(client, collection_name, data= rows, partition_name=partition_name,
check_task=CheckTasks.err_res, check_items=error)
class TestMilvusClientInsertValid(TestcaseBase):
""" Test case of search interface """
@pytest.fixture(scope="function", params=[False, True])
def auto_id(self, request):
yield request.param
@pytest.fixture(scope="function", params=["COSINE", "L2"])
def metric_type(self, request):
yield request.param
"""
******************************************************************
# The following are valid base cases
******************************************************************
"""
@pytest.mark.tags(CaseLabel.L0)
def test_milvus_client_insert_default(self):
"""
target: test search (high level api) normal case
method: create connection, collection, insert and search
expected: search/query successfully
"""
client = self._connect(enable_milvus_client_api=True)
collection_name = cf.gen_unique_str(prefix)
# 1. create collection
client_w.create_collection(client, collection_name, default_dim, consistency_level="Strong")
collections = client_w.list_collections(client)[0]
assert collection_name in collections
client_w.describe_collection(client, collection_name,
check_task=CheckTasks.check_describe_collection_property,
check_items={"collection_name": collection_name,
"dim": default_dim,
"consistency_level": 0})
# 2. insert
rng = np.random.default_rng(seed=19530)
rows = [{default_primary_key_field_name: i, default_vector_field_name: list(rng.random((1, default_dim))[0]),
default_float_field_name: i * 1.0, default_string_field_name: str(i)} for i in range(default_nb)]
results = client_w.insert(client, collection_name, rows)[0]
assert results['insert_count'] == default_nb
# 3. search
vectors_to_search = rng.random((1, default_dim))
insert_ids = [i for i in range(default_nb)]
client_w.search(client, collection_name, vectors_to_search,
check_task=CheckTasks.check_search_results,
check_items={"enable_milvus_client_api": True,
"nq": len(vectors_to_search),
"ids": insert_ids,
"limit": default_limit})
# 4. query
client_w.query(client, collection_name, filter=default_search_exp,
check_task=CheckTasks.check_query_results,
check_items={exp_res: rows,
"with_vec": True,
"primary_field": default_primary_key_field_name})
client_w.release_collection(client, collection_name)
client_w.drop_collection(client, collection_name)
@pytest.mark.tags(CaseLabel.L2)
def test_milvus_client_insert_different_fields(self):
"""
target: test search (high level api) normal case
method: create connection, collection, insert and search
expected: search/query successfully
"""
client = self._connect(enable_milvus_client_api=True)
collection_name = cf.gen_unique_str(prefix)
# 1. create collection
client_w.create_collection(client, collection_name, default_dim, consistency_level="Strong")
collections = client_w.list_collections(client)[0]
assert collection_name in collections
client_w.describe_collection(client, collection_name,
check_task=CheckTasks.check_describe_collection_property,
check_items={"collection_name": collection_name,
"dim": default_dim,
"consistency_level": 0})
# 2. insert
rng = np.random.default_rng(seed=19530)
rows = [{default_primary_key_field_name: i, default_vector_field_name: list(rng.random((1, default_dim))[0]),
default_float_field_name: i * 1.0, default_string_field_name: str(i)} for i in range(default_nb)]
results = client_w.insert(client, collection_name, rows)[0]
assert results['insert_count'] == default_nb
# 3. insert diff fields
rows = [{default_primary_key_field_name: i, default_vector_field_name: list(rng.random((1, default_dim))[0]),
default_float_field_name: i * 1.0, "new_diff_str_field": str(i)} for i in range(default_nb)]
results = client_w.insert(client, collection_name, rows)[0]
assert results['insert_count'] == default_nb
# 3. search
vectors_to_search = rng.random((1, default_dim))
insert_ids = [i for i in range(default_nb)]
client_w.search(client, collection_name, vectors_to_search,
check_task=CheckTasks.check_search_results,
check_items={"enable_milvus_client_api": True,
"nq": len(vectors_to_search),
"ids": insert_ids,
"limit": default_limit})
client_w.drop_collection(client, collection_name)
@pytest.mark.tags(CaseLabel.L2)
def test_milvus_client_insert_empty_data(self):
"""
target: test search (high level api) normal case
method: create connection, collection, insert and search
expected: search/query successfully
"""
client = self._connect(enable_milvus_client_api=True)
collection_name = cf.gen_unique_str(prefix)
# 1. create collection
client_w.create_collection(client, collection_name, default_dim, consistency_level="Strong")
# 2. insert
rows = []
results = client_w.insert(client, collection_name, rows)[0]
assert results['insert_count'] == 0
# 3. search
rng = np.random.default_rng(seed=19530)
vectors_to_search = rng.random((1, default_dim))
client_w.search(client, collection_name, vectors_to_search,
check_task=CheckTasks.check_search_results,
check_items={"enable_milvus_client_api": True,
"nq": len(vectors_to_search),
"ids": [],
"limit": 0})
client_w.drop_collection(client, collection_name)
@pytest.mark.tags(CaseLabel.L1)
def test_milvus_client_insert_partition(self):
"""
target: test fast create collection normal case
method: create collection
expected: create collection with default schema, index, and load successfully
"""
client = self._connect(enable_milvus_client_api=True)
collection_name = cf.gen_unique_str(prefix)
partition_name = cf.gen_unique_str(prefix)
# 1. create collection
client_w.create_collection(client, collection_name, default_dim, consistency_level="Strong")
# 2. create partition
client_w.create_partition(client, collection_name, partition_name)
partitions = client_w.list_partitions(client, collection_name)[0]
assert partition_name in partitions
index = client_w.list_indexes(client, collection_name)[0]
assert index == ['vector']
# load_state = client_w.get_load_state(collection_name)[0]
# 3. insert
rng = np.random.default_rng(seed=19530)
rows = [{default_primary_key_field_name: i, default_vector_field_name: list(rng.random((1, default_dim))[0]),
default_float_field_name: i * 1.0, default_string_field_name: str(i)} for i in range(default_nb)]
results = client_w.insert(client, collection_name, rows, partition_name=partition_name)[0]
assert results['insert_count'] == default_nb
# 3. search
vectors_to_search = rng.random((1, default_dim))
insert_ids = [i for i in range(default_nb)]
client_w.search(client, collection_name, vectors_to_search,
check_task=CheckTasks.check_search_results,
check_items={"enable_milvus_client_api": True,
"nq": len(vectors_to_search),
"ids": insert_ids,
"limit": default_limit})
# partition_number = client_w.get_partition_stats(client, collection_name, "_default")[0]
# assert partition_number == default_nb
# partition_number = client_w.get_partition_stats(client, collection_name, partition_name)[0]
# assert partition_number[0]['value'] == 0
if client_w.has_partition(client, collection_name, partition_name)[0]:
client_w.release_partitions(client, collection_name, partition_name)
client_w.drop_partition(client, collection_name, partition_name)
if client_w.has_collection(client, collection_name)[0]:
client_w.drop_collection(client, collection_name)
class TestMilvusClientUpsertInvalid(TestcaseBase):
""" Test case of search interface """
@pytest.fixture(scope="function", params=[False, True])
def auto_id(self, request):
yield request.param
@pytest.fixture(scope="function", params=["COSINE", "L2"])
def metric_type(self, request):
yield request.param
"""
******************************************************************
# The following are invalid base cases
******************************************************************
"""
@pytest.mark.tags(CaseLabel.L1)
def test_milvus_client_upsert_column_data(self):
"""
target: test insert column data
method: create connection, collection, insert and search
expected: raise error
"""
client = self._connect(enable_milvus_client_api=True)
collection_name = cf.gen_unique_str(prefix)
# 1. create collection
client_w.create_collection(client, collection_name, default_dim)
# 2. insert
vectors = [[random.random() for _ in range(default_dim)] for _ in range(default_nb)]
data = [[i for i in range(default_nb)], vectors]
error = {ct.err_code: 999, ct.err_msg: "Input data type is inconsistent with defined schema, please check it."}
client_w.upsert(client, collection_name, data,
check_task=CheckTasks.err_res, check_items=error)
client_w.drop_collection(client, collection_name)
@pytest.mark.tags(CaseLabel.L1)
def test_milvus_client_upsert_empty_collection_name(self):
"""
target: test high level api: client.create_collection
method: create collection with invalid primary field
expected: Raise exception
"""
client = self._connect(enable_milvus_client_api=True)
collection_name = ""
rng = np.random.default_rng(seed=19530)
rows = [{default_primary_key_field_name: i, default_vector_field_name: list(rng.random((1, default_dim))[0]),
default_float_field_name: i * 1.0, default_string_field_name: str(i)} for i in range(default_nb)]
error = {ct.err_code: 999, ct.err_msg: f"`collection_name` value {collection_name} is illegal"}
client_w.upsert(client, collection_name, rows,
check_task=CheckTasks.err_res, check_items=error)
@pytest.mark.tags(CaseLabel.L1)
@pytest.mark.parametrize("collection_name", ["12-s", "12 s", "(mn)", "中文", "%$#"])
def test_milvus_client_upsert_invalid_collection_name(self, collection_name):
"""
target: test high level api: client.create_collection
method: create collection with invalid primary field
expected: Raise exception
"""
client = self._connect(enable_milvus_client_api=True)
rng = np.random.default_rng(seed=19530)
rows = [{default_primary_key_field_name: i, default_vector_field_name: list(rng.random((1, default_dim))[0]),
default_float_field_name: i * 1.0, default_string_field_name: str(i)} for i in range(default_nb)]
error = {ct.err_code: 1100, ct.err_msg: f"Invalid collection name: {collection_name}. the first character of a "
f"collection name must be an underscore or letter: invalid parameter"}
client_w.upsert(client, collection_name, rows,
check_task=CheckTasks.err_res, check_items=error)
@pytest.mark.tags(CaseLabel.L1)
def test_milvus_client_upsert_collection_name_over_max_length(self):
"""
target: test high level api: client.create_collection
method: create collection with invalid primary field
expected: Raise exception
"""
client = self._connect(enable_milvus_client_api=True)
collection_name = "a".join("a" for i in range(256))
rng = np.random.default_rng(seed=19530)
rows = [{default_primary_key_field_name: i, default_vector_field_name: list(rng.random((1, default_dim))[0]),
default_float_field_name: i * 1.0, default_string_field_name: str(i)} for i in range(default_nb)]
error = {ct.err_code: 1100, ct.err_msg: f"invalid dimension: {collection_name}. "
f"the length of a collection name must be less than 255 characters: "
f"invalid parameter"}
client_w.upsert(client, collection_name, rows,
check_task=CheckTasks.err_res, check_items=error)
@pytest.mark.tags(CaseLabel.L1)
def test_milvus_client_upsert_not_exist_collection_name(self):
"""
target: test high level api: client.create_collection
method: create collection with invalid primary field
expected: Raise exception
"""
client = self._connect(enable_milvus_client_api=True)
collection_name = cf.gen_unique_str("insert_not_exist")
rng = np.random.default_rng(seed=19530)
rows = [{default_primary_key_field_name: i, default_vector_field_name: list(rng.random((1, default_dim))[0]),
default_float_field_name: i * 1.0, default_string_field_name: str(i)} for i in range(default_nb)]
error = {ct.err_code: 100, ct.err_msg: f"can't find collection collection not found"
f"[database=default][collection={collection_name}]"}
client_w.upsert(client, collection_name, rows,
check_task=CheckTasks.err_res, check_items=error)
@pytest.mark.tags(CaseLabel.L1)
@pytest.mark.parametrize("data", ["12-s", "12 s", "(mn)", "中文", "%$#", " "])
def test_milvus_client_upsert_data_invalid_type(self, data):
"""
target: test high level api: client.create_collection
method: create collection with invalid primary field
expected: Raise exception
"""
client = self._connect(enable_milvus_client_api=True)
collection_name = cf.gen_unique_str(prefix)
# 1. create collection
client_w.create_collection(client, collection_name, default_dim, consistency_level="Strong")
# 2. insert
error = {ct.err_code: 999, ct.err_msg: f"wrong type of argument 'data',expected 'Dict' or list of 'Dict', got 'str'"}
client_w.upsert(client, collection_name, data,
check_task=CheckTasks.err_res, check_items=error)
@pytest.mark.tags(CaseLabel.L1)
def test_milvus_client_upsert_data_empty(self):
"""
target: test high level api: client.create_collection
method: create collection with invalid primary field
expected: Raise exception
"""
client = self._connect(enable_milvus_client_api=True)
collection_name = cf.gen_unique_str(prefix)
# 1. create collection
client_w.create_collection(client, collection_name, default_dim, consistency_level="Strong")
# 2. insert
error = {ct.err_code: 999, ct.err_msg: f"wrong type of argument 'data',expected 'Dict' or list of 'Dict', got 'str'"}
client_w.upsert(client, collection_name, data="",
check_task=CheckTasks.err_res, check_items=error)
@pytest.mark.tags(CaseLabel.L1)
def test_milvus_client_upsert_data_vector_field_missing(self):
"""
target: test high level api: client.create_collection
method: create collection with invalid primary field
expected: Raise exception
"""
client = self._connect(enable_milvus_client_api=True)
collection_name = cf.gen_unique_str(prefix)
# 1. create collection
client_w.create_collection(client, collection_name, default_dim, consistency_level="Strong")
# 2. insert
rng = np.random.default_rng(seed=19530)
rows = [{default_primary_key_field_name: i,
default_float_field_name: i * 1.0, default_string_field_name: str(i)} for i in range(default_nb)]
error = {ct.err_code: 999, ct.err_msg: f"float vector field 'vector' is illegal, array type mismatch"}
client_w.upsert(client, collection_name, data= rows,
check_task=CheckTasks.err_res, check_items=error)
@pytest.mark.tags(CaseLabel.L1)
def test_milvus_client_upsert_data_id_field_missing(self):
"""
target: test high level api: client.create_collection
method: create collection with invalid primary field
expected: Raise exception
"""
client = self._connect(enable_milvus_client_api=True)
collection_name = cf.gen_unique_str(prefix)
# 1. create collection
client_w.create_collection(client, collection_name, default_dim, consistency_level="Strong")
# 2. insert
rng = np.random.default_rng(seed=19530)
rows = [{default_vector_field_name: list(rng.random((1, default_dim))[0]),
default_float_field_name: i * 1.0, default_string_field_name: str(i)} for i in range(default_nb)]
error = {ct.err_code: 999, ct.err_msg: f"not support vector field as PrimaryField"}
client_w.upsert(client, collection_name, data=rows,
check_task=CheckTasks.err_res, check_items=error)
@pytest.mark.tags(CaseLabel.L1)
def test_milvus_client_upsert_data_extra_field(self):
"""
target: test milvus client: insert extra field than schema
method: insert extra field than schema when enable_dynamic_field is False
expected: Raise exception
"""
client = self._connect(enable_milvus_client_api=True)
collection_name = cf.gen_unique_str(prefix)
# 1. create collection
client_w.create_collection(client, collection_name, default_dim, enable_dynamic_field=False)
# 2. insert
rng = np.random.default_rng(seed=19530)
rows = [{default_primary_key_field_name: i, default_vector_field_name: list(rng.random((1, default_dim))[0]),
default_float_field_name: i * 1.0, default_string_field_name: str(i)} for i in range(default_nb)]
error = {ct.err_code: 999, ct.err_msg: f"Attempt to insert an unexpected field `{default_float_field_name}` "
f"to collection without enabling dynamic field"}
client_w.upsert(client, collection_name, data= rows,
check_task=CheckTasks.err_res, check_items=error)
@pytest.mark.tags(CaseLabel.L1)
def test_milvus_client_upsert_data_dim_not_match(self):
"""
target: test milvus client: insert extra field than schema
method: insert extra field than schema when enable_dynamic_field is False
expected: Raise exception
"""
client = self._connect(enable_milvus_client_api=True)
collection_name = cf.gen_unique_str(prefix)
# 1. create collection
client_w.create_collection(client, collection_name, default_dim)
# 2. insert
rng = np.random.default_rng(seed=19530)
rows = [{default_primary_key_field_name: i, default_vector_field_name: list(rng.random((1, default_dim+1))[0]),
default_float_field_name: i * 1.0, default_string_field_name: str(i)} for i in range(default_nb)]
error = {ct.err_code: 65536, ct.err_msg: f"of float data should divide the dim({default_dim})"}
client_w.upsert(client, collection_name, data= rows,
check_task=CheckTasks.err_res, check_items=error)
@pytest.mark.tags(CaseLabel.L1)
def test_milvus_client_upsert_not_matched_data(self):
"""
target: test milvus client: insert not matched data then defined
method: insert string to int primary field
expected: Raise exception
"""
client = self._connect(enable_milvus_client_api=True)
collection_name = cf.gen_unique_str(prefix)
# 1. create collection
client_w.create_collection(client, collection_name, default_dim)
# 2. insert
rng = np.random.default_rng(seed=19530)
rows = [{default_primary_key_field_name: str(i), default_vector_field_name: list(rng.random((1, default_dim))[0]),
default_float_field_name: i * 1.0, default_string_field_name: str(i)} for i in range(default_nb)]
error = {ct.err_code: 999, ct.err_msg: f"The Input data type is inconsistent with defined schema"}
client_w.upsert(client, collection_name, data=rows,
check_task=CheckTasks.err_res, check_items=error)
@pytest.mark.tags(CaseLabel.L1)
@pytest.mark.parametrize("partition_name", ["12-s", "12 s", "(mn)", "中文", "%$#", " "])
def test_milvus_client_upsert_invalid_partition_name(self, partition_name):
"""
target: test milvus client: insert extra field than schema
method: insert extra field than schema when enable_dynamic_field is False
expected: Raise exception
"""
client = self._connect(enable_milvus_client_api=True)
collection_name = cf.gen_unique_str(prefix)
# 1. create collection
client_w.create_collection(client, collection_name, default_dim)
# 2. insert
rng = np.random.default_rng(seed=19530)
rows = [{default_primary_key_field_name: i, default_vector_field_name: list(rng.random((1, default_dim))[0]),
default_float_field_name: i * 1.0, default_string_field_name: str(i)} for i in range(default_nb)]
error = {ct.err_code: 65535, ct.err_msg: f"Invalid partition name: {partition_name}. The first character of "
f"a partition name must be an underscore or letter."}
client_w.upsert(client, collection_name, data= rows, partition_name=partition_name,
check_task=CheckTasks.err_res, check_items=error)
@pytest.mark.tags(CaseLabel.L1)
def test_milvus_client_upsert_not_exist_partition_name(self):
"""
target: test milvus client: insert extra field than schema
method: insert extra field than schema when enable_dynamic_field is False
expected: Raise exception
"""
client = self._connect(enable_milvus_client_api=True)
collection_name = cf.gen_unique_str(prefix)
# 1. create collection
client_w.create_collection(client, collection_name, default_dim)
# 2. insert
rng = np.random.default_rng(seed=19530)
rows = [{default_primary_key_field_name: i, default_vector_field_name: list(rng.random((1, default_dim))[0]),
default_float_field_name: i * 1.0, default_string_field_name: str(i)} for i in range(default_nb)]
partition_name = cf.gen_unique_str("partition_not_exist")
error = {ct.err_code: 200, ct.err_msg: f"partition not found[partition={partition_name}]"}
client_w.upsert(client, collection_name, data= rows, partition_name=partition_name,
check_task=CheckTasks.err_res, check_items=error)
@pytest.mark.tags(CaseLabel.L2)
def test_milvus_client_upsert_collection_partition_not_match(self):
"""
target: test milvus client: insert extra field than schema
method: insert extra field than schema when enable_dynamic_field is False
expected: Raise exception
"""
client = self._connect(enable_milvus_client_api=True)
collection_name = cf.gen_unique_str(prefix)
another_collection_name = cf.gen_unique_str(prefix + "another")
partition_name = cf.gen_unique_str("partition")
# 1. create collection
client_w.create_collection(client, collection_name, default_dim)
client_w.create_collection(client, another_collection_name, default_dim)
client_w.create_partition(client, another_collection_name, partition_name)
# 2. insert
rng = np.random.default_rng(seed=19530)
rows = [{default_primary_key_field_name: i, default_vector_field_name: list(rng.random((1, default_dim))[0]),
default_float_field_name: i * 1.0, default_string_field_name: str(i)} for i in range(default_nb)]
error = {ct.err_code: 200, ct.err_msg: f"partition not found[partition={partition_name}]"}
client_w.upsert(client, collection_name, data= rows, partition_name=partition_name,
check_task=CheckTasks.err_res, check_items=error)
class TestMilvusClientUpsertValid(TestcaseBase):
""" Test case of search interface """
@pytest.fixture(scope="function", params=[False, True])
def auto_id(self, request):
yield request.param
@pytest.fixture(scope="function", params=["COSINE", "L2"])
def metric_type(self, request):
yield request.param
"""
******************************************************************
# The following are valid base cases
******************************************************************
"""
@pytest.mark.tags(CaseLabel.L0)
def test_milvus_client_upsert_default(self):
"""
target: test search (high level api) normal case
method: create connection, collection, insert and search
expected: search/query successfully
"""
client = self._connect(enable_milvus_client_api=True)
collection_name = cf.gen_unique_str(prefix)
# 1. create collection
client_w.create_collection(client, collection_name, default_dim, consistency_level="Strong")
collections = client_w.list_collections(client)[0]
assert collection_name in collections
client_w.describe_collection(client, collection_name,
check_task=CheckTasks.check_describe_collection_property,
check_items={"collection_name": collection_name,
"dim": default_dim,
"consistency_level": 0})
# 2. insert
rng = np.random.default_rng(seed=19530)
rows = [{default_primary_key_field_name: i, default_vector_field_name: list(rng.random((1, default_dim))[0]),
default_float_field_name: i * 1.0, default_string_field_name: str(i)} for i in range(default_nb)]
results = client_w.upsert(client, collection_name, rows)[0]
assert results['upsert_count'] == default_nb
# 3. search
vectors_to_search = rng.random((1, default_dim))
insert_ids = [i for i in range(default_nb)]
client_w.search(client, collection_name, vectors_to_search,
check_task=CheckTasks.check_search_results,
check_items={"enable_milvus_client_api": True,
"nq": len(vectors_to_search),
"ids": insert_ids,
"limit": default_limit})
# 4. query
client_w.query(client, collection_name, filter=default_search_exp,
check_task=CheckTasks.check_query_results,
check_items={exp_res: rows,
"with_vec": True,
"primary_field": default_primary_key_field_name})
client_w.release_collection(client, collection_name)
client_w.drop_collection(client, collection_name)
@pytest.mark.tags(CaseLabel.L2)
def test_milvus_client_upsert_empty_data(self):
"""
target: test search (high level api) normal case
method: create connection, collection, insert and search
expected: search/query successfully
"""
client = self._connect(enable_milvus_client_api=True)
collection_name = cf.gen_unique_str(prefix)
# 1. create collection
client_w.create_collection(client, collection_name, default_dim, consistency_level="Strong")
# 2. insert
rows = []
results = client_w.upsert(client, collection_name, rows)[0]
assert results['upsert_count'] == 0
# 3. search
rng = np.random.default_rng(seed=19530)
vectors_to_search = rng.random((1, default_dim))
client_w.search(client, collection_name, vectors_to_search,
check_task=CheckTasks.check_search_results,
check_items={"enable_milvus_client_api": True,
"nq": len(vectors_to_search),
"ids": [],
"limit": 0})
client_w.drop_collection(client, collection_name)
@pytest.mark.tags(CaseLabel.L2)
def test_milvus_client_upsert_partition(self):
"""
target: test fast create collection normal case
method: create collection
expected: create collection with default schema, index, and load successfully
"""
client = self._connect(enable_milvus_client_api=True)
collection_name = cf.gen_unique_str(prefix)
partition_name = cf.gen_unique_str(prefix)
# 1. create collection
client_w.create_collection(client, collection_name, default_dim, consistency_level="Strong")
# 2. create partition
client_w.create_partition(client, collection_name, partition_name)
partitions = client_w.list_partitions(client, collection_name)[0]
assert partition_name in partitions
index = client_w.list_indexes(client, collection_name)[0]
assert index == ['vector']
# load_state = client_w.get_load_state(collection_name)[0]
rng = np.random.default_rng(seed=19530)
rows = [{default_primary_key_field_name: i, default_vector_field_name: list(rng.random((1, default_dim))[0]),
default_float_field_name: i * 1.0, default_string_field_name: str(i)} for i in range(default_nb)]
# 3. upsert to default partition
results = client_w.upsert(client, collection_name, rows, partition_name=partitions[0])[0]
assert results['upsert_count'] == default_nb
# 4. upsert to non-default partition
results = client_w.upsert(client, collection_name, rows, partition_name=partition_name)[0]
assert results['upsert_count'] == default_nb
# 5. search
vectors_to_search = rng.random((1, default_dim))
insert_ids = [i for i in range(default_nb)]
client_w.search(client, collection_name, vectors_to_search,
check_task=CheckTasks.check_search_results,
check_items={"enable_milvus_client_api": True,
"nq": len(vectors_to_search),
"ids": insert_ids,
"limit": default_limit})
# partition_number = client_w.get_partition_stats(client, collection_name, "_default")[0]
# assert partition_number == default_nb
# partition_number = client_w.get_partition_stats(client, collection_name, partition_name)[0]
# assert partition_number[0]['value'] == 0
if client_w.has_partition(client, collection_name, partition_name)[0]:
client_w.release_partitions(client, collection_name, partition_name)
client_w.drop_partition(client, collection_name, partition_name)
if client_w.has_collection(client, collection_name)[0]:
client_w.drop_collection(client, collection_name)
@pytest.mark.tags(CaseLabel.L1)
def test_milvus_client_insert_upsert(self):
"""
target: test fast create collection normal case
method: create collection
expected: create collection with default schema, index, and load successfully
"""
client = self._connect(enable_milvus_client_api=True)
collection_name = cf.gen_unique_str(prefix)
partition_name = cf.gen_unique_str(prefix)
# 1. create collection
client_w.create_collection(client, collection_name, default_dim, consistency_level="Strong")
# 2. create partition
client_w.create_partition(client, collection_name, partition_name)
partitions = client_w.list_partitions(client, collection_name)[0]
assert partition_name in partitions
index = client_w.list_indexes(client, collection_name)[0]
assert index == ['vector']
# load_state = client_w.get_load_state(collection_name)[0]
# 3. insert and upsert
rng = np.random.default_rng(seed=19530)
rows = [{default_primary_key_field_name: i, default_vector_field_name: list(rng.random((1, default_dim))[0]),
default_float_field_name: i * 1.0, default_string_field_name: str(i)} for i in range(default_nb)]
results = client_w.insert(client, collection_name, rows, partition_name=partition_name)[0]
assert results['insert_count'] == default_nb
rows = [{default_primary_key_field_name: i, default_vector_field_name: list(rng.random((1, default_dim))[0]),
default_float_field_name: i * 1.0, "new_diff_str_field": str(i)} for i in range(default_nb)]
results = client_w.upsert(client, collection_name, rows, partition_name=partition_name)[0]
assert results['upsert_count'] == default_nb
# 3. search
vectors_to_search = rng.random((1, default_dim))
insert_ids = [i for i in range(default_nb)]
client_w.search(client, collection_name, vectors_to_search,
check_task=CheckTasks.check_search_results,
check_items={"enable_milvus_client_api": True,
"nq": len(vectors_to_search),
"ids": insert_ids,
"limit": default_limit})
if client_w.has_partition(client, collection_name, partition_name)[0]:
client_w.release_partitions(client, collection_name, partition_name)
client_w.drop_partition(client, collection_name, partition_name)
if client_w.has_collection(client, collection_name)[0]:
client_w.drop_collection(client, collection_name)