2022-02-21 09:47:51 +08:00
|
|
|
from ssl import ALERT_DESCRIPTION_UNKNOWN_PSK_IDENTITY
|
2021-06-26 13:32:11 +08:00
|
|
|
import threading
|
|
|
|
|
2021-05-31 19:23:31 +08:00
|
|
|
import numpy as np
|
|
|
|
import pandas as pd
|
2022-02-21 09:47:51 +08:00
|
|
|
import random
|
2021-05-26 16:10:57 +08:00
|
|
|
import pytest
|
2022-02-21 09:47:51 +08:00
|
|
|
from pymilvus import Index, DataType
|
2022-04-19 15:41:39 +08:00
|
|
|
from pymilvus.exceptions import MilvusException
|
2021-05-26 16:10:57 +08:00
|
|
|
|
2021-06-05 10:25:34 +08:00
|
|
|
from base.client_base import TestcaseBase
|
2021-06-26 13:32:11 +08:00
|
|
|
from utils.util_log import test_log as log
|
2021-05-26 16:10:57 +08:00
|
|
|
from common import common_func as cf
|
|
|
|
from common import common_type as ct
|
2021-06-10 09:49:49 +08:00
|
|
|
from common.common_type import CaseLabel, CheckTasks
|
2021-05-26 16:10:57 +08:00
|
|
|
|
2021-05-31 19:23:31 +08:00
|
|
|
prefix = "insert"
|
2023-03-03 15:23:48 +08:00
|
|
|
pre_upsert = "upsert"
|
2021-06-10 09:49:49 +08:00
|
|
|
exp_name = "name"
|
|
|
|
exp_schema = "schema"
|
|
|
|
exp_num = "num_entities"
|
|
|
|
exp_primary = "primary"
|
2023-03-03 15:23:48 +08:00
|
|
|
default_float_name = ct.default_float_field_name
|
2021-05-31 19:23:31 +08:00
|
|
|
default_schema = cf.gen_default_collection_schema()
|
|
|
|
default_binary_schema = cf.gen_default_binary_collection_schema()
|
2023-09-04 09:57:09 +08:00
|
|
|
default_index_params = {"index_type": "IVF_SQ8",
|
|
|
|
"metric_type": "L2", "params": {"nlist": 64}}
|
2024-04-30 17:37:27 +08:00
|
|
|
default_binary_index_params = ct.default_binary_index
|
2022-02-21 09:47:51 +08:00
|
|
|
default_search_exp = "int64 >= 0"
|
2021-05-31 19:23:31 +08:00
|
|
|
|
2021-05-26 16:10:57 +08:00
|
|
|
|
2021-06-05 10:25:34 +08:00
|
|
|
class TestInsertParams(TestcaseBase):
|
2021-05-26 16:10:57 +08:00
|
|
|
""" Test case of Insert interface """
|
|
|
|
|
2021-05-31 19:23:31 +08:00
|
|
|
@pytest.mark.tags(CaseLabel.L0)
|
|
|
|
def test_insert_dataframe_data(self):
|
|
|
|
"""
|
|
|
|
target: test insert DataFrame data
|
2022-01-10 13:11:10 +08:00
|
|
|
method: 1.create collection
|
|
|
|
2.insert dataframe data
|
2021-05-31 19:23:31 +08:00
|
|
|
expected: assert num entities
|
|
|
|
"""
|
2021-06-10 09:49:49 +08:00
|
|
|
c_name = cf.gen_unique_str(prefix)
|
|
|
|
collection_w = self.init_collection_wrap(name=c_name)
|
|
|
|
df = cf.gen_default_dataframe_data(ct.default_nb)
|
2021-06-23 13:52:07 +08:00
|
|
|
mutation_res, _ = collection_w.insert(data=df)
|
2021-06-28 15:10:13 +08:00
|
|
|
assert mutation_res.insert_count == ct.default_nb
|
2023-10-24 09:26:31 +08:00
|
|
|
assert mutation_res.primary_keys == df[ct.default_int64_field_name].values.tolist()
|
2021-06-10 09:49:49 +08:00
|
|
|
assert collection_w.num_entities == ct.default_nb
|
2021-05-31 19:23:31 +08:00
|
|
|
|
|
|
|
@pytest.mark.tags(CaseLabel.L0)
|
|
|
|
def test_insert_list_data(self):
|
|
|
|
"""
|
|
|
|
target: test insert list-like data
|
|
|
|
method: 1.create 2.insert list data
|
|
|
|
expected: assert num entities
|
|
|
|
"""
|
2021-06-10 09:49:49 +08:00
|
|
|
c_name = cf.gen_unique_str(prefix)
|
|
|
|
collection_w = self.init_collection_wrap(name=c_name)
|
|
|
|
data = cf.gen_default_list_data(ct.default_nb)
|
2021-06-23 13:52:07 +08:00
|
|
|
mutation_res, _ = collection_w.insert(data=data)
|
2021-06-28 15:10:13 +08:00
|
|
|
assert mutation_res.insert_count == ct.default_nb
|
2021-06-23 13:52:07 +08:00
|
|
|
assert mutation_res.primary_keys == data[0]
|
2021-06-10 09:49:49 +08:00
|
|
|
assert collection_w.num_entities == ct.default_nb
|
2021-05-31 19:23:31 +08:00
|
|
|
|
2021-12-02 18:09:55 +08:00
|
|
|
@pytest.mark.tags(CaseLabel.L2)
|
2024-05-10 14:57:32 +08:00
|
|
|
def test_insert_non_data_type(self):
|
2021-05-31 19:23:31 +08:00
|
|
|
"""
|
|
|
|
target: test insert with non-dataframe, non-list data
|
|
|
|
method: insert with data (non-dataframe and non-list type)
|
|
|
|
expected: raise exception
|
|
|
|
"""
|
2021-06-10 09:49:49 +08:00
|
|
|
c_name = cf.gen_unique_str(prefix)
|
|
|
|
collection_w = self.init_collection_wrap(name=c_name)
|
2024-05-10 14:57:32 +08:00
|
|
|
error = {ct.err_code: 999,
|
|
|
|
ct.err_msg: "The type of data should be List, pd.DataFrame or Dict"}
|
|
|
|
collection_w.insert(data=None,
|
2023-09-04 09:57:09 +08:00
|
|
|
check_task=CheckTasks.err_res, check_items=error)
|
2021-05-31 19:23:31 +08:00
|
|
|
|
2021-12-02 18:09:55 +08:00
|
|
|
@pytest.mark.tags(CaseLabel.L2)
|
2024-05-10 14:57:32 +08:00
|
|
|
@pytest.mark.parametrize("data", [pd.DataFrame()])
|
2021-05-31 19:23:31 +08:00
|
|
|
def test_insert_empty_data(self, data):
|
|
|
|
"""
|
2024-05-10 14:57:32 +08:00
|
|
|
target: test insert empty dataFrame()
|
2021-05-31 19:23:31 +08:00
|
|
|
method: insert empty
|
|
|
|
expected: raise exception
|
|
|
|
"""
|
2021-06-10 09:49:49 +08:00
|
|
|
c_name = cf.gen_unique_str(prefix)
|
|
|
|
collection_w = self.init_collection_wrap(name=c_name)
|
2024-05-10 14:57:32 +08:00
|
|
|
error = {ct.err_code: 999, ct.err_msg: "The fields don't match with schema fields"}
|
|
|
|
collection_w.insert(
|
|
|
|
data=data, check_task=CheckTasks.err_res, check_items=error)
|
|
|
|
|
|
|
|
@pytest.mark.tags(CaseLabel.L2)
|
|
|
|
@pytest.mark.parametrize("data", [[[]]])
|
|
|
|
def test_insert_empty_data(self, data):
|
|
|
|
"""
|
|
|
|
target: test insert empty array
|
|
|
|
method: insert empty
|
|
|
|
expected: raise exception
|
|
|
|
"""
|
|
|
|
c_name = cf.gen_unique_str(prefix)
|
|
|
|
collection_w = self.init_collection_wrap(name=c_name)
|
|
|
|
error = {ct.err_code: 999, ct.err_msg: "The data don't match with schema fields"}
|
2023-09-04 09:57:09 +08:00
|
|
|
collection_w.insert(
|
|
|
|
data=data, check_task=CheckTasks.err_res, check_items=error)
|
2021-05-31 19:23:31 +08:00
|
|
|
|
2021-12-02 18:09:55 +08:00
|
|
|
@pytest.mark.tags(CaseLabel.L2)
|
2021-05-31 19:23:31 +08:00
|
|
|
def test_insert_dataframe_only_columns(self):
|
|
|
|
"""
|
|
|
|
target: test insert with dataframe just columns
|
|
|
|
method: dataframe just have columns
|
|
|
|
expected: num entities is zero
|
|
|
|
"""
|
2021-06-10 09:49:49 +08:00
|
|
|
c_name = cf.gen_unique_str(prefix)
|
|
|
|
collection_w = self.init_collection_wrap(name=c_name)
|
2023-09-04 09:57:09 +08:00
|
|
|
columns = [ct.default_int64_field_name,
|
|
|
|
ct.default_float_vec_field_name]
|
2021-05-31 19:23:31 +08:00
|
|
|
df = pd.DataFrame(columns=columns)
|
2024-05-10 14:57:32 +08:00
|
|
|
error = {ct.err_code: 999,
|
|
|
|
ct.err_msg: "The fields don't match with schema fields"}
|
2023-09-04 09:57:09 +08:00
|
|
|
collection_w.insert(
|
|
|
|
data=df, check_task=CheckTasks.err_res, check_items=error)
|
2021-05-31 19:23:31 +08:00
|
|
|
|
2021-12-02 18:09:55 +08:00
|
|
|
@pytest.mark.tags(CaseLabel.L2)
|
2021-05-31 19:23:31 +08:00
|
|
|
def test_insert_empty_field_name_dataframe(self):
|
|
|
|
"""
|
|
|
|
target: test insert empty field name df
|
|
|
|
method: dataframe with empty column
|
|
|
|
expected: raise exception
|
|
|
|
"""
|
2021-06-10 09:49:49 +08:00
|
|
|
c_name = cf.gen_unique_str(prefix)
|
2024-05-10 14:57:32 +08:00
|
|
|
collection_w = self.init_collection_wrap(name=c_name, dim=32)
|
2021-05-31 19:23:31 +08:00
|
|
|
df = cf.gen_default_dataframe_data(10)
|
|
|
|
df.rename(columns={ct.default_int64_field_name: ' '}, inplace=True)
|
2024-05-10 14:57:32 +08:00
|
|
|
error = {ct.err_code: 999,
|
|
|
|
ct.err_msg: "The name of field don't match, expected: int64"}
|
2023-09-04 09:57:09 +08:00
|
|
|
collection_w.insert(
|
|
|
|
data=df, check_task=CheckTasks.err_res, check_items=error)
|
2021-05-31 19:23:31 +08:00
|
|
|
|
2021-12-02 18:09:55 +08:00
|
|
|
@pytest.mark.tags(CaseLabel.L2)
|
2024-05-14 15:03:33 +08:00
|
|
|
def test_insert_invalid_field_name_dataframe(self):
|
2021-05-31 19:23:31 +08:00
|
|
|
"""
|
|
|
|
target: test insert with invalid dataframe data
|
|
|
|
method: insert with invalid field name dataframe
|
|
|
|
expected: raise exception
|
|
|
|
"""
|
2024-05-14 15:03:33 +08:00
|
|
|
invalid_field_name = "non_existing"
|
2021-06-10 09:49:49 +08:00
|
|
|
c_name = cf.gen_unique_str(prefix)
|
|
|
|
collection_w = self.init_collection_wrap(name=c_name)
|
2021-05-31 19:23:31 +08:00
|
|
|
df = cf.gen_default_dataframe_data(10)
|
2023-09-04 09:57:09 +08:00
|
|
|
df.rename(
|
2024-05-14 15:03:33 +08:00
|
|
|
columns={ct.default_int64_field_name: invalid_field_name}, inplace=True)
|
2024-05-10 14:57:32 +08:00
|
|
|
error = {ct.err_code: 999,
|
2024-05-14 15:03:33 +08:00
|
|
|
ct.err_msg: f"The name of field don't match, expected: int64, got {invalid_field_name}"}
|
2023-09-04 09:57:09 +08:00
|
|
|
collection_w.insert(
|
|
|
|
data=df, check_task=CheckTasks.err_res, check_items=error)
|
2021-05-31 19:23:31 +08:00
|
|
|
|
2022-03-05 14:23:57 +08:00
|
|
|
@pytest.mark.tags(CaseLabel.L1)
|
2021-05-31 19:23:31 +08:00
|
|
|
def test_insert_numpy_data(self):
|
2021-05-26 16:10:57 +08:00
|
|
|
"""
|
2021-05-31 19:23:31 +08:00
|
|
|
target: test insert numpy.ndarray data
|
|
|
|
method: 1.create by schema 2.insert data
|
2021-05-26 16:10:57 +08:00
|
|
|
expected: assert num_entities
|
|
|
|
"""
|
2021-06-10 09:49:49 +08:00
|
|
|
c_name = cf.gen_unique_str(prefix)
|
|
|
|
collection_w = self.init_collection_wrap(name=c_name)
|
2024-05-10 14:57:32 +08:00
|
|
|
nb = 10
|
|
|
|
data = cf.gen_numpy_data(nb=nb)
|
2022-03-05 14:23:57 +08:00
|
|
|
collection_w.insert(data=data)
|
2024-05-10 14:57:32 +08:00
|
|
|
assert collection_w.num_entities == nb
|
2021-05-31 19:23:31 +08:00
|
|
|
|
|
|
|
@pytest.mark.tags(CaseLabel.L1)
|
|
|
|
def test_insert_binary_dataframe(self):
|
|
|
|
"""
|
|
|
|
target: test insert binary dataframe
|
|
|
|
method: 1. create by schema 2. insert dataframe
|
|
|
|
expected: assert num_entities
|
|
|
|
"""
|
2021-06-10 09:49:49 +08:00
|
|
|
c_name = cf.gen_unique_str(prefix)
|
2023-09-04 09:57:09 +08:00
|
|
|
collection_w = self.init_collection_wrap(
|
|
|
|
name=c_name, schema=default_binary_schema)
|
2021-06-10 09:49:49 +08:00
|
|
|
df, _ = cf.gen_default_binary_dataframe_data(ct.default_nb)
|
2021-06-23 13:52:07 +08:00
|
|
|
mutation_res, _ = collection_w.insert(data=df)
|
2021-06-28 15:10:13 +08:00
|
|
|
assert mutation_res.insert_count == ct.default_nb
|
2023-10-24 09:26:31 +08:00
|
|
|
assert mutation_res.primary_keys == df[ct.default_int64_field_name].values.tolist()
|
2021-06-10 09:49:49 +08:00
|
|
|
assert collection_w.num_entities == ct.default_nb
|
2021-05-31 19:23:31 +08:00
|
|
|
|
|
|
|
@pytest.mark.tags(CaseLabel.L0)
|
|
|
|
def test_insert_binary_data(self):
|
|
|
|
"""
|
|
|
|
target: test insert list-like binary data
|
|
|
|
method: 1. create by schema 2. insert data
|
|
|
|
expected: assert num_entities
|
|
|
|
"""
|
2021-06-10 09:49:49 +08:00
|
|
|
c_name = cf.gen_unique_str(prefix)
|
2023-09-04 09:57:09 +08:00
|
|
|
collection_w = self.init_collection_wrap(
|
|
|
|
name=c_name, schema=default_binary_schema)
|
2021-06-10 09:49:49 +08:00
|
|
|
data, _ = cf.gen_default_binary_list_data(ct.default_nb)
|
2021-06-23 13:52:07 +08:00
|
|
|
mutation_res, _ = collection_w.insert(data=data)
|
2021-06-28 15:10:13 +08:00
|
|
|
assert mutation_res.insert_count == ct.default_nb
|
2021-06-23 13:52:07 +08:00
|
|
|
assert mutation_res.primary_keys == data[0]
|
2021-06-10 09:49:49 +08:00
|
|
|
assert collection_w.num_entities == ct.default_nb
|
2021-05-31 19:23:31 +08:00
|
|
|
|
|
|
|
@pytest.mark.tags(CaseLabel.L0)
|
|
|
|
def test_insert_single(self):
|
|
|
|
"""
|
|
|
|
target: test insert single
|
|
|
|
method: insert one entity
|
|
|
|
expected: verify num
|
|
|
|
"""
|
2021-06-10 09:49:49 +08:00
|
|
|
c_name = cf.gen_unique_str(prefix)
|
|
|
|
collection_w = self.init_collection_wrap(name=c_name)
|
2021-05-31 19:23:31 +08:00
|
|
|
data = cf.gen_default_list_data(nb=1)
|
2021-06-23 13:52:07 +08:00
|
|
|
mutation_res, _ = collection_w.insert(data=data)
|
2021-06-28 15:10:13 +08:00
|
|
|
assert mutation_res.insert_count == 1
|
2021-06-23 13:52:07 +08:00
|
|
|
assert mutation_res.primary_keys == data[0]
|
2021-06-10 09:49:49 +08:00
|
|
|
assert collection_w.num_entities == 1
|
2021-05-31 19:23:31 +08:00
|
|
|
|
2021-12-02 18:09:55 +08:00
|
|
|
@pytest.mark.tags(CaseLabel.L2)
|
2021-05-31 19:23:31 +08:00
|
|
|
def test_insert_dim_not_match(self):
|
|
|
|
"""
|
|
|
|
target: test insert with not match dim
|
|
|
|
method: insert data dim not equal to schema dim
|
|
|
|
expected: raise exception
|
|
|
|
"""
|
2021-06-10 09:49:49 +08:00
|
|
|
c_name = cf.gen_unique_str(prefix)
|
|
|
|
collection_w = self.init_collection_wrap(name=c_name)
|
|
|
|
dim = 129
|
|
|
|
df = cf.gen_default_dataframe_data(ct.default_nb, dim=dim)
|
2024-04-16 20:27:32 +08:00
|
|
|
error = {ct.err_code: 65535,
|
2021-06-23 13:52:07 +08:00
|
|
|
ct.err_msg: f'Collection field dim is {ct.default_dim}, but entities field dim is {dim}'}
|
2024-04-16 20:27:32 +08:00
|
|
|
collection_w.insert(data=df, check_task=CheckTasks.err_res, check_items=error)
|
2021-05-31 19:23:31 +08:00
|
|
|
|
2021-12-02 18:09:55 +08:00
|
|
|
@pytest.mark.tags(CaseLabel.L2)
|
2021-06-03 10:52:32 +08:00
|
|
|
def test_insert_binary_dim_not_match(self):
|
|
|
|
"""
|
|
|
|
target: test insert binary with dim not match
|
|
|
|
method: insert binary data dim not equal to schema
|
|
|
|
expected: raise exception
|
|
|
|
"""
|
2021-06-10 09:49:49 +08:00
|
|
|
c_name = cf.gen_unique_str(prefix)
|
2023-09-04 09:57:09 +08:00
|
|
|
collection_w = self.init_collection_wrap(
|
|
|
|
name=c_name, schema=default_binary_schema)
|
2021-06-10 09:49:49 +08:00
|
|
|
dim = 120
|
|
|
|
df, _ = cf.gen_default_binary_dataframe_data(ct.default_nb, dim=dim)
|
2024-04-16 20:27:32 +08:00
|
|
|
error = {ct.err_code: 1100,
|
|
|
|
ct.err_msg: f'the dim ({dim}) of field data(binary_vector) is not equal to schema dim '
|
|
|
|
f'({ct.default_dim}): invalid parameter[expected={dim}][actual={ct.default_dim}]'}
|
|
|
|
collection_w.insert(data=df, check_task=CheckTasks.err_res, check_items=error)
|
2021-06-03 10:52:32 +08:00
|
|
|
|
2021-12-02 18:09:55 +08:00
|
|
|
@pytest.mark.tags(CaseLabel.L2)
|
2021-05-31 19:23:31 +08:00
|
|
|
def test_insert_field_name_not_match(self):
|
|
|
|
"""
|
|
|
|
target: test insert field name not match
|
|
|
|
method: data field name not match schema
|
|
|
|
expected: raise exception
|
|
|
|
"""
|
2021-06-10 09:49:49 +08:00
|
|
|
c_name = cf.gen_unique_str(prefix)
|
|
|
|
collection_w = self.init_collection_wrap(name=c_name)
|
2021-05-31 19:23:31 +08:00
|
|
|
df = cf.gen_default_dataframe_data(10)
|
|
|
|
df.rename(columns={ct.default_float_field_name: "int"}, inplace=True)
|
2024-05-10 14:57:32 +08:00
|
|
|
error = {ct.err_code: 999, ct.err_msg: "The name of field don't match, expected: float, got int"}
|
2023-09-04 09:57:09 +08:00
|
|
|
collection_w.insert(
|
|
|
|
data=df, check_task=CheckTasks.err_res, check_items=error)
|
2021-05-31 19:23:31 +08:00
|
|
|
|
2021-12-02 18:09:55 +08:00
|
|
|
@pytest.mark.tags(CaseLabel.L2)
|
2023-10-19 17:24:07 +08:00
|
|
|
@pytest.mark.skip(reason="Currently not check in pymilvus")
|
2021-05-31 19:23:31 +08:00
|
|
|
def test_insert_field_value_not_match(self):
|
|
|
|
"""
|
|
|
|
target: test insert data value not match
|
|
|
|
method: insert data value type not match schema
|
|
|
|
expected: raise exception
|
|
|
|
"""
|
2021-06-10 09:49:49 +08:00
|
|
|
c_name = cf.gen_unique_str(prefix)
|
|
|
|
collection_w = self.init_collection_wrap(name=c_name)
|
2021-05-31 19:23:31 +08:00
|
|
|
nb = 10
|
|
|
|
df = cf.gen_default_dataframe_data(nb)
|
2023-09-19 21:51:25 +08:00
|
|
|
new_float_value = pd.Series(data=[float(i) for i in range(nb)], dtype="float64")
|
2023-01-14 14:47:41 +08:00
|
|
|
df[df.columns[1]] = new_float_value
|
2024-05-10 14:57:32 +08:00
|
|
|
error = {ct.err_code: 999,
|
2023-09-19 21:51:25 +08:00
|
|
|
ct.err_msg: "The data type of field float doesn't match, expected: FLOAT, got DOUBLE"}
|
|
|
|
collection_w.insert(data=df, check_task=CheckTasks.err_res, check_items=error)
|
2021-05-31 19:23:31 +08:00
|
|
|
|
2021-12-02 18:09:55 +08:00
|
|
|
@pytest.mark.tags(CaseLabel.L2)
|
2021-05-31 19:23:31 +08:00
|
|
|
def test_insert_value_less(self):
|
|
|
|
"""
|
|
|
|
target: test insert value less than other
|
2024-05-10 14:57:32 +08:00
|
|
|
method: string field value less than vec-field value
|
2021-05-31 19:23:31 +08:00
|
|
|
expected: raise exception
|
|
|
|
"""
|
2021-06-10 09:49:49 +08:00
|
|
|
c_name = cf.gen_unique_str(prefix)
|
|
|
|
collection_w = self.init_collection_wrap(name=c_name)
|
2021-05-31 19:23:31 +08:00
|
|
|
nb = 10
|
2024-05-10 14:57:32 +08:00
|
|
|
data = []
|
|
|
|
for fields in collection_w.schema.fields:
|
|
|
|
field_data = cf.gen_data_by_collection_field(fields, nb=nb)
|
|
|
|
if fields.dtype == DataType.VARCHAR:
|
|
|
|
field_data = field_data[:-1]
|
|
|
|
data.append(field_data)
|
|
|
|
error = {ct.err_code: 999, ct.err_msg: "Field data size misaligned for field [varchar] "}
|
2023-09-04 09:57:09 +08:00
|
|
|
collection_w.insert(
|
|
|
|
data=data, check_task=CheckTasks.err_res, check_items=error)
|
2021-05-31 19:23:31 +08:00
|
|
|
|
2021-12-02 18:09:55 +08:00
|
|
|
@pytest.mark.tags(CaseLabel.L2)
|
2021-05-31 19:23:31 +08:00
|
|
|
def test_insert_vector_value_less(self):
|
|
|
|
"""
|
|
|
|
target: test insert vector value less than other
|
|
|
|
method: vec field value less than int field
|
2021-09-28 19:18:03 +08:00
|
|
|
expected: raise exception
|
2021-05-31 19:23:31 +08:00
|
|
|
"""
|
2021-06-10 09:49:49 +08:00
|
|
|
c_name = cf.gen_unique_str(prefix)
|
|
|
|
collection_w = self.init_collection_wrap(name=c_name)
|
2021-05-31 19:23:31 +08:00
|
|
|
nb = 10
|
2024-05-10 14:57:32 +08:00
|
|
|
data = []
|
|
|
|
for fields in collection_w.schema.fields:
|
|
|
|
field_data = cf.gen_data_by_collection_field(fields, nb=nb)
|
|
|
|
if fields.dtype == DataType.FLOAT_VECTOR:
|
|
|
|
field_data = field_data[:-1]
|
|
|
|
data.append(field_data)
|
|
|
|
error = {ct.err_code: 999, ct.err_msg: 'Field data size misaligned for field [float_vector] '}
|
2023-09-04 09:57:09 +08:00
|
|
|
collection_w.insert(
|
|
|
|
data=data, check_task=CheckTasks.err_res, check_items=error)
|
2021-05-31 19:23:31 +08:00
|
|
|
|
2021-12-02 18:09:55 +08:00
|
|
|
@pytest.mark.tags(CaseLabel.L2)
|
2021-05-31 19:23:31 +08:00
|
|
|
def test_insert_fields_more(self):
|
|
|
|
"""
|
|
|
|
target: test insert with fields more
|
|
|
|
method: field more than schema fields
|
2021-09-28 19:18:03 +08:00
|
|
|
expected: raise exception
|
2021-05-31 19:23:31 +08:00
|
|
|
"""
|
2021-06-10 09:49:49 +08:00
|
|
|
c_name = cf.gen_unique_str(prefix)
|
|
|
|
collection_w = self.init_collection_wrap(name=c_name)
|
2024-05-10 14:57:32 +08:00
|
|
|
nb = 10
|
|
|
|
data = []
|
|
|
|
for fields in collection_w.schema.fields:
|
|
|
|
field_data = cf.gen_data_by_collection_field(fields, nb=nb)
|
|
|
|
data.append(field_data)
|
|
|
|
data.append([1 for _ in range(nb)])
|
|
|
|
error = {ct.err_code: 999, ct.err_msg: "The data don't match with schema fields"}
|
2023-09-04 09:57:09 +08:00
|
|
|
collection_w.insert(
|
2024-05-10 14:57:32 +08:00
|
|
|
data=data, check_task=CheckTasks.err_res, check_items=error)
|
2021-05-31 19:23:31 +08:00
|
|
|
|
2021-12-02 18:09:55 +08:00
|
|
|
@pytest.mark.tags(CaseLabel.L2)
|
2021-05-31 19:23:31 +08:00
|
|
|
def test_insert_fields_less(self):
|
|
|
|
"""
|
|
|
|
target: test insert with fields less
|
|
|
|
method: fields less than schema fields
|
|
|
|
expected: raise exception
|
|
|
|
"""
|
2021-06-10 09:49:49 +08:00
|
|
|
c_name = cf.gen_unique_str(prefix)
|
|
|
|
collection_w = self.init_collection_wrap(name=c_name)
|
|
|
|
df = cf.gen_default_dataframe_data(ct.default_nb)
|
2021-05-31 19:23:31 +08:00
|
|
|
df.drop(ct.default_float_vec_field_name, axis=1, inplace=True)
|
2024-05-10 14:57:32 +08:00
|
|
|
error = {ct.err_code: 999, ct.err_msg: "The fields don't match with schema fields"}
|
2023-09-04 09:57:09 +08:00
|
|
|
collection_w.insert(
|
|
|
|
data=df, check_task=CheckTasks.err_res, check_items=error)
|
2021-05-31 19:23:31 +08:00
|
|
|
|
2021-12-02 18:09:55 +08:00
|
|
|
@pytest.mark.tags(CaseLabel.L2)
|
2021-05-31 19:23:31 +08:00
|
|
|
def test_insert_list_order_inconsistent_schema(self):
|
|
|
|
"""
|
|
|
|
target: test insert data fields order inconsistent with schema
|
|
|
|
method: insert list data, data fields order inconsistent with schema
|
|
|
|
expected: raise exception
|
|
|
|
"""
|
2021-06-10 09:49:49 +08:00
|
|
|
c_name = cf.gen_unique_str(prefix)
|
|
|
|
collection_w = self.init_collection_wrap(name=c_name)
|
2021-05-31 19:23:31 +08:00
|
|
|
nb = 10
|
2024-05-10 14:57:32 +08:00
|
|
|
data = []
|
|
|
|
for field in collection_w.schema.fields:
|
|
|
|
field_data = cf.gen_data_by_collection_field(field, nb=nb)
|
|
|
|
data.append(field_data)
|
|
|
|
tmp = data[0]
|
|
|
|
data[0] = data[1]
|
|
|
|
data[1] = tmp
|
|
|
|
error = {ct.err_code: 999,
|
|
|
|
ct.err_msg: "The Input data type is inconsistent with defined schema"}
|
2023-09-04 09:57:09 +08:00
|
|
|
collection_w.insert(
|
|
|
|
data=data, check_task=CheckTasks.err_res, check_items=error)
|
2021-05-31 19:23:31 +08:00
|
|
|
|
2021-12-02 18:09:55 +08:00
|
|
|
@pytest.mark.tags(CaseLabel.L2)
|
2021-06-23 13:52:07 +08:00
|
|
|
def test_insert_inconsistent_data(self):
|
|
|
|
"""
|
|
|
|
target: test insert with inconsistent data
|
|
|
|
method: insert with data that same field has different type data
|
|
|
|
expected: raise exception
|
|
|
|
"""
|
|
|
|
c_name = cf.gen_unique_str(prefix)
|
|
|
|
collection_w = self.init_collection_wrap(name=c_name)
|
|
|
|
data = cf.gen_default_list_data(nb=100)
|
|
|
|
data[0][1] = 1.0
|
2024-05-10 14:57:32 +08:00
|
|
|
error = {ct.err_code: 999,
|
2024-08-26 12:00:58 +08:00
|
|
|
ct.err_msg: "The Input data type is inconsistent with defined schema, {%s} field should be a int64, "
|
|
|
|
"but got a {<class 'int'>} instead." % ct.default_int64_field_name}
|
2024-03-02 11:01:13 +08:00
|
|
|
collection_w.insert(data, check_task=CheckTasks.err_res, check_items=error)
|
2021-06-23 13:52:07 +08:00
|
|
|
|
2021-05-31 19:23:31 +08:00
|
|
|
|
2021-06-05 10:25:34 +08:00
|
|
|
class TestInsertOperation(TestcaseBase):
|
2021-05-31 19:23:31 +08:00
|
|
|
"""
|
|
|
|
******************************************************************
|
|
|
|
The following cases are used to test insert interface operations
|
|
|
|
******************************************************************
|
|
|
|
"""
|
|
|
|
|
2021-10-13 19:01:23 +08:00
|
|
|
@pytest.fixture(scope="function", params=[8, 4096])
|
|
|
|
def dim(self, request):
|
|
|
|
yield request.param
|
|
|
|
|
2023-06-06 12:06:41 +08:00
|
|
|
@pytest.fixture(scope="function", params=[False, True])
|
|
|
|
def auto_id(self, request):
|
|
|
|
yield request.param
|
|
|
|
|
2023-07-17 21:49:19 +08:00
|
|
|
@pytest.fixture(scope="function", params=[ct.default_int64_field_name, ct.default_string_field_name])
|
|
|
|
def pk_field(self, request):
|
|
|
|
yield request.param
|
|
|
|
|
2021-12-02 18:09:55 +08:00
|
|
|
@pytest.mark.tags(CaseLabel.L2)
|
2021-05-31 19:23:31 +08:00
|
|
|
def test_insert_without_connection(self):
|
|
|
|
"""
|
|
|
|
target: test insert without connection
|
|
|
|
method: insert after remove connection
|
|
|
|
expected: raise exception
|
|
|
|
"""
|
2021-06-10 09:49:49 +08:00
|
|
|
c_name = cf.gen_unique_str(prefix)
|
|
|
|
collection_w = self.init_collection_wrap(name=c_name)
|
2021-06-04 09:35:34 +08:00
|
|
|
self.connection_wrap.remove_connection(ct.default_alias)
|
|
|
|
res_list, _ = self.connection_wrap.list_connections()
|
2021-05-31 19:23:31 +08:00
|
|
|
assert ct.default_alias not in res_list
|
|
|
|
data = cf.gen_default_list_data(10)
|
2024-05-10 14:57:32 +08:00
|
|
|
error = {ct.err_code: 999, ct.err_msg: 'should create connection first'}
|
2024-04-07 18:49:16 +08:00
|
|
|
collection_w.insert(data=data, check_task=CheckTasks.err_res, check_items=error)
|
2021-05-31 19:23:31 +08:00
|
|
|
|
2021-12-02 18:09:55 +08:00
|
|
|
@pytest.mark.tags(CaseLabel.L1)
|
2022-06-30 15:54:18 +08:00
|
|
|
def test_insert_default_partition(self):
|
2021-07-22 14:42:15 +08:00
|
|
|
"""
|
2022-06-30 15:54:18 +08:00
|
|
|
target: test insert entities into default partition
|
|
|
|
method: create partition and insert info collection
|
|
|
|
expected: the collection insert count equals to nb
|
|
|
|
"""
|
2023-09-04 09:57:09 +08:00
|
|
|
collection_w = self.init_collection_wrap(
|
|
|
|
name=cf.gen_unique_str(prefix))
|
2022-06-30 15:54:18 +08:00
|
|
|
partition_w1 = self.init_partition_wrap(collection_w)
|
|
|
|
data = cf.gen_default_list_data(nb=ct.default_nb)
|
2023-09-04 09:57:09 +08:00
|
|
|
mutation_res, _ = collection_w.insert(
|
|
|
|
data=data, partition_name=partition_w1.name)
|
2022-06-30 15:54:18 +08:00
|
|
|
assert mutation_res.insert_count == ct.default_nb
|
|
|
|
|
|
|
|
def test_insert_partition_not_existed(self):
|
|
|
|
"""
|
|
|
|
target: test insert entities in collection created before
|
|
|
|
method: create collection and insert entities in it, with the not existed partition_name param
|
|
|
|
expected: error raised
|
|
|
|
"""
|
2023-09-04 09:57:09 +08:00
|
|
|
collection_w = self.init_collection_wrap(
|
|
|
|
name=cf.gen_unique_str(prefix))
|
2024-05-10 14:57:32 +08:00
|
|
|
df = cf.gen_default_dataframe_data(nb=10)
|
|
|
|
error = {ct.err_code: 999,
|
|
|
|
ct.err_msg: "partition not found[partition=p]"}
|
2022-06-30 15:54:18 +08:00
|
|
|
mutation_res, _ = collection_w.insert(data=df, partition_name="p", check_task=CheckTasks.err_res,
|
|
|
|
check_items=error)
|
|
|
|
|
|
|
|
@pytest.mark.tags(CaseLabel.L1)
|
|
|
|
def test_insert_partition_repeatedly(self):
|
|
|
|
"""
|
|
|
|
target: test insert entities in collection created before
|
|
|
|
method: create collection and insert entities in it repeatedly, with the partition_name param
|
|
|
|
expected: the collection row count equals to nq
|
|
|
|
"""
|
2023-09-04 09:57:09 +08:00
|
|
|
collection_w = self.init_collection_wrap(
|
|
|
|
name=cf.gen_unique_str(prefix))
|
2022-06-30 15:54:18 +08:00
|
|
|
partition_w1 = self.init_partition_wrap(collection_w)
|
|
|
|
partition_w2 = self.init_partition_wrap(collection_w)
|
|
|
|
df = cf.gen_default_dataframe_data(nb=ct.default_nb)
|
2023-09-04 09:57:09 +08:00
|
|
|
mutation_res, _ = collection_w.insert(
|
|
|
|
data=df, partition_name=partition_w1.name)
|
|
|
|
new_res, _ = collection_w.insert(
|
|
|
|
data=df, partition_name=partition_w2.name)
|
2022-06-30 15:54:18 +08:00
|
|
|
assert mutation_res.insert_count == ct.default_nb
|
|
|
|
assert new_res.insert_count == ct.default_nb
|
|
|
|
|
|
|
|
@pytest.mark.tags(CaseLabel.L0)
|
|
|
|
def test_insert_partition_with_ids(self):
|
|
|
|
"""
|
|
|
|
target: test insert entities in collection created before, insert with ids
|
|
|
|
method: create collection and insert entities in it, with the partition_name param
|
|
|
|
expected: the collection insert count equals to nq
|
|
|
|
"""
|
2023-09-04 09:57:09 +08:00
|
|
|
collection_w = self.init_collection_wrap(
|
|
|
|
name=cf.gen_unique_str(prefix))
|
2022-06-30 15:54:18 +08:00
|
|
|
partition_name = cf.gen_unique_str(prefix)
|
2023-05-06 16:54:39 +08:00
|
|
|
partition_w1 = self.init_partition_wrap(collection_w, partition_name)
|
2022-06-30 15:54:18 +08:00
|
|
|
df = cf.gen_default_dataframe_data(ct.default_nb)
|
2023-09-04 09:57:09 +08:00
|
|
|
mutation_res, _ = collection_w.insert(
|
|
|
|
data=df, partition_name=partition_w1.name)
|
2022-06-30 15:54:18 +08:00
|
|
|
assert mutation_res.insert_count == ct.default_nb
|
|
|
|
|
2023-03-09 19:13:52 +08:00
|
|
|
@pytest.mark.tags(CaseLabel.L1)
|
|
|
|
def test_insert_exceed_varchar_limit(self):
|
|
|
|
"""
|
|
|
|
target: test insert exceed varchar limit
|
|
|
|
method: create a collection with varchar limit=2 and insert invalid data
|
|
|
|
expected: error raised
|
|
|
|
"""
|
|
|
|
fields = [
|
|
|
|
cf.gen_int64_field(is_primary=True),
|
|
|
|
cf.gen_float_vec_field(),
|
|
|
|
cf.gen_string_field(name='small_limit', max_length=2),
|
|
|
|
cf.gen_string_field(name='big_limit', max_length=65530)
|
|
|
|
]
|
|
|
|
schema = cf.gen_collection_schema(fields, auto_id=True)
|
|
|
|
name = cf.gen_unique_str(prefix)
|
|
|
|
collection_w = self.init_collection_wrap(name, schema)
|
|
|
|
vectors = cf.gen_vectors(2, ct.default_dim)
|
2023-09-04 09:57:09 +08:00
|
|
|
data = [vectors, ["limit_1___________",
|
|
|
|
"limit_2___________"], ['1', '2']]
|
2024-05-10 14:57:32 +08:00
|
|
|
error = {ct.err_code: 999,
|
2024-08-20 14:20:56 +08:00
|
|
|
ct.err_msg: "length of string exceeds max length"}
|
2023-09-04 09:57:09 +08:00
|
|
|
collection_w.insert(
|
|
|
|
data, check_task=CheckTasks.err_res, check_items=error)
|
2023-03-09 19:13:52 +08:00
|
|
|
|
2022-06-30 15:54:18 +08:00
|
|
|
@pytest.mark.tags(CaseLabel.L2)
|
|
|
|
def test_insert_with_no_vector_field_dtype(self):
|
|
|
|
"""
|
2024-05-10 14:57:32 +08:00
|
|
|
target: test insert entities, with no vector field
|
|
|
|
method: vector field is missing in data
|
2022-06-30 15:54:18 +08:00
|
|
|
expected: error raised
|
|
|
|
"""
|
2024-05-10 14:57:32 +08:00
|
|
|
collection_w = self.init_collection_wrap(name=cf.gen_unique_str(prefix))
|
|
|
|
nb = 1
|
|
|
|
data = []
|
|
|
|
fields = collection_w.schema.fields
|
|
|
|
for field in fields:
|
|
|
|
field_data = cf.gen_data_by_collection_field(field, nb=nb)
|
|
|
|
if field.dtype != DataType.FLOAT_VECTOR:
|
|
|
|
data.append(field_data)
|
|
|
|
error = {ct.err_code: 999, ct.err_msg: f"The data don't match with schema fields, "
|
|
|
|
f"expect {len(fields)} list, got {len(data)}"}
|
|
|
|
collection_w.insert(data=data, check_task=CheckTasks.err_res, check_items=error)
|
2022-06-30 15:54:18 +08:00
|
|
|
|
|
|
|
@pytest.mark.tags(CaseLabel.L2)
|
2024-05-10 14:57:32 +08:00
|
|
|
def test_insert_with_vector_field_dismatch_dtype(self):
|
2022-06-30 15:54:18 +08:00
|
|
|
"""
|
2024-05-10 14:57:32 +08:00
|
|
|
target: test insert entities, with no vector field
|
|
|
|
method: vector field is missing in data
|
2022-06-30 15:54:18 +08:00
|
|
|
expected: error raised
|
|
|
|
"""
|
2024-05-10 14:57:32 +08:00
|
|
|
collection_w = self.init_collection_wrap(name=cf.gen_unique_str(prefix))
|
|
|
|
nb = 1
|
|
|
|
data = []
|
|
|
|
for field in collection_w.schema.fields:
|
|
|
|
field_data = cf.gen_data_by_collection_field(field, nb=nb)
|
|
|
|
if field.dtype == DataType.FLOAT_VECTOR:
|
|
|
|
field_data = [random.randint(-1000, 1000) * 0.0001 for _ in range(nb)]
|
|
|
|
data.append(field_data)
|
|
|
|
error = {ct.err_code: 999, ct.err_msg: "The Input data type is inconsistent with defined schema"}
|
|
|
|
collection_w.insert(data=data, check_task=CheckTasks.err_res, check_items=error)
|
2021-07-22 14:42:15 +08:00
|
|
|
|
2021-06-23 13:52:07 +08:00
|
|
|
@pytest.mark.tags(CaseLabel.L1)
|
2021-05-31 19:23:31 +08:00
|
|
|
def test_insert_drop_collection(self):
|
|
|
|
"""
|
|
|
|
target: test insert and drop
|
|
|
|
method: insert data and drop collection
|
|
|
|
expected: verify collection if exist
|
|
|
|
"""
|
2021-06-10 09:49:49 +08:00
|
|
|
c_name = cf.gen_unique_str(prefix)
|
|
|
|
collection_w = self.init_collection_wrap(name=c_name)
|
2021-06-04 09:35:34 +08:00
|
|
|
collection_list, _ = self.utility_wrap.list_collections()
|
2021-06-10 09:49:49 +08:00
|
|
|
assert collection_w.name in collection_list
|
|
|
|
df = cf.gen_default_dataframe_data(ct.default_nb)
|
|
|
|
collection_w.insert(data=df)
|
|
|
|
collection_w.drop()
|
2021-06-04 09:35:34 +08:00
|
|
|
collection_list, _ = self.utility_wrap.list_collections()
|
2021-06-10 09:49:49 +08:00
|
|
|
assert collection_w.name not in collection_list
|
2021-05-31 19:23:31 +08:00
|
|
|
|
2021-06-26 13:32:11 +08:00
|
|
|
@pytest.mark.tags(CaseLabel.L1)
|
2021-05-31 19:23:31 +08:00
|
|
|
def test_insert_create_index(self):
|
|
|
|
"""
|
|
|
|
target: test insert and create index
|
|
|
|
method: 1. insert 2. create index
|
|
|
|
expected: verify num entities and index
|
|
|
|
"""
|
2023-09-04 09:57:09 +08:00
|
|
|
collection_w = self.init_collection_wrap(
|
|
|
|
name=cf.gen_unique_str(prefix))
|
2021-06-26 13:32:11 +08:00
|
|
|
df = cf.gen_default_dataframe_data(ct.default_nb)
|
|
|
|
collection_w.insert(data=df)
|
|
|
|
assert collection_w.num_entities == ct.default_nb
|
2023-09-04 09:57:09 +08:00
|
|
|
collection_w.create_index(
|
|
|
|
ct.default_float_vec_field_name, default_index_params)
|
2021-07-16 15:29:55 +08:00
|
|
|
assert collection_w.has_index()[0]
|
2021-06-26 13:32:11 +08:00
|
|
|
index, _ = collection_w.index()
|
2023-09-04 09:57:09 +08:00
|
|
|
assert index == Index(
|
|
|
|
collection_w.collection, ct.default_float_vec_field_name, default_index_params)
|
2021-06-26 13:32:11 +08:00
|
|
|
assert collection_w.indexes[0] == index
|
2021-05-31 19:23:31 +08:00
|
|
|
|
2021-06-26 13:32:11 +08:00
|
|
|
@pytest.mark.tags(CaseLabel.L1)
|
2021-05-31 19:23:31 +08:00
|
|
|
def test_insert_after_create_index(self):
|
|
|
|
"""
|
|
|
|
target: test insert after create index
|
|
|
|
method: 1. create index 2. insert data
|
|
|
|
expected: verify index and num entities
|
|
|
|
"""
|
2023-09-04 09:57:09 +08:00
|
|
|
collection_w = self.init_collection_wrap(
|
|
|
|
name=cf.gen_unique_str(prefix))
|
|
|
|
collection_w.create_index(
|
|
|
|
ct.default_float_vec_field_name, default_index_params)
|
2021-07-16 15:29:55 +08:00
|
|
|
assert collection_w.has_index()[0]
|
2021-06-26 13:32:11 +08:00
|
|
|
index, _ = collection_w.index()
|
2023-09-04 09:57:09 +08:00
|
|
|
assert index == Index(
|
|
|
|
collection_w.collection, ct.default_float_vec_field_name, default_index_params)
|
2021-06-26 13:32:11 +08:00
|
|
|
assert collection_w.indexes[0] == index
|
|
|
|
df = cf.gen_default_dataframe_data(ct.default_nb)
|
|
|
|
collection_w.insert(data=df)
|
|
|
|
assert collection_w.num_entities == ct.default_nb
|
2021-05-31 19:23:31 +08:00
|
|
|
|
2021-12-02 18:09:55 +08:00
|
|
|
@pytest.mark.tags(CaseLabel.L1)
|
2021-05-31 19:23:31 +08:00
|
|
|
def test_insert_binary_after_index(self):
|
|
|
|
"""
|
|
|
|
target: test insert binary after index
|
|
|
|
method: 1.create index 2.insert binary data
|
|
|
|
expected: 1.index ok 2.num entities correct
|
|
|
|
"""
|
2021-06-26 13:32:11 +08:00
|
|
|
schema = cf.gen_default_binary_collection_schema()
|
2023-09-04 09:57:09 +08:00
|
|
|
collection_w = self.init_collection_wrap(
|
|
|
|
name=cf.gen_unique_str(prefix), schema=schema)
|
|
|
|
collection_w.create_index(
|
|
|
|
ct.default_binary_vec_field_name, default_binary_index_params)
|
2021-07-16 15:29:55 +08:00
|
|
|
assert collection_w.has_index()[0]
|
2021-06-26 13:32:11 +08:00
|
|
|
index, _ = collection_w.index()
|
2023-09-04 09:57:09 +08:00
|
|
|
assert index == Index(
|
|
|
|
collection_w.collection, ct.default_binary_vec_field_name, default_binary_index_params)
|
2021-06-26 13:32:11 +08:00
|
|
|
assert collection_w.indexes[0] == index
|
|
|
|
df, _ = cf.gen_default_binary_dataframe_data(ct.default_nb)
|
|
|
|
collection_w.insert(data=df)
|
|
|
|
assert collection_w.num_entities == ct.default_nb
|
2021-05-31 19:23:31 +08:00
|
|
|
|
2021-06-26 13:32:11 +08:00
|
|
|
@pytest.mark.tags(CaseLabel.L1)
|
|
|
|
def test_insert_auto_id_create_index(self):
|
2021-05-31 19:23:31 +08:00
|
|
|
"""
|
2021-06-26 13:32:11 +08:00
|
|
|
target: test create index in auto_id=True collection
|
2022-01-05 18:01:20 +08:00
|
|
|
method: 1.create auto_id=True collection and insert
|
|
|
|
2.create index
|
2021-06-26 13:32:11 +08:00
|
|
|
expected: index correct
|
2021-05-31 19:23:31 +08:00
|
|
|
"""
|
2021-06-26 13:32:11 +08:00
|
|
|
schema = cf.gen_default_collection_schema(auto_id=True)
|
2023-09-04 09:57:09 +08:00
|
|
|
collection_w = self.init_collection_wrap(
|
|
|
|
name=cf.gen_unique_str(prefix), schema=schema)
|
2021-12-02 13:05:50 +08:00
|
|
|
df = cf.gen_default_dataframe_data()
|
2021-06-26 13:32:11 +08:00
|
|
|
df.drop(ct.default_int64_field_name, axis=1, inplace=True)
|
|
|
|
mutation_res, _ = collection_w.insert(data=df)
|
|
|
|
assert cf._check_primary_keys(mutation_res.primary_keys, ct.default_nb)
|
|
|
|
assert collection_w.num_entities == ct.default_nb
|
|
|
|
# create index
|
2023-09-04 09:57:09 +08:00
|
|
|
collection_w.create_index(
|
|
|
|
ct.default_float_vec_field_name, default_index_params)
|
2021-07-16 15:29:55 +08:00
|
|
|
assert collection_w.has_index()[0]
|
2021-06-26 13:32:11 +08:00
|
|
|
index, _ = collection_w.index()
|
2023-09-04 09:57:09 +08:00
|
|
|
assert index == Index(
|
|
|
|
collection_w.collection, ct.default_float_vec_field_name, default_index_params)
|
2021-06-26 13:32:11 +08:00
|
|
|
assert collection_w.indexes[0] == index
|
2021-05-31 19:23:31 +08:00
|
|
|
|
2021-12-02 18:09:55 +08:00
|
|
|
@pytest.mark.tags(CaseLabel.L2)
|
2023-07-17 21:49:19 +08:00
|
|
|
def test_insert_auto_id_true(self, pk_field):
|
2021-05-31 19:23:31 +08:00
|
|
|
"""
|
2021-06-23 13:52:07 +08:00
|
|
|
target: test insert ids fields values when auto_id=True
|
|
|
|
method: 1.create collection with auto_id=True 2.insert without ids
|
|
|
|
expected: verify primary_keys and num_entities
|
2021-05-31 19:23:31 +08:00
|
|
|
"""
|
2021-06-23 13:52:07 +08:00
|
|
|
c_name = cf.gen_unique_str(prefix)
|
2023-09-04 09:57:09 +08:00
|
|
|
schema = cf.gen_default_collection_schema(
|
|
|
|
primary_field=pk_field, auto_id=True)
|
2021-06-23 13:52:07 +08:00
|
|
|
collection_w = self.init_collection_wrap(name=c_name, schema=schema)
|
2021-12-02 13:05:50 +08:00
|
|
|
df = cf.gen_default_dataframe_data()
|
2023-07-17 21:49:19 +08:00
|
|
|
df.drop(pk_field, axis=1, inplace=True)
|
2021-06-23 13:52:07 +08:00
|
|
|
mutation_res, _ = collection_w.insert(data=df)
|
2021-06-25 10:38:11 +08:00
|
|
|
assert cf._check_primary_keys(mutation_res.primary_keys, ct.default_nb)
|
2021-06-23 13:52:07 +08:00
|
|
|
assert collection_w.num_entities == ct.default_nb
|
2021-05-31 19:23:31 +08:00
|
|
|
|
2021-06-23 13:52:07 +08:00
|
|
|
@pytest.mark.tags(CaseLabel.L1)
|
2023-07-17 21:49:19 +08:00
|
|
|
def test_insert_twice_auto_id_true(self, pk_field):
|
2021-05-31 19:23:31 +08:00
|
|
|
"""
|
2021-06-23 13:52:07 +08:00
|
|
|
target: test insert ids fields twice when auto_id=True
|
|
|
|
method: 1.create collection with auto_id=True 2.insert twice
|
|
|
|
expected: verify primary_keys unique
|
2021-05-31 19:23:31 +08:00
|
|
|
"""
|
2021-06-23 13:52:07 +08:00
|
|
|
c_name = cf.gen_unique_str(prefix)
|
2023-09-04 09:57:09 +08:00
|
|
|
schema = cf.gen_default_collection_schema(
|
|
|
|
primary_field=pk_field, auto_id=True)
|
2021-06-23 13:52:07 +08:00
|
|
|
nb = 10
|
|
|
|
collection_w = self.init_collection_wrap(name=c_name, schema=schema)
|
|
|
|
df = cf.gen_default_dataframe_data(nb)
|
2023-07-17 21:49:19 +08:00
|
|
|
df.drop(pk_field, axis=1, inplace=True)
|
2021-06-23 13:52:07 +08:00
|
|
|
mutation_res, _ = collection_w.insert(data=df)
|
|
|
|
primary_keys = mutation_res.primary_keys
|
2021-06-25 10:38:11 +08:00
|
|
|
assert cf._check_primary_keys(primary_keys, nb)
|
2021-06-23 13:52:07 +08:00
|
|
|
mutation_res_1, _ = collection_w.insert(data=df)
|
|
|
|
primary_keys.extend(mutation_res_1.primary_keys)
|
2021-06-26 13:32:11 +08:00
|
|
|
assert cf._check_primary_keys(primary_keys, nb * 2)
|
|
|
|
assert collection_w.num_entities == nb * 2
|
2021-05-31 19:23:31 +08:00
|
|
|
|
2021-12-02 18:09:55 +08:00
|
|
|
@pytest.mark.tags(CaseLabel.L2)
|
2023-07-17 21:49:19 +08:00
|
|
|
def test_insert_auto_id_true_list_data(self, pk_field):
|
2021-05-31 19:23:31 +08:00
|
|
|
"""
|
2021-06-23 13:52:07 +08:00
|
|
|
target: test insert ids fields values when auto_id=True
|
|
|
|
method: 1.create collection with auto_id=True 2.insert list data with ids field values
|
|
|
|
expected: assert num entities
|
2021-05-31 19:23:31 +08:00
|
|
|
"""
|
2021-06-23 13:52:07 +08:00
|
|
|
c_name = cf.gen_unique_str(prefix)
|
2023-09-04 09:57:09 +08:00
|
|
|
schema = cf.gen_default_collection_schema(
|
|
|
|
primary_field=pk_field, auto_id=True)
|
2021-06-23 13:52:07 +08:00
|
|
|
collection_w = self.init_collection_wrap(name=c_name, schema=schema)
|
2021-12-02 13:05:50 +08:00
|
|
|
data = cf.gen_default_list_data()
|
2023-07-17 21:49:19 +08:00
|
|
|
if pk_field == ct.default_int64_field_name:
|
|
|
|
mutation_res, _ = collection_w.insert(data=data[1:])
|
|
|
|
else:
|
|
|
|
del data[2]
|
|
|
|
mutation_res, _ = collection_w.insert(data=data)
|
2021-06-28 15:10:13 +08:00
|
|
|
assert mutation_res.insert_count == ct.default_nb
|
2021-06-25 10:38:11 +08:00
|
|
|
assert cf._check_primary_keys(mutation_res.primary_keys, ct.default_nb)
|
2021-06-23 13:52:07 +08:00
|
|
|
assert collection_w.num_entities == ct.default_nb
|
|
|
|
|
|
|
|
@pytest.mark.tags(CaseLabel.L1)
|
2023-07-17 21:49:19 +08:00
|
|
|
def test_insert_auto_id_true_with_dataframe_values(self, pk_field):
|
2021-06-23 13:52:07 +08:00
|
|
|
"""
|
|
|
|
target: test insert with auto_id=True
|
|
|
|
method: create collection with auto_id=True
|
|
|
|
expected: 1.verify num entities 2.verify ids
|
|
|
|
"""
|
|
|
|
c_name = cf.gen_unique_str(prefix)
|
2023-09-04 09:57:09 +08:00
|
|
|
schema = cf.gen_default_collection_schema(
|
|
|
|
primary_field=pk_field, auto_id=True)
|
2021-06-23 13:52:07 +08:00
|
|
|
collection_w = self.init_collection_wrap(name=c_name, schema=schema)
|
|
|
|
df = cf.gen_default_dataframe_data(nb=100)
|
2024-05-10 14:57:32 +08:00
|
|
|
error = {ct.err_code: 999,
|
|
|
|
ct.err_msg: f"Expect no data for auto_id primary field: {pk_field}"}
|
2023-09-04 09:57:09 +08:00
|
|
|
collection_w.insert(
|
|
|
|
data=df, check_task=CheckTasks.err_res, check_items=error)
|
2021-06-23 13:52:07 +08:00
|
|
|
assert collection_w.is_empty
|
|
|
|
|
2021-12-02 18:09:55 +08:00
|
|
|
@pytest.mark.tags(CaseLabel.L2)
|
2023-07-17 21:49:19 +08:00
|
|
|
def test_insert_auto_id_true_with_list_values(self, pk_field):
|
2021-06-23 13:52:07 +08:00
|
|
|
"""
|
|
|
|
target: test insert with auto_id=True
|
|
|
|
method: create collection with auto_id=True
|
|
|
|
expected: 1.verify num entities 2.verify ids
|
|
|
|
"""
|
|
|
|
c_name = cf.gen_unique_str(prefix)
|
2024-05-10 14:57:32 +08:00
|
|
|
schema = cf.gen_default_collection_schema(primary_field=pk_field, auto_id=True)
|
2021-06-23 13:52:07 +08:00
|
|
|
collection_w = self.init_collection_wrap(name=c_name, schema=schema)
|
2024-05-10 14:57:32 +08:00
|
|
|
data = []
|
|
|
|
nb = 100
|
|
|
|
for field in collection_w.schema.fields:
|
|
|
|
field_data = cf.gen_data_by_collection_field(field, nb=nb)
|
|
|
|
if field.name != pk_field:
|
|
|
|
data.append(field_data)
|
|
|
|
collection_w.insert(data=data)
|
|
|
|
assert collection_w.num_entities == nb
|
2021-05-31 19:23:31 +08:00
|
|
|
|
2021-06-23 13:52:07 +08:00
|
|
|
@pytest.mark.tags(CaseLabel.L1)
|
|
|
|
def test_insert_auto_id_false_same_values(self):
|
2021-05-31 19:23:31 +08:00
|
|
|
"""
|
2021-06-23 13:52:07 +08:00
|
|
|
target: test insert same ids with auto_id false
|
|
|
|
method: 1.create collection with auto_id=False 2.insert same int64 field values
|
2021-05-31 19:23:31 +08:00
|
|
|
expected: raise exception
|
|
|
|
"""
|
2021-06-23 13:52:07 +08:00
|
|
|
c_name = cf.gen_unique_str(prefix)
|
|
|
|
collection_w = self.init_collection_wrap(name=c_name)
|
2021-06-25 10:38:11 +08:00
|
|
|
nb = 100
|
|
|
|
data = cf.gen_default_list_data(nb=nb)
|
|
|
|
data[0] = [1 for i in range(nb)]
|
|
|
|
mutation_res, _ = collection_w.insert(data)
|
2021-06-28 15:10:13 +08:00
|
|
|
assert mutation_res.insert_count == nb
|
2021-06-25 10:38:11 +08:00
|
|
|
assert mutation_res.primary_keys == data[0]
|
2021-06-23 13:52:07 +08:00
|
|
|
|
|
|
|
@pytest.mark.tags(CaseLabel.L1)
|
|
|
|
def test_insert_auto_id_false_negative_values(self):
|
|
|
|
"""
|
|
|
|
target: test insert negative ids with auto_id false
|
|
|
|
method: auto_id=False, primary field values is negative
|
|
|
|
expected: verify num entities
|
|
|
|
"""
|
|
|
|
c_name = cf.gen_unique_str(prefix)
|
|
|
|
collection_w = self.init_collection_wrap(name=c_name)
|
|
|
|
nb = 100
|
|
|
|
data = cf.gen_default_list_data(nb)
|
|
|
|
data[0] = [i for i in range(0, -nb, -1)]
|
|
|
|
mutation_res, _ = collection_w.insert(data)
|
|
|
|
assert mutation_res.primary_keys == data[0]
|
|
|
|
assert collection_w.num_entities == nb
|
2021-05-31 19:23:31 +08:00
|
|
|
|
2021-12-02 18:09:55 +08:00
|
|
|
@pytest.mark.tags(CaseLabel.L1)
|
2022-06-30 15:54:18 +08:00
|
|
|
# @pytest.mark.xfail(reason="issue 15416")
|
2021-05-31 19:23:31 +08:00
|
|
|
def test_insert_multi_threading(self):
|
|
|
|
"""
|
|
|
|
target: test concurrent insert
|
|
|
|
method: multi threads insert
|
|
|
|
expected: verify num entities
|
|
|
|
"""
|
2023-09-04 09:57:09 +08:00
|
|
|
collection_w = self.init_collection_wrap(
|
|
|
|
name=cf.gen_unique_str(prefix))
|
2021-06-26 13:32:11 +08:00
|
|
|
df = cf.gen_default_dataframe_data(ct.default_nb)
|
|
|
|
thread_num = 4
|
|
|
|
threads = []
|
|
|
|
primary_keys = df[ct.default_int64_field_name].values.tolist()
|
|
|
|
|
|
|
|
def insert(thread_i):
|
|
|
|
log.debug(f'In thread-{thread_i}')
|
|
|
|
mutation_res, _ = collection_w.insert(df)
|
|
|
|
assert mutation_res.insert_count == ct.default_nb
|
|
|
|
assert mutation_res.primary_keys == primary_keys
|
|
|
|
|
|
|
|
for i in range(thread_num):
|
2021-06-28 15:10:13 +08:00
|
|
|
x = threading.Thread(target=insert, args=(i,))
|
2021-06-26 13:32:11 +08:00
|
|
|
threads.append(x)
|
|
|
|
x.start()
|
|
|
|
for t in threads:
|
|
|
|
t.join()
|
|
|
|
assert collection_w.num_entities == ct.default_nb * thread_num
|
|
|
|
|
2021-12-02 18:09:55 +08:00
|
|
|
@pytest.mark.tags(CaseLabel.L1)
|
2021-10-13 19:01:23 +08:00
|
|
|
def test_insert_multi_times(self, dim):
|
2021-05-31 19:23:31 +08:00
|
|
|
"""
|
|
|
|
target: test insert multi times
|
|
|
|
method: insert data multi times
|
|
|
|
expected: verify num entities
|
|
|
|
"""
|
2021-06-10 09:49:49 +08:00
|
|
|
step = 120
|
2021-10-13 19:01:23 +08:00
|
|
|
nb = 12000
|
2021-12-02 13:05:50 +08:00
|
|
|
collection_w = self.init_collection_general(prefix, dim=dim)[0]
|
2021-10-13 19:01:23 +08:00
|
|
|
for _ in range(nb // step):
|
|
|
|
df = cf.gen_default_dataframe_data(step, dim)
|
2021-06-23 13:52:07 +08:00
|
|
|
mutation_res, _ = collection_w.insert(data=df)
|
2021-06-28 15:10:13 +08:00
|
|
|
assert mutation_res.insert_count == step
|
2023-09-04 09:57:09 +08:00
|
|
|
assert mutation_res.primary_keys == df[ct.default_int64_field_name].values.tolist(
|
|
|
|
)
|
2021-06-23 13:52:07 +08:00
|
|
|
|
2021-10-13 19:01:23 +08:00
|
|
|
assert collection_w.num_entities == nb
|
2021-05-31 19:23:31 +08:00
|
|
|
|
2021-12-02 18:09:55 +08:00
|
|
|
@pytest.mark.tags(CaseLabel.L1)
|
2021-09-08 12:10:00 +08:00
|
|
|
def test_insert_all_datatype_collection(self):
|
|
|
|
"""
|
|
|
|
target: test insert into collection that contains all datatype fields
|
|
|
|
method: 1.create all datatype collection 2.insert data
|
|
|
|
expected: verify num entities
|
|
|
|
"""
|
|
|
|
self._connect()
|
2021-09-24 20:13:56 +08:00
|
|
|
nb = 100
|
|
|
|
df = cf.gen_dataframe_all_data_type(nb=nb)
|
2021-09-08 12:10:00 +08:00
|
|
|
self.collection_wrap.construct_from_dataframe(cf.gen_unique_str(prefix), df,
|
|
|
|
primary_field=ct.default_int64_field_name)
|
2021-09-24 20:13:56 +08:00
|
|
|
assert self.collection_wrap.num_entities == nb
|
2021-09-08 12:10:00 +08:00
|
|
|
|
2023-01-18 17:43:43 +08:00
|
|
|
@pytest.mark.tags(CaseLabel.L2)
|
|
|
|
def test_insert_equal_to_resource_limit(self):
|
|
|
|
"""
|
|
|
|
target: test insert data equal to RPC limitation 64MB (67108864)
|
|
|
|
method: calculated critical value and insert equivalent data
|
|
|
|
expected: raise exception
|
|
|
|
"""
|
2023-06-07 19:48:35 +08:00
|
|
|
# nb = 127583 without json field
|
|
|
|
nb = 108993
|
2023-01-18 17:43:43 +08:00
|
|
|
collection_name = cf.gen_unique_str(prefix)
|
|
|
|
collection_w = self.init_collection_wrap(name=collection_name)
|
|
|
|
data = cf.gen_default_dataframe_data(nb)
|
|
|
|
collection_w.insert(data=data)
|
|
|
|
assert collection_w.num_entities == nb
|
|
|
|
|
2023-06-06 12:06:41 +08:00
|
|
|
@pytest.mark.tags(CaseLabel.L1)
|
2023-08-30 18:47:01 +08:00
|
|
|
@pytest.mark.skip("not support default_value now")
|
2023-06-06 12:06:41 +08:00
|
|
|
@pytest.mark.parametrize("default_value", [[], None])
|
|
|
|
def test_insert_one_field_using_default_value(self, default_value, auto_id):
|
|
|
|
"""
|
|
|
|
target: test insert with one field using default value
|
|
|
|
method: 1. create a collection with one field using default value
|
|
|
|
2. insert using []/None to replace the field value
|
|
|
|
expected: insert successfully
|
|
|
|
"""
|
|
|
|
fields = [cf.gen_int64_field(is_primary=True), cf.gen_float_field(),
|
|
|
|
cf.gen_string_field(default_value="abc"), cf.gen_float_vec_field()]
|
|
|
|
schema = cf.gen_collection_schema(fields, auto_id=auto_id)
|
|
|
|
collection_w = self.init_collection_wrap(schema=schema)
|
|
|
|
data = [
|
|
|
|
[i for i in range(ct.default_nb)],
|
|
|
|
[np.float32(i) for i in range(ct.default_nb)],
|
|
|
|
default_value,
|
|
|
|
cf.gen_vectors(ct.default_nb, ct.default_dim)
|
|
|
|
]
|
|
|
|
if auto_id:
|
|
|
|
del data[0]
|
|
|
|
collection_w.insert(data)
|
|
|
|
assert collection_w.num_entities == ct.default_nb
|
|
|
|
|
|
|
|
@pytest.mark.tags(CaseLabel.L1)
|
2023-08-30 18:47:01 +08:00
|
|
|
@pytest.mark.skip("not support default_value now")
|
2023-06-06 12:06:41 +08:00
|
|
|
@pytest.mark.parametrize("default_value", [[], None])
|
|
|
|
def test_insert_multi_fields_using_default_value(self, default_value, auto_id):
|
|
|
|
"""
|
|
|
|
target: test insert with multi fields using default value
|
|
|
|
method: 1. default value fields before vector, insert [], None, fail
|
|
|
|
2. default value fields all after vector field, insert empty, succeed
|
|
|
|
expected: report error and insert successfully
|
|
|
|
"""
|
|
|
|
# 1. default value fields before vector, insert [], None, fail
|
|
|
|
fields = [cf.gen_int64_field(is_primary=True), cf.gen_float_field(default_value=np.float32(1.0)),
|
|
|
|
cf.gen_string_field(default_value="abc"), cf.gen_float_vec_field()]
|
|
|
|
schema = cf.gen_collection_schema(fields, auto_id=auto_id)
|
|
|
|
collection_w = self.init_collection_wrap(schema=schema)
|
|
|
|
data = [[i for i in range(ct.default_nb)], default_value,
|
|
|
|
# if multi default_value fields before vector field, every field must use []/None
|
|
|
|
cf.gen_vectors(ct.default_nb, ct.default_dim)]
|
|
|
|
if auto_id:
|
|
|
|
del data[0]
|
|
|
|
collection_w.insert(data, check_task=CheckTasks.err_res,
|
2024-05-10 14:57:32 +08:00
|
|
|
check_items={ct.err_code: 999,
|
2023-06-06 12:06:41 +08:00
|
|
|
ct.err_msg: "The data type of field varchar doesn't match"})
|
|
|
|
# 2. default value fields all after vector field, insert empty, succeed
|
|
|
|
fields = [cf.gen_int64_field(is_primary=True), cf.gen_float_vec_field(),
|
|
|
|
cf.gen_float_field(default_value=np.float32(1.0)),
|
|
|
|
cf.gen_string_field(default_value="abc")]
|
|
|
|
schema = cf.gen_collection_schema(fields, auto_id=auto_id)
|
|
|
|
collection_w = self.init_collection_wrap(schema=schema)
|
2023-09-04 09:57:09 +08:00
|
|
|
data = [[i for i in range(ct.default_nb)], cf.gen_vectors(
|
|
|
|
ct.default_nb, ct.default_dim)]
|
2023-06-06 12:06:41 +08:00
|
|
|
data1 = [[i for i in range(ct.default_nb)], cf.gen_vectors(ct.default_nb, ct.default_dim),
|
|
|
|
[np.float32(i) for i in range(ct.default_nb)]]
|
|
|
|
if auto_id:
|
|
|
|
del data[0], data1[0]
|
|
|
|
collection_w.insert(data)
|
|
|
|
assert collection_w.num_entities == ct.default_nb
|
|
|
|
collection_w.insert(data1)
|
|
|
|
|
2023-06-19 15:44:41 +08:00
|
|
|
@pytest.mark.tags(CaseLabel.L2)
|
2023-08-30 18:47:01 +08:00
|
|
|
@pytest.mark.skip("not support default_value now")
|
2023-06-19 15:44:41 +08:00
|
|
|
def test_insert_dataframe_using_default_value(self):
|
|
|
|
"""
|
|
|
|
target: test insert with dataframe
|
|
|
|
method: insert with invalid dataframe
|
|
|
|
expected: insert successfully
|
|
|
|
"""
|
|
|
|
fields = [cf.gen_int64_field(is_primary=True), cf.gen_float_field(),
|
|
|
|
cf.gen_string_field(default_value="abc"), cf.gen_float_vec_field()]
|
|
|
|
schema = cf.gen_collection_schema(fields)
|
|
|
|
collection_w = self.init_collection_wrap(schema=schema)
|
|
|
|
vectors = cf.gen_vectors(ct.default_nb, ct.default_dim)
|
|
|
|
# None/[] is not allowed when using dataframe
|
|
|
|
# To use default value, delete the whole item
|
|
|
|
df = pd.DataFrame({
|
|
|
|
"int64": pd.Series(data=[i for i in range(0, ct.default_nb)]),
|
|
|
|
"float_vector": vectors,
|
|
|
|
"float": pd.Series(data=[float(i) for i in range(ct.default_nb)], dtype="float32")
|
|
|
|
})
|
|
|
|
collection_w.insert(df)
|
|
|
|
assert collection_w.num_entities == ct.default_nb
|
|
|
|
|
2021-05-31 19:23:31 +08:00
|
|
|
|
2021-06-05 10:25:34 +08:00
|
|
|
class TestInsertAsync(TestcaseBase):
|
2021-05-31 19:23:31 +08:00
|
|
|
"""
|
|
|
|
******************************************************************
|
|
|
|
The following cases are used to test insert async
|
|
|
|
******************************************************************
|
|
|
|
"""
|
|
|
|
|
2021-06-28 15:10:13 +08:00
|
|
|
@pytest.mark.tags(CaseLabel.L1)
|
2021-05-31 19:23:31 +08:00
|
|
|
def test_insert_sync(self):
|
|
|
|
"""
|
|
|
|
target: test async insert
|
|
|
|
method: insert with async=True
|
|
|
|
expected: verify num entities
|
|
|
|
"""
|
2023-09-04 09:57:09 +08:00
|
|
|
collection_w = self.init_collection_wrap(
|
|
|
|
name=cf.gen_unique_str(prefix))
|
2021-10-08 18:03:26 +08:00
|
|
|
df = cf.gen_default_dataframe_data()
|
2021-06-28 15:10:13 +08:00
|
|
|
future, _ = collection_w.insert(data=df, _async=True)
|
|
|
|
future.done()
|
|
|
|
mutation_res = future.result()
|
|
|
|
assert mutation_res.insert_count == ct.default_nb
|
2023-09-04 09:57:09 +08:00
|
|
|
assert mutation_res.primary_keys == df[ct.default_int64_field_name].values.tolist(
|
|
|
|
)
|
2021-06-28 15:10:13 +08:00
|
|
|
assert collection_w.num_entities == ct.default_nb
|
2021-05-31 19:23:31 +08:00
|
|
|
|
2021-06-28 15:10:13 +08:00
|
|
|
@pytest.mark.tags(CaseLabel.L1)
|
2021-05-31 19:23:31 +08:00
|
|
|
def test_insert_async_false(self):
|
|
|
|
"""
|
|
|
|
target: test insert with false async
|
|
|
|
method: async = false
|
|
|
|
expected: verify num entities
|
|
|
|
"""
|
2023-09-04 09:57:09 +08:00
|
|
|
collection_w = self.init_collection_wrap(
|
|
|
|
name=cf.gen_unique_str(prefix))
|
2021-10-08 18:03:26 +08:00
|
|
|
df = cf.gen_default_dataframe_data()
|
2021-06-28 15:10:13 +08:00
|
|
|
mutation_res, _ = collection_w.insert(data=df, _async=False)
|
|
|
|
assert mutation_res.insert_count == ct.default_nb
|
2023-09-04 09:57:09 +08:00
|
|
|
assert mutation_res.primary_keys == df[ct.default_int64_field_name].values.tolist(
|
|
|
|
)
|
2021-06-28 15:10:13 +08:00
|
|
|
assert collection_w.num_entities == ct.default_nb
|
2021-05-31 19:23:31 +08:00
|
|
|
|
2021-06-28 15:10:13 +08:00
|
|
|
@pytest.mark.tags(CaseLabel.L1)
|
2021-05-31 19:23:31 +08:00
|
|
|
def test_insert_async_callback(self):
|
|
|
|
"""
|
|
|
|
target: test insert with callback func
|
|
|
|
method: insert with callback func
|
|
|
|
expected: verify num entities
|
|
|
|
"""
|
2023-09-04 09:57:09 +08:00
|
|
|
collection_w = self.init_collection_wrap(
|
|
|
|
name=cf.gen_unique_str(prefix))
|
2021-10-08 18:03:26 +08:00
|
|
|
df = cf.gen_default_dataframe_data()
|
2023-09-04 09:57:09 +08:00
|
|
|
future, _ = collection_w.insert(
|
|
|
|
data=df, _async=True, _callback=assert_mutation_result)
|
2021-06-28 15:10:13 +08:00
|
|
|
future.done()
|
2021-07-06 09:22:01 +08:00
|
|
|
mutation_res = future.result()
|
2023-09-04 09:57:09 +08:00
|
|
|
assert mutation_res.primary_keys == df[ct.default_int64_field_name].values.tolist(
|
|
|
|
)
|
2021-06-28 15:10:13 +08:00
|
|
|
assert collection_w.num_entities == ct.default_nb
|
2021-05-31 19:23:31 +08:00
|
|
|
|
2021-06-28 15:10:13 +08:00
|
|
|
@pytest.mark.tags(CaseLabel.L2)
|
2021-05-31 19:23:31 +08:00
|
|
|
def test_insert_async_long(self):
|
|
|
|
"""
|
|
|
|
target: test insert with async
|
|
|
|
method: insert 5w entities with callback func
|
|
|
|
expected: verify num entities
|
|
|
|
"""
|
2021-06-28 15:10:13 +08:00
|
|
|
nb = 50000
|
2023-09-04 09:57:09 +08:00
|
|
|
collection_w = self.init_collection_wrap(
|
|
|
|
name=cf.gen_unique_str(prefix))
|
2021-06-28 15:10:13 +08:00
|
|
|
df = cf.gen_default_dataframe_data(nb)
|
|
|
|
future, _ = collection_w.insert(data=df, _async=True)
|
|
|
|
future.done()
|
|
|
|
mutation_res = future.result()
|
|
|
|
assert mutation_res.insert_count == nb
|
2023-09-04 09:57:09 +08:00
|
|
|
assert mutation_res.primary_keys == df[ct.default_int64_field_name].values.tolist(
|
|
|
|
)
|
2021-06-28 15:10:13 +08:00
|
|
|
assert collection_w.num_entities == nb
|
2021-05-31 19:23:31 +08:00
|
|
|
|
2021-06-28 15:10:13 +08:00
|
|
|
@pytest.mark.tags(CaseLabel.L2)
|
2021-05-31 19:23:31 +08:00
|
|
|
def test_insert_async_callback_timeout(self):
|
|
|
|
"""
|
|
|
|
target: test insert async with callback
|
|
|
|
method: insert 10w entities with timeout=1
|
|
|
|
expected: raise exception
|
|
|
|
"""
|
2021-06-28 15:10:13 +08:00
|
|
|
nb = 100000
|
2023-09-04 09:57:09 +08:00
|
|
|
collection_w = self.init_collection_wrap(
|
|
|
|
name=cf.gen_unique_str(prefix))
|
2021-06-28 15:10:13 +08:00
|
|
|
df = cf.gen_default_dataframe_data(nb)
|
2023-09-04 09:57:09 +08:00
|
|
|
future, _ = collection_w.insert(
|
|
|
|
data=df, _async=True, _callback=None, timeout=0.2)
|
2022-04-19 15:41:39 +08:00
|
|
|
with pytest.raises(MilvusException):
|
2021-06-28 15:10:13 +08:00
|
|
|
future.result()
|
2021-05-31 19:23:31 +08:00
|
|
|
|
2021-06-28 15:10:13 +08:00
|
|
|
@pytest.mark.tags(CaseLabel.L2)
|
2021-05-31 19:23:31 +08:00
|
|
|
def test_insert_async_invalid_data(self):
|
|
|
|
"""
|
|
|
|
target: test insert async with invalid data
|
|
|
|
method: insert async with invalid data
|
|
|
|
expected: raise exception
|
|
|
|
"""
|
2023-09-04 09:57:09 +08:00
|
|
|
collection_w = self.init_collection_wrap(
|
|
|
|
name=cf.gen_unique_str(prefix))
|
|
|
|
columns = [ct.default_int64_field_name,
|
|
|
|
ct.default_float_vec_field_name]
|
2021-06-28 15:10:13 +08:00
|
|
|
df = pd.DataFrame(columns=columns)
|
2023-09-04 09:57:09 +08:00
|
|
|
error = {ct.err_code: 0,
|
2023-10-19 17:24:07 +08:00
|
|
|
ct.err_msg: "The fields don't match with schema fields"}
|
2023-09-04 09:57:09 +08:00
|
|
|
collection_w.insert(data=df, _async=True,
|
|
|
|
check_task=CheckTasks.err_res, check_items=error)
|
2021-05-31 19:23:31 +08:00
|
|
|
|
2021-06-28 15:10:13 +08:00
|
|
|
@pytest.mark.tags(CaseLabel.L2)
|
2021-05-31 19:23:31 +08:00
|
|
|
def test_insert_async_invalid_partition(self):
|
|
|
|
"""
|
|
|
|
target: test insert async with invalid partition
|
|
|
|
method: insert async with invalid partition
|
|
|
|
expected: raise exception
|
|
|
|
"""
|
2023-11-24 14:52:25 +08:00
|
|
|
collection_w = self.init_collection_wrap(name=cf.gen_unique_str(prefix))
|
2021-06-28 15:10:13 +08:00
|
|
|
df = cf.gen_default_dataframe_data()
|
2023-11-24 14:52:25 +08:00
|
|
|
err_msg = "partition not found"
|
|
|
|
future, _ = collection_w.insert(data=df, partition_name="p", _async=True)
|
2021-06-28 15:10:13 +08:00
|
|
|
future.done()
|
2022-04-19 15:41:39 +08:00
|
|
|
with pytest.raises(MilvusException, match=err_msg):
|
2021-06-28 15:10:13 +08:00
|
|
|
future.result()
|
|
|
|
|
|
|
|
|
|
|
|
def assert_mutation_result(mutation_res):
|
2021-07-06 09:22:01 +08:00
|
|
|
assert mutation_res.insert_count == ct.default_nb
|
2022-02-21 09:47:51 +08:00
|
|
|
|
|
|
|
|
|
|
|
class TestInsertBinary(TestcaseBase):
|
|
|
|
|
|
|
|
@pytest.mark.tags(CaseLabel.L0)
|
|
|
|
def test_insert_binary_partition(self):
|
|
|
|
"""
|
|
|
|
target: test insert entities and create partition
|
|
|
|
method: create collection and insert binary entities in it, with the partition_name param
|
|
|
|
expected: the collection row count equals to nb
|
|
|
|
"""
|
|
|
|
c_name = cf.gen_unique_str(prefix)
|
2023-09-04 09:57:09 +08:00
|
|
|
collection_w = self.init_collection_wrap(
|
|
|
|
name=c_name, schema=default_binary_schema)
|
2022-02-21 09:47:51 +08:00
|
|
|
df, _ = cf.gen_default_binary_dataframe_data(ct.default_nb)
|
|
|
|
partition_name = cf.gen_unique_str(prefix)
|
2023-05-06 16:54:39 +08:00
|
|
|
partition_w1 = self.init_partition_wrap(collection_w, partition_name)
|
2023-09-04 09:57:09 +08:00
|
|
|
mutation_res, _ = collection_w.insert(
|
|
|
|
data=df, partition_name=partition_w1.name)
|
2022-02-21 09:47:51 +08:00
|
|
|
assert mutation_res.insert_count == ct.default_nb
|
2022-06-30 15:54:18 +08:00
|
|
|
|
2022-02-21 09:47:51 +08:00
|
|
|
@pytest.mark.tags(CaseLabel.L1)
|
|
|
|
def test_insert_binary_multi_times(self):
|
|
|
|
"""
|
|
|
|
target: test insert entities multi times and final flush
|
|
|
|
method: create collection and insert binary entity multi
|
|
|
|
expected: the collection row count equals to nb
|
|
|
|
"""
|
|
|
|
c_name = cf.gen_unique_str(prefix)
|
2023-09-04 09:57:09 +08:00
|
|
|
collection_w = self.init_collection_wrap(
|
|
|
|
name=c_name, schema=default_binary_schema)
|
2022-02-21 09:47:51 +08:00
|
|
|
df, _ = cf.gen_default_binary_dataframe_data(ct.default_nb)
|
|
|
|
nums = 2
|
|
|
|
for i in range(nums):
|
|
|
|
mutation_res, _ = collection_w.insert(data=df)
|
2022-06-30 15:54:18 +08:00
|
|
|
assert collection_w.num_entities == ct.default_nb * nums
|
|
|
|
|
2022-02-21 09:47:51 +08:00
|
|
|
@pytest.mark.tags(CaseLabel.L2)
|
|
|
|
def test_insert_binary_create_index(self):
|
|
|
|
"""
|
|
|
|
target: test build index insert after vector
|
|
|
|
method: insert vector and build index
|
|
|
|
expected: no error raised
|
|
|
|
"""
|
|
|
|
c_name = cf.gen_unique_str(prefix)
|
2023-09-04 09:57:09 +08:00
|
|
|
collection_w = self.init_collection_wrap(
|
|
|
|
name=c_name, schema=default_binary_schema)
|
2022-02-21 09:47:51 +08:00
|
|
|
df, _ = cf.gen_default_binary_dataframe_data(ct.default_nb)
|
|
|
|
mutation_res, _ = collection_w.insert(data=df)
|
|
|
|
assert mutation_res.insert_count == ct.default_nb
|
2023-09-04 09:57:09 +08:00
|
|
|
default_index = {"index_type": "BIN_IVF_FLAT",
|
|
|
|
"params": {"nlist": 128}, "metric_type": "JACCARD"}
|
2022-02-21 09:47:51 +08:00
|
|
|
collection_w.create_index("binary_vector", default_index)
|
2022-06-30 15:54:18 +08:00
|
|
|
|
|
|
|
|
2022-02-21 09:47:51 +08:00
|
|
|
class TestInsertInvalid(TestcaseBase):
|
|
|
|
"""
|
|
|
|
******************************************************************
|
|
|
|
The following cases are used to test insert invalid params
|
|
|
|
******************************************************************
|
|
|
|
"""
|
2024-05-10 14:57:32 +08:00
|
|
|
@pytest.mark.tags(CaseLabel.L0)
|
|
|
|
@pytest.mark.parametrize("primary_field", [ct.default_int64_field_name, ct.default_string_field_name])
|
|
|
|
def test_insert_with_invalid_field_value(self, primary_field):
|
2023-11-29 10:48:26 +08:00
|
|
|
"""
|
2024-05-10 14:57:32 +08:00
|
|
|
target: verify error msg when inserting with invalid field value
|
|
|
|
method: insert with invalid field value
|
2023-11-29 10:48:26 +08:00
|
|
|
expected: raise exception
|
|
|
|
"""
|
2024-05-10 14:57:32 +08:00
|
|
|
collection_w = self.init_collection_general(prefix, auto_id=False, insert_data=False,
|
|
|
|
primary_field=primary_field, is_index=False,
|
|
|
|
is_all_data_type=True, with_json=True)[0]
|
2023-11-29 10:48:26 +08:00
|
|
|
nb = 100
|
2024-05-10 14:57:32 +08:00
|
|
|
data = cf.gen_data_by_collection_schema(collection_w.schema, nb=nb)
|
|
|
|
for dirty_i in [0, nb // 2, nb - 1]: # check the dirty data at first, middle and last
|
|
|
|
log.debug(f"dirty_i: {dirty_i}")
|
|
|
|
for i in range(len(data)):
|
|
|
|
if data[i][dirty_i].__class__ is int:
|
|
|
|
tmp = data[i][0]
|
|
|
|
data[i][dirty_i] = "iamstring"
|
|
|
|
error = {ct.err_code: 999, ct.err_msg: "The Input data type is inconsistent with defined schema"}
|
|
|
|
collection_w.insert(data=data, check_task=CheckTasks.err_res, check_items=error)
|
|
|
|
data[i][dirty_i] = tmp
|
|
|
|
elif data[i][dirty_i].__class__ is str:
|
|
|
|
tmp = data[i][dirty_i]
|
|
|
|
data[i][dirty_i] = random.randint(0, 1000)
|
|
|
|
error = {ct.err_code: 999, ct.err_msg: "expect string input, got: <class 'int'>"}
|
|
|
|
collection_w.insert(data=data, check_task=CheckTasks.err_res, check_items=error)
|
|
|
|
data[i][dirty_i] = tmp
|
|
|
|
elif data[i][dirty_i].__class__ is bool:
|
|
|
|
tmp = data[i][dirty_i]
|
|
|
|
data[i][dirty_i] = "iamstring"
|
|
|
|
error = {ct.err_code: 999, ct.err_msg: "The Input data type is inconsistent with defined schema"}
|
|
|
|
collection_w.insert(data=data, check_task=CheckTasks.err_res, check_items=error)
|
|
|
|
data[i][dirty_i] = tmp
|
|
|
|
elif data[i][dirty_i].__class__ is float:
|
|
|
|
tmp = data[i][dirty_i]
|
|
|
|
data[i][dirty_i] = "iamstring"
|
|
|
|
error = {ct.err_code: 999, ct.err_msg: "The Input data type is inconsistent with defined schema"}
|
|
|
|
collection_w.insert(data=data, check_task=CheckTasks.err_res, check_items=error)
|
|
|
|
data[i][dirty_i] = tmp
|
|
|
|
else:
|
|
|
|
continue
|
|
|
|
res = collection_w.insert(data)[0]
|
|
|
|
assert res.insert_count == nb
|
2023-11-29 10:48:26 +08:00
|
|
|
|
|
|
|
@pytest.mark.tags(CaseLabel.L2)
|
2022-02-21 09:47:51 +08:00
|
|
|
def test_insert_with_invalid_partition_name(self):
|
|
|
|
"""
|
|
|
|
target: test insert with invalid scenario
|
|
|
|
method: insert with invalid partition name
|
|
|
|
expected: raise exception
|
|
|
|
"""
|
|
|
|
collection_name = cf.gen_unique_str(prefix)
|
|
|
|
collection_w = self.init_collection_wrap(name=collection_name)
|
|
|
|
df = cf.gen_default_list_data(ct.default_nb)
|
2023-09-07 09:45:15 +08:00
|
|
|
error = {ct.err_code: 15, 'err_msg': "partition not found"}
|
2022-06-30 15:54:18 +08:00
|
|
|
mutation_res, _ = collection_w.insert(data=df, partition_name="p", check_task=CheckTasks.err_res,
|
|
|
|
check_items=error)
|
2022-02-21 09:47:51 +08:00
|
|
|
|
2023-07-17 21:49:19 +08:00
|
|
|
@pytest.mark.tags(CaseLabel.L2)
|
2024-08-20 14:20:56 +08:00
|
|
|
def test_insert_with_pk_varchar_auto_id_true(self):
|
2023-07-17 21:49:19 +08:00
|
|
|
"""
|
|
|
|
target: test insert invalid with pk varchar and auto id true
|
|
|
|
method: set pk varchar max length < 18, insert data
|
2024-08-20 14:20:56 +08:00
|
|
|
expected: varchar pk supports auto_id=true
|
2023-07-17 21:49:19 +08:00
|
|
|
"""
|
|
|
|
string_field = cf.gen_string_field(is_primary=True, max_length=6)
|
|
|
|
embedding_field = cf.gen_float_vec_field()
|
2023-09-04 09:57:09 +08:00
|
|
|
schema = cf.gen_collection_schema(
|
|
|
|
[string_field, embedding_field], auto_id=True)
|
2023-07-17 21:49:19 +08:00
|
|
|
collection_w = self.init_collection_wrap(schema=schema)
|
2023-09-04 09:57:09 +08:00
|
|
|
data = [[[random.random() for _ in range(ct.default_dim)]
|
|
|
|
for _ in range(2)]]
|
2023-07-25 10:27:01 +08:00
|
|
|
res = collection_w.insert(data=data)[0]
|
|
|
|
assert res.insert_count == 2
|
2023-07-17 21:49:19 +08:00
|
|
|
|
2023-08-30 18:47:01 +08:00
|
|
|
@pytest.mark.tags(CaseLabel.L2)
|
|
|
|
@pytest.mark.parametrize("invalid_int8", [-129, 128])
|
|
|
|
def test_insert_int8_overflow(self, invalid_int8):
|
|
|
|
"""
|
|
|
|
target: test insert int8 out of range
|
|
|
|
method: insert int8 out of range
|
|
|
|
expected: raise exception
|
|
|
|
"""
|
2024-03-02 11:01:13 +08:00
|
|
|
collection_w = self.init_collection_general(prefix, is_all_data_type=True)[0]
|
2023-08-30 18:47:01 +08:00
|
|
|
data = cf.gen_dataframe_all_data_type(nb=1)
|
|
|
|
data[ct.default_int8_field_name] = [invalid_int8]
|
2023-10-19 17:24:07 +08:00
|
|
|
error = {ct.err_code: 1100, 'err_msg': "The data type of field int8 doesn't match, "
|
2024-03-02 11:01:13 +08:00
|
|
|
"expected: INT8, got INT64"}
|
|
|
|
collection_w.insert(data, check_task=CheckTasks.err_res, check_items=error)
|
2023-08-30 18:47:01 +08:00
|
|
|
|
|
|
|
@pytest.mark.tags(CaseLabel.L2)
|
|
|
|
@pytest.mark.parametrize("invalid_int16", [-32769, 32768])
|
|
|
|
def test_insert_int16_overflow(self, invalid_int16):
|
|
|
|
"""
|
|
|
|
target: test insert int16 out of range
|
|
|
|
method: insert int16 out of range
|
|
|
|
expected: raise exception
|
|
|
|
"""
|
2024-03-02 11:01:13 +08:00
|
|
|
collection_w = self.init_collection_general(prefix, is_all_data_type=True)[0]
|
2023-08-30 18:47:01 +08:00
|
|
|
data = cf.gen_dataframe_all_data_type(nb=1)
|
|
|
|
data[ct.default_int16_field_name] = [invalid_int16]
|
2023-10-19 17:24:07 +08:00
|
|
|
error = {ct.err_code: 1100, 'err_msg': "The data type of field int16 doesn't match, "
|
2024-03-02 11:01:13 +08:00
|
|
|
"expected: INT16, got INT64"}
|
|
|
|
collection_w.insert(data, check_task=CheckTasks.err_res, check_items=error)
|
2023-08-30 18:47:01 +08:00
|
|
|
|
|
|
|
@pytest.mark.tags(CaseLabel.L2)
|
|
|
|
@pytest.mark.parametrize("invalid_int32", [-2147483649, 2147483648])
|
|
|
|
def test_insert_int32_overflow(self, invalid_int32):
|
|
|
|
"""
|
|
|
|
target: test insert int32 out of range
|
|
|
|
method: insert int32 out of range
|
|
|
|
expected: raise exception
|
|
|
|
"""
|
2024-03-02 11:01:13 +08:00
|
|
|
collection_w = self.init_collection_general(prefix, is_all_data_type=True)[0]
|
2023-08-30 18:47:01 +08:00
|
|
|
data = cf.gen_dataframe_all_data_type(nb=1)
|
|
|
|
data[ct.default_int32_field_name] = [invalid_int32]
|
2024-05-10 14:57:32 +08:00
|
|
|
error = {ct.err_code: 999, 'err_msg': "The Input data type is inconsistent with defined schema"}
|
2024-03-02 11:01:13 +08:00
|
|
|
collection_w.insert(data, check_task=CheckTasks.err_res, check_items=error)
|
2023-08-30 18:47:01 +08:00
|
|
|
|
2023-07-17 21:49:19 +08:00
|
|
|
@pytest.mark.tags(CaseLabel.L2)
|
2023-01-18 17:43:43 +08:00
|
|
|
def test_insert_over_resource_limit(self):
|
|
|
|
"""
|
|
|
|
target: test insert over RPC limitation 64MB (67108864)
|
|
|
|
method: insert excessive data
|
|
|
|
expected: raise exception
|
|
|
|
"""
|
|
|
|
nb = 150000
|
|
|
|
collection_name = cf.gen_unique_str(prefix)
|
|
|
|
collection_w = self.init_collection_wrap(name=collection_name)
|
|
|
|
data = cf.gen_default_dataframe_data(nb)
|
2024-05-10 14:57:32 +08:00
|
|
|
error = {ct.err_code: 999, ct.err_msg: "message larger than max"}
|
2023-09-04 09:57:09 +08:00
|
|
|
collection_w.insert(
|
|
|
|
data=data, check_task=CheckTasks.err_res, check_items=error)
|
2023-01-18 17:43:43 +08:00
|
|
|
|
2023-06-19 15:44:41 +08:00
|
|
|
@pytest.mark.tags(CaseLabel.L2)
|
2023-08-30 18:47:01 +08:00
|
|
|
@pytest.mark.skip("not support default_value now")
|
2023-06-19 15:44:41 +08:00
|
|
|
@pytest.mark.parametrize("default_value", [[], None])
|
|
|
|
def test_insert_array_using_default_value(self, default_value):
|
|
|
|
"""
|
|
|
|
target: test insert with array
|
|
|
|
method: insert with invalid array
|
|
|
|
expected: raise exception
|
|
|
|
"""
|
|
|
|
fields = [cf.gen_int64_field(is_primary=True), cf.gen_float_field(),
|
|
|
|
cf.gen_string_field(default_value="abc"), cf.gen_float_vec_field()]
|
|
|
|
schema = cf.gen_collection_schema(fields)
|
|
|
|
collection_w = self.init_collection_wrap(schema=schema)
|
|
|
|
vectors = cf.gen_vectors(ct.default_nb, ct.default_dim)
|
2023-09-04 09:57:09 +08:00
|
|
|
data = [{"int64": 1, "float_vector": vectors[1],
|
|
|
|
"varchar": default_value, "float": np.float32(1.0)}]
|
2023-06-19 15:44:41 +08:00
|
|
|
collection_w.insert(data, check_task=CheckTasks.err_res,
|
2024-05-10 14:57:32 +08:00
|
|
|
check_items={ct.err_code: 999, ct.err_msg: "Field varchar don't match in entities[0]"})
|
2023-06-19 15:44:41 +08:00
|
|
|
|
|
|
|
@pytest.mark.tags(CaseLabel.L2)
|
2023-08-30 18:47:01 +08:00
|
|
|
@pytest.mark.skip("not support default_value now")
|
2023-06-19 15:44:41 +08:00
|
|
|
@pytest.mark.parametrize("default_value", [[], None])
|
|
|
|
def test_insert_tuple_using_default_value(self, default_value):
|
|
|
|
"""
|
|
|
|
target: test insert with tuple
|
|
|
|
method: insert with invalid tuple
|
|
|
|
expected: insert successfully
|
|
|
|
"""
|
|
|
|
fields = [cf.gen_int64_field(is_primary=True), cf.gen_float_vec_field(),
|
|
|
|
cf.gen_string_field(), cf.gen_float_field(default_value=np.float32(3.14))]
|
|
|
|
schema = cf.gen_collection_schema(fields)
|
|
|
|
collection_w = self.init_collection_wrap(schema=schema)
|
|
|
|
vectors = cf.gen_vectors(ct.default_nb, ct.default_dim)
|
|
|
|
int_values = [i for i in range(0, ct.default_nb)]
|
|
|
|
string_values = ["abc" for i in range(ct.default_nb)]
|
|
|
|
data = (int_values, vectors, string_values, default_value)
|
|
|
|
collection_w.insert(data, check_task=CheckTasks.err_res,
|
2024-05-10 14:57:32 +08:00
|
|
|
check_items={ct.err_code: 999, ct.err_msg: "Field varchar don't match in entities[0]"})
|
2023-06-19 15:44:41 +08:00
|
|
|
|
2023-12-22 09:58:43 +08:00
|
|
|
@pytest.mark.tags(CaseLabel.L2)
|
|
|
|
def test_insert_with_nan_value(self):
|
|
|
|
"""
|
|
|
|
target: test insert with nan value
|
|
|
|
method: insert with nan value: None, float('nan'), np.NAN/np.nan, float('inf')
|
|
|
|
expected: raise exception
|
|
|
|
"""
|
|
|
|
vector_field = ct.default_float_vec_field_name
|
|
|
|
collection_name = cf.gen_unique_str(prefix)
|
|
|
|
collection_w = self.init_collection_wrap(name=collection_name)
|
|
|
|
data = cf.gen_default_dataframe_data()
|
|
|
|
data[vector_field][0][0] = None
|
2024-05-10 14:57:32 +08:00
|
|
|
error = {ct.err_code: 999, ct.err_msg: "The Input data type is inconsistent with defined schema"}
|
2023-12-22 09:58:43 +08:00
|
|
|
collection_w.insert(data=data, check_task=CheckTasks.err_res, check_items=error)
|
|
|
|
data[vector_field][0][0] = float('nan')
|
2024-05-10 14:57:32 +08:00
|
|
|
error = {ct.err_code: 999, ct.err_msg: "value 'NaN' is not a number or infinity"}
|
2023-12-22 09:58:43 +08:00
|
|
|
collection_w.insert(data=data, check_task=CheckTasks.err_res, check_items=error)
|
|
|
|
data[vector_field][0][0] = np.NAN
|
|
|
|
collection_w.insert(data=data, check_task=CheckTasks.err_res, check_items=error)
|
|
|
|
data[vector_field][0][0] = float('inf')
|
|
|
|
error = {ct.err_code: 65535, ct.err_msg: "value '+Inf' is not a number or infinity"}
|
|
|
|
collection_w.insert(data=data, check_task=CheckTasks.err_res, check_items=error)
|
|
|
|
|
2024-06-19 15:24:09 +08:00
|
|
|
@pytest.mark.tags(CaseLabel.L2)
|
|
|
|
@pytest.mark.parametrize("index ", ct.all_index_types[9:11])
|
|
|
|
@pytest.mark.parametrize("invalid_vector_type ", ["FLOAT_VECTOR", "FLOAT16_VECTOR", "BFLOAT16_VECTOR"])
|
|
|
|
def test_invalid_sparse_vector_data(self, index, invalid_vector_type):
|
|
|
|
"""
|
|
|
|
target: insert illegal data type
|
|
|
|
method: insert illegal data type
|
|
|
|
expected: raise exception
|
|
|
|
"""
|
|
|
|
c_name = cf.gen_unique_str(prefix)
|
|
|
|
schema = cf.gen_default_sparse_schema()
|
|
|
|
collection_w = self.init_collection_wrap(name=c_name, schema=schema)
|
|
|
|
nb = 100
|
|
|
|
data = cf.gen_default_list_sparse_data(nb=nb)[:-1]
|
|
|
|
invalid_vec = cf.gen_vectors(nb, dim=128, vector_data_type=invalid_vector_type)
|
|
|
|
data.append(invalid_vec)
|
|
|
|
error = {ct.err_code: 1, ct.err_msg: 'input must be a sparse matrix in supported format'}
|
|
|
|
collection_w.insert(data=data, check_task=CheckTasks.err_res, check_items=error)
|
|
|
|
|
2022-06-30 15:54:18 +08:00
|
|
|
|
2022-02-21 09:47:51 +08:00
|
|
|
class TestInsertInvalidBinary(TestcaseBase):
|
|
|
|
"""
|
|
|
|
******************************************************************
|
|
|
|
The following cases are used to test insert invalid params of binary
|
|
|
|
******************************************************************
|
|
|
|
"""
|
|
|
|
|
2022-02-24 09:03:52 +08:00
|
|
|
@pytest.mark.tags(CaseLabel.L1)
|
2022-02-21 09:47:51 +08:00
|
|
|
def test_insert_ids_binary_invalid(self):
|
|
|
|
"""
|
2024-05-10 14:57:32 +08:00
|
|
|
target: test insert float vector into a collection with binary vector schema
|
2022-02-21 09:47:51 +08:00
|
|
|
method: create collection and insert entities in it
|
|
|
|
expected: raise exception
|
|
|
|
"""
|
2024-05-10 14:57:32 +08:00
|
|
|
collection_w = self.init_collection_general(prefix, auto_id=False, insert_data=False, is_binary=True,
|
|
|
|
is_index=False, with_json=False)[0]
|
|
|
|
data = cf.gen_default_list_data(nb=100, with_json=False)
|
|
|
|
error = {ct.err_code: 999, ct.err_msg: "Invalid binary vector data exists"}
|
2023-09-04 09:57:09 +08:00
|
|
|
mutation_res, _ = collection_w.insert(
|
2024-05-10 14:57:32 +08:00
|
|
|
data=data, check_task=CheckTasks.err_res, check_items=error)
|
2022-02-21 09:47:51 +08:00
|
|
|
|
|
|
|
@pytest.mark.tags(CaseLabel.L2)
|
|
|
|
def test_insert_with_invalid_binary_partition_name(self):
|
|
|
|
"""
|
|
|
|
target: test insert with invalid scenario
|
|
|
|
method: insert with invalid partition name
|
|
|
|
expected: raise exception
|
|
|
|
"""
|
2024-05-10 14:57:32 +08:00
|
|
|
collection_w = self.init_collection_general(prefix, auto_id=False, insert_data=False, is_binary=True,
|
|
|
|
is_index=False, with_json=False)[0]
|
|
|
|
partition_name = "non_existent_partition"
|
|
|
|
df, _ = cf.gen_default_binary_dataframe_data(nb=100)
|
|
|
|
error = {ct.err_code: 999, 'err_msg': f"partition not found[partition={partition_name}]"}
|
2022-06-30 15:54:18 +08:00
|
|
|
mutation_res, _ = collection_w.insert(data=df, partition_name=partition_name, check_task=CheckTasks.err_res,
|
|
|
|
check_items=error)
|
|
|
|
|
2022-02-21 09:47:51 +08:00
|
|
|
|
2022-05-09 15:53:52 +08:00
|
|
|
class TestInsertString(TestcaseBase):
|
|
|
|
"""
|
|
|
|
******************************************************************
|
|
|
|
The following cases are used to test insert string
|
|
|
|
******************************************************************
|
|
|
|
"""
|
|
|
|
|
|
|
|
@pytest.mark.tags(CaseLabel.L0)
|
|
|
|
def test_insert_string_field_is_primary(self):
|
|
|
|
"""
|
|
|
|
target: test insert string is primary
|
|
|
|
method: 1.create a collection and string field is primary
|
|
|
|
2.insert string field data
|
|
|
|
expected: Insert Successfully
|
|
|
|
"""
|
|
|
|
c_name = cf.gen_unique_str(prefix)
|
|
|
|
schema = cf.gen_string_pk_default_collection_schema()
|
|
|
|
collection_w = self.init_collection_wrap(name=c_name, schema=schema)
|
|
|
|
data = cf.gen_default_list_data(ct.default_nb)
|
|
|
|
mutation_res, _ = collection_w.insert(data=data)
|
|
|
|
assert mutation_res.insert_count == ct.default_nb
|
|
|
|
assert mutation_res.primary_keys == data[2]
|
|
|
|
|
|
|
|
@pytest.mark.tags(CaseLabel.L0)
|
|
|
|
@pytest.mark.parametrize("string_fields", [[cf.gen_string_field(name="string_field1")],
|
2023-09-04 09:57:09 +08:00
|
|
|
[cf.gen_string_field(
|
|
|
|
name="string_field2")],
|
2022-06-30 15:54:18 +08:00
|
|
|
[cf.gen_string_field(name="string_field3")]])
|
2022-05-09 15:53:52 +08:00
|
|
|
def test_insert_multi_string_fields(self, string_fields):
|
|
|
|
"""
|
|
|
|
target: test insert multi string fields
|
|
|
|
method: 1.create a collection
|
|
|
|
2.Insert multi string fields
|
|
|
|
expected: Insert Successfully
|
|
|
|
"""
|
|
|
|
schema = cf.gen_schema_multi_string_fields(string_fields)
|
2023-09-04 09:57:09 +08:00
|
|
|
collection_w = self.init_collection_wrap(
|
|
|
|
name=cf.gen_unique_str(prefix), schema=schema)
|
2022-05-09 15:53:52 +08:00
|
|
|
df = cf.gen_dataframe_multi_string_fields(string_fields=string_fields)
|
|
|
|
collection_w.insert(df)
|
|
|
|
assert collection_w.num_entities == ct.default_nb
|
|
|
|
|
|
|
|
@pytest.mark.tags(CaseLabel.L0)
|
|
|
|
def test_insert_string_field_length_exceed(self):
|
|
|
|
"""
|
|
|
|
target: test insert string field exceed the maximum length
|
|
|
|
method: 1.create a collection
|
|
|
|
2.Insert string field length is exceeded maximum value of 65535
|
|
|
|
expected: Raise exceptions
|
|
|
|
"""
|
|
|
|
c_name = cf.gen_unique_str(prefix)
|
2022-06-30 15:54:18 +08:00
|
|
|
collection_w = self.init_collection_wrap(name=c_name)
|
2024-05-10 14:57:32 +08:00
|
|
|
max = 65535
|
|
|
|
data = []
|
|
|
|
for field in collection_w.schema.fields:
|
|
|
|
field_data = cf.gen_data_by_collection_field(field, nb=1)
|
|
|
|
if field.dtype == DataType.VARCHAR:
|
|
|
|
field_data = [cf.gen_str_by_length(length=max + 1)]
|
|
|
|
data.append(field_data)
|
|
|
|
|
|
|
|
error = {ct.err_code: 999, ct.err_msg: 'length of string exceeds max length'}
|
|
|
|
collection_w.insert(data=data, check_task=CheckTasks.err_res, check_items=error)
|
2022-08-31 17:16:57 +08:00
|
|
|
|
|
|
|
@pytest.mark.tags(CaseLabel.L1)
|
2024-05-10 14:57:32 +08:00
|
|
|
@pytest.mark.parametrize("str_field_value", ["", " "])
|
|
|
|
def test_insert_string_field_space_empty(self, str_field_value):
|
2022-08-31 17:16:57 +08:00
|
|
|
"""
|
|
|
|
target: test create collection with string field
|
|
|
|
method: 1.create a collection
|
|
|
|
2.Insert string field with space
|
|
|
|
expected: Insert successfully
|
|
|
|
"""
|
|
|
|
c_name = cf.gen_unique_str(prefix)
|
|
|
|
collection_w = self.init_collection_wrap(name=c_name)
|
2024-05-10 14:57:32 +08:00
|
|
|
nb = 100
|
|
|
|
data = []
|
|
|
|
for field in collection_w.schema.fields:
|
|
|
|
field_data = cf.gen_data_by_collection_field(field, nb=nb)
|
|
|
|
if field.dtype == DataType.VARCHAR:
|
|
|
|
field_data = [str_field_value for _ in range(nb)]
|
|
|
|
data.append(field_data)
|
2022-09-21 10:14:51 +08:00
|
|
|
|
|
|
|
collection_w.insert(data)
|
|
|
|
assert collection_w.num_entities == nb
|
|
|
|
|
|
|
|
@pytest.mark.tags(CaseLabel.L1)
|
2024-05-10 14:57:32 +08:00
|
|
|
@pytest.mark.parametrize("str_field_value", ["", " "])
|
|
|
|
def test_insert_string_field_is_pk_and_empty(self, str_field_value):
|
2022-09-21 10:14:51 +08:00
|
|
|
"""
|
|
|
|
target: test create collection with string field is primary
|
|
|
|
method: 1.create a collection
|
|
|
|
2.Insert string field with empty, string field is pk
|
|
|
|
expected: Insert successfully
|
|
|
|
"""
|
|
|
|
c_name = cf.gen_unique_str(prefix)
|
|
|
|
schema = cf.gen_string_pk_default_collection_schema()
|
|
|
|
collection_w = self.init_collection_wrap(name=c_name, schema=schema)
|
2024-05-10 14:57:32 +08:00
|
|
|
nb = 100
|
|
|
|
data = []
|
|
|
|
for field in collection_w.schema.fields:
|
|
|
|
field_data = cf.gen_data_by_collection_field(field, nb=nb)
|
|
|
|
if field.dtype == DataType.VARCHAR:
|
|
|
|
field_data = [str_field_value for _ in range(nb)]
|
|
|
|
data.append(field_data)
|
2022-09-21 10:14:51 +08:00
|
|
|
collection_w.insert(data)
|
|
|
|
assert collection_w.num_entities == nb
|
2023-03-03 15:23:48 +08:00
|
|
|
|
|
|
|
|
|
|
|
class TestUpsertValid(TestcaseBase):
|
|
|
|
""" Valid test case of Upsert interface """
|
|
|
|
|
|
|
|
@pytest.mark.tags(CaseLabel.L1)
|
|
|
|
def test_upsert_data_pk_not_exist(self):
|
|
|
|
"""
|
|
|
|
target: test upsert with collection has no data
|
|
|
|
method: 1. create a collection with no initialized data
|
|
|
|
2. upsert data
|
|
|
|
expected: upsert run normally as inert
|
|
|
|
"""
|
|
|
|
c_name = cf.gen_unique_str(pre_upsert)
|
|
|
|
collection_w = self.init_collection_wrap(name=c_name)
|
|
|
|
data = cf.gen_default_dataframe_data()
|
|
|
|
collection_w.upsert(data=data)
|
|
|
|
assert collection_w.num_entities == ct.default_nb
|
|
|
|
|
|
|
|
@pytest.mark.tags(CaseLabel.L0)
|
2024-05-10 14:57:32 +08:00
|
|
|
@pytest.mark.parametrize("start", [0, 1500, 3500])
|
2023-03-03 15:23:48 +08:00
|
|
|
def test_upsert_data_pk_exist(self, start):
|
|
|
|
"""
|
|
|
|
target: test upsert data and collection pk exists
|
|
|
|
method: 1. create a collection and insert data
|
|
|
|
2. upsert data whose pk exists
|
|
|
|
expected: upsert succeed
|
|
|
|
"""
|
|
|
|
upsert_nb = 1000
|
|
|
|
collection_w = self.init_collection_general(pre_upsert, True)[0]
|
2023-10-17 14:16:08 +08:00
|
|
|
upsert_data, float_values = cf.gen_default_data_for_upsert(upsert_nb, start=start)
|
2023-03-03 15:23:48 +08:00
|
|
|
collection_w.upsert(data=upsert_data)
|
|
|
|
exp = f"int64 >= {start} && int64 <= {upsert_nb + start}"
|
|
|
|
res = collection_w.query(exp, output_fields=[default_float_name])[0]
|
2023-10-17 14:16:08 +08:00
|
|
|
assert [res[i][default_float_name] for i in range(upsert_nb)] == float_values.to_list()
|
2023-03-03 15:23:48 +08:00
|
|
|
|
2024-08-20 14:20:56 +08:00
|
|
|
@pytest.mark.tags(CaseLabel.L0)
|
|
|
|
def test_upsert_with_auto_id(self):
|
|
|
|
"""
|
|
|
|
target: test upsert with auto id
|
|
|
|
method: 1. create a collection with autoID=true
|
|
|
|
2. upsert 10 entities with non-existing pks
|
|
|
|
verify: success, and the pks are auto-generated
|
|
|
|
3. query 10 entities to get the existing pks
|
|
|
|
4. upsert 10 entities with existing pks
|
|
|
|
verify: success, and the pks are re-generated, and the new pks are visibly
|
|
|
|
"""
|
|
|
|
dim = 32
|
|
|
|
collection_w, _, _, insert_ids, _ = self.init_collection_general(pre_upsert, auto_id=True,
|
|
|
|
dim=dim, insert_data=True, with_json=False)
|
|
|
|
nb = 10
|
|
|
|
start = ct.default_nb * 10
|
|
|
|
data = cf.gen_default_list_data(dim=dim, nb=nb, start=start, with_json=False)
|
|
|
|
res_upsert1 = collection_w.upsert(data=data)[0]
|
|
|
|
collection_w.flush()
|
|
|
|
# assert the pks are auto-generated, and num_entities increased for upsert with non_existing pks
|
|
|
|
assert res_upsert1.primary_keys[0] > insert_ids[-1]
|
|
|
|
assert collection_w.num_entities == ct.default_nb + nb
|
|
|
|
|
|
|
|
# query 10 entities to get the existing pks
|
|
|
|
res_q = collection_w.query(expr='', limit=nb)[0]
|
|
|
|
print(f"res_q: {res_q}")
|
|
|
|
existing_pks = [res_q[i][ct.default_int64_field_name] for i in range(nb)]
|
|
|
|
existing_count = collection_w.query(expr=f"{ct.default_int64_field_name} in {existing_pks}",
|
|
|
|
output_fields=[ct.default_count_output])[0]
|
|
|
|
assert nb == existing_count[0].get(ct.default_count_output)
|
|
|
|
# upsert 10 entities with the existing pks
|
|
|
|
start = ct.default_nb * 20
|
|
|
|
data = cf.gen_default_list_data(dim=dim, nb=nb, start=start, with_json=False)
|
|
|
|
data[0] = existing_pks
|
|
|
|
res_upsert2 = collection_w.upsert(data=data)[0]
|
|
|
|
collection_w.flush()
|
|
|
|
# assert the new pks are auto-generated again
|
|
|
|
assert res_upsert2.primary_keys[0] > res_upsert1.primary_keys[-1]
|
|
|
|
existing_count = collection_w.query(expr=f"{ct.default_int64_field_name} in {existing_pks}",
|
|
|
|
output_fields=[ct.default_count_output])[0]
|
|
|
|
assert 0 == existing_count[0].get(ct.default_count_output)
|
|
|
|
res_q = collection_w.query(expr=f"{ct.default_int64_field_name} in {res_upsert2.primary_keys}",
|
|
|
|
output_fields=["*"])[0]
|
|
|
|
assert nb == len(res_q)
|
|
|
|
current_count = collection_w.query(expr='', output_fields=[ct.default_count_output])[0]
|
|
|
|
assert current_count[0].get(ct.default_count_output) == ct.default_nb + nb
|
|
|
|
|
|
|
|
@pytest.mark.tags(CaseLabel.L1)
|
|
|
|
@pytest.mark.parametrize("auto_id", [True, False])
|
|
|
|
def test_upsert_with_primary_key_string(self, auto_id):
|
2023-03-03 15:23:48 +08:00
|
|
|
"""
|
|
|
|
target: test upsert with string primary key
|
|
|
|
method: 1. create a collection with pk string
|
|
|
|
2. insert data
|
|
|
|
3. upsert data with ' ' before or after string
|
|
|
|
expected: raise no exception
|
|
|
|
"""
|
|
|
|
c_name = cf.gen_unique_str(pre_upsert)
|
2023-10-17 14:16:08 +08:00
|
|
|
fields = [cf.gen_string_field(), cf.gen_float_vec_field(dim=ct.default_dim)]
|
2024-08-20 14:20:56 +08:00
|
|
|
schema = cf.gen_collection_schema(fields=fields, primary_field=ct.default_string_field_name,
|
|
|
|
auto_id=auto_id)
|
2023-03-03 15:23:48 +08:00
|
|
|
collection_w = self.init_collection_wrap(name=c_name, schema=schema)
|
2023-10-17 14:16:08 +08:00
|
|
|
vectors = [[random.random() for _ in range(ct.default_dim)] for _ in range(2)]
|
2024-08-20 14:20:56 +08:00
|
|
|
if not auto_id:
|
|
|
|
collection_w.insert([["a", "b"], vectors])
|
|
|
|
res_upsert = collection_w.upsert([[" a", "b "], vectors])[0]
|
|
|
|
assert res_upsert.primary_keys[0] == " a" and res_upsert.primary_keys[1] == "b "
|
|
|
|
else:
|
|
|
|
collection_w.insert([vectors])
|
|
|
|
res_upsert = collection_w.upsert([[" a", "b "], vectors])[0]
|
|
|
|
assert res_upsert.primary_keys[0] != " a" and res_upsert.primary_keys[1] != "b "
|
2023-03-03 15:23:48 +08:00
|
|
|
assert collection_w.num_entities == 4
|
|
|
|
|
2023-03-17 19:37:55 +08:00
|
|
|
@pytest.mark.tags(CaseLabel.L2)
|
|
|
|
def test_upsert_binary_data(self):
|
|
|
|
"""
|
|
|
|
target: test upsert binary data
|
|
|
|
method: 1. create a collection and insert data
|
|
|
|
2. upsert data
|
|
|
|
3. check the results
|
|
|
|
expected: raise no exception
|
|
|
|
"""
|
|
|
|
nb = 500
|
|
|
|
c_name = cf.gen_unique_str(pre_upsert)
|
2023-10-17 14:16:08 +08:00
|
|
|
collection_w = self.init_collection_general(c_name, True, is_binary=True)[0]
|
2023-03-17 19:37:55 +08:00
|
|
|
binary_vectors = cf.gen_binary_vectors(nb, ct.default_dim)[1]
|
|
|
|
data = [[i for i in range(nb)], [np.float32(i) for i in range(nb)],
|
|
|
|
[str(i) for i in range(nb)], binary_vectors]
|
|
|
|
collection_w.upsert(data)
|
2023-10-17 14:16:08 +08:00
|
|
|
res = collection_w.query("int64 >= 0", [ct.default_binary_vec_field_name])[0]
|
2023-06-06 12:02:34 +08:00
|
|
|
assert binary_vectors[0] == res[0][ct. default_binary_vec_field_name][0]
|
2023-03-17 19:37:55 +08:00
|
|
|
|
2023-03-03 15:23:48 +08:00
|
|
|
@pytest.mark.tags(CaseLabel.L1)
|
|
|
|
def test_upsert_same_with_inserted_data(self):
|
|
|
|
"""
|
|
|
|
target: test upsert with data same with collection inserted data
|
|
|
|
method: 1. create a collection and insert data
|
|
|
|
2. upsert data same with inserted
|
|
|
|
3. check the update data number
|
|
|
|
expected: upsert successfully
|
|
|
|
"""
|
|
|
|
upsert_nb = 1000
|
|
|
|
c_name = cf.gen_unique_str(pre_upsert)
|
|
|
|
collection_w = self.init_collection_wrap(name=c_name)
|
|
|
|
data = cf.gen_default_dataframe_data()
|
|
|
|
collection_w.insert(data=data)
|
|
|
|
upsert_data = data[:upsert_nb]
|
|
|
|
res = collection_w.upsert(data=upsert_data)[0]
|
|
|
|
assert res.insert_count == upsert_nb, res.delete_count == upsert_nb
|
|
|
|
|
|
|
|
@pytest.mark.tags(CaseLabel.L2)
|
|
|
|
def test_upsert_data_is_none(self):
|
|
|
|
"""
|
|
|
|
target: test upsert with data=None
|
|
|
|
method: 1. create a collection
|
|
|
|
2. insert data
|
|
|
|
3. upsert data=None
|
|
|
|
expected: raise no exception
|
|
|
|
"""
|
2023-10-17 14:16:08 +08:00
|
|
|
collection_w = self.init_collection_general(pre_upsert, insert_data=True, is_index=False)[0]
|
2023-03-03 15:23:48 +08:00
|
|
|
assert collection_w.num_entities == ct.default_nb
|
2024-04-16 20:27:32 +08:00
|
|
|
collection_w.upsert(data=None, check_task=CheckTasks.err_res,
|
2024-05-10 14:57:32 +08:00
|
|
|
check_items={ct.err_code: 999,
|
2024-04-16 20:27:32 +08:00
|
|
|
ct.err_msg: "The type of data should be List, pd.DataFrame or Dict"})
|
2023-03-03 15:23:48 +08:00
|
|
|
|
|
|
|
@pytest.mark.tags(CaseLabel.L1)
|
|
|
|
def test_upsert_in_specific_partition(self):
|
|
|
|
"""
|
|
|
|
target: test upsert in specific partition
|
|
|
|
method: 1. create a collection and 2 partitions
|
|
|
|
2. insert data
|
|
|
|
3. upsert in the given partition
|
|
|
|
expected: raise no exception
|
|
|
|
"""
|
2023-03-13 15:33:58 +08:00
|
|
|
# create a collection and 2 partitions
|
|
|
|
c_name = cf.gen_unique_str(pre_upsert)
|
|
|
|
collection_w = self.init_collection_wrap(name=c_name)
|
|
|
|
collection_w.create_partition("partition_new")
|
|
|
|
cf.insert_data(collection_w)
|
2023-10-17 14:16:08 +08:00
|
|
|
collection_w.create_index(ct.default_float_vec_field_name, default_index_params)
|
2023-03-13 15:33:58 +08:00
|
|
|
collection_w.load()
|
|
|
|
|
|
|
|
# check the ids which will be upserted is in partition _default
|
2023-03-03 15:23:48 +08:00
|
|
|
upsert_nb = 10
|
2023-03-13 15:33:58 +08:00
|
|
|
expr = f"int64 >= 0 && int64 < {upsert_nb}"
|
|
|
|
res0 = collection_w.query(expr, [default_float_name], ["_default"])[0]
|
|
|
|
assert len(res0) == upsert_nb
|
|
|
|
collection_w.flush()
|
2023-10-17 14:16:08 +08:00
|
|
|
res1 = collection_w.query(expr, [default_float_name], ["partition_new"])[0]
|
|
|
|
assert collection_w.partition('partition_new')[0].num_entities == ct.default_nb // 2
|
2023-03-13 15:33:58 +08:00
|
|
|
|
|
|
|
# upsert ids in partition _default
|
|
|
|
data, float_values = cf.gen_default_data_for_upsert(upsert_nb)
|
|
|
|
collection_w.upsert(data=data, partition_name="_default")
|
|
|
|
|
|
|
|
# check the result in partition _default(upsert successfully) and others(no missing, nothing new)
|
|
|
|
collection_w.flush()
|
|
|
|
res0 = collection_w.query(expr, [default_float_name], ["_default"])[0]
|
2023-10-17 14:16:08 +08:00
|
|
|
res2 = collection_w.query(expr, [default_float_name], ["partition_new"])[0]
|
2023-03-13 15:33:58 +08:00
|
|
|
assert res1 == res2
|
2023-10-17 14:16:08 +08:00
|
|
|
assert [res0[i][default_float_name] for i in range(upsert_nb)] == float_values.to_list()
|
|
|
|
assert collection_w.partition('partition_new')[0].num_entities == ct.default_nb // 2
|
2023-03-13 15:33:58 +08:00
|
|
|
|
|
|
|
@pytest.mark.tags(CaseLabel.L2)
|
|
|
|
# @pytest.mark.skip(reason="issue #22592")
|
|
|
|
def test_upsert_in_mismatched_partitions(self):
|
|
|
|
"""
|
|
|
|
target: test upsert in unmatched partition
|
|
|
|
method: 1. create a collection and 2 partitions
|
|
|
|
2. insert data and load
|
|
|
|
3. upsert in unmatched partitions
|
|
|
|
expected: upsert successfully
|
|
|
|
"""
|
|
|
|
# create a collection and 2 partitions
|
2023-03-03 15:23:48 +08:00
|
|
|
c_name = cf.gen_unique_str(pre_upsert)
|
|
|
|
collection_w = self.init_collection_wrap(name=c_name)
|
|
|
|
collection_w.create_partition("partition_1")
|
|
|
|
collection_w.create_partition("partition_2")
|
2023-03-13 15:33:58 +08:00
|
|
|
|
|
|
|
# insert data and load collection
|
2023-03-03 15:23:48 +08:00
|
|
|
cf.insert_data(collection_w)
|
2023-10-17 14:16:08 +08:00
|
|
|
collection_w.create_index(ct.default_float_vec_field_name, default_index_params)
|
2023-03-03 15:23:48 +08:00
|
|
|
collection_w.load()
|
2023-03-13 15:33:58 +08:00
|
|
|
|
|
|
|
# check the ids which will be upserted is not in partition 'partition_1'
|
|
|
|
upsert_nb = 100
|
|
|
|
expr = f"int64 >= 0 && int64 <= {upsert_nb}"
|
2023-10-17 14:16:08 +08:00
|
|
|
res = collection_w.query(expr, [default_float_name], ["partition_1"])[0]
|
2023-03-13 15:33:58 +08:00
|
|
|
assert len(res) == 0
|
|
|
|
|
|
|
|
# upsert in partition 'partition_1'
|
2023-03-03 15:23:48 +08:00
|
|
|
data, float_values = cf.gen_default_data_for_upsert(upsert_nb)
|
2023-03-13 15:33:58 +08:00
|
|
|
collection_w.upsert(data, "partition_1")
|
|
|
|
|
|
|
|
# check the upserted data in 'partition_1'
|
2023-10-17 14:16:08 +08:00
|
|
|
res1 = collection_w.query(expr, [default_float_name], ["partition_1"])[0]
|
|
|
|
assert [res1[i][default_float_name] for i in range(upsert_nb)] == float_values.to_list()
|
2023-03-13 15:33:58 +08:00
|
|
|
|
2023-03-21 19:06:00 +08:00
|
|
|
@pytest.mark.tags(CaseLabel.L1)
|
2023-03-13 15:33:58 +08:00
|
|
|
def test_upsert_same_pk_concurrently(self):
|
|
|
|
"""
|
|
|
|
target: test upsert the same pk concurrently
|
|
|
|
method: 1. create a collection and insert data
|
|
|
|
2. load collection
|
|
|
|
3. upsert the same pk
|
|
|
|
expected: not raise exception
|
|
|
|
"""
|
|
|
|
# initialize a collection
|
|
|
|
upsert_nb = 1000
|
|
|
|
collection_w = self.init_collection_general(pre_upsert, True)[0]
|
2023-10-17 14:16:08 +08:00
|
|
|
data1, float_values1 = cf.gen_default_data_for_upsert(upsert_nb, size=1000)
|
2023-03-13 15:33:58 +08:00
|
|
|
data2, float_values2 = cf.gen_default_data_for_upsert(upsert_nb)
|
|
|
|
|
|
|
|
# upsert at the same time
|
2023-03-24 09:33:58 +08:00
|
|
|
def do_upsert1():
|
2023-03-13 15:33:58 +08:00
|
|
|
collection_w.upsert(data=data1)
|
|
|
|
|
2023-03-24 09:33:58 +08:00
|
|
|
def do_upsert2():
|
|
|
|
collection_w.upsert(data=data2)
|
|
|
|
|
|
|
|
t1 = threading.Thread(target=do_upsert1, args=())
|
|
|
|
t2 = threading.Thread(target=do_upsert2, args=())
|
|
|
|
|
|
|
|
t1.start()
|
|
|
|
t2.start()
|
|
|
|
t1.join()
|
|
|
|
t2.join()
|
2023-03-13 15:33:58 +08:00
|
|
|
|
|
|
|
# check the result
|
|
|
|
exp = f"int64 >= 0 && int64 <= {upsert_nb}"
|
2023-10-17 14:16:08 +08:00
|
|
|
res = collection_w.query(exp, [default_float_name], consistency_level="Strong")[0]
|
2023-03-13 15:33:58 +08:00
|
|
|
res = [res[i][default_float_name] for i in range(upsert_nb)]
|
2023-03-17 19:37:55 +08:00
|
|
|
if not (res == float_values1.to_list() or res == float_values2.to_list()):
|
2023-03-13 15:33:58 +08:00
|
|
|
assert False
|
|
|
|
|
2023-03-17 19:37:55 +08:00
|
|
|
@pytest.mark.tags(CaseLabel.L1)
|
2023-03-13 15:33:58 +08:00
|
|
|
def test_upsert_multiple_times(self):
|
|
|
|
"""
|
|
|
|
target: test upsert multiple times
|
|
|
|
method: 1. create a collection and insert data
|
|
|
|
2. upsert repeatedly
|
|
|
|
expected: not raise exception
|
|
|
|
"""
|
|
|
|
# initialize a collection
|
|
|
|
upsert_nb = 1000
|
|
|
|
collection_w = self.init_collection_general(pre_upsert, True)[0]
|
|
|
|
# upsert
|
2023-05-12 18:49:21 +08:00
|
|
|
step = 500
|
2023-03-13 15:33:58 +08:00
|
|
|
for i in range(10):
|
2023-05-12 18:49:21 +08:00
|
|
|
data = cf.gen_default_data_for_upsert(upsert_nb, start=i*step)[0]
|
2023-03-13 15:33:58 +08:00
|
|
|
collection_w.upsert(data)
|
2023-05-12 18:49:21 +08:00
|
|
|
# check the result
|
|
|
|
res = collection_w.query(expr="", output_fields=["count(*)"])[0]
|
|
|
|
assert res[0]["count(*)"] == upsert_nb * 10 - step * 9
|
2023-03-13 15:33:58 +08:00
|
|
|
|
|
|
|
@pytest.mark.tags(CaseLabel.L2)
|
2023-03-17 19:37:55 +08:00
|
|
|
def test_upsert_pk_string_multiple_times(self):
|
2023-03-13 15:33:58 +08:00
|
|
|
"""
|
2023-03-17 19:37:55 +08:00
|
|
|
target: test upsert multiple times
|
2023-03-13 15:33:58 +08:00
|
|
|
method: 1. create a collection and insert data
|
2023-03-17 19:37:55 +08:00
|
|
|
2. upsert repeatedly
|
|
|
|
expected: not raise exception
|
2023-03-13 15:33:58 +08:00
|
|
|
"""
|
|
|
|
# initialize a collection
|
|
|
|
upsert_nb = 1000
|
2023-03-17 19:37:55 +08:00
|
|
|
schema = cf.gen_string_pk_default_collection_schema()
|
|
|
|
name = cf.gen_unique_str(pre_upsert)
|
|
|
|
collection_w = self.init_collection_wrap(name, schema)
|
|
|
|
collection_w.insert(cf.gen_default_list_data())
|
|
|
|
# upsert
|
2023-05-12 18:49:21 +08:00
|
|
|
step = 500
|
2023-03-17 19:37:55 +08:00
|
|
|
for i in range(10):
|
2023-05-12 18:49:21 +08:00
|
|
|
data = cf.gen_default_list_data(upsert_nb, start=i * step)
|
2023-03-17 19:37:55 +08:00
|
|
|
collection_w.upsert(data)
|
2023-05-12 18:49:21 +08:00
|
|
|
# load
|
2023-10-17 14:16:08 +08:00
|
|
|
collection_w.create_index(ct.default_float_vec_field_name, default_index_params)
|
2023-05-12 18:49:21 +08:00
|
|
|
collection_w.load()
|
|
|
|
# check the result
|
|
|
|
res = collection_w.query(expr="", output_fields=["count(*)"])[0]
|
|
|
|
assert res[0]["count(*)"] == upsert_nb * 10 - step * 9
|
2023-03-03 15:23:48 +08:00
|
|
|
|
2023-10-17 14:16:08 +08:00
|
|
|
@pytest.mark.tags(CaseLabel.L2)
|
|
|
|
def test_upsert_enable_dynamic_field(self):
|
|
|
|
"""
|
|
|
|
target: test upsert when enable dynamic field is True
|
|
|
|
method: 1. create a collection and insert data
|
|
|
|
2. upsert
|
|
|
|
expected: not raise exception
|
|
|
|
"""
|
|
|
|
upsert_nb = ct.default_nb
|
|
|
|
start = ct.default_nb // 2
|
|
|
|
collection_w = self.init_collection_general(pre_upsert, True, enable_dynamic_field=True)[0]
|
|
|
|
upsert_data = cf.gen_default_rows_data(start=start)
|
|
|
|
for i in range(start, start + upsert_nb):
|
|
|
|
upsert_data[i - start]["new"] = [i, i + 1]
|
|
|
|
collection_w.upsert(data=upsert_data)
|
|
|
|
exp = f"int64 >= {start} && int64 <= {upsert_nb + start}"
|
|
|
|
res = collection_w.query(exp, output_fields=["new"])[0]
|
|
|
|
assert len(res[0]["new"]) == 2
|
|
|
|
|
2023-06-19 15:44:41 +08:00
|
|
|
@pytest.mark.tags(CaseLabel.L1)
|
2023-08-30 18:47:01 +08:00
|
|
|
@pytest.mark.skip("not support default_value now")
|
2023-06-19 15:44:41 +08:00
|
|
|
@pytest.mark.parametrize("default_value", [[], None])
|
|
|
|
def test_upsert_one_field_using_default_value(self, default_value):
|
|
|
|
"""
|
|
|
|
target: test insert with one field using default value
|
|
|
|
method: 1. create a collection with one field using default value
|
|
|
|
2. insert using []/None to replace the field value
|
|
|
|
expected: insert successfully
|
|
|
|
"""
|
|
|
|
fields = [cf.gen_int64_field(is_primary=True), cf.gen_float_field(),
|
|
|
|
cf.gen_string_field(default_value="abc"), cf.gen_float_vec_field()]
|
|
|
|
schema = cf.gen_collection_schema(fields)
|
|
|
|
collection_w = self.init_collection_wrap(schema=schema)
|
|
|
|
cf.insert_data(collection_w, with_json=False)
|
|
|
|
data = [
|
|
|
|
[i for i in range(ct.default_nb)],
|
|
|
|
[np.float32(i) for i in range(ct.default_nb)],
|
|
|
|
default_value,
|
|
|
|
cf.gen_vectors(ct.default_nb, ct.default_dim)
|
|
|
|
]
|
|
|
|
collection_w.upsert(data)
|
|
|
|
|
|
|
|
@pytest.mark.tags(CaseLabel.L1)
|
2023-08-30 18:47:01 +08:00
|
|
|
@pytest.mark.skip("not support default_value now")
|
2023-06-19 15:44:41 +08:00
|
|
|
@pytest.mark.parametrize("default_value", [[], None])
|
|
|
|
def test_upsert_multi_fields_using_default_value(self, default_value):
|
|
|
|
"""
|
|
|
|
target: test insert with multi fields using default value
|
|
|
|
method: 1. default value fields before vector, insert [], None, fail
|
|
|
|
2. default value fields all after vector field, insert empty, succeed
|
|
|
|
expected: report error and insert successfully
|
|
|
|
"""
|
|
|
|
# 1. default value fields before vector, insert [], None, fail
|
|
|
|
fields = [
|
|
|
|
cf.gen_int64_field(is_primary=True),
|
|
|
|
cf.gen_float_field(default_value=np.float32(1.0)),
|
|
|
|
cf.gen_string_field(default_value="abc"),
|
|
|
|
cf.gen_float_vec_field()
|
|
|
|
]
|
|
|
|
schema = cf.gen_collection_schema(fields)
|
|
|
|
collection_w = self.init_collection_wrap(schema=schema)
|
|
|
|
cf.insert_data(collection_w, with_json=False)
|
|
|
|
data = [
|
|
|
|
[i for i in range(ct.default_nb)],
|
|
|
|
default_value,
|
|
|
|
# if multi default_value fields before vector field, every field must use []/None
|
|
|
|
cf.gen_vectors(ct.default_nb, ct.default_dim)
|
|
|
|
]
|
|
|
|
collection_w.upsert(data,
|
|
|
|
check_task=CheckTasks.err_res,
|
2024-05-10 14:57:32 +08:00
|
|
|
check_items={ct.err_code: 999,
|
2023-06-19 15:44:41 +08:00
|
|
|
ct.err_msg: "The data type of field varchar doesn't match"})
|
|
|
|
|
|
|
|
# 2. default value fields all after vector field, insert empty, succeed
|
|
|
|
fields = [
|
|
|
|
cf.gen_int64_field(is_primary=True),
|
|
|
|
cf.gen_float_vec_field(),
|
|
|
|
cf.gen_float_field(default_value=np.float32(1.0)),
|
|
|
|
cf.gen_string_field(default_value="abc")
|
|
|
|
]
|
|
|
|
schema = cf.gen_collection_schema(fields)
|
|
|
|
collection_w = self.init_collection_wrap(schema=schema)
|
|
|
|
data = [
|
|
|
|
[i for i in range(ct.default_nb)],
|
|
|
|
cf.gen_vectors(ct.default_nb, ct.default_dim)
|
|
|
|
]
|
|
|
|
data1 = [
|
|
|
|
[i for i in range(ct.default_nb)],
|
|
|
|
cf.gen_vectors(ct.default_nb, ct.default_dim),
|
|
|
|
[np.float32(i) for i in range(ct.default_nb)]
|
|
|
|
]
|
|
|
|
collection_w.upsert(data)
|
|
|
|
assert collection_w.num_entities == ct.default_nb
|
|
|
|
collection_w.upsert(data1)
|
|
|
|
|
|
|
|
@pytest.mark.tags(CaseLabel.L2)
|
2023-08-30 18:47:01 +08:00
|
|
|
@pytest.mark.skip("not support default_value now")
|
2023-06-19 15:44:41 +08:00
|
|
|
def test_upsert_dataframe_using_default_value(self):
|
|
|
|
"""
|
|
|
|
target: test upsert with dataframe
|
|
|
|
method: upsert with invalid dataframe
|
|
|
|
expected: upsert successfully
|
|
|
|
"""
|
|
|
|
fields = [cf.gen_int64_field(is_primary=True), cf.gen_float_field(),
|
|
|
|
cf.gen_string_field(default_value="abc"), cf.gen_float_vec_field()]
|
|
|
|
schema = cf.gen_collection_schema(fields)
|
|
|
|
collection_w = self.init_collection_wrap(schema=schema)
|
|
|
|
vectors = cf.gen_vectors(ct.default_nb, ct.default_dim)
|
|
|
|
# None/[] is not allowed when using dataframe
|
|
|
|
# To use default value, delete the whole item
|
|
|
|
df = pd.DataFrame({
|
|
|
|
"int64": pd.Series(data=[i for i in range(0, ct.default_nb)]),
|
|
|
|
"float_vector": vectors,
|
|
|
|
"float": pd.Series(data=[float(i) for i in range(ct.default_nb)], dtype="float32")
|
|
|
|
})
|
|
|
|
collection_w.upsert(df)
|
|
|
|
assert collection_w.num_entities == ct.default_nb
|
|
|
|
|
2024-06-19 15:24:09 +08:00
|
|
|
@pytest.mark.tags(CaseLabel.L2)
|
|
|
|
@pytest.mark.parametrize("index ", ct.all_index_types[9:11])
|
|
|
|
def test_upsert_sparse_data(self, index):
|
|
|
|
"""
|
|
|
|
target: multiple upserts and counts(*)
|
|
|
|
method: multiple upserts and counts(*)
|
|
|
|
expected: number of data entries normal
|
|
|
|
"""
|
|
|
|
c_name = cf.gen_unique_str(prefix)
|
|
|
|
schema = cf.gen_default_sparse_schema()
|
|
|
|
collection_w = self.init_collection_wrap(name=c_name, schema=schema)
|
|
|
|
data = cf.gen_default_list_sparse_data(nb=ct.default_nb)
|
|
|
|
collection_w.upsert(data=data)
|
|
|
|
assert collection_w.num_entities == ct.default_nb
|
|
|
|
params = cf.get_index_params_params(index)
|
|
|
|
index_params = {"index_type": index, "metric_type": "IP", "params": params}
|
|
|
|
collection_w.create_index(ct.default_sparse_vec_field_name, index_params, index_name=index)
|
|
|
|
collection_w.load()
|
|
|
|
for i in range(5):
|
|
|
|
collection_w.upsert(data=data)
|
|
|
|
collection_w.query(expr=f'{ct.default_int64_field_name} >= 0', output_fields=[ct.default_count_output]
|
|
|
|
, check_task=CheckTasks.check_query_results,
|
|
|
|
check_items={"exp_res": [{"count(*)": ct.default_nb}]})
|
|
|
|
|
2023-03-03 15:23:48 +08:00
|
|
|
|
|
|
|
class TestUpsertInvalid(TestcaseBase):
|
|
|
|
""" Invalid test case of Upsert interface """
|
|
|
|
|
2024-05-10 14:57:32 +08:00
|
|
|
@pytest.mark.tags(CaseLabel.L0)
|
|
|
|
@pytest.mark.parametrize("primary_field", [ct.default_int64_field_name, ct.default_string_field_name])
|
|
|
|
def test_upsert_data_type_dismatch(self, primary_field):
|
2023-03-03 15:23:48 +08:00
|
|
|
"""
|
|
|
|
target: test upsert with invalid data type
|
|
|
|
method: upsert data type string, set, number, float...
|
|
|
|
expected: raise exception
|
|
|
|
"""
|
2024-05-10 14:57:32 +08:00
|
|
|
collection_w = self.init_collection_general(pre_upsert, auto_id=False, insert_data=False,
|
|
|
|
primary_field=primary_field, is_index=False,
|
|
|
|
is_all_data_type=True, with_json=True)[0]
|
|
|
|
nb = 100
|
|
|
|
data = cf.gen_data_by_collection_schema(collection_w.schema, nb=nb)
|
|
|
|
for dirty_i in [0, nb // 2, nb - 1]: # check the dirty data at first, middle and last
|
|
|
|
log.debug(f"dirty_i: {dirty_i}")
|
|
|
|
for i in range(len(data)):
|
|
|
|
if data[i][dirty_i].__class__ is int:
|
|
|
|
tmp = data[i][0]
|
|
|
|
data[i][dirty_i] = "iamstring"
|
|
|
|
error = {ct.err_code: 999, ct.err_msg: "The Input data type is inconsistent with defined schema"}
|
|
|
|
collection_w.upsert(data=data, check_task=CheckTasks.err_res, check_items=error)
|
|
|
|
data[i][dirty_i] = tmp
|
|
|
|
elif data[i][dirty_i].__class__ is str:
|
|
|
|
tmp = data[i][dirty_i]
|
|
|
|
data[i][dirty_i] = random.randint(0, 1000)
|
|
|
|
error = {ct.err_code: 999, ct.err_msg: "expect string input, got: <class 'int'>"}
|
|
|
|
collection_w.upsert(data=data, check_task=CheckTasks.err_res, check_items=error)
|
|
|
|
data[i][dirty_i] = tmp
|
|
|
|
elif data[i][dirty_i].__class__ is bool:
|
|
|
|
tmp = data[i][dirty_i]
|
|
|
|
data[i][dirty_i] = "iamstring"
|
|
|
|
error = {ct.err_code: 999, ct.err_msg: "The Input data type is inconsistent with defined schema"}
|
|
|
|
collection_w.upsert(data=data, check_task=CheckTasks.err_res, check_items=error)
|
|
|
|
data[i][dirty_i] = tmp
|
|
|
|
elif data[i][dirty_i].__class__ is float:
|
|
|
|
tmp = data[i][dirty_i]
|
|
|
|
data[i][dirty_i] = "iamstring"
|
|
|
|
error = {ct.err_code: 999, ct.err_msg: "The Input data type is inconsistent with defined schema"}
|
|
|
|
collection_w.upsert(data=data, check_task=CheckTasks.err_res, check_items=error)
|
|
|
|
data[i][dirty_i] = tmp
|
|
|
|
else:
|
|
|
|
continue
|
|
|
|
res = collection_w.upsert(data)[0]
|
|
|
|
assert res.insert_count == nb
|
|
|
|
|
|
|
|
@pytest.mark.tags(CaseLabel.L2)
|
|
|
|
def test_upsert_vector_unmatch(self):
|
2023-03-03 15:23:48 +08:00
|
|
|
"""
|
|
|
|
target: test upsert with unmatched data vector
|
|
|
|
method: 1. create a collection with dim=128
|
|
|
|
2. upsert with vector dim unmatch
|
|
|
|
expected: raise exception
|
|
|
|
"""
|
|
|
|
c_name = cf.gen_unique_str(pre_upsert)
|
2024-05-10 14:57:32 +08:00
|
|
|
collection_w = self.init_collection_wrap(name=c_name, with_json=False)
|
|
|
|
data = cf.gen_default_binary_dataframe_data()[0]
|
|
|
|
error = {ct.err_code: 999,
|
|
|
|
ct.err_msg: "The name of field don't match, expected: float_vector, got binary_vector"}
|
|
|
|
collection_w.upsert(data=data, check_task=CheckTasks.err_res, check_items=error)
|
2023-03-03 15:23:48 +08:00
|
|
|
|
2023-03-17 19:37:55 +08:00
|
|
|
@pytest.mark.tags(CaseLabel.L2)
|
2024-05-10 14:57:32 +08:00
|
|
|
@pytest.mark.parametrize("dim", [128-8, 128+8])
|
2023-03-17 19:37:55 +08:00
|
|
|
def test_upsert_binary_dim_unmatch(self, dim):
|
|
|
|
"""
|
|
|
|
target: test upsert with unmatched vector dim
|
|
|
|
method: 1. create a collection with default dim 128
|
|
|
|
2. upsert with mismatched dim
|
|
|
|
expected: raise exception
|
|
|
|
"""
|
2023-10-17 14:16:08 +08:00
|
|
|
collection_w = self.init_collection_general(pre_upsert, True, is_binary=True)[0]
|
2023-03-17 19:37:55 +08:00
|
|
|
data = cf.gen_default_binary_dataframe_data(dim=dim)[0]
|
2024-04-16 20:27:32 +08:00
|
|
|
error = {ct.err_code: 1100,
|
2023-09-04 09:57:09 +08:00
|
|
|
ct.err_msg: f"Collection field dim is 128, but entities field dim is {dim}"}
|
2023-10-17 14:16:08 +08:00
|
|
|
collection_w.upsert(data=data, check_task=CheckTasks.err_res, check_items=error)
|
2023-03-17 19:37:55 +08:00
|
|
|
|
|
|
|
@pytest.mark.tags(CaseLabel.L2)
|
2024-05-10 14:57:32 +08:00
|
|
|
@pytest.mark.parametrize("dim", [256])
|
2023-03-17 19:37:55 +08:00
|
|
|
def test_upsert_dim_unmatch(self, dim):
|
|
|
|
"""
|
|
|
|
target: test upsert with unmatched vector dim
|
|
|
|
method: 1. create a collection with default dim 128
|
|
|
|
2. upsert with mismatched dim
|
|
|
|
expected: raise exception
|
|
|
|
"""
|
2024-05-10 14:57:32 +08:00
|
|
|
nb = 10
|
|
|
|
collection_w = self.init_collection_general(pre_upsert, True, with_json=False)[0]
|
|
|
|
data = cf.gen_default_list_data(nb=nb, dim=dim, with_json=False)
|
|
|
|
error = {ct.err_code: 1100,
|
|
|
|
ct.err_msg: f"the dim ({dim}) of field data(float_vector) is not equal to schema dim ({ct.default_dim})"}
|
2023-10-17 14:16:08 +08:00
|
|
|
collection_w.upsert(data=data, check_task=CheckTasks.err_res, check_items=error)
|
2023-03-17 19:37:55 +08:00
|
|
|
|
2023-03-03 15:23:48 +08:00
|
|
|
@pytest.mark.tags(CaseLabel.L2)
|
2024-05-14 15:03:33 +08:00
|
|
|
@pytest.mark.parametrize("partition_name", ct.invalid_resource_names[4:])
|
|
|
|
def test_upsert_partition_name_non_existing(self, partition_name):
|
2023-03-03 15:23:48 +08:00
|
|
|
"""
|
|
|
|
target: test upsert partition name invalid
|
|
|
|
method: 1. create a collection with partitions
|
|
|
|
2. upsert with invalid partition name
|
|
|
|
expected: raise exception
|
|
|
|
"""
|
|
|
|
c_name = cf.gen_unique_str(pre_upsert)
|
|
|
|
collection_w = self.init_collection_wrap(name=c_name)
|
|
|
|
p_name = cf.gen_unique_str('partition_')
|
|
|
|
collection_w.create_partition(p_name)
|
|
|
|
cf.insert_data(collection_w)
|
|
|
|
data = cf.gen_default_dataframe_data(nb=100)
|
2024-05-10 14:57:32 +08:00
|
|
|
error = {ct.err_code: 999, ct.err_msg: "Invalid partition name"}
|
2023-03-03 15:23:48 +08:00
|
|
|
collection_w.upsert(data=data, partition_name=partition_name,
|
|
|
|
check_task=CheckTasks.err_res, check_items=error)
|
|
|
|
|
|
|
|
@pytest.mark.tags(CaseLabel.L2)
|
|
|
|
def test_upsert_partition_name_nonexistent(self):
|
|
|
|
"""
|
|
|
|
target: test upsert partition name nonexistent
|
|
|
|
method: 1. create a collection
|
|
|
|
2. upsert with nonexistent partition name
|
|
|
|
expected: raise exception
|
|
|
|
"""
|
|
|
|
c_name = cf.gen_unique_str(pre_upsert)
|
|
|
|
collection_w = self.init_collection_wrap(name=c_name)
|
|
|
|
data = cf.gen_default_dataframe_data(nb=2)
|
|
|
|
partition_name = "partition1"
|
2023-11-24 14:52:25 +08:00
|
|
|
error = {ct.err_code: 200, ct.err_msg: f"partition not found[partition={partition_name}]"}
|
2023-03-03 15:23:48 +08:00
|
|
|
collection_w.upsert(data=data, partition_name=partition_name,
|
|
|
|
check_task=CheckTasks.err_res, check_items=error)
|
|
|
|
|
|
|
|
@pytest.mark.tags(CaseLabel.L2)
|
2024-08-26 12:00:58 +08:00
|
|
|
@pytest.mark.skip("insert and upsert have removed the [] error check")
|
2023-03-03 15:23:48 +08:00
|
|
|
def test_upsert_multi_partitions(self):
|
|
|
|
"""
|
|
|
|
target: test upsert two partitions
|
|
|
|
method: 1. create a collection and two partitions
|
|
|
|
2. upsert two partitions
|
|
|
|
expected: raise exception
|
|
|
|
"""
|
|
|
|
c_name = cf.gen_unique_str(pre_upsert)
|
|
|
|
collection_w = self.init_collection_wrap(name=c_name)
|
|
|
|
collection_w.create_partition("partition_1")
|
|
|
|
collection_w.create_partition("partition_2")
|
|
|
|
cf.insert_data(collection_w)
|
|
|
|
data = cf.gen_default_dataframe_data(nb=1000)
|
2024-05-10 14:57:32 +08:00
|
|
|
error = {ct.err_code: 999, ct.err_msg: "['partition_1', 'partition_2'] has type <class 'list'>, "
|
2024-08-26 12:00:58 +08:00
|
|
|
"but expected one of: (<class 'bytes'>, <class 'str'>)"}
|
2023-03-03 15:23:48 +08:00
|
|
|
collection_w.upsert(data=data, partition_name=["partition_1", "partition_2"],
|
|
|
|
check_task=CheckTasks.err_res, check_items=error)
|
|
|
|
|
|
|
|
@pytest.mark.tags(CaseLabel.L2)
|
2024-08-20 14:20:56 +08:00
|
|
|
def test_upsert_with_auto_id_pk_type_dismacth(self):
|
2023-03-03 15:23:48 +08:00
|
|
|
"""
|
2024-08-20 14:20:56 +08:00
|
|
|
target: test upsert with auto_id and pk type dismatch
|
|
|
|
method: 1. create a collection with pk int64 and auto_id=True
|
|
|
|
2. upsert with pk string type dismatch
|
2023-03-03 15:23:48 +08:00
|
|
|
expected: raise exception
|
|
|
|
"""
|
2024-08-20 14:20:56 +08:00
|
|
|
dim = 16
|
|
|
|
collection_w = self.init_collection_general(pre_upsert, auto_id=False,
|
|
|
|
dim=dim, insert_data=True, with_json=False)[0]
|
|
|
|
nb = 10
|
|
|
|
data = cf.gen_default_list_data(dim=dim, nb=nb, with_json=False)
|
|
|
|
data[0] = [str(i) for i in range(nb)]
|
|
|
|
error = {ct.err_code: 999, ct.err_msg: "The Input data type is inconsistent with defined schema"}
|
2023-10-17 14:16:08 +08:00
|
|
|
collection_w.upsert(data=data, check_task=CheckTasks.err_res, check_items=error)
|
2023-06-19 15:44:41 +08:00
|
|
|
|
|
|
|
@pytest.mark.tags(CaseLabel.L2)
|
2023-10-17 14:16:08 +08:00
|
|
|
@pytest.mark.skip("not support default_value now")
|
2023-06-19 15:44:41 +08:00
|
|
|
@pytest.mark.parametrize("default_value", [[], None])
|
|
|
|
def test_upsert_array_using_default_value(self, default_value):
|
|
|
|
"""
|
|
|
|
target: test upsert with array
|
|
|
|
method: upsert with invalid array
|
|
|
|
expected: raise exception
|
|
|
|
"""
|
|
|
|
fields = [cf.gen_int64_field(is_primary=True), cf.gen_float_field(),
|
|
|
|
cf.gen_string_field(default_value="abc"), cf.gen_float_vec_field()]
|
|
|
|
schema = cf.gen_collection_schema(fields)
|
|
|
|
collection_w = self.init_collection_wrap(schema=schema)
|
|
|
|
vectors = cf.gen_vectors(ct.default_nb, ct.default_dim)
|
2023-09-04 09:57:09 +08:00
|
|
|
data = [{"int64": 1, "float_vector": vectors[1],
|
|
|
|
"varchar": default_value, "float": np.float32(1.0)}]
|
2023-06-19 15:44:41 +08:00
|
|
|
collection_w.upsert(data, check_task=CheckTasks.err_res,
|
2024-05-10 14:57:32 +08:00
|
|
|
check_items={ct.err_code: 999, ct.err_msg: "Field varchar don't match in entities[0]"})
|
2023-06-19 15:44:41 +08:00
|
|
|
|
|
|
|
@pytest.mark.tags(CaseLabel.L2)
|
2023-10-17 14:16:08 +08:00
|
|
|
@pytest.mark.skip("not support default_value now")
|
2023-06-19 15:44:41 +08:00
|
|
|
@pytest.mark.parametrize("default_value", [[], None])
|
|
|
|
def test_upsert_tuple_using_default_value(self, default_value):
|
|
|
|
"""
|
|
|
|
target: test upsert with tuple
|
|
|
|
method: upsert with invalid tuple
|
|
|
|
expected: upsert successfully
|
|
|
|
"""
|
|
|
|
fields = [cf.gen_int64_field(is_primary=True), cf.gen_float_field(default_value=np.float32(3.14)),
|
|
|
|
cf.gen_string_field(), cf.gen_float_vec_field()]
|
|
|
|
schema = cf.gen_collection_schema(fields)
|
|
|
|
collection_w = self.init_collection_wrap(schema=schema)
|
|
|
|
vectors = cf.gen_vectors(ct.default_nb, ct.default_dim)
|
|
|
|
int_values = [i for i in range(0, ct.default_nb)]
|
|
|
|
string_values = ["abc" for i in range(ct.default_nb)]
|
|
|
|
data = (int_values, default_value, string_values, vectors)
|
|
|
|
collection_w.upsert(data, check_task=CheckTasks.err_res,
|
2024-05-10 14:57:32 +08:00
|
|
|
check_items={ct.err_code: 999, ct.err_msg: "Field varchar don't match in entities[0]"})
|
2023-10-24 09:26:31 +08:00
|
|
|
|
|
|
|
|
|
|
|
class TestInsertArray(TestcaseBase):
|
|
|
|
""" Test case of Insert array """
|
|
|
|
|
|
|
|
@pytest.mark.tags(CaseLabel.L1)
|
|
|
|
@pytest.mark.parametrize("auto_id", [True, False])
|
|
|
|
def test_insert_array_dataframe(self, auto_id):
|
|
|
|
"""
|
|
|
|
target: test insert DataFrame data
|
|
|
|
method: Insert data in the form of dataframe
|
|
|
|
expected: assert num entities
|
|
|
|
"""
|
|
|
|
schema = cf.gen_array_collection_schema(auto_id=auto_id)
|
|
|
|
collection_w = self.init_collection_wrap(schema=schema)
|
|
|
|
data = cf.gen_array_dataframe_data()
|
|
|
|
if auto_id:
|
|
|
|
data = data.drop(ct.default_int64_field_name, axis=1)
|
|
|
|
collection_w.insert(data=data)
|
|
|
|
collection_w.flush()
|
|
|
|
assert collection_w.num_entities == ct.default_nb
|
|
|
|
|
|
|
|
@pytest.mark.tags(CaseLabel.L1)
|
|
|
|
@pytest.mark.parametrize("auto_id", [True, False])
|
|
|
|
def test_insert_array_list(self, auto_id):
|
|
|
|
"""
|
|
|
|
target: test insert list data
|
|
|
|
method: Insert data in the form of a list
|
|
|
|
expected: assert num entities
|
|
|
|
"""
|
|
|
|
schema = cf.gen_array_collection_schema(auto_id=auto_id)
|
|
|
|
collection_w = self.init_collection_wrap(schema=schema)
|
|
|
|
|
|
|
|
nb = ct.default_nb
|
|
|
|
arr_len = ct.default_max_capacity
|
|
|
|
pk_values = [i for i in range(nb)]
|
|
|
|
float_vec = cf.gen_vectors(nb, ct.default_dim)
|
|
|
|
int32_values = [[np.int32(j) for j in range(i, i+arr_len)] for i in range(nb)]
|
|
|
|
float_values = [[np.float32(j) for j in range(i, i+arr_len)] for i in range(nb)]
|
|
|
|
string_values = [[str(j) for j in range(i, i+arr_len)] for i in range(nb)]
|
|
|
|
|
|
|
|
data = [pk_values, float_vec, int32_values, float_values, string_values]
|
|
|
|
if auto_id:
|
|
|
|
del data[0]
|
|
|
|
# log.info(data[0][1])
|
|
|
|
collection_w.insert(data=data)
|
|
|
|
assert collection_w.num_entities == nb
|
|
|
|
|
|
|
|
@pytest.mark.tags(CaseLabel.L1)
|
|
|
|
def test_insert_array_rows(self):
|
|
|
|
"""
|
|
|
|
target: test insert row data
|
|
|
|
method: Insert data in the form of rows
|
|
|
|
expected: assert num entities
|
|
|
|
"""
|
|
|
|
schema = cf.gen_array_collection_schema()
|
|
|
|
collection_w = self.init_collection_wrap(schema=schema)
|
|
|
|
|
2024-05-10 14:57:32 +08:00
|
|
|
data = cf.gen_row_data_by_schema(schema=schema)
|
2023-10-24 09:26:31 +08:00
|
|
|
collection_w.insert(data=data)
|
|
|
|
assert collection_w.num_entities == ct.default_nb
|
|
|
|
|
|
|
|
collection_w.upsert(data[:2])
|
|
|
|
|
|
|
|
@pytest.mark.tags(CaseLabel.L2)
|
|
|
|
def test_insert_array_empty_list(self):
|
|
|
|
"""
|
|
|
|
target: test insert DataFrame data
|
|
|
|
method: Insert data with the length of array = 0
|
|
|
|
expected: assert num entities
|
|
|
|
"""
|
|
|
|
nb = ct.default_nb
|
|
|
|
schema = cf.gen_array_collection_schema()
|
|
|
|
collection_w = self.init_collection_wrap(schema=schema)
|
|
|
|
data = cf.gen_array_dataframe_data()
|
|
|
|
data[ct.default_int32_array_field_name] = [[] for _ in range(nb)]
|
|
|
|
collection_w.insert(data=data)
|
|
|
|
assert collection_w.num_entities == ct.default_nb
|
|
|
|
|
|
|
|
@pytest.mark.tags(CaseLabel.L2)
|
|
|
|
def test_insert_array_length_differ(self):
|
|
|
|
"""
|
|
|
|
target: test insert row data
|
|
|
|
method: Insert data with every row's array length differ
|
|
|
|
expected: assert num entities
|
|
|
|
"""
|
|
|
|
nb = ct.default_nb
|
|
|
|
schema = cf.gen_array_collection_schema()
|
|
|
|
collection_w = self.init_collection_wrap(schema=schema)
|
|
|
|
array = []
|
|
|
|
for i in range(nb):
|
|
|
|
arr_len1 = random.randint(0, ct.default_max_capacity)
|
|
|
|
arr_len2 = random.randint(0, ct.default_max_capacity)
|
|
|
|
arr = {
|
|
|
|
ct.default_int64_field_name: i,
|
|
|
|
ct.default_float_vec_field_name: [random.random() for _ in range(ct.default_dim)],
|
|
|
|
ct.default_int32_array_field_name: [np.int32(j) for j in range(arr_len1)],
|
|
|
|
ct.default_float_array_field_name: [np.float32(j) for j in range(arr_len2)],
|
|
|
|
ct.default_string_array_field_name: [str(j) for j in range(ct.default_max_capacity)],
|
|
|
|
}
|
|
|
|
array.append(arr)
|
|
|
|
|
|
|
|
collection_w.insert(array)
|
|
|
|
assert collection_w.num_entities == nb
|
|
|
|
|
2024-05-10 14:57:32 +08:00
|
|
|
data = cf.gen_row_data_by_schema(nb=2, schema=schema)
|
2023-10-24 09:26:31 +08:00
|
|
|
collection_w.upsert(data)
|
|
|
|
|
|
|
|
@pytest.mark.tags(CaseLabel.L2)
|
|
|
|
def test_insert_array_length_invalid(self):
|
|
|
|
"""
|
|
|
|
target: Insert actual array length > max_capacity
|
|
|
|
method: Insert actual array length > max_capacity
|
|
|
|
expected: raise error
|
|
|
|
"""
|
|
|
|
# init collection
|
2024-05-10 14:57:32 +08:00
|
|
|
schema = cf.gen_array_collection_schema(dim=32)
|
2023-10-24 09:26:31 +08:00
|
|
|
collection_w = self.init_collection_wrap(schema=schema)
|
|
|
|
# Insert actual array length > max_capacity
|
|
|
|
arr_len = ct.default_max_capacity + 1
|
2024-05-10 14:57:32 +08:00
|
|
|
data = cf.gen_row_data_by_schema(schema=schema,nb=11)
|
2023-10-24 09:26:31 +08:00
|
|
|
data[1][ct.default_float_array_field_name] = [np.float32(i) for i in range(arr_len)]
|
|
|
|
err_msg = (f"the length (101) of 1th array exceeds max capacity ({ct.default_max_capacity}): "
|
|
|
|
f"expected=valid length array, actual=array length exceeds max capacity: invalid parameter")
|
|
|
|
collection_w.insert(data=data, check_task=CheckTasks.err_res,
|
|
|
|
check_items={ct.err_code: 1100, ct.err_msg: err_msg})
|
|
|
|
|
|
|
|
@pytest.mark.tags(CaseLabel.L2)
|
|
|
|
def test_insert_array_type_invalid(self):
|
|
|
|
"""
|
|
|
|
target: Insert array type invalid
|
|
|
|
method: 1. Insert string values to an int array
|
|
|
|
2. upsert float values to a string array
|
|
|
|
expected: raise error
|
|
|
|
"""
|
|
|
|
# init collection
|
2024-05-10 14:57:32 +08:00
|
|
|
arr_len = 5
|
|
|
|
nb = 10
|
|
|
|
dim = 8
|
|
|
|
schema = cf.gen_array_collection_schema(dim=dim)
|
2023-10-24 09:26:31 +08:00
|
|
|
collection_w = self.init_collection_wrap(schema=schema)
|
2024-05-10 14:57:32 +08:00
|
|
|
data = cf.gen_row_data_by_schema(schema=schema, nb=nb)
|
2023-10-24 09:26:31 +08:00
|
|
|
# 1. Insert string values to an int array
|
|
|
|
data[1][ct.default_int32_array_field_name] = [str(i) for i in range(arr_len)]
|
2024-05-10 14:57:32 +08:00
|
|
|
err_msg = "The Input data type is inconsistent with defined schema"
|
2023-10-24 09:26:31 +08:00
|
|
|
collection_w.insert(data=data, check_task=CheckTasks.err_res,
|
2024-05-10 14:57:32 +08:00
|
|
|
check_items={ct.err_code: 999, ct.err_msg: err_msg})
|
2023-10-24 09:26:31 +08:00
|
|
|
|
|
|
|
# 2. upsert float values to a string array
|
2024-05-10 14:57:32 +08:00
|
|
|
data = cf.gen_row_data_by_schema(schema=schema)
|
2023-10-24 09:26:31 +08:00
|
|
|
data[1][ct.default_string_array_field_name] = [np.float32(i) for i in range(arr_len)]
|
|
|
|
collection_w.upsert(data=data, check_task=CheckTasks.err_res,
|
2024-05-10 14:57:32 +08:00
|
|
|
check_items={ct.err_code: 999, ct.err_msg: err_msg})
|
2023-10-24 09:26:31 +08:00
|
|
|
|
|
|
|
@pytest.mark.tags(CaseLabel.L2)
|
|
|
|
def test_insert_array_mixed_value(self):
|
|
|
|
"""
|
|
|
|
target: Insert array consisting of mixed values
|
|
|
|
method: Insert array consisting of mixed values
|
|
|
|
expected: raise error
|
|
|
|
"""
|
|
|
|
# init collection
|
2024-05-10 14:57:32 +08:00
|
|
|
schema = cf.gen_array_collection_schema(dim=32)
|
2023-10-24 09:26:31 +08:00
|
|
|
collection_w = self.init_collection_wrap(schema=schema)
|
|
|
|
# Insert array consisting of mixed values
|
2024-05-10 14:57:32 +08:00
|
|
|
data = cf.gen_row_data_by_schema(schema=schema, nb=10)
|
2023-10-24 09:26:31 +08:00
|
|
|
data[1][ct.default_string_array_field_name] = ["a", 1, [2.0, 3.0], False]
|
|
|
|
collection_w.insert(data=data, check_task=CheckTasks.err_res,
|
2024-05-10 14:57:32 +08:00
|
|
|
check_items={ct.err_code: 999,
|
|
|
|
ct.err_msg: "The Input data type is inconsistent with defined schema"})
|