test: add restful v2 test (#30984)

add restful v2 test

Signed-off-by: zhuwenxing <wenxing.zhu@zilliz.com>
This commit is contained in:
zhuwenxing 2024-03-02 11:03:03 +08:00 committed by GitHub
parent d8164c43d2
commit d98a5e441f
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
19 changed files with 4649 additions and 0 deletions

View File

@ -0,0 +1,9 @@
## How to run the test cases
install milvus with authentication enabled
```bash
pip install -r requirements.txt
pytest testcases -m L0 -n 6 -v --endpoint http://127.0.0.1:19530 --minio_host 127.0.0.1
```

View File

@ -0,0 +1,750 @@
import json
import requests
import time
import uuid
from utils.util_log import test_log as logger
from minio import Minio
from minio.error import S3Error
def logger_request_response(response, url, tt, headers, data, str_data, str_response, method):
if len(data) > 2000:
data = data[:1000] + "..." + data[-1000:]
try:
if response.status_code == 200:
if ('code' in response.json() and response.json()["code"] == 200) or (
'Code' in response.json() and response.json()["Code"] == 0):
logger.debug(
f"\nmethod: {method}, \nurl: {url}, \ncost time: {tt}, \nheader: {headers}, \npayload: {str_data}, \nresponse: {str_response}")
else:
logger.debug(
f"\nmethod: {method}, \nurl: {url}, \ncost time: {tt}, \nheader: {headers}, \npayload: {data}, \nresponse: {response.text}")
else:
logger.debug(
f"method: \nmethod: {method}, \nurl: {url}, \ncost time: {tt}, \nheader: {headers}, \npayload: {data}, \nresponse: {response.text}")
except Exception as e:
logger.debug(
f"method: \nmethod: {method}, \nurl: {url}, \ncost time: {tt}, \nheader: {headers}, \npayload: {data}, \nresponse: {response.text}, \nerror: {e}")
class Requests:
def __init__(self, url=None, api_key=None):
self.url = url
self.api_key = api_key
self.headers = {
'Content-Type': 'application/json',
'Authorization': f'Bearer {self.api_key}',
'RequestId': str(uuid.uuid1())
}
def update_headers(self):
headers = {
'Content-Type': 'application/json',
'Authorization': f'Bearer {self.api_key}',
'RequestId': str(uuid.uuid1())
}
return headers
def post(self, url, headers=None, data=None, params=None):
headers = headers if headers is not None else self.update_headers()
data = json.dumps(data)
str_data = data[:200] + '...' + data[-200:] if len(data) > 400 else data
t0 = time.time()
response = requests.post(url, headers=headers, data=data, params=params)
tt = time.time() - t0
str_response = response.text[:200] + '...' + response.text[-200:] if len(response.text) > 400 else response.text
logger_request_response(response, url, tt, headers, data, str_data, str_response, "post")
return response
def get(self, url, headers=None, params=None, data=None):
headers = headers if headers is not None else self.update_headers()
data = json.dumps(data)
str_data = data[:200] + '...' + data[-200:] if len(data) > 400 else data
t0 = time.time()
if data is None or data == "null":
response = requests.get(url, headers=headers, params=params)
else:
response = requests.get(url, headers=headers, params=params, data=data)
tt = time.time() - t0
str_response = response.text[:200] + '...' + response.text[-200:] if len(response.text) > 400 else response.text
logger_request_response(response, url, tt, headers, data, str_data, str_response, "get")
return response
def put(self, url, headers=None, data=None):
headers = headers if headers is not None else self.update_headers()
data = json.dumps(data)
str_data = data[:200] + '...' + data[-200:] if len(data) > 400 else data
t0 = time.time()
response = requests.put(url, headers=headers, data=data)
tt = time.time() - t0
str_response = response.text[:200] + '...' + response.text[-200:] if len(response.text) > 400 else response.text
logger_request_response(response, url, tt, headers, data, str_data, str_response, "put")
return response
def delete(self, url, headers=None, data=None):
headers = headers if headers is not None else self.update_headers()
data = json.dumps(data)
str_data = data[:200] + '...' + data[-200:] if len(data) > 400 else data
t0 = time.time()
response = requests.delete(url, headers=headers, data=data)
tt = time.time() - t0
str_response = response.text[:200] + '...' + response.text[-200:] if len(response.text) > 400 else response.text
logger_request_response(response, url, tt, headers, data, str_data, str_response, "delete")
return response
class VectorClient(Requests):
def __init__(self, endpoint, token):
super().__init__(url=endpoint, api_key=token)
self.endpoint = endpoint
self.token = token
self.api_key = token
self.db_name = None
self.headers = self.update_headers()
def update_headers(self):
headers = {
'Content-Type': 'application/json',
'Authorization': f'Bearer {self.api_key}',
'Accept-Type-Allow-Int64': "true",
'RequestId': str(uuid.uuid1())
}
return headers
def vector_search(self, payload, db_name="default", timeout=10):
time.sleep(1)
url = f'{self.endpoint}/v2/vectordb/entities/search'
if self.db_name is not None:
payload["dbName"] = self.db_name
if db_name != "default":
payload["dbName"] = db_name
response = self.post(url, headers=self.update_headers(), data=payload)
rsp = response.json()
if "data" in rsp and len(rsp["data"]) == 0:
t0 = time.time()
while time.time() - t0 < timeout:
response = self.post(url, headers=self.update_headers(), data=payload)
rsp = response.json()
if len(rsp["data"]) > 0:
break
time.sleep(1)
else:
response = self.post(url, headers=self.update_headers(), data=payload)
rsp = response.json()
if "data" in rsp and len(rsp["data"]) == 0:
logger.info(f"after {timeout}s, still no data")
return response.json()
def vector_query(self, payload, db_name="default", timeout=10):
time.sleep(1)
url = f'{self.endpoint}/v2/vectordb/entities/query'
if self.db_name is not None:
payload["dbName"] = self.db_name
if db_name != "default":
payload["dbName"] = db_name
response = self.post(url, headers=self.update_headers(), data=payload)
rsp = response.json()
if "data" in rsp and len(rsp["data"]) == 0:
t0 = time.time()
while time.time() - t0 < timeout:
response = self.post(url, headers=self.update_headers(), data=payload)
rsp = response.json()
if len(rsp["data"]) > 0:
break
time.sleep(1)
else:
response = self.post(url, headers=self.update_headers(), data=payload)
rsp = response.json()
if "data" in rsp and len(rsp["data"]) == 0:
logger.info(f"after {timeout}s, still no data")
return response.json()
def vector_get(self, payload, db_name="default"):
time.sleep(1)
url = f'{self.endpoint}/v2/vectordb/entities/get'
if self.db_name is not None:
payload["dbName"] = self.db_name
if db_name != "default":
payload["dbName"] = db_name
response = self.post(url, headers=self.update_headers(), data=payload)
return response.json()
def vector_delete(self, payload, db_name="default"):
url = f'{self.endpoint}/v2/vectordb/entities/delete'
if self.db_name is not None:
payload["dbName"] = self.db_name
if db_name != "default":
payload["dbName"] = db_name
response = self.post(url, headers=self.update_headers(), data=payload)
return response.json()
def vector_insert(self, payload, db_name="default"):
url = f'{self.endpoint}/v2/vectordb/entities/insert'
if self.db_name is not None:
payload["dbName"] = self.db_name
if db_name != "default":
payload["dbName"] = db_name
response = self.post(url, headers=self.update_headers(), data=payload)
return response.json()
def vector_upsert(self, payload, db_name="default"):
url = f'{self.endpoint}/v2/vectordb/entities/upsert'
if self.db_name is not None:
payload["dbName"] = self.db_name
if db_name != "default":
payload["dbName"] = db_name
response = self.post(url, headers=self.update_headers(), data=payload)
return response.json()
class CollectionClient(Requests):
def __init__(self, endpoint, token):
super().__init__(url=endpoint, api_key=token)
self.endpoint = endpoint
self.api_key = token
self.db_name = None
self.headers = self.update_headers()
def update_headers(self, headers=None):
if headers is not None:
return headers
headers = {
'Content-Type': 'application/json',
'Authorization': f'Bearer {self.api_key}',
'RequestId': str(uuid.uuid1())
}
return headers
def collection_has(self, db_name="default", collection_name=None):
url = f'{self.endpoint}/v2/vectordb/collections/has'
if self.db_name is not None:
db_name = self.db_name
data = {
"dbName": db_name,
"collectionName": collection_name
}
response = self.post(url, headers=self.update_headers(), data=data)
res = response.json()
return res
def collection_rename(self, payload, db_name="default"):
url = f'{self.endpoint}/v2/vectordb/collections/rename'
if self.db_name is not None:
payload["dbName"] = self.db_name
if db_name != "default":
payload["dbName"] = db_name
response = self.post(url, headers=self.update_headers(), data=payload)
return response.json()
def collection_stats(self, db_name="default", collection_name=None):
url = f'{self.endpoint}/v2/vectordb/collections/get_stats'
if self.db_name is not None:
db_name = self.db_name
data = {
"dbName": db_name,
"collectionName": collection_name
}
response = self.post(url, headers=self.update_headers(), data=data)
res = response.json()
return res
def collection_load(self, db_name="default", collection_name=None):
url = f'{self.endpoint}/v2/vectordb/collections/load'
if self.db_name is not None:
db_name = self.db_name
payload = {
"dbName": db_name,
"collectionName": collection_name
}
response = self.post(url, headers=self.update_headers(), data=payload)
res = response.json()
return res
def collection_release(self, db_name="default", collection_name=None):
url = f'{self.endpoint}/v2/vectordb/collections/release'
if self.db_name is not None:
db_name = self.db_name
payload = {
"dbName": db_name,
"collectionName": collection_name
}
response = self.post(url, headers=self.update_headers(), data=payload)
res = response.json()
return res
def collection_load_state(self, db_name="default", collection_name=None, partition_names=None):
url = f'{self.endpoint}/v2/vectordb/collections/get_load_state'
if self.db_name is not None:
db_name = self.db_name
data = {
"dbName": db_name,
"collectionName": collection_name,
}
if partition_names is not None:
data["partitionNames"] = partition_names
response = self.post(url, headers=self.update_headers(), data=data)
res = response.json()
return res
def collection_list(self, db_name="default"):
url = f'{self.endpoint}/v2/vectordb/collections/list'
params = {}
if self.db_name is not None:
params = {
"dbName": self.db_name
}
if db_name != "default":
params = {
"dbName": db_name
}
response = self.post(url, headers=self.update_headers(), params=params)
res = response.json()
return res
def collection_create(self, payload, db_name="default"):
time.sleep(1) # wait for collection created and in case of rate limit
url = f'{self.endpoint}/v2/vectordb/collections/create'
if self.db_name is not None:
payload["dbName"] = self.db_name
if db_name != "default":
payload["dbName"] = db_name
response = self.post(url, headers=self.update_headers(), data=payload)
return response.json()
def collection_describe(self, collection_name, db_name="default"):
url = f'{self.endpoint}/v2/vectordb/collections/describe'
data = {"collectionName": collection_name}
if self.db_name is not None:
data = {
"collectionName": collection_name,
"dbName": self.db_name
}
if db_name != "default":
data = {
"collectionName": collection_name,
"dbName": db_name
}
response = self.post(url, headers=self.update_headers(), data=data)
return response.json()
def collection_drop(self, payload, db_name="default"):
time.sleep(1) # wait for collection drop and in case of rate limit
url = f'{self.endpoint}/v2/vectordb/collections/drop'
if self.db_name is not None:
payload["dbName"] = self.db_name
if db_name != "default":
payload["dbName"] = db_name
response = self.post(url, headers=self.update_headers(), data=payload)
return response.json()
class PartitionClient(Requests):
def __init__(self, endpoint, token):
super().__init__(url=endpoint, api_key=token)
self.endpoint = endpoint
self.api_key = token
self.db_name = None
self.headers = self.update_headers()
def update_headers(self):
headers = {
'Content-Type': 'application/json',
'Authorization': f'Bearer {self.api_key}',
'RequestId': str(uuid.uuid1())
}
return headers
def partition_list(self, db_name="default", collection_name=None):
url = f'{self.endpoint}/v2/vectordb/partitions/list'
data = {
"collectionName": collection_name
}
if self.db_name is not None:
data = {
"dbName": self.db_name,
"collectionName": collection_name
}
if db_name != "default":
data = {
"dbName": db_name,
"collectionName": collection_name
}
response = self.post(url, headers=self.update_headers(), data=data)
res = response.json()
return res
def partition_create(self, db_name="default", collection_name=None, partition_name=None):
url = f'{self.endpoint}/v2/vectordb/partitions/create'
if self.db_name is not None:
db_name = self.db_name
payload = {
"dbName": db_name,
"collectionName": collection_name,
"partitionName": partition_name
}
response = self.post(url, headers=self.update_headers(), data=payload)
res = response.json()
return res
def partition_drop(self, db_name="default", collection_name=None, partition_name=None):
url = f'{self.endpoint}/v2/vectordb/partitions/drop'
if self.db_name is not None:
db_name = self.db_name
payload = {
"dbName": db_name,
"collectionName": collection_name,
"partitionName": partition_name
}
response = self.post(url, headers=self.update_headers(), data=payload)
res = response.json()
return res
def partition_load(self, db_name="default", collection_name=None, partition_names=None):
url = f'{self.endpoint}/v2/vectordb/partitions/load'
if self.db_name is not None:
db_name = self.db_name
payload = {
"dbName": db_name,
"collectionName": collection_name,
"partitionNames": partition_names
}
response = self.post(url, headers=self.update_headers(), data=payload)
res = response.json()
return res
def partition_release(self, db_name="default", collection_name=None, partition_names=None):
url = f'{self.endpoint}/v2/vectordb/partitions/release'
if self.db_name is not None:
db_name = self.db_name
payload = {
"dbName": db_name,
"collectionName": collection_name,
"partitionNames": partition_names
}
response = self.post(url, headers=self.update_headers(), data=payload)
res = response.json()
return res
def partition_has(self, db_name="default", collection_name=None, partition_name=None):
url = f'{self.endpoint}/v2/vectordb/partitions/has'
if self.db_name is not None:
db_name = self.db_name
data = {
"dbName": db_name,
"collectionName": collection_name,
"partitionName": partition_name
}
response = self.post(url, headers=self.update_headers(), data=data)
res = response.json()
return res
def partition_stats(self, db_name="default", collection_name=None, partition_name=None):
url = f'{self.endpoint}/v2/vectordb/partitions/get_stats'
if self.db_name is not None:
db_name = self.db_name
data = {
"dbName": db_name,
"collectionName": collection_name,
"partitionName": partition_name
}
response = self.post(url, headers=self.update_headers(), data=data)
res = response.json()
return res
class UserClient(Requests):
def __init__(self, endpoint, token):
super().__init__(url=endpoint, api_key=token)
self.endpoint = endpoint
self.api_key = token
self.db_name = None
self.headers = self.update_headers()
def update_headers(self):
headers = {
'Content-Type': 'application/json',
'Authorization': f'Bearer {self.api_key}',
'RequestId': str(uuid.uuid1())
}
return headers
def user_list(self):
url = f'{self.endpoint}/v2/vectordb/users/list'
response = self.post(url, headers=self.update_headers())
res = response.json()
return res
def user_create(self, payload):
url = f'{self.endpoint}/v2/vectordb/users/create'
response = self.post(url, headers=self.update_headers(), data=payload)
res = response.json()
return res
def user_password_update(self, payload):
url = f'{self.endpoint}/v2/vectordb/users/update_password'
response = self.post(url, headers=self.update_headers(), data=payload)
res = response.json()
return res
def user_describe(self, user_name):
url = f'{self.endpoint}/v2/vectordb/users/describe'
data = {
"userName": user_name
}
response = self.post(url, headers=self.update_headers(), data=data)
res = response.json()
return res
def user_drop(self, payload):
url = f'{self.endpoint}/v2/vectordb/users/drop'
response = self.post(url, headers=self.update_headers(), data=payload)
res = response.json()
return res
def user_grant(self, payload):
url = f'{self.endpoint}/v2/vectordb/users/grant_role'
response = self.post(url, headers=self.update_headers(), data=payload)
res = response.json()
return res
def user_revoke(self, payload):
url = f'{self.endpoint}/v2/vectordb/users/revoke_role'
response = self.post(url, headers=self.update_headers(), data=payload)
res = response.json()
return res
class RoleClient(Requests):
def __init__(self, endpoint, token):
super().__init__(url=endpoint, api_key=token)
self.endpoint = endpoint
self.api_key = token
self.db_name = None
self.headers = self.update_headers()
def update_headers(self):
headers = {
'Content-Type': 'application/json',
'Authorization': f'Bearer {self.api_key}',
'RequestId': str(uuid.uuid1())
}
return headers
def role_list(self):
url = f'{self.endpoint}/v2/vectordb/roles/list'
response = self.post(url, headers=self.update_headers())
res = response.json()
return res
def role_create(self, payload):
url = f'{self.endpoint}/v2/vectordb/roles/create'
response = self.post(url, headers=self.update_headers(), data=payload)
res = response.json()
return res
def role_describe(self, role_name):
url = f'{self.endpoint}/v2/vectordb/roles/describe'
data = {
"roleName": role_name
}
response = self.post(url, headers=self.update_headers(), data=data)
res = response.json()
return res
def role_drop(self, payload):
url = f'{self.endpoint}/v2/vectordb/roles/drop'
response = self.post(url, headers=self.update_headers(), data=payload)
res = response.json()
return res
def role_grant(self, payload):
url = f'{self.endpoint}/v2/vectordb/roles/grant_privilege'
response = self.post(url, headers=self.update_headers(), data=payload)
res = response.json()
return res
def role_revoke(self, payload):
url = f'{self.endpoint}/v2/vectordb/roles/revoke_privilege'
response = self.post(url, headers=self.update_headers(), data=payload)
res = response.json()
return res
class IndexClient(Requests):
def __init__(self, endpoint, token):
super().__init__(url=endpoint, api_key=token)
self.endpoint = endpoint
self.api_key = token
self.db_name = None
self.headers = self.update_headers()
def update_headers(self):
headers = {
'Content-Type': 'application/json',
'Authorization': f'Bearer {self.api_key}',
'RequestId': str(uuid.uuid1())
}
return headers
def index_create(self, payload, db_name="default"):
url = f'{self.endpoint}/v2/vectordb/indexes/create'
if self.db_name is not None:
db_name = self.db_name
payload["dbName"] = db_name
response = self.post(url, headers=self.update_headers(), data=payload)
res = response.json()
return res
def index_describe(self, db_name="default", collection_name=None, index_name=None):
url = f'{self.endpoint}/v2/vectordb/indexes/describe'
if self.db_name is not None:
db_name = self.db_name
data = {
"dbName": db_name,
"collectionName": collection_name,
"indexName": index_name
}
response = self.post(url, headers=self.update_headers(), data=data)
res = response.json()
return res
def index_list(self, collection_name=None, db_name="default"):
url = f'{self.endpoint}/v2/vectordb/indexes/list'
if self.db_name is not None:
db_name = self.db_name
data = {
"dbName": db_name,
"collectionName": collection_name
}
response = self.post(url, headers=self.update_headers(), data=data)
res = response.json()
return res
def index_drop(self, payload, db_name="default"):
url = f'{self.endpoint}/v2/vectordb/indexes/drop'
if self.db_name is not None:
db_name = self.db_name
payload["dbName"] = db_name
response = self.post(url, headers=self.update_headers(), data=payload)
res = response.json()
return res
class AliasClient(Requests):
def __init__(self, endpoint, token):
super().__init__(url=endpoint, api_key=token)
self.endpoint = endpoint
self.api_key = token
self.db_name = None
self.headers = self.update_headers()
def update_headers(self):
headers = {
'Content-Type': 'application/json',
'Authorization': f'Bearer {self.api_key}',
'RequestId': str(uuid.uuid1())
}
return headers
def list_alias(self):
url = f'{self.endpoint}/v2/vectordb/aliases/list'
response = self.post(url, headers=self.update_headers())
res = response.json()
return res
def describe_alias(self, alias_name):
url = f'{self.endpoint}/v2/vectordb/aliases/describe'
data = {
"aliasName": alias_name
}
response = self.post(url, headers=self.update_headers(), data=data)
res = response.json()
return res
def alter_alias(self, payload):
url = f'{self.endpoint}/v2/vectordb/aliases/alter'
response = self.post(url, headers=self.update_headers(), data=payload)
res = response.json()
return res
def drop_alias(self, payload):
url = f'{self.endpoint}/v2/vectordb/aliases/drop'
response = self.post(url, headers=self.update_headers(), data=payload)
res = response.json()
return res
def create_alias(self, payload):
url = f'{self.endpoint}/v2/vectordb/aliases/create'
response = self.post(url, headers=self.update_headers(), data=payload)
res = response.json()
return res
class ImportJobClient(Requests):
def __init__(self, endpoint, token):
super().__init__(url=endpoint, api_key=token)
self.endpoint = endpoint
self.api_key = token
self.db_name = None
self.headers = self.update_headers()
def update_headers(self):
headers = {
'Content-Type': 'application/json',
'Authorization': f'Bearer {self.api_key}',
'RequestId': str(uuid.uuid1())
}
return headers
def list_import_jobs(self, payload, db_name="default"):
payload["dbName"] = db_name
data = payload
url = f'{self.endpoint}/v2/vectordb/jobs/import/list'
response = self.post(url, headers=self.update_headers(), data=data)
res = response.json()
return res
def create_import_jobs(self, payload):
url = f'{self.endpoint}/v2/vectordb/jobs/import/create'
response = self.post(url, headers=self.update_headers(), data=payload)
res = response.json()
return res
def get_import_job_progress(self, task_id):
payload = {
"taskID": task_id
}
url = f'{self.endpoint}/v2/vectordb/jobs/import/get_progress'
response = self.post(url, headers=self.update_headers(), data=payload)
res = response.json()
return res
class StorageClient():
def __init__(self, endpoint, access_key, secret_key, bucket_name):
self.endpoint = endpoint
self.access_key = access_key
self.secret_key = secret_key
self.bucket_name = bucket_name
self.client = Minio(
self.endpoint,
access_key=access_key,
secret_key=secret_key,
secure=False,
)
def upload_file(self, file_path, object_name):
try:
self.client.fput_object(self.bucket_name, object_name, file_path)
except S3Error as exc:
logger.error("fail to copy files to minio", exc)

View File

@ -0,0 +1,135 @@
import json
import sys
import pytest
import time
from pymilvus import connections, db
from utils.util_log import test_log as logger
from api.milvus import (VectorClient, CollectionClient, PartitionClient, IndexClient, AliasClient,
UserClient, RoleClient, ImportJobClient, StorageClient)
from utils.utils import get_data_by_payload
def get_config():
pass
class Base:
name = None
protocol = None
host = None
port = None
endpoint = None
api_key = None
username = None
password = None
invalid_api_key = None
vector_client = None
collection_client = None
partition_client = None
index_client = None
user_client = None
role_client = None
import_job_client = None
storage_client = None
class TestBase(Base):
def teardown_method(self):
self.collection_client.api_key = self.api_key
all_collections = self.collection_client.collection_list()['data']
if self.name in all_collections:
logger.info(f"collection {self.name} exist, drop it")
payload = {
"collectionName": self.name,
}
try:
rsp = self.collection_client.collection_drop(payload)
except Exception as e:
logger.error(e)
@pytest.fixture(scope="function", autouse=True)
def init_client(self, endpoint, token, minio_host):
self.endpoint = f"{endpoint}"
self.api_key = f"{token}"
self.invalid_api_key = "invalid_token"
self.vector_client = VectorClient(self.endpoint, self.api_key)
self.collection_client = CollectionClient(self.endpoint, self.api_key)
self.partition_client = PartitionClient(self.endpoint, self.api_key)
self.index_client = IndexClient(self.endpoint, self.api_key)
self.alias_client = AliasClient(self.endpoint, self.api_key)
self.user_client = UserClient(self.endpoint, self.api_key)
self.role_client = RoleClient(self.endpoint, self.api_key)
self.import_job_client = ImportJobClient(self.endpoint, self.api_key)
self.storage_client = StorageClient(f"{minio_host}:9000", "minioadmin", "minioadmin", "milvus-bucket")
if token is None:
self.vector_client.api_key = None
self.collection_client.api_key = None
self.partition_client.api_key = None
connections.connect(uri=endpoint, token=token)
def init_collection(self, collection_name, pk_field="id", metric_type="L2", dim=128, nb=100, batch_size=1000):
# create collection
schema_payload = {
"collectionName": collection_name,
"dimension": dim,
"metricType": metric_type,
"description": "test collection",
"primaryField": pk_field,
"vectorField": "vector",
}
rsp = self.collection_client.collection_create(schema_payload)
assert rsp['code'] == 200
self.wait_collection_load_completed(collection_name)
batch_size = batch_size
batch = nb // batch_size
remainder = nb % batch_size
data = []
for i in range(batch):
nb = batch_size
data = get_data_by_payload(schema_payload, nb)
payload = {
"collectionName": collection_name,
"data": data
}
body_size = sys.getsizeof(json.dumps(payload))
logger.debug(f"body size: {body_size / 1024 / 1024} MB")
rsp = self.vector_client.vector_insert(payload)
assert rsp['code'] == 200
# insert remainder data
if remainder:
nb = remainder
data = get_data_by_payload(schema_payload, nb)
payload = {
"collectionName": collection_name,
"data": data
}
rsp = self.vector_client.vector_insert(payload)
assert rsp['code'] == 200
return schema_payload, data
def wait_collection_load_completed(self, name):
t0 = time.time()
timeout = 60
while True and time.time() - t0 < timeout:
rsp = self.collection_client.collection_describe(name)
if "data" in rsp and "load" in rsp["data"] and rsp["data"]["load"] == "LoadStateLoaded":
break
else:
time.sleep(5)
def create_database(self, db_name="default"):
all_db = db.list_database()
logger.info(f"all database: {all_db}")
if db_name not in all_db:
logger.info(f"create database: {db_name}")
try:
db.create_database(db_name=db_name)
except Exception as e:
logger.error(e)
def update_database(self, db_name="default"):
self.create_database(db_name=db_name)
self.collection_client.db_name = db_name
self.vector_client.db_name = db_name

View File

@ -0,0 +1,44 @@
import os
class LogConfig:
def __init__(self):
self.log_debug = ""
self.log_err = ""
self.log_info = ""
self.log_worker = ""
self.get_default_config()
@staticmethod
def get_env_variable(var="CI_LOG_PATH"):
""" get log path for testing """
try:
log_path = os.environ[var]
return str(log_path)
except Exception as e:
# now = datetime.datetime.now().strftime("%Y-%m-%d-%H-%M-%S")
log_path = f"/tmp/ci_logs"
print("[get_env_variable] failed to get environment variables : %s, use default path : %s" % (str(e), log_path))
return log_path
@staticmethod
def create_path(log_path):
if not os.path.isdir(str(log_path)):
print("[create_path] folder(%s) is not exist." % log_path)
print("[create_path] create path now...")
os.makedirs(log_path)
def get_default_config(self):
""" Make sure the path exists """
log_dir = self.get_env_variable()
self.log_debug = "%s/ci_test_log.debug" % log_dir
self.log_info = "%s/ci_test_log.log" % log_dir
self.log_err = "%s/ci_test_log.err" % log_dir
work_log = os.environ.get('PYTEST_XDIST_WORKER')
if work_log is not None:
self.log_worker = f'{log_dir}/{work_log}.log'
self.create_path(log_dir)
log_config = LogConfig()

View File

@ -0,0 +1,23 @@
import pytest
import yaml
def pytest_addoption(parser):
parser.addoption("--endpoint", action="store", default="http://127.0.0.1:19530", help="endpoint")
parser.addoption("--token", action="store", default="root:Milvus", help="token")
parser.addoption("--minio_host", action="store", default="127.0.0.1", help="minio host")
@pytest.fixture
def endpoint(request):
return request.config.getoption("--endpoint")
@pytest.fixture
def token(request):
return request.config.getoption("--token")
@pytest.fixture
def minio_host(request):
return request.config.getoption("--minio_host")

View File

@ -0,0 +1,14 @@
[pytest]
addopts = --strict --endpoint http://127.0.0.1:19530 --token root:Milvus --minio_host 127.0.0.1
log_format = [%(asctime)s - %(levelname)s - %(name)s]: %(message)s (%(filename)s:%(lineno)s)
log_date_format = %Y-%m-%d %H:%M:%S
filterwarnings =
ignore::DeprecationWarning
markers =
L0 : 'L0 case, high priority'
L1 : 'L1 case, second priority'

View File

@ -0,0 +1,16 @@
--extra-index-url https://test.pypi.org/simple/
requests==2.31.0
urllib3==1.26.18
pytest~=7.2.0
pyyaml~=6.0
numpy~=1.24.3
allure-pytest>=2.8.18
Faker==19.2.0
pymilvus==2.4.0rc39
scikit-learn~=1.1.3
pytest-xdist==2.5.0
minio==7.1.14
# for bf16 datatype
jax==0.4.13
jaxlib==0.4.13

View File

@ -0,0 +1,125 @@
import random
from sklearn import preprocessing
import numpy as np
from utils.utils import gen_collection_name
from utils.util_log import test_log as logger
import pytest
from base.testbase import TestBase
@pytest.mark.L0
class TestAliasE2E(TestBase):
def test_alias_e2e(self):
"""
"""
# list alias before create
rsp = self.alias_client.list_alias()
name = gen_collection_name()
client = self.collection_client
payload = {
"collectionName": name,
"schema": {
"fields": [
{"fieldName": "book_id", "dataType": "Int64", "isPrimary": True, "elementTypeParams": {}},
{"fieldName": "word_count", "dataType": "Int64", "elementTypeParams": {}},
{"fieldName": "book_describe", "dataType": "VarChar", "elementTypeParams": {"max_length": "256"}},
{"fieldName": "book_intro", "dataType": "FloatVector", "elementTypeParams": {"dim": f"{128}"}}
]
},
"indexParams": [{"fieldName": "book_intro", "indexName": "book_intro_vector", "metricType": "L2"}]
}
logger.info(f"create collection {name} with payload: {payload}")
rsp = client.collection_create(payload)
# create alias
alias_name = name + "_alias"
payload = {
"collectionName": name,
"aliasName": alias_name
}
rsp = self.alias_client.create_alias(payload)
assert rsp['code'] == 200
# list alias after create
rsp = self.alias_client.list_alias()
assert alias_name in rsp['data']
# describe alias
rsp = self.alias_client.describe_alias(alias_name)
assert rsp['data']["aliasName"] == alias_name
assert rsp['data']["collectionName"] == name
# do crud operation by alias
# insert data by alias
data = []
for j in range(3000):
tmp = {
"book_id": j,
"word_count": j,
"book_describe": f"book_{j}",
"book_intro": preprocessing.normalize([np.array([random.random() for _ in range(128)])])[0].tolist(),
}
data.append(tmp)
payload = {
"collectionName": alias_name,
"data": data
}
rsp = self.vector_client.vector_insert(payload)
# delete data by alias
payload = {
"collectionName": alias_name,
"ids": [1, 2, 3]
}
rsp = self.vector_client.vector_delete(payload)
# upsert data by alias
upsert_data = []
for j in range(100):
tmp = {
"book_id": j,
"word_count": j + 1,
"book_describe": f"book_{j + 2}",
"book_intro": preprocessing.normalize([np.array([random.random() for _ in range(128)])])[0].tolist(),
}
upsert_data.append(tmp)
payload = {
"collectionName": alias_name,
"data": upsert_data
}
rsp = self.vector_client.vector_upsert(payload)
# search data by alias
payload = {
"collectionName": alias_name,
"vector": preprocessing.normalize([np.array([random.random() for i in range(128)])])[0].tolist()
}
rsp = self.vector_client.vector_search(payload)
# query data by alias
payload = {
"collectionName": alias_name,
"filter": "book_id > 10"
}
rsp = self.vector_client.vector_query(payload)
# alter alias to another collection
new_name = gen_collection_name()
payload = {
"collectionName": new_name,
"metricType": "L2",
"dimension": 128,
}
rsp = client.collection_create(payload)
payload = {
"collectionName": new_name,
"aliasName": alias_name
}
rsp = self.alias_client.alter_alias(payload)
# describe alias
rsp = self.alias_client.describe_alias(alias_name)
assert rsp['data']["aliasName"] == alias_name
assert rsp['data']["collectionName"] == new_name
# query data by alias, expect no data
payload = {
"collectionName": alias_name,
"filter": "id > 0"
}
rsp = self.vector_client.vector_query(payload)
assert rsp['data'] == []

View File

@ -0,0 +1,968 @@
import datetime
import logging
import time
from utils.util_log import test_log as logger
from utils.utils import gen_collection_name
import pytest
from api.milvus import CollectionClient
from base.testbase import TestBase
import threading
from utils.utils import get_data_by_payload
from pymilvus import (
FieldSchema, CollectionSchema, DataType,
Collection
)
@pytest.mark.L0
class TestCreateCollection(TestBase):
@pytest.mark.parametrize("metric_type", ["L2", "IP", "COSINE"])
@pytest.mark.parametrize("dim", [128])
def test_create_collections_fast(self, dim, metric_type):
"""
target: test create collection
method: create a collection with a simple schema
expected: create collection success
"""
name = gen_collection_name()
dim = 128
client = self.collection_client
payload = {
"collectionName": name,
"dimension": dim,
"metricType": metric_type
}
logging.info(f"create collection {name} with payload: {payload}")
rsp = client.collection_create(payload)
assert rsp['code'] == 200
rsp = client.collection_list()
all_collections = rsp['data']
assert name in all_collections
# describe collection
rsp = client.collection_describe(name)
assert rsp['code'] == 200
assert rsp['data']['collectionName'] == name
@pytest.mark.parametrize("auto_id", [True, False])
@pytest.mark.parametrize("enable_dynamic_field", [True, False])
@pytest.mark.parametrize("enable_partition_key", [True, False])
@pytest.mark.parametrize("dim", [128])
def test_create_collections_custom_without_index(self, dim, auto_id, enable_dynamic_field, enable_partition_key):
"""
target: test create collection
method: create a collection with a simple schema
expected: create collection success
"""
name = gen_collection_name()
dim = 128
client = self.collection_client
payload = {
"collectionName": name,
"schema": {
"autoId": auto_id,
"enableDynamicField": enable_dynamic_field,
"fields": [
{"fieldName": "book_id", "dataType": "Int64", "isPrimary": True, "elementTypeParams": {}},
{"fieldName": "user_id", "dataType": "Int64", "isPartitionKey": enable_partition_key,
"elementTypeParams": {}},
{"fieldName": "word_count", "dataType": "Int64", "elementTypeParams": {}},
{"fieldName": "book_describe", "dataType": "VarChar", "elementTypeParams": {"max_length": "256"}},
{"fieldName": "book_intro", "dataType": "FloatVector", "elementTypeParams": {"dim": f"{dim}"}},
{"fieldName": "image_intro", "dataType": "FloatVector", "elementTypeParams": {"dim": f"{dim}"}},
]
}
}
logging.info(f"create collection {name} with payload: {payload}")
rsp = client.collection_create(payload)
assert rsp['code'] == 200
rsp = client.collection_list()
all_collections = rsp['data']
assert name in all_collections
c = Collection(name)
logger.info(f"schema: {c.schema}")
# describe collection
rsp = client.collection_describe(name)
assert rsp['code'] == 200
assert rsp['data']['collectionName'] == name
assert rsp['data']['autoId'] == auto_id
assert c.schema.auto_id == auto_id
assert rsp['data']['enableDynamicField'] == enable_dynamic_field
assert c.schema.enable_dynamic_field == enable_dynamic_field
# assert no index created
indexes = rsp['data']['indexes']
assert len(indexes) == 0
# assert not loaded
assert rsp['data']['load'] == "LoadStateNotLoad"
for field in rsp['data']['fields']:
if field['name'] == "user_id":
assert field['partitionKey'] == enable_partition_key
for field in c.schema.fields:
if field.name == "user_id":
assert field.is_partition_key == enable_partition_key
@pytest.mark.parametrize("metric_type", ["L2", "IP", "COSINE"])
@pytest.mark.parametrize("dim", [128])
def test_create_collections_one_float_vector_with_index(self, dim, metric_type):
"""
target: test create collection
method: create a collection with a simple schema
expected: create collection success
"""
name = gen_collection_name()
dim = 128
client = self.collection_client
payload = {
"collectionName": name,
"schema": {
"fields": [
{"fieldName": "book_id", "dataType": "Int64", "isPrimary": True, "elementTypeParams": {}},
{"fieldName": "word_count", "dataType": "Int64", "elementTypeParams": {}},
{"fieldName": "book_describe", "dataType": "VarChar", "elementTypeParams": {"max_length": "256"}},
{"fieldName": "book_intro", "dataType": "FloatVector", "elementTypeParams": {"dim": f"{dim}"}}
]
},
"indexParams": [
{"fieldName": "book_intro", "indexName": "book_intro_vector", "metricType": f"{metric_type}"}]
}
logging.info(f"create collection {name} with payload: {payload}")
rsp = client.collection_create(payload)
assert rsp['code'] == 200
rsp = client.collection_list()
all_collections = rsp['data']
assert name in all_collections
# describe collection
time.sleep(10)
rsp = client.collection_describe(name)
assert rsp['code'] == 200
assert rsp['data']['collectionName'] == name
# assert index created
indexes = rsp['data']['indexes']
assert len(indexes) == len(payload['indexParams'])
# assert load success
assert rsp['data']['load'] == "LoadStateLoaded"
@pytest.mark.parametrize("metric_type", ["L2", "IP", "COSINE"])
@pytest.mark.parametrize("dim", [128])
def test_create_collections_multi_float_vector_with_one_index(self, dim, metric_type):
"""
target: test create collection
method: create a collection with a simple schema
expected: create collection success
"""
name = gen_collection_name()
dim = 128
client = self.collection_client
payload = {
"collectionName": name,
"schema": {
"fields": [
{"fieldName": "book_id", "dataType": "Int64", "isPrimary": True, "elementTypeParams": {}},
{"fieldName": "word_count", "dataType": "Int64", "elementTypeParams": {}},
{"fieldName": "book_describe", "dataType": "VarChar", "elementTypeParams": {"max_length": "256"}},
{"fieldName": "book_intro", "dataType": "FloatVector", "elementTypeParams": {"dim": f"{dim}"}},
{"fieldName": "image_intro", "dataType": "FloatVector", "elementTypeParams": {"dim": f"{dim}"}}
]
},
"indexParams": [
{"fieldName": "book_intro", "indexName": "book_intro_vector", "metricType": f"{metric_type}"}]
}
logging.info(f"create collection {name} with payload: {payload}")
rsp = client.collection_create(payload)
assert rsp['code'] == 65535
rsp = client.collection_list()
all_collections = rsp['data']
assert name in all_collections
# describe collection
time.sleep(10)
rsp = client.collection_describe(name)
assert rsp['code'] == 200
assert rsp['data']['collectionName'] == name
# assert index created
indexes = rsp['data']['indexes']
assert len(indexes) == len(payload['indexParams'])
# assert load success
assert rsp['data']['load'] == "LoadStateNotLoad"
@pytest.mark.parametrize("metric_type", ["L2", "IP", "COSINE"])
@pytest.mark.parametrize("dim", [128])
def test_create_collections_multi_float_vector_with_all_index(self, dim, metric_type):
"""
target: test create collection
method: create a collection with a simple schema
expected: create collection success
"""
name = gen_collection_name()
dim = 128
client = self.collection_client
payload = {
"collectionName": name,
"schema": {
"fields": [
{"fieldName": "book_id", "dataType": "Int64", "isPrimary": True, "elementTypeParams": {}},
{"fieldName": "word_count", "dataType": "Int64", "elementTypeParams": {}},
{"fieldName": "book_describe", "dataType": "VarChar", "elementTypeParams": {"max_length": "256"}},
{"fieldName": "book_intro", "dataType": "FloatVector", "elementTypeParams": {"dim": f"{dim}"}},
{"fieldName": "image_intro", "dataType": "FloatVector", "elementTypeParams": {"dim": f"{dim}"}}
]
},
"indexParams": [
{"fieldName": "book_intro", "indexName": "book_intro_vector", "metricType": f"{metric_type}"},
{"fieldName": "image_intro", "indexName": "image_intro_vector", "metricType": f"{metric_type}"}]
}
logging.info(f"create collection {name} with payload: {payload}")
rsp = client.collection_create(payload)
assert rsp['code'] == 200
rsp = client.collection_list()
all_collections = rsp['data']
assert name in all_collections
# describe collection
time.sleep(10)
rsp = client.collection_describe(name)
assert rsp['code'] == 200
assert rsp['data']['collectionName'] == name
# assert index created
indexes = rsp['data']['indexes']
assert len(indexes) == len(payload['indexParams'])
# assert load success
assert rsp['data']['load'] == "LoadStateLoaded"
@pytest.mark.parametrize("auto_id", [True])
@pytest.mark.parametrize("enable_dynamic_field", [True])
@pytest.mark.parametrize("enable_partition_key", [True])
@pytest.mark.parametrize("dim", [128])
@pytest.mark.parametrize("metric_type", ["L2", "IP", "COSINE"])
def test_create_collections_float16_vector_datatype(self, dim, auto_id, enable_dynamic_field, enable_partition_key,
metric_type):
"""
target: test create collection
method: create a collection with a simple schema
expected: create collection success
"""
name = gen_collection_name()
dim = 128
client = self.collection_client
payload = {
"collectionName": name,
"schema": {
"autoId": auto_id,
"enableDynamicField": enable_dynamic_field,
"fields": [
{"fieldName": "book_id", "dataType": "Int64", "isPrimary": True, "elementTypeParams": {}},
{"fieldName": "float16_vector", "dataType": "Float16Vector",
"elementTypeParams": {"dim": f"{dim}"}},
{"fieldName": "bfloat16_vector", "dataType": "BFloat16Vector",
"elementTypeParams": {"dim": f"{dim}"}},
]
},
"indexParams": [
{"fieldName": "float16_vector", "indexName": "float16_vector_index", "metricType": f"{metric_type}"},
{"fieldName": "bfloat16_vector", "indexName": "bfloat16_vector_index", "metricType": f"{metric_type}"}]
}
logging.info(f"create collection {name} with payload: {payload}")
rsp = client.collection_create(payload)
assert rsp['code'] == 200
rsp = client.collection_list()
all_collections = rsp['data']
assert name in all_collections
c = Collection(name)
logger.info(f"schema: {c.schema}")
# describe collection
rsp = client.collection_describe(name)
assert rsp['code'] == 200
assert rsp['data']['collectionName'] == name
assert len(rsp['data']['fields']) == len(c.schema.fields)
@pytest.mark.parametrize("auto_id", [True])
@pytest.mark.parametrize("enable_dynamic_field", [True])
@pytest.mark.parametrize("enable_partition_key", [True])
@pytest.mark.parametrize("dim", [128])
@pytest.mark.parametrize("metric_type", ["JACCARD", "HAMMING"])
def test_create_collections_binary_vector_datatype(self, dim, auto_id, enable_dynamic_field, enable_partition_key,
metric_type):
"""
target: test create collection
method: create a collection with a simple schema
expected: create collection success
"""
name = gen_collection_name()
dim = 128
client = self.collection_client
payload = {
"collectionName": name,
"schema": {
"autoId": auto_id,
"enableDynamicField": enable_dynamic_field,
"fields": [
{"fieldName": "book_id", "dataType": "Int64", "isPrimary": True, "elementTypeParams": {}},
{"fieldName": "binary_vector", "dataType": "BinaryVector", "elementTypeParams": {"dim": f"{dim}"}},
]
},
"indexParams": [
{"fieldName": "binary_vector", "indexName": "binary_vector_index", "metricType": f"{metric_type}"}
]
}
logging.info(f"create collection {name} with payload: {payload}")
rsp = client.collection_create(payload)
assert rsp['code'] == 200
rsp = client.collection_list()
all_collections = rsp['data']
assert name in all_collections
c = Collection(name)
logger.info(f"schema: {c.schema}")
# describe collection
rsp = client.collection_describe(name)
assert rsp['code'] == 200
assert rsp['data']['collectionName'] == name
assert len(rsp['data']['fields']) == len(c.schema.fields)
def test_create_collections_concurrent_with_same_param(self):
"""
target: test create collection with same param
method: concurrent create collections with same param with multi thread
expected: create collections all success
"""
concurrent_rsp = []
def create_collection(c_name, vector_dim, c_metric_type):
collection_payload = {
"collectionName": c_name,
"dimension": vector_dim,
"metricType": c_metric_type,
}
rsp = client.collection_create(collection_payload)
concurrent_rsp.append(rsp)
logger.info(rsp)
name = gen_collection_name()
dim = 128
metric_type = "L2"
client = self.collection_client
threads = []
for i in range(10):
t = threading.Thread(target=create_collection, args=(name, dim, metric_type,))
threads.append(t)
for t in threads:
t.start()
for t in threads:
t.join()
time.sleep(10)
success_cnt = 0
for rsp in concurrent_rsp:
if rsp["code"] == 200:
success_cnt += 1
logger.info(concurrent_rsp)
assert success_cnt == 10
rsp = client.collection_list()
all_collections = rsp['data']
assert name in all_collections
# describe collection
rsp = client.collection_describe(name)
assert rsp['code'] == 200
assert rsp['data']['collectionName'] == name
assert f"FloatVector({dim})" in str(rsp['data']['fields'])
def test_create_collections_concurrent_with_different_param(self):
"""
target: test create collection with different param
method: concurrent create collections with different param with multi thread
expected: only one collection can success
"""
concurrent_rsp = []
def create_collection(c_name, vector_dim, c_metric_type):
collection_payload = {
"collectionName": c_name,
"dimension": vector_dim,
"metricType": c_metric_type,
}
rsp = client.collection_create(collection_payload)
concurrent_rsp.append(rsp)
logger.info(rsp)
name = gen_collection_name()
dim = 128
client = self.collection_client
threads = []
for i in range(0, 5):
t = threading.Thread(target=create_collection, args=(name, dim + i, "L2",))
threads.append(t)
for i in range(5, 10):
t = threading.Thread(target=create_collection, args=(name, dim + i, "IP",))
threads.append(t)
for t in threads:
t.start()
for t in threads:
t.join()
time.sleep(10)
success_cnt = 0
for rsp in concurrent_rsp:
if rsp["code"] == 200:
success_cnt += 1
logger.info(concurrent_rsp)
assert success_cnt == 1
rsp = client.collection_list()
all_collections = rsp['data']
assert name in all_collections
# describe collection
rsp = client.collection_describe(name)
assert rsp['code'] == 200
assert rsp['data']['collectionName'] == name
@pytest.mark.L1
class TestCreateCollectionNegative(TestBase):
def test_create_collections_custom_with_invalid_datatype(self):
"""
"""
name = gen_collection_name()
dim = 128
client = self.collection_client
payload = {
"collectionName": name,
"schema": {
"fields": [
{"fieldName": "book_id", "dataType": "Int64", "isPrimary": True, "elementTypeParams": {}},
{"fieldName": "word_count", "dataType": "Int64", "elementTypeParams": {}},
{"fieldName": "book_describe", "dataType": "VARCHAR", "elementTypeParams": {"max_length": "256"}},
{"fieldName": "book_intro", "dataType": "FloatVector", "elementTypeParams": {"dim": f"{dim}"}}
]
}
}
logging.info(f"create collection {name} with payload: {payload}")
rsp = client.collection_create(payload)
assert rsp['code'] == 1100
def test_create_collections_with_invalid_api_key(self):
"""
target: test create collection with invalid api key(wrong username and password)
method: create collections with invalid api key
expected: create collection failed
"""
name = gen_collection_name()
dim = 128
client = self.collection_client
client.api_key = "illegal_api_key"
payload = {
"collectionName": name,
"dimension": dim,
}
rsp = client.collection_create(payload)
assert rsp['code'] == 1800
@pytest.mark.parametrize("name",
[" ", "test_collection_" * 100, "test collection", "test/collection", "test\collection"])
def test_create_collections_with_invalid_collection_name(self, name):
"""
target: test create collection with invalid collection name
method: create collections with invalid collection name
expected: create collection failed with right error message
"""
dim = 128
client = self.collection_client
payload = {
"collectionName": name,
"dimension": dim,
}
rsp = client.collection_create(payload)
assert rsp['code'] == 1
@pytest.mark.L0
class TestHasCollections(TestBase):
def test_has_collections_default(self):
"""
target: test list collection with a simple schema
method: create collections and list them
expected: created collections are in list
"""
client = self.collection_client
name_list = []
for i in range(2):
name = gen_collection_name()
dim = 128
payload = {
"collectionName": name,
"metricType": "L2",
"dimension": dim,
}
time.sleep(1)
rsp = client.collection_create(payload)
assert rsp['code'] == 200
name_list.append(name)
rsp = client.collection_list()
all_collections = rsp['data']
for name in name_list:
assert name in all_collections
rsp = client.collection_has(collection_name=name)
assert rsp['data']['has'] is True
def test_has_collections_with_not_exist_name(self):
"""
target: test list collection with a simple schema
method: create collections and list them
expected: created collections are in list
"""
client = self.collection_client
name_list = []
for i in range(2):
name = gen_collection_name()
name_list.append(name)
rsp = client.collection_list()
all_collections = rsp['data']
for name in name_list:
assert name not in all_collections
rsp = client.collection_has(collection_name=name)
assert rsp['data']['has'] is False
@pytest.mark.L0
class TestGetCollectionStats(TestBase):
def test_get_collections_stats(self):
"""
target: test list collection with a simple schema
method: create collections and list them
expected: created collections are in list
"""
client = self.collection_client
name = gen_collection_name()
dim = 128
payload = {
"collectionName": name,
"metricType": "L2",
"dimension": dim,
}
time.sleep(1)
rsp = client.collection_create(payload)
assert rsp['code'] == 200
# describe collection
client.collection_describe(collection_name=name)
rsp = client.collection_stats(collection_name=name)
assert rsp['code'] == 200
assert rsp['data']['rowCount'] == 0
# insert data
nb = 3000
data = get_data_by_payload(payload, nb)
payload = {
"collectionName": name,
"data": data
}
self.vector_client.vector_insert(payload=payload)
c = Collection(name)
count = c.query(expr="", output_fields=["count(*)"])
logger.info(f"count: {count}")
c.flush()
rsp = client.collection_stats(collection_name=name)
assert rsp['data']['rowCount'] == nb
class TestLoadReleaseCollection(TestBase):
def test_load_and_release_collection(self):
name = gen_collection_name()
dim = 128
client = self.collection_client
payload = {
"collectionName": name,
"schema": {
"fields": [
{"fieldName": "book_id", "dataType": "Int64", "isPrimary": True, "elementTypeParams": {}},
{"fieldName": "word_count", "dataType": "Int64", "elementTypeParams": {}},
{"fieldName": "book_describe", "dataType": "VarChar", "elementTypeParams": {"max_length": "256"}},
{"fieldName": "book_intro", "dataType": "FloatVector", "elementTypeParams": {"dim": f"{dim}"}}
]
}
}
rsp = client.collection_create(payload)
assert rsp['code'] == 200
# create index before load
index_params = [{"fieldName": "book_intro", "indexName": "book_intro_vector", "metricType": "L2"}]
payload = {
"collectionName": name,
"indexParams": index_params
}
rsp = self.index_client.index_create(payload)
# get load state before load
rsp = client.collection_load_state(collection_name=name)
assert rsp['data']['loadState'] == "LoadStateNotLoad"
# describe collection
client.collection_describe(collection_name=name)
rsp = client.collection_load(collection_name=name)
assert rsp['code'] == 200
rsp = client.collection_load_state(collection_name=name)
assert rsp['data']['loadState'] in ["LoadStateLoaded", "LoadStateLoading"]
time.sleep(5)
rsp = client.collection_load_state(collection_name=name)
assert rsp['data']['loadState'] == "LoadStateLoaded"
# release collection
rsp = client.collection_release(collection_name=name)
time.sleep(5)
rsp = client.collection_load_state(collection_name=name)
assert rsp['data']['loadState'] == "LoadStateNotLoad"
class TestGetCollectionLoadState(TestBase):
def test_get_collection_load_state(self):
"""
target: test list collection with a simple schema
method: create collections and list them
expected: created collections are in list
"""
client = self.collection_client
name = gen_collection_name()
dim = 128
payload = {
"collectionName": name,
"metricType": "L2",
"dimension": dim,
}
rsp = client.collection_create(payload)
assert rsp['code'] == 200
# describe collection
client.collection_describe(collection_name=name)
rsp = client.collection_load_state(collection_name=name)
assert rsp['code'] == 200
assert rsp['data']['load'] == "LoadStateNotLoad"
# insert data
nb = 3000
data = get_data_by_payload(payload, nb)
payload = {
"collectionName": name,
"data": data
}
self.vector_client.vector_insert(payload=payload)
rsp = client.collection_load_state(collection_name=name)
assert rsp['data']['load'] == "LoadStateLoading"
time.sleep(10)
rsp = client.collection_load_state(collection_name=name)
assert rsp['data']['load'] == "LoadStateLoaded"
@pytest.mark.L0
class TestListCollections(TestBase):
def test_list_collections_default(self):
"""
target: test list collection with a simple schema
method: create collections and list them
expected: created collections are in list
"""
client = self.collection_client
name_list = []
for i in range(2):
name = gen_collection_name()
dim = 128
payload = {
"collectionName": name,
"metricType": "L2",
"dimension": dim,
}
time.sleep(1)
rsp = client.collection_create(payload)
assert rsp['code'] == 200
name_list.append(name)
rsp = client.collection_list()
all_collections = rsp['data']
for name in name_list:
assert name in all_collections
@pytest.mark.L1
class TestListCollectionsNegative(TestBase):
def test_list_collections_with_invalid_api_key(self):
"""
target: test list collection with an invalid api key
method: list collection with invalid api key
expected: raise error with right error code and message
"""
client = self.collection_client
name_list = []
for i in range(2):
name = gen_collection_name()
dim = 128
payload = {
"collectionName": name,
"metricType": "L2",
"dimension": dim,
}
time.sleep(1)
rsp = client.collection_create(payload)
assert rsp['code'] == 200
name_list.append(name)
client = self.collection_client
client.api_key = "illegal_api_key"
rsp = client.collection_list()
assert rsp['code'] == 1800
@pytest.mark.L0
class TestDescribeCollection(TestBase):
def test_describe_collections_default(self):
"""
target: test describe collection with a simple schema
method: describe collection
expected: info of description is same with param passed to create collection
"""
name = gen_collection_name()
dim = 128
client = self.collection_client
payload = {
"collectionName": name,
"dimension": dim,
"metricType": "L2"
}
rsp = client.collection_create(payload)
assert rsp['code'] == 200
rsp = client.collection_list()
all_collections = rsp['data']
assert name in all_collections
# describe collection
rsp = client.collection_describe(name)
assert rsp['code'] == 200
assert rsp['data']['collectionName'] == name
assert rsp['data']['autoId'] is True
assert rsp['data']['enableDynamicField'] is True
assert len(rsp['data']['indexes']) == 1
def test_describe_collections_custom(self):
"""
target: test describe collection with a simple schema
method: describe collection
expected: info of description is same with param passed to create collection
"""
name = gen_collection_name()
dim = 128
client = self.collection_client
fields = [
FieldSchema(name='reviewer_id', dtype=DataType.INT64, description="", is_primary=True),
FieldSchema(name='store_address', dtype=DataType.VARCHAR, description="", max_length=512,
is_partition_key=True),
FieldSchema(name='review', dtype=DataType.VARCHAR, description="", max_length=16384),
FieldSchema(name='vector', dtype=DataType.FLOAT_VECTOR, description="", dim=384, is_index=True),
]
schema = CollectionSchema(
fields=fields,
description="",
enable_dynamic_field=True,
# The following is an alternative to setting `is_partition_key` in a field schema.
partition_key_field="store_address"
)
collection = Collection(
name=name,
schema=schema,
)
logger.info(f"schema: {schema}")
rsp = client.collection_list()
all_collections = rsp['data']
assert name in all_collections
# describe collection
rsp = client.collection_describe(name)
assert rsp['code'] == 200
assert rsp['data']['collectionName'] == name
for field in rsp['data']['fields']:
if field['name'] == "store_address":
assert field['PartitionKey'] is True
if field['name'] == "reviewer_id":
assert field['primaryKey'] is True
assert rsp['data']['autoId'] is False
assert rsp['data']['enableDynamicField'] is True
@pytest.mark.L1
class TestDescribeCollectionNegative(TestBase):
def test_describe_collections_with_invalid_api_key(self):
"""
target: test describe collection with invalid api key
method: describe collection with invalid api key
expected: raise error with right error code and message
"""
name = gen_collection_name()
dim = 128
client = self.collection_client
payload = {
"collectionName": name,
"dimension": dim,
}
rsp = client.collection_create(payload)
assert rsp['code'] == 200
rsp = client.collection_list()
all_collections = rsp['data']
assert name in all_collections
# describe collection
illegal_client = CollectionClient(self.url, "illegal_api_key")
rsp = illegal_client.collection_describe(name)
assert rsp['code'] == 1800
def test_describe_collections_with_invalid_collection_name(self):
"""
target: test describe collection with invalid collection name
method: describe collection with invalid collection name
expected: raise error with right error code and message
"""
name = gen_collection_name()
dim = 128
client = self.collection_client
payload = {
"collectionName": name,
"dimension": dim,
}
rsp = client.collection_create(payload)
assert rsp['code'] == 200
rsp = client.collection_list()
all_collections = rsp['data']
assert name in all_collections
# describe collection
invalid_name = "invalid_name"
rsp = client.collection_describe(invalid_name)
assert rsp['code'] == 1
@pytest.mark.L0
class TestDropCollection(TestBase):
def test_drop_collections_default(self):
"""
Drop a collection with a simple schema
target: test drop collection with a simple schema
method: drop collection
expected: dropped collection was not in collection list
"""
clo_list = []
for i in range(5):
time.sleep(1)
name = 'test_collection_' + datetime.datetime.now().strftime("%Y_%m_%d_%H_%M_%S_%f_%f")
payload = {
"collectionName": name,
"dimension": 128,
"metricType": "L2"
}
rsp = self.collection_client.collection_create(payload)
assert rsp['code'] == 200
clo_list.append(name)
rsp = self.collection_client.collection_list()
all_collections = rsp['data']
for name in clo_list:
assert name in all_collections
for name in clo_list:
time.sleep(0.2)
payload = {
"collectionName": name,
}
rsp = self.collection_client.collection_drop(payload)
assert rsp['code'] == 200
rsp = self.collection_client.collection_list()
all_collections = rsp['data']
for name in clo_list:
assert name not in all_collections
@pytest.mark.L1
class TestDropCollectionNegative(TestBase):
def test_drop_collections_with_invalid_api_key(self):
"""
target: test drop collection with invalid api key
method: drop collection with invalid api key
expected: raise error with right error code and message; collection still in collection list
"""
name = gen_collection_name()
dim = 128
client = self.collection_client
payload = {
"collectionName": name,
"dimension": dim,
}
rsp = client.collection_create(payload)
assert rsp['code'] == 200
rsp = client.collection_list()
all_collections = rsp['data']
assert name in all_collections
# drop collection
payload = {
"collectionName": name,
}
illegal_client = CollectionClient(self.url, "invalid_api_key")
rsp = illegal_client.collection_drop(payload)
assert rsp['code'] == 1800
rsp = client.collection_list()
all_collections = rsp['data']
assert name in all_collections
def test_drop_collections_with_invalid_collection_name(self):
"""
target: test drop collection with invalid collection name
method: drop collection with invalid collection name
expected: raise error with right error code and message
"""
name = gen_collection_name()
dim = 128
client = self.collection_client
payload = {
"collectionName": name,
"dimension": dim,
}
rsp = client.collection_create(payload)
assert rsp['code'] == 200
rsp = client.collection_list()
all_collections = rsp['data']
assert name in all_collections
# drop collection
invalid_name = "invalid_name"
payload = {
"collectionName": invalid_name,
}
rsp = client.collection_drop(payload)
assert rsp['code'] == 100
@pytest.mark.L0
class TestRenameCollection(TestBase):
def test_rename_collection(self):
"""
target: test rename collection
method: rename collection
expected: renamed collection is in collection list
"""
name = gen_collection_name()
dim = 128
client = self.collection_client
payload = {
"collectionName": name,
"metricType": "L2",
"dimension": dim,
}
rsp = client.collection_create(payload)
assert rsp['code'] == 200
rsp = client.collection_list()
all_collections = rsp['data']
assert name in all_collections
new_name = gen_collection_name()
payload = {
"collectionName": name,
"newCollectionName": new_name,
}
rsp = client.collection_rename(payload)
assert rsp['code'] == 200
rsp = client.collection_list()
all_collections = rsp['data']
assert new_name in all_collections
assert name not in all_collections

View File

@ -0,0 +1,303 @@
import random
from sklearn import preprocessing
import numpy as np
import sys
import json
import time
from utils import constant
from utils.utils import gen_collection_name
from utils.util_log import test_log as logger
import pytest
from base.testbase import TestBase
from utils.utils import gen_vector
from pymilvus import (
FieldSchema, CollectionSchema, DataType,
Collection
)
@pytest.mark.L0
class TestCreateIndex(TestBase):
@pytest.mark.parametrize("metric_type", ["L2"])
@pytest.mark.parametrize("index_type", ["AUTOINDEX", "HNSW"])
@pytest.mark.parametrize("dim", [128])
def test_index_e2e(self, dim, metric_type, index_type):
"""
target: test create collection
method: create a collection with a simple schema
expected: create collection success
"""
name = gen_collection_name()
client = self.collection_client
payload = {
"collectionName": name,
"schema": {
"fields": [
{"fieldName": "book_id", "dataType": "Int64", "isPrimary": True, "elementTypeParams": {}},
{"fieldName": "word_count", "dataType": "Int64", "elementTypeParams": {}},
{"fieldName": "book_describe", "dataType": "VarChar", "elementTypeParams": {"max_length": "256"}},
{"fieldName": "book_intro", "dataType": "FloatVector", "elementTypeParams": {"dim": f"{dim}"}}
]
}
}
logger.info(f"create collection {name} with payload: {payload}")
rsp = client.collection_create(payload)
# insert data
for i in range(1):
data = []
for j in range(3000):
tmp = {
"book_id": j,
"word_count": j,
"book_describe": f"book_{j}",
"book_intro": preprocessing.normalize([np.array([random.random() for _ in range(dim)])])[
0].tolist(),
}
data.append(tmp)
payload = {
"collectionName": name,
"data": data
}
rsp = self.vector_client.vector_insert(payload)
c = Collection(name)
c.flush()
# list index, expect empty
rsp = self.index_client.index_list(name)
# create index
payload = {
"collectionName": name,
"indexParams": [{"fieldName": "book_intro", "indexName": "book_intro_vector",
"metricType": f"{metric_type}"}]
}
if index_type == "HNSW":
payload["indexParams"][0]["indexConfig"] = {"index_type": "HNSW", "M": "16", "efConstruction": "200"}
if index_type == "AUTOINDEX":
payload["indexParams"][0]["indexConfig"] = {"index_type": "AUTOINDEX"}
rsp = self.index_client.index_create(payload)
assert rsp['code'] == 200
time.sleep(10)
# list index, expect not empty
rsp = self.index_client.index_list(collection_name=name)
# describe index
rsp = self.index_client.index_describe(collection_name=name, index_name="book_intro_vector")
assert rsp['code'] == 200
assert len(rsp['data']) == len(payload['indexParams'])
expected_index = sorted(payload['indexParams'], key=lambda x: x['fieldName'])
actual_index = sorted(rsp['data'], key=lambda x: x['fieldName'])
for i in range(len(expected_index)):
assert expected_index[i]['fieldName'] == actual_index[i]['fieldName']
assert expected_index[i]['indexName'] == actual_index[i]['indexName']
assert expected_index[i]['metricType'] == actual_index[i]['metricType']
assert expected_index[i]["indexConfig"]['index_type'] == actual_index[i]['indexType']
# drop index
for i in range(len(actual_index)):
payload = {
"collectionName": name,
"indexName": actual_index[i]['indexName']
}
rsp = self.index_client.index_drop(payload)
assert rsp['code'] == 200
# list index, expect empty
rsp = self.index_client.index_list(collection_name=name)
assert rsp['data'] == []
@pytest.mark.parametrize("index_type", ["INVERTED"])
@pytest.mark.parametrize("dim", [128])
def test_index_for_scalar_field(self, dim, index_type):
"""
target: test create collection
method: create a collection with a simple schema
expected: create collection success
"""
name = gen_collection_name()
client = self.collection_client
payload = {
"collectionName": name,
"schema": {
"fields": [
{"fieldName": "book_id", "dataType": "Int64", "isPrimary": True, "elementTypeParams": {}},
{"fieldName": "word_count", "dataType": "Int64", "elementTypeParams": {}},
{"fieldName": "book_describe", "dataType": "VarChar", "elementTypeParams": {"max_length": "256"}},
{"fieldName": "book_intro", "dataType": "FloatVector", "elementTypeParams": {"dim": f"{dim}"}}
]
}
}
logger.info(f"create collection {name} with payload: {payload}")
rsp = client.collection_create(payload)
# insert data
for i in range(1):
data = []
for j in range(3000):
tmp = {
"book_id": j,
"word_count": j,
"book_describe": f"book_{j}",
"book_intro": preprocessing.normalize([np.array([random.random() for _ in range(dim)])])[
0].tolist(),
}
data.append(tmp)
payload = {
"collectionName": name,
"data": data
}
rsp = self.vector_client.vector_insert(payload)
c = Collection(name)
c.flush()
# list index, expect empty
rsp = self.index_client.index_list(name)
# create index
payload = {
"collectionName": name,
"indexParams": [{"fieldName": "word_count", "indexName": "word_count_vector",
"indexConfig": {"index_type": "INVERTED"}}]
}
rsp = self.index_client.index_create(payload)
assert rsp['code'] == 200
time.sleep(10)
# list index, expect not empty
rsp = self.index_client.index_list(collection_name=name)
# describe index
rsp = self.index_client.index_describe(collection_name=name, index_name="word_count_vector")
assert rsp['code'] == 200
assert len(rsp['data']) == len(payload['indexParams'])
expected_index = sorted(payload['indexParams'], key=lambda x: x['fieldName'])
actual_index = sorted(rsp['data'], key=lambda x: x['fieldName'])
for i in range(len(expected_index)):
assert expected_index[i]['fieldName'] == actual_index[i]['fieldName']
assert expected_index[i]['indexName'] == actual_index[i]['indexName']
assert expected_index[i]['indexConfig']['index_type'] == actual_index[i]['indexType']
@pytest.mark.parametrize("index_type", ["BIN_FLAT", "BIN_IVF_FLAT"])
@pytest.mark.parametrize("metric_type", ["JACCARD", "HAMMING"])
@pytest.mark.parametrize("dim", [128])
def test_index_for_binary_vector_field(self, dim, metric_type, index_type):
"""
target: test create collection
method: create a collection with a simple schema
expected: create collection success
"""
name = gen_collection_name()
client = self.collection_client
payload = {
"collectionName": name,
"schema": {
"fields": [
{"fieldName": "book_id", "dataType": "Int64", "isPrimary": True, "elementTypeParams": {}},
{"fieldName": "word_count", "dataType": "Int64", "elementTypeParams": {}},
{"fieldName": "book_describe", "dataType": "VarChar", "elementTypeParams": {"max_length": "256"}},
{"fieldName": "binary_vector", "dataType": "BinaryVector", "elementTypeParams": {"dim": f"{dim}"}}
]
}
}
logger.info(f"create collection {name} with payload: {payload}")
rsp = client.collection_create(payload)
# insert data
for i in range(1):
data = []
for j in range(3000):
tmp = {
"book_id": j,
"word_count": j,
"book_describe": f"book_{j}",
"binary_vector": gen_vector(datatype="BinaryVector", dim=dim)
}
data.append(tmp)
payload = {
"collectionName": name,
"data": data
}
rsp = self.vector_client.vector_insert(payload)
c = Collection(name)
c.flush()
# list index, expect empty
rsp = self.index_client.index_list(name)
# create index
index_name = "binary_vector_index"
payload = {
"collectionName": name,
"indexParams": [{"fieldName": "binary_vector", "indexName": index_name, "metricType": metric_type,
"indexConfig": {"index_type": index_type}}]
}
if index_type == "BIN_IVF_FLAT":
payload["indexParams"][0]["indexConfig"]["nlist"] = "16384"
rsp = self.index_client.index_create(payload)
assert rsp['code'] == 200
time.sleep(10)
# list index, expect not empty
rsp = self.index_client.index_list(collection_name=name)
# describe index
rsp = self.index_client.index_describe(collection_name=name, index_name=index_name)
assert rsp['code'] == 200
assert len(rsp['data']) == len(payload['indexParams'])
expected_index = sorted(payload['indexParams'], key=lambda x: x['fieldName'])
actual_index = sorted(rsp['data'], key=lambda x: x['fieldName'])
for i in range(len(expected_index)):
assert expected_index[i]['fieldName'] == actual_index[i]['fieldName']
assert expected_index[i]['indexName'] == actual_index[i]['indexName']
assert expected_index[i]['indexConfig']['index_type'] == actual_index[i]['indexType']
@pytest.mark.L0
class TestCreateIndexNegative(TestBase):
@pytest.mark.parametrize("index_type", ["BIN_FLAT", "BIN_IVF_FLAT"])
@pytest.mark.parametrize("metric_type", ["L2", "IP", "COSINE"])
@pytest.mark.parametrize("dim", [128])
def test_index_for_binary_vector_field_with_mismatch_metric_type(self, dim, metric_type, index_type):
"""
target: test create collection
method: create a collection with a simple schema
expected: create collection success
"""
name = gen_collection_name()
client = self.collection_client
payload = {
"collectionName": name,
"schema": {
"fields": [
{"fieldName": "book_id", "dataType": "Int64", "isPrimary": True, "elementTypeParams": {}},
{"fieldName": "word_count", "dataType": "Int64", "elementTypeParams": {}},
{"fieldName": "book_describe", "dataType": "VarChar", "elementTypeParams": {"max_length": "256"}},
{"fieldName": "binary_vector", "dataType": "BinaryVector", "elementTypeParams": {"dim": f"{dim}"}}
]
}
}
logger.info(f"create collection {name} with payload: {payload}")
rsp = client.collection_create(payload)
# insert data
for i in range(1):
data = []
for j in range(3000):
tmp = {
"book_id": j,
"word_count": j,
"book_describe": f"book_{j}",
"binary_vector": gen_vector(datatype="BinaryVector", dim=dim)
}
data.append(tmp)
payload = {
"collectionName": name,
"data": data
}
rsp = self.vector_client.vector_insert(payload)
c = Collection(name)
c.flush()
# list index, expect empty
rsp = self.index_client.index_list(name)
# create index
index_name = "binary_vector_index"
payload = {
"collectionName": name,
"indexParams": [{"fieldName": "binary_vector", "indexName": index_name, "metricType": metric_type,
"indexConfig": {"index_type": index_type}}]
}
if index_type == "BIN_IVF_FLAT":
payload["indexParams"][0]["indexConfig"]["nlist"] = "16384"
rsp = self.index_client.index_create(payload)
assert rsp['code'] == 65535

View File

@ -0,0 +1,81 @@
import random
import json
import time
from utils.utils import gen_collection_name
import pytest
from base.testbase import TestBase
@pytest.mark.L0
class TestJobE2E(TestBase):
def test_job_e2e(self):
# create collection
name = gen_collection_name()
dim = 128
payload = {
"collectionName": name,
"schema": {
"fields": [
{"fieldName": "book_id", "dataType": "Int64", "isPrimary": True, "elementTypeParams": {}},
{"fieldName": "word_count", "dataType": "Int64", "elementTypeParams": {}},
{"fieldName": "book_describe", "dataType": "VarChar", "elementTypeParams": {"max_length": "256"}},
{"fieldName": "book_intro", "dataType": "FloatVector", "elementTypeParams": {"dim": f"{dim}"}}
]
},
"indexParams": [{"fieldName": "book_intro", "indexName": "book_intro_vector", "metricType": "L2"}]
}
rsp = self.collection_client.collection_create(payload)
# upload file to storage
data = [{
"book_id": i,
"word_count": i,
"book_describe": f"book_{i}",
"book_intro": [random.random() for _ in range(dim)]}
for i in range(10000)]
# dump data to file
file_name = "bulk_insert_data.json"
file_path = f"/tmp/{file_name}"
with open(file_path, "w") as f:
json.dump(data, f)
# upload file to minio storage
self.storage_client.upload_file(file_path, file_name)
# create import job
payload = {
"collectionName": name,
"files": [file_name],
}
rsp = self.import_job_client.create_import_jobs(payload)
# list import job
payload = {
"collectionName": name,
}
rsp = self.import_job_client.list_import_jobs(payload)
# get import job progress
for task in rsp['data']:
task_id = task['taskID']
finished = False
t0 = time.time()
while not finished:
rsp = self.import_job_client.get_import_job_progress(task_id)
if rsp['data']['state'] == "ImportCompleted":
finished = True
time.sleep(5)
if time.time() - t0 > 120:
assert False, "import job timeout"
time.sleep(10)
# query data
payload = {
"collectionName": name,
"filter": f"book_id in {[i for i in range(1000)]}",
"limit": 100,
"offset": 0,
"outputFields": ["*"]
}
rsp = self.vector_client.vector_query(payload)
assert len(rsp['data']) == 100

View File

@ -0,0 +1,124 @@
import random
from sklearn import preprocessing
import numpy as np
from utils.utils import gen_collection_name
import pytest
from base.testbase import TestBase
from pymilvus import (
Collection
)
@pytest.mark.L0
class TestPartitionE2E(TestBase):
def test_partition_e2e(self):
"""
target: test create collection
method: create a collection with a simple schema
expected: create collection success
"""
name = gen_collection_name()
dim = 128
metric_type = "L2"
client = self.collection_client
payload = {
"collectionName": name,
"schema": {
"fields": [
{"fieldName": "book_id", "dataType": "Int64", "isPrimary": True, "elementTypeParams": {}},
{"fieldName": "word_count", "dataType": "Int64", "elementTypeParams": {}},
{"fieldName": "book_describe", "dataType": "VarChar", "elementTypeParams": {"max_length": "256"}},
{"fieldName": "book_intro", "dataType": "FloatVector", "elementTypeParams": {"dim": f"{dim}"}}
]
},
"indexParams": [
{"fieldName": "book_intro", "indexName": "book_intro_vector", "metricType": f"{metric_type}"}]
}
rsp = self.collection_client.collection_create(payload)
assert rsp['code'] == 200
rsp = client.collection_list()
all_collections = rsp['data']
assert name in all_collections
# describe collection
rsp = client.collection_describe(name)
assert rsp['code'] == 200
assert rsp['data']['collectionName'] == name
# insert data to default partition
data = []
for j in range(3000):
tmp = {
"book_id": j,
"word_count": j,
"book_describe": f"book_{j}",
"book_intro": preprocessing.normalize([np.array([random.random() for i in range(dim)])])[0].tolist()
}
data.append(tmp)
payload = {
"collectionName": name,
"data": data,
}
rsp = self.vector_client.vector_insert(payload)
assert rsp['code'] == 200
# create partition
partition_name = "test_partition"
rsp = self.partition_client.partition_create(collection_name=name, partition_name=partition_name)
assert rsp['code'] == 200
# insert data to partition
data = []
for j in range(3000, 6000):
tmp = {
"book_id": j,
"word_count": j,
"book_describe": f"book_{j}",
"book_intro": preprocessing.normalize([np.array([random.random() for i in range(dim)])])[0].tolist()
}
data.append(tmp)
payload = {
"collectionName": name,
"partitionName": partition_name,
"data": data,
}
rsp = self.vector_client.vector_insert(payload)
assert rsp['code'] == 200
# create partition again
rsp = self.partition_client.partition_create(collection_name=name, partition_name=partition_name)
# list partitions
rsp = self.partition_client.partition_list(collection_name=name)
assert rsp['code'] == 200
assert partition_name in rsp['data']
# has partition
rsp = self.partition_client.partition_has(collection_name=name, partition_name=partition_name)
assert rsp['code'] == 200
assert rsp['data']["has"] is True
# flush and get partition statistics
c = Collection(name=name)
c.flush()
rsp = self.partition_client.partition_stats(collection_name=name, partition_name=partition_name)
assert rsp['code'] == 200
assert rsp['data']['rowCount'] == 3000
# release partition
rsp = self.partition_client.partition_release(collection_name=name, partition_names=[partition_name])
assert rsp['code'] == 200
# release partition again
rsp = self.partition_client.partition_release(collection_name=name, partition_names=[partition_name])
assert rsp['code'] == 200
# load partition
rsp = self.partition_client.partition_load(collection_name=name, partition_names=[partition_name])
assert rsp['code'] == 200
# load partition again
rsp = self.partition_client.partition_load(collection_name=name, partition_names=[partition_name])
assert rsp['code'] == 200
# drop partition when it is loaded
rsp = self.partition_client.partition_drop(collection_name=name, partition_name=partition_name)
assert rsp['code'] == 65535
# drop partition after release
rsp = self.partition_client.partition_release(collection_name=name, partition_names=[partition_name])
rsp = self.partition_client.partition_drop(collection_name=name, partition_name=partition_name)
assert rsp['code'] == 200
# has partition
rsp = self.partition_client.partition_has(collection_name=name, partition_name=partition_name)
assert rsp['code'] == 200
assert rsp['data']["has"] is False

View File

@ -0,0 +1,315 @@
import random
import time
from utils.utils import gen_collection_name
from utils.util_log import test_log as logger
import pytest
from base.testbase import TestBase
from pymilvus import (
FieldSchema, CollectionSchema, DataType,
Collection
)
@pytest.mark.L0
class TestRestfulSdkCompatibility(TestBase):
@pytest.mark.parametrize("dim", [128, 256])
@pytest.mark.parametrize("enable_dynamic", [True, False])
@pytest.mark.parametrize("shard_num", [1, 2])
def test_collection_created_by_sdk_describe_by_restful(self, dim, enable_dynamic, shard_num):
"""
"""
# 1. create collection by sdk
name = gen_collection_name()
default_fields = [
FieldSchema(name="int64", dtype=DataType.INT64, is_primary=True),
FieldSchema(name="float", dtype=DataType.FLOAT),
FieldSchema(name="varchar", dtype=DataType.VARCHAR, max_length=65535),
FieldSchema(name="float_vector", dtype=DataType.FLOAT_VECTOR, dim=dim)
]
default_schema = CollectionSchema(fields=default_fields, description="test collection",
enable_dynamic_field=enable_dynamic)
collection = Collection(name=name, schema=default_schema, shards_num=shard_num)
logger.info(collection.schema)
# 2. use restful to get collection info
client = self.collection_client
rsp = client.collection_list()
all_collections = rsp['data']
assert name in all_collections
rsp = client.collection_describe(name)
assert rsp['code'] == 200
assert rsp['data']['collectionName'] == name
assert rsp['data']['enableDynamicField'] == enable_dynamic
assert rsp['data']['load'] == "LoadStateNotLoad"
assert rsp['data']['shardsNum'] == shard_num
@pytest.mark.parametrize("metric_type", ["L2", "IP", "COSINE"])
@pytest.mark.parametrize("dim", [128])
def test_collection_created_by_restful_describe_by_sdk(self, dim, metric_type):
"""
"""
name = gen_collection_name()
dim = 128
client = self.collection_client
payload = {
"collectionName": name,
"dimension": dim,
"metricType": metric_type,
}
rsp = client.collection_create(payload)
assert rsp['code'] == 200
collection = Collection(name=name)
logger.info(collection.schema)
field_names = [field.name for field in collection.schema.fields]
assert len(field_names) == 2
assert collection.schema.enable_dynamic_field is True
assert len(collection.indexes) > 0
@pytest.mark.parametrize("metric_type", ["L2", "IP"])
def test_collection_created_index_by_sdk_describe_by_restful(self, metric_type):
"""
"""
# 1. create collection by sdk
name = gen_collection_name()
default_fields = [
FieldSchema(name="int64", dtype=DataType.INT64, is_primary=True),
FieldSchema(name="float", dtype=DataType.FLOAT),
FieldSchema(name="varchar", dtype=DataType.VARCHAR, max_length=65535),
FieldSchema(name="float_vector", dtype=DataType.FLOAT_VECTOR, dim=128)
]
default_schema = CollectionSchema(fields=default_fields, description="test collection",
enable_dynamic_field=True)
collection = Collection(name=name, schema=default_schema)
# create index by sdk
index_param = {"metric_type": metric_type, "index_type": "IVF_FLAT", "params": {"nlist": 128}}
collection.create_index(field_name="float_vector", index_params=index_param)
# 2. use restful to get collection info
client = self.collection_client
rsp = client.collection_list()
all_collections = rsp['data']
assert name in all_collections
rsp = client.collection_describe(name)
assert rsp['code'] == 200
assert rsp['data']['collectionName'] == name
assert len(rsp['data']['indexes']) == 1 and rsp['data']['indexes'][0]['metricType'] == metric_type
@pytest.mark.parametrize("metric_type", ["L2", "IP"])
def test_collection_load_by_sdk_describe_by_restful(self, metric_type):
"""
"""
# 1. create collection by sdk
name = gen_collection_name()
default_fields = [
FieldSchema(name="int64", dtype=DataType.INT64, is_primary=True),
FieldSchema(name="float", dtype=DataType.FLOAT),
FieldSchema(name="varchar", dtype=DataType.VARCHAR, max_length=65535),
FieldSchema(name="float_vector", dtype=DataType.FLOAT_VECTOR, dim=128)
]
default_schema = CollectionSchema(fields=default_fields, description="test collection",
enable_dynamic_field=True)
collection = Collection(name=name, schema=default_schema)
# create index by sdk
index_param = {"metric_type": metric_type, "index_type": "IVF_FLAT", "params": {"nlist": 128}}
collection.create_index(field_name="float_vector", index_params=index_param)
collection.load()
# 2. use restful to get collection info
client = self.collection_client
rsp = client.collection_list()
all_collections = rsp['data']
assert name in all_collections
rsp = client.collection_describe(name)
assert rsp['data']['load'] == "LoadStateLoaded"
def test_collection_create_by_sdk_insert_vector_by_restful(self):
"""
"""
# 1. create collection by sdk
dim = 128
nb = 100
name = gen_collection_name()
default_fields = [
FieldSchema(name="int64", dtype=DataType.INT64, is_primary=True),
FieldSchema(name="float", dtype=DataType.FLOAT),
FieldSchema(name="varchar", dtype=DataType.VARCHAR, max_length=65535),
FieldSchema(name="float_vector", dtype=DataType.FLOAT_VECTOR, dim=128)
]
default_schema = CollectionSchema(fields=default_fields, description="test collection",
enable_dynamic_field=True)
collection = Collection(name=name, schema=default_schema)
# create index by sdk
index_param = {"metric_type": "L2", "index_type": "IVF_FLAT", "params": {"nlist": 128}}
collection.create_index(field_name="float_vector", index_params=index_param)
collection.load()
# insert data by restful
data = [
{"int64": i, "float": i, "varchar": str(i), "float_vector": [random.random() for _ in range(dim)], "age": i}
for i in range(nb)
]
client = self.vector_client
payload = {
"collectionName": name,
"data": data,
}
rsp = client.vector_insert(payload)
assert rsp['code'] == 200
assert rsp['data']['insertCount'] == nb
def test_collection_create_by_sdk_search_vector_by_restful(self):
"""
"""
dim = 128
nb = 100
name = gen_collection_name()
default_fields = [
FieldSchema(name="int64", dtype=DataType.INT64, is_primary=True),
FieldSchema(name="float", dtype=DataType.FLOAT),
FieldSchema(name="varchar", dtype=DataType.VARCHAR, max_length=65535),
FieldSchema(name="float_vector", dtype=DataType.FLOAT_VECTOR, dim=128)
]
default_schema = CollectionSchema(fields=default_fields, description="test collection",
enable_dynamic_field=True)
# init collection by sdk
collection = Collection(name=name, schema=default_schema)
index_param = {"metric_type": "L2", "index_type": "IVF_FLAT", "params": {"nlist": 128}}
collection.create_index(field_name="float_vector", index_params=index_param)
collection.load()
data = [
{"int64": i, "float": i, "varchar": str(i), "float_vector": [random.random() for _ in range(dim)], "age": i}
for i in range(nb)
]
collection.insert(data)
client = self.vector_client
payload = {
"collectionName": name,
"vector": [random.random() for _ in range(dim)],
"limit": 10
}
# search data by restful
rsp = client.vector_search(payload)
assert rsp['code'] == 200
assert len(rsp['data']) == 10
def test_collection_create_by_sdk_query_vector_by_restful(self):
"""
"""
dim = 128
nb = 100
name = gen_collection_name()
default_fields = [
FieldSchema(name="int64", dtype=DataType.INT64, is_primary=True),
FieldSchema(name="float", dtype=DataType.FLOAT),
FieldSchema(name="varchar", dtype=DataType.VARCHAR, max_length=65535),
FieldSchema(name="float_vector", dtype=DataType.FLOAT_VECTOR, dim=128)
]
default_schema = CollectionSchema(fields=default_fields, description="test collection",
enable_dynamic_field=True)
# init collection by sdk
collection = Collection(name=name, schema=default_schema)
index_param = {"metric_type": "L2", "index_type": "IVF_FLAT", "params": {"nlist": 128}}
collection.create_index(field_name="float_vector", index_params=index_param)
collection.load()
data = [
{"int64": i, "float": i, "varchar": str(i), "float_vector": [random.random() for _ in range(dim)], "age": i}
for i in range(nb)
]
collection.insert(data)
client = self.vector_client
payload = {
"collectionName": name,
"filter": "int64 in [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]",
}
# query data by restful
rsp = client.vector_query(payload)
assert rsp['code'] == 200
assert len(rsp['data']) == 10
def test_collection_create_by_restful_search_vector_by_sdk(self):
"""
"""
name = gen_collection_name()
dim = 128
# insert data by restful
self.init_collection(name, metric_type="L2", dim=dim)
time.sleep(5)
# search data by sdk
collection = Collection(name=name)
nq = 5
vectors_to_search = [[random.random() for i in range(dim)] for j in range(nq)]
res = collection.search(data=vectors_to_search, anns_field="vector", param={}, limit=10)
assert len(res) == nq
assert len(res[0]) == 10
def test_collection_create_by_restful_query_vector_by_sdk(self):
"""
"""
name = gen_collection_name()
dim = 128
# insert data by restful
self.init_collection(name, metric_type="L2", dim=dim)
time.sleep(5)
# query data by sdk
collection = Collection(name=name)
res = collection.query(expr=f"uid in [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]", output_fields=["*"])
for item in res:
uid = item["uid"]
assert uid in [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
def test_collection_create_by_restful_delete_vector_by_sdk(self):
"""
"""
name = gen_collection_name()
dim = 128
# insert data by restful
self.init_collection(name, metric_type="L2", dim=dim)
time.sleep(5)
# query data by sdk
collection = Collection(name=name)
res = collection.query(expr=f"uid in [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]", output_fields=["*"])
pk_id_list = []
for item in res:
uid = item["uid"]
pk_id_list.append(item["id"])
expr = f"id in {pk_id_list}"
collection.delete(expr)
time.sleep(5)
res = collection.query(expr=f"uid in [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]", output_fields=["*"])
assert len(res) == 0
def test_collection_create_by_sdk_delete_vector_by_restful(self):
"""
"""
dim = 128
nb = 100
name = gen_collection_name()
default_fields = [
FieldSchema(name="int64", dtype=DataType.INT64, is_primary=True),
FieldSchema(name="float", dtype=DataType.FLOAT),
FieldSchema(name="varchar", dtype=DataType.VARCHAR, max_length=65535),
FieldSchema(name="float_vector", dtype=DataType.FLOAT_VECTOR, dim=128)
]
default_schema = CollectionSchema(fields=default_fields, description="test collection",
enable_dynamic_field=True)
# init collection by sdk
collection = Collection(name=name, schema=default_schema)
index_param = {"metric_type": "L2", "index_type": "IVF_FLAT", "params": {"nlist": 128}}
collection.create_index(field_name="float_vector", index_params=index_param)
collection.load()
data = [
{"int64": i, "float": i, "varchar": str(i), "float_vector": [random.random() for _ in range(dim)], "age": i}
for i in range(nb)
]
collection.insert(data)
time.sleep(5)
res = collection.query(expr=f"int64 in [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]", output_fields=["*"])
pk_id_list = []
for item in res:
pk_id_list.append(item["int64"])
payload = {
"collectionName": name,
"id": pk_id_list
}
# delete data by restful
rsp = self.vector_client.vector_delete(payload)
time.sleep(5)
res = collection.query(expr=f"int64 in [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]", output_fields=["*"])
assert len(res) == 0

View File

@ -0,0 +1,79 @@
from utils.utils import gen_unique_str
from base.testbase import TestBase
class TestRoleE2E(TestBase):
def teardown_method(self):
# because role num is limited, so we need to delete all roles after test
rsp = self.role_client.role_list()
all_roles = rsp['data']
# delete all roles except default roles
for role in all_roles:
if role.startswith("role"):
payload = {
"roleName": role
}
# revoke privilege from role
rsp = self.role_client.role_describe(role)
for d in rsp['data']:
payload = {
"roleName": role,
"objectType": d['objectType'],
"objectName": d['objectName'],
"privilege": d['privilege']
}
self.role_client.role_revoke(payload)
self.role_client.role_drop(payload)
def test_role_e2e(self):
# list role before create
rsp = self.role_client.role_list()
# create role
role_name = gen_unique_str("role")
payload = {
"roleName": role_name,
}
rsp = self.role_client.role_create(payload)
# list role after create
rsp = self.role_client.role_list()
assert role_name in rsp['data']
# describe role
rsp = self.role_client.role_describe(role_name)
# grant privilege to role
payload = {
"roleName": role_name,
"objectType": "Global",
"objectName": "*",
"privilege": "CreateCollection"
}
rsp = self.role_client.role_grant(payload)
# describe role after grant
rsp = self.role_client.role_describe(role_name)
privileges = []
for p in rsp['data']:
privileges.append(p['privilege'])
assert "CreateCollection" in privileges
# revoke privilege from role
payload = {
"roleName": role_name,
"objectType": "Global",
"objectName": "*",
"privilege": "CreateCollection"
}
rsp = self.role_client.role_revoke(payload)
# describe role after revoke
rsp = self.role_client.role_describe(role_name)
privileges = []
for p in rsp['data']:
privileges.append(p['privilege'])
assert "CreateCollection" not in privileges
# drop role
payload = {
"roleName": role_name
}
rsp = self.role_client.role_drop(payload)
rsp = self.role_client.role_list()
assert role_name not in rsp['data']

View File

@ -0,0 +1,137 @@
from utils.utils import gen_collection_name, gen_unique_str
import pytest
from base.testbase import TestBase
from pymilvus import (connections)
@pytest.mark.L0
class TestUserE2E(TestBase):
def test_user_e2e(self):
# list user before create
rsp = self.user_client.user_list()
# create user
user_name = gen_unique_str("user")
password = "1234578"
payload = {
"userName": user_name,
"password": password
}
rsp = self.user_client.user_create(payload)
# list user after create
rsp = self.user_client.user_list()
assert user_name in rsp['data']
# describe user
rsp = self.user_client.user_describe(user_name)
# update user password
new_password = "87654321"
payload = {
"userName": user_name,
"password": password,
"newPassword": new_password
}
rsp = self.user_client.user_password_update(payload)
assert rsp['code'] == 200
# drop user
payload = {
"userName": user_name
}
rsp = self.user_client.user_drop(payload)
rsp = self.user_client.user_list()
assert user_name not in rsp['data']
def test_user_binding_role(self):
# create user
user_name = gen_unique_str("user")
password = "12345678"
payload = {
"userName": user_name,
"password": password
}
rsp = self.user_client.user_create(payload)
# list user after create
rsp = self.user_client.user_list()
assert user_name in rsp['data']
# create role
role_name = gen_unique_str("role")
payload = {
"roleName": role_name,
}
rsp = self.role_client.role_create(payload)
# privilege to role
payload = {
"roleName": role_name,
"objectType": "Global",
"objectName": "*",
"privilege": "All"
}
rsp = self.role_client.role_grant(payload)
# bind role to user
payload = {
"userName": user_name,
"roleName": role_name
}
rsp = self.user_client.user_grant(payload)
# describe user roles
rsp = self.user_client.user_describe(user_name)
# test user has privilege with pymilvus
uri = self.user_client.endpoint
connections.connect(alias="test", uri=f"{uri}", token=f"{user_name}:{password}")
# create collection with user
collection_name = gen_collection_name()
payload = {
"collectionName": collection_name,
"schema": {
"fields": [
{"fieldName": "book_id", "dataType": "Int64", "isPrimary": True, "elementTypeParams": {}},
{"fieldName": "word_count", "dataType": "Int64", "elementTypeParams": {}},
{"fieldName": "book_describe", "dataType": "VarChar", "elementTypeParams": {"max_length": "256"}},
{"fieldName": "book_intro", "dataType": "FloatVector", "elementTypeParams": {"dim": "128"}}
]
}
}
self.collection_client.api_key = f"{user_name}:{password}"
rsp = self.collection_client.collection_create(payload)
assert rsp['code'] == 200
@pytest.mark.L0
class TestUserNegative(TestBase):
def test_create_user_with_short_password(self):
# list user before create
rsp = self.user_client.user_list()
# create user
user_name = gen_unique_str("user")
password = "1234"
payload = {
"userName": user_name,
"password": password
}
rsp = self.user_client.user_create(payload)
assert rsp['code'] == 1100
def test_create_user_twice(self):
# list user before create
rsp = self.user_client.user_list()
# create user
user_name = gen_unique_str("user")
password = "12345678"
payload = {
"userName": user_name,
"password": password
}
for i in range(2):
rsp = self.user_client.user_create(payload)
if i == 0:
assert rsp['code'] == 200
else:
assert rsp['code'] == 65535
assert "user already exists" in rsp['message']

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,2 @@
MAX_SUM_OFFSET_AND_LIMIT = 16384

View File

@ -0,0 +1,56 @@
import logging
import sys
from config.log_config import log_config
class TestLog:
def __init__(self, logger, log_debug, log_file, log_err, log_worker):
self.logger = logger
self.log_debug = log_debug
self.log_file = log_file
self.log_err = log_err
self.log_worker = log_worker
self.log = logging.getLogger(self.logger)
self.log.setLevel(logging.DEBUG)
try:
formatter = logging.Formatter("[%(asctime)s - %(levelname)s - %(name)s]: "
"%(message)s (%(filename)s:%(lineno)s)")
# [%(process)s] process NO.
dh = logging.FileHandler(self.log_debug)
dh.setLevel(logging.DEBUG)
dh.setFormatter(formatter)
self.log.addHandler(dh)
fh = logging.FileHandler(self.log_file)
fh.setLevel(logging.INFO)
fh.setFormatter(formatter)
self.log.addHandler(fh)
eh = logging.FileHandler(self.log_err)
eh.setLevel(logging.ERROR)
eh.setFormatter(formatter)
self.log.addHandler(eh)
if self.log_worker != "":
wh = logging.FileHandler(self.log_worker)
wh.setLevel(logging.DEBUG)
wh.setFormatter(formatter)
self.log.addHandler(wh)
ch = logging.StreamHandler(sys.stdout)
ch.setLevel(logging.DEBUG)
ch.setFormatter(formatter)
except Exception as e:
print("Can not use %s or %s or %s to log. error : %s" % (log_debug, log_file, log_err, str(e)))
"""All modules share this unified log"""
log_debug = log_config.log_debug
log_info = log_config.log_info
log_err = log_config.log_err
log_worker = log_config.log_worker
test_log = TestLog('ci_test', log_debug, log_info, log_err, log_worker).log

View File

@ -0,0 +1,228 @@
import random
import time
import random
import string
from faker import Faker
import numpy as np
import jax.numpy as jnp
from sklearn import preprocessing
import base64
import requests
from loguru import logger
import datetime
fake = Faker()
def random_string(length=8):
letters = string.ascii_letters
return ''.join(random.choice(letters) for _ in range(length))
def gen_collection_name(prefix="test_collection", length=8):
name = f'{prefix}_' + datetime.datetime.now().strftime("%Y_%m_%d_%H_%M_%S_%f") + random_string(length=length)
return name
def admin_password():
return "Milvus"
def gen_unique_str(prefix="test", length=8):
return prefix + "_" + random_string(length=length)
def invalid_cluster_name():
res = [
"demo" * 100,
"demo" + "!",
"demo" + "@",
]
return res
def wait_cluster_be_ready(cluster_id, client, timeout=120):
t0 = time.time()
while True and time.time() - t0 < timeout:
rsp = client.cluster_describe(cluster_id)
if rsp['code'] == 200:
if rsp['data']['status'] == "RUNNING":
return time.time() - t0
time.sleep(1)
logger.debug("wait cluster to be ready, cost time: %s" % (time.time() - t0))
return -1
def gen_data_by_type(field):
data_type = field["type"]
if data_type == "bool":
return random.choice([True, False])
if data_type == "int8":
return random.randint(-128, 127)
if data_type == "int16":
return random.randint(-32768, 32767)
if data_type == "int32":
return random.randint(-2147483648, 2147483647)
if data_type == "int64":
return random.randint(-9223372036854775808, 9223372036854775807)
if data_type == "float32":
return np.float64(random.random()) # Object of type float32 is not JSON serializable, so set it as float64
if data_type == "float64":
return np.float64(random.random())
if "varchar" in data_type:
length = int(data_type.split("(")[1].split(")")[0])
return "".join([chr(random.randint(97, 122)) for _ in range(length)])
if "floatVector" in data_type:
dim = int(data_type.split("(")[1].split(")")[0])
return preprocessing.normalize([np.array([random.random() for i in range(dim)])])[0].tolist()
return None
def get_data_by_fields(fields, nb):
# logger.info(f"fields: {fields}")
fields_not_auto_id = []
for field in fields:
if not field.get("autoId", False):
fields_not_auto_id.append(field)
# logger.info(f"fields_not_auto_id: {fields_not_auto_id}")
data = []
for i in range(nb):
tmp = {}
for field in fields_not_auto_id:
tmp[field["name"]] = gen_data_by_type(field)
data.append(tmp)
return data
def get_random_json_data(uid=None):
# gen random dict data
if uid is None:
uid = 0
data = {"uid": uid, "name": fake.name(), "address": fake.address(), "text": fake.text(), "email": fake.email(),
"phone_number": fake.phone_number(),
"json": {
"name": fake.name(),
"address": fake.address()
}
}
for i in range(random.randint(1, 10)):
data["key" + str(random.randint(1, 100_000))] = "value" + str(random.randint(1, 100_000))
return data
def get_data_by_payload(payload, nb=100):
dim = payload.get("dimension", 128)
vector_field = payload.get("vectorField", "vector")
data = []
if nb == 1:
data = [{
vector_field: preprocessing.normalize([np.array([random.random() for i in range(dim)])])[0].tolist(),
**get_random_json_data()
}]
else:
for i in range(nb):
data.append({
vector_field: preprocessing.normalize([np.array([random.random() for i in range(dim)])])[0].tolist(),
**get_random_json_data(uid=i)
})
return data
def get_common_fields_by_data(data, exclude_fields=None):
fields = set()
if isinstance(data, dict):
data = [data]
if not isinstance(data, list):
raise Exception("data must be list or dict")
common_fields = set(data[0].keys())
for d in data:
keys = set(d.keys())
common_fields = common_fields.intersection(keys)
if exclude_fields is not None:
exclude_fields = set(exclude_fields)
common_fields = common_fields.difference(exclude_fields)
return list(common_fields)
def gen_binary_vectors(num, dim):
raw_vectors = []
binary_vectors = []
for _ in range(num):
raw_vector = [random.randint(0, 1) for _ in range(dim)]
raw_vectors.append(raw_vector)
# packs a binary-valued array into bits in a unit8 array, and bytes array_of_ints
binary_vectors.append(bytes(np.packbits(raw_vector, axis=-1).tolist()))
return raw_vectors, binary_vectors
def gen_fp16_vectors(num, dim):
"""
generate float16 vector data
raw_vectors : the vectors
fp16_vectors: the bytes used for insert
return: raw_vectors and fp16_vectors
"""
raw_vectors = []
fp16_vectors = []
for _ in range(num):
raw_vector = [random.random() for _ in range(dim)]
raw_vectors.append(raw_vector)
fp16_vector = np.array(raw_vector, dtype=np.float16).view(np.uint8).tolist()
fp16_vectors.append(bytes(fp16_vector))
return raw_vectors, fp16_vectors
def gen_bf16_vectors(num, dim):
"""
generate brain float16 vector data
raw_vectors : the vectors
bf16_vectors: the bytes used for insert
return: raw_vectors and bf16_vectors
"""
raw_vectors = []
bf16_vectors = []
for _ in range(num):
raw_vector = [random.random() for _ in range(dim)]
raw_vectors.append(raw_vector)
bf16_vector = np.array(jnp.array(raw_vector, dtype=jnp.bfloat16)).view(np.uint8).tolist()
bf16_vectors.append(bytes(bf16_vector))
return raw_vectors, bf16_vectors
def gen_vector(datatype="float_vector", dim=128, binary_data=False):
value = None
if datatype == "FloatVector":
return preprocessing.normalize([np.array([random.random() for i in range(dim)])])[0].tolist()
if datatype == "BinaryVector":
value = gen_binary_vectors(1, dim)[1][0]
if datatype == "Float16Vector":
value = gen_fp16_vectors(1, dim)[1][0]
if datatype == "BFloat16Vector":
value = gen_bf16_vectors(1, dim)[1][0]
if value is None:
raise Exception(f"unsupported datatype: {datatype}")
else:
if binary_data:
return value
else:
data = base64.b64encode(value).decode("utf-8")
return data
def get_all_fields_by_data(data, exclude_fields=None):
fields = set()
for d in data:
keys = list(d.keys())
fields.union(keys)
if exclude_fields is not None:
exclude_fields = set(exclude_fields)
fields = fields.difference(exclude_fields)
return list(fields)