milvus/tests/benchmark/milvus_benchmark/parser.py

import pdb
import logging

logger = logging.getLogger("milvus_benchmark.parser")


def operations_parser(operations):
    if not operations:
        raise Exception("No operations in suite defined")
    for run_type, run_params in operations.items():
        logger.debug(run_type)
        return (run_type, run_params)


def collection_parser(collection_name):
    """
    Resolve the collection name to obtain the corresponding configuration
    e.g.:
    sift_1m_128_l2
    sift: type of data set
    1m: size of the data inserted in the collection
    128: vector dimension
    l2: metric type
    """
    tmp = collection_name.split("_")
    # if len(tmp) != 5:
    #     return None
    data_type = tmp[0]
    collection_size_unit = tmp[1][-1]
    collection_size = tmp[1][0:-1]
    if collection_size_unit == "w":
        collection_size = int(collection_size) * 10000
    elif collection_size_unit == "m":
        collection_size = int(collection_size) * 1000000
    elif collection_size_unit == "b":
        collection_size = int(collection_size) * 1000000000
    dimension = int(tmp[2])
    metric_type = str(tmp[3])
    return (data_type, collection_size, dimension, metric_type)


def parse_ann_collection_name(collection_name):
    """
    Analyze the collection name of the accuracy test and obtain the corresponding configuration
    e.g.:
    sift_128_euclidean
    """
    data_type = collection_name.split("_")[0]
    dimension = int(collection_name.split("_")[1])
    metric = collection_name.split("_")[2]
    # metric = collection_name.attrs['distance']
    # dimension = len(collection_name["train"][0])
    if metric == "euclidean":
        metric_type = "l2"
    elif metric  == "angular":
        metric_type = "ip"
    elif metric  == "jaccard":
        metric_type = "jaccard"
    elif metric == "hamming":
        metric_type = "hamming"
    return (data_type, dimension, metric_type)


def search_params_parser(param):
    # parse top-k, set default value if top-k not in param
    if "top_ks" not in param:
        top_ks = [10]
    else:
        top_ks = param["top_ks"]
    if isinstance(top_ks, int):
        top_ks = [top_ks]
    elif isinstance(top_ks, list):
        top_ks = list(top_ks)
    else:
        logger.warning("Invalid format top-ks: %s" % str(top_ks))

    # parse nqs, set default value if nq not in param
    if "nqs" not in param:
        nqs = [10]
    else:
        nqs = param["nqs"]
    if isinstance(nqs, int):
        nqs = [nqs]
    elif isinstance(nqs, list):
        nqs = list(nqs)
    else:
        logger.warning("Invalid format nqs: %s" % str(nqs))

    # parse nprobes
    if "nprobes" not in param:
        nprobes = [1]
    else:
        nprobes = param["nprobes"]
    if isinstance(nprobes, int):
        nprobes = [nprobes]
    elif isinstance(nprobes, list):
        nprobes = list(nprobes)
    else:
        logger.warning("Invalid format nprobes: %s" % str(nprobes))    

    return top_ks, nqs, nprobes
Add tests/benchmark and tests/python_test using new python SDK Signed-off-by: Cai Yudong <yudong.cai@zilliz.com> 2021-02-25 17:35:36 +08:00			`import pdb`
			`import logging`

			`logger = logging.getLogger("milvus_benchmark.parser")`


			`def operations_parser(operations):`
			`if not operations:`
			`raise Exception("No operations in suite defined")`
			`for run_type, run_params in operations.items():`
			`logger.debug(run_type)`
			`return (run_type, run_params)`


			`def collection_parser(collection_name):`
[skip ci] Add comment for the collection name resolution on benchmark (#8578) Signed-off-by: wangting0128 <ting.wang@zilliz.com> 2021-09-26 15:07:56 +08:00			`"""`
			`Resolve the collection name to obtain the corresponding configuration`
			`e.g.:`
			`sift_1m_128_l2`
			`sift: type of data set`
			`1m: size of the data inserted in the collection`
			`128: vector dimension`
			`l2: metric type`
			`"""`
Add tests/benchmark and tests/python_test using new python SDK Signed-off-by: Cai Yudong <yudong.cai@zilliz.com> 2021-02-25 17:35:36 +08:00			`tmp = collection_name.split("_")`
			`# if len(tmp) != 5:`
			`# return None`
			`data_type = tmp[0]`
			`collection_size_unit = tmp[1][-1]`
			`collection_size = tmp[1][0:-1]`
Bench scripts for 2.0 (#6263) * [skip ci] update benchmark scripts for 2.0 Signed-off-by: del-zhenwu <zhenxiang.li@zilliz.com> * [skip ci] Update README.md Signed-off-by: del-zhenwu <zhenxiang.li@zilliz.com> * [skip ci] Update mergify.yml for bench scripts Signed-off-by: zhenwu <zhenwu@milvus.io> Co-authored-by: zhenwu <zhenwu@milvus.io> 2021-07-02 11:40:16 +08:00			`if collection_size_unit == "w":`
			`collection_size = int(collection_size) * 10000`
			`elif collection_size_unit == "m":`
Add tests/benchmark and tests/python_test using new python SDK Signed-off-by: Cai Yudong <yudong.cai@zilliz.com> 2021-02-25 17:35:36 +08:00			`collection_size = int(collection_size) * 1000000`
			`elif collection_size_unit == "b":`
			`collection_size = int(collection_size) * 1000000000`
			`dimension = int(tmp[2])`
			`metric_type = str(tmp[3])`
			`return (data_type, collection_size, dimension, metric_type)`


			`def parse_ann_collection_name(collection_name):`
[skip ci] Add comments of parse_ann_collection_name func on benchmark (#9725) Signed-off-by: wangting0128 <ting.wang@zilliz.com> 2021-10-12 19:16:44 +08:00			`"""`
			`Analyze the collection name of the accuracy test and obtain the corresponding configuration`
			`e.g.:`
			`sift_128_euclidean`
			`"""`
Add tests/benchmark and tests/python_test using new python SDK Signed-off-by: Cai Yudong <yudong.cai@zilliz.com> 2021-02-25 17:35:36 +08:00			`data_type = collection_name.split("_")[0]`
			`dimension = int(collection_name.split("_")[1])`
			`metric = collection_name.split("_")[2]`
			`# metric = collection_name.attrs['distance']`
			`# dimension = len(collection_name["train"][0])`
			`if metric == "euclidean":`
			`metric_type = "l2"`
			`elif metric == "angular":`
			`metric_type = "ip"`
			`elif metric == "jaccard":`
			`metric_type = "jaccard"`
			`elif metric == "hamming":`
			`metric_type = "hamming"`
			`return (data_type, dimension, metric_type)`


			`def search_params_parser(param):`
			`# parse top-k, set default value if top-k not in param`
			`if "top_ks" not in param:`
			`top_ks = [10]`
			`else:`
			`top_ks = param["top_ks"]`
			`if isinstance(top_ks, int):`
			`top_ks = [top_ks]`
			`elif isinstance(top_ks, list):`
			`top_ks = list(top_ks)`
			`else:`
			`logger.warning("Invalid format top-ks: %s" % str(top_ks))`

			`# parse nqs, set default value if nq not in param`
			`if "nqs" not in param:`
			`nqs = [10]`
			`else:`
			`nqs = param["nqs"]`
			`if isinstance(nqs, int):`
			`nqs = [nqs]`
			`elif isinstance(nqs, list):`
			`nqs = list(nqs)`
			`else:`
			`logger.warning("Invalid format nqs: %s" % str(nqs))`

			`# parse nprobes`
			`if "nprobes" not in param:`
			`nprobes = [1]`
			`else:`
			`nprobes = param["nprobes"]`
			`if isinstance(nprobes, int):`
			`nprobes = [nprobes]`
			`elif isinstance(nprobes, list):`
			`nprobes = list(nprobes)`
			`else:`
			`logger.warning("Invalid format nprobes: %s" % str(nprobes))`

			`return top_ks, nqs, nprobes`