test: Avoid unstable case of bulkinsert (#28679)

test: There are too many test cases for bulkinsert+partition_key. Each case creates 10 bulkinsert tasks to import a file with 100~200 rows. The default num_partitions is 64 for partition_key. So, each task will generate 64 tiny segments. There are 10 cases, each case 10 tasks, each task 64 tiny segment, totally there are 6400 tiny segments generated. And all these segment row count is less than 1024, no need to build index, and take part in compaction. There will be lots of compaction tasks generated. It costs too much time to process these compaction tasks. Eventually, some cases are timeout after waiting 5 minutes for their segments to be ready and cases fail. Specifying the num_partitions to a small value can avoid this problem. ``` [2023-11-21T03:41:16.187Z] testcases/test_bulk_insert.py::TestBulkInsert::test_partition_key_on_json_file[int_scalar-True-True] PASSED [ 54%] [2023-11-21T03:41:42.796Z] testcases/test_bulk_insert.py::TestBulkInsert::test_partition_key_on_json_file[int_scalar-False-True] PASSED [ 57%] [2023-11-21T03:42:04.694Z] testcases/test_bulk_insert.py::TestBulkInsert::test_partition_key_on_json_file[string_scalar-True-True] PASSED [ 60%] [2023-11-21T03:42:31.205Z] testcases/test_bulk_insert.py::TestBulkInsert::test_partition_key_on_json_file[string_scalar-False-True] PASSED [ 63%] [2023-11-21T03:43:38.876Z] testcases/test_bulk_insert.py::TestBulkInsert::test_partition_key_on_multi_numpy_files[10-150-13-True] XPASS [ 66%] [2023-11-21T03:49:00.357Z] testcases/test_bulk_insert.py::TestBulkInsert::test_partition_key_on_multi_numpy_files[10-150-13-False] XFAIL [ 69%] [2023-11-21T03:53:51.811Z] testcases/test_bulk_insert.py::TestBulkInsert::test_partition_key_on_csv_file[int_scalar-True] FAILED [ 72%] [2023-11-21T03:58:58.283Z] testcases/test_bulk_insert.py::TestBulkInsert::test_partition_key_on_csv_file[int_scalar-False] FAILED [ 75%] [2023-11-21T04:02:04.696Z] testcases/test_bulk_insert.py::TestBulkInsert::test_partition_key_on_csv_file[string_scalar-True] PASSED [ 78%] [2023-11-21T04:02:26.608Z] testcases/test_bulk_insert.py::TestBulkInsert::test_partition_key_on_csv_file[string_scalar-False] PASSED [ 81%] ``` Signed-off-by: yhmo <yihua.mo@zilliz.com>
2024-12-02 11:59:00 +08:00 · 2023-11-28 10:34:31 +08:00 · 2023-11-28 10:34:31 +08:00 · 9c9ab08f54
commit 9c9ab08f54
parent ce2436127c
1 changed files with 5 additions and 6 deletions
--- a/tests/python_client/testcases/test_bulk_insert.py
+++ b/tests/python_client/testcases/test_bulk_insert.py
@ -783,8 +783,8 @@ class TestBulkInsert(TestcaseBaseBulkInsert):
            cf.gen_float_field(name=df.float_field),
        ]
        schema = cf.gen_collection_schema(fields=fields, auto_id=auto_id)
-        self.collection_wrap.init_collection(c_name, schema=schema)
-        assert len(self.collection_wrap.partitions) == ct.default_partition_num
+        self.collection_wrap.init_collection(c_name, schema=schema, num_partitions=10)
+        assert len(self.collection_wrap.partitions) == 10

        # import data
        t0 = time.time()
@ -858,7 +858,6 @@ class TestBulkInsert(TestcaseBaseBulkInsert):
    @pytest.mark.parametrize("dim", [13])
    @pytest.mark.parametrize("entities", [150])
    @pytest.mark.parametrize("file_nums", [10])
-    @pytest.mark.skip(reason="issue #28209")
    def test_partition_key_on_multi_numpy_files(
            self, auto_id, dim, entities, file_nums
    ):
@ -880,7 +879,7 @@ class TestBulkInsert(TestcaseBaseBulkInsert):
            cf.gen_float_vec_field(name=df.vec_field, dim=dim),
        ]
        schema = cf.gen_collection_schema(fields=fields)
-        self.collection_wrap.init_collection(c_name, schema=schema)
+        self.collection_wrap.init_collection(c_name, schema=schema, num_partitions=10)
        # build index
        index_params = ct.default_index
        self.collection_wrap.create_index(
@ -976,8 +975,8 @@ class TestBulkInsert(TestcaseBaseBulkInsert):
            cf.gen_float_field(name=df.float_field),
        ]
        schema = cf.gen_collection_schema(fields=fields, auto_id=auto_id)
-        self.collection_wrap.init_collection(c_name, schema=schema)
-        assert len(self.collection_wrap.partitions) == ct.default_partition_num
+        self.collection_wrap.init_collection(c_name, schema=schema, num_partitions=10)
+        assert len(self.collection_wrap.partitions) == 10

        # import data
        t0 = time.time()