From b6fcbb0998853f43f8e58a73375818b93817aba0 Mon Sep 17 00:00:00 2001 From: Gao Date: Fri, 11 Aug 2023 14:21:29 +0800 Subject: [PATCH] Support ScaNN index (#26099) Signed-off-by: chasingegg --- internal/core/thirdparty/knowhere/CMakeLists.txt | 4 ++-- internal/querynodev2/mock_data.go | 1 + internal/querynodev2/segments/mock_data.go | 1 + internal/util/indexcgowrapper/index_test.go | 6 ++++++ pkg/util/indexparamcheck/conf_adapter_mgr.go | 1 + pkg/util/indexparamcheck/conf_adapter_mgr_test.go | 12 ++++++++++++ pkg/util/indexparamcheck/index_type.go | 1 + tests/integration/getvector/get_vector_test.go | 11 +++++++++++ tests/integration/util_index.go | 5 +++-- 9 files changed, 38 insertions(+), 4 deletions(-) diff --git a/internal/core/thirdparty/knowhere/CMakeLists.txt b/internal/core/thirdparty/knowhere/CMakeLists.txt index 1ff1374142..3cfafdac23 100644 --- a/internal/core/thirdparty/knowhere/CMakeLists.txt +++ b/internal/core/thirdparty/knowhere/CMakeLists.txt @@ -11,7 +11,7 @@ # or implied. See the License for the specific language governing permissions and limitations under the License. #------------------------------------------------------------------------------- -set( KNOWHERE_VERSION b7d0b0a ) +set( KNOWHERE_VERSION 4f99dc0) message(STATUS "Building knowhere-${KNOWHERE_SOURCE_VER} from source") message(STATUS ${CMAKE_BUILD_TYPE}) @@ -30,7 +30,7 @@ endif () set( CMAKE_PREFIX_PATH ${CONAN_BOOST_ROOT} ) FetchContent_Declare( knowhere - GIT_REPOSITORY "https://github.com/milvus-io/knowhere.git" + GIT_REPOSITORY "https://github.com/zilliztech/knowhere.git" GIT_TAG ${KNOWHERE_VERSION} SOURCE_DIR ${CMAKE_CURRENT_BINARY_DIR}/knowhere-src BINARY_DIR ${CMAKE_CURRENT_BINARY_DIR}/knowhere-build diff --git a/internal/querynodev2/mock_data.go b/internal/querynodev2/mock_data.go index 8fadf0f1c1..d0387ab680 100644 --- a/internal/querynodev2/mock_data.go +++ b/internal/querynodev2/mock_data.go @@ -50,6 +50,7 @@ const ( IndexFaissIDMap = "FLAT" IndexFaissIVFFlat = "IVF_FLAT" IndexFaissIVFPQ = "IVF_PQ" + IndexScaNN = "SCANN" IndexFaissIVFSQ8 = "IVF_SQ8" IndexFaissBinIDMap = "BIN_FLAT" IndexFaissBinIVFFlat = "BIN_IVF_FLAT" diff --git a/internal/querynodev2/segments/mock_data.go b/internal/querynodev2/segments/mock_data.go index a709339a2f..5529a2ceba 100644 --- a/internal/querynodev2/segments/mock_data.go +++ b/internal/querynodev2/segments/mock_data.go @@ -56,6 +56,7 @@ const ( IndexFaissIVFFlat = "IVF_FLAT" IndexFaissIVFPQ = "IVF_PQ" IndexFaissIVFSQ8 = "IVF_SQ8" + IndexScaNN = "SCANN" IndexFaissBinIDMap = "BIN_FLAT" IndexFaissBinIVFFlat = "BIN_IVF_FLAT" IndexHNSW = "HNSW" diff --git a/internal/util/indexcgowrapper/index_test.go b/internal/util/indexcgowrapper/index_test.go index 0083cf6143..ae2011279d 100644 --- a/internal/util/indexcgowrapper/index_test.go +++ b/internal/util/indexcgowrapper/index_test.go @@ -18,6 +18,7 @@ const ( IndexFaissIVFFlat = "IVF_FLAT" IndexFaissIVFPQ = "IVF_PQ" IndexFaissIVFSQ8 = "IVF_SQ8" + IndexScaNN = "SCANN" IndexFaissBinIDMap = "BIN_FLAT" IndexFaissBinIVFFlat = "BIN_IVF_FLAT" @@ -52,6 +53,8 @@ func generateFloatVectorTestCases() []vecTestCase { {IndexFaissIVFPQ, metric.IP, false, schemapb.DataType_FloatVector}, {IndexFaissIVFSQ8, metric.L2, false, schemapb.DataType_FloatVector}, {IndexFaissIVFSQ8, metric.IP, false, schemapb.DataType_FloatVector}, + {IndexScaNN, metric.L2, false, schemapb.DataType_FloatVector}, + {IndexScaNN, metric.IP, false, schemapb.DataType_FloatVector}, {IndexHNSW, metric.L2, false, schemapb.DataType_FloatVector}, {IndexHNSW, metric.IP, false, schemapb.DataType_FloatVector}, } @@ -89,6 +92,9 @@ func generateParams(indexType, metricType string) (map[string]string, map[string indexParams[common.DimKey] = strconv.Itoa(dim) indexParams["nlist"] = strconv.Itoa(nlist) indexParams["nbits"] = strconv.Itoa(nbits) + } else if indexType == IndexScaNN { + indexParams[common.DimKey] = strconv.Itoa(dim) + indexParams["nlist"] = strconv.Itoa(nlist) } else if indexType == IndexHNSW { indexParams[common.DimKey] = strconv.Itoa(dim) indexParams["M"] = strconv.Itoa(16) diff --git a/pkg/util/indexparamcheck/conf_adapter_mgr.go b/pkg/util/indexparamcheck/conf_adapter_mgr.go index 5b9d5e491b..dd60ae638a 100644 --- a/pkg/util/indexparamcheck/conf_adapter_mgr.go +++ b/pkg/util/indexparamcheck/conf_adapter_mgr.go @@ -48,6 +48,7 @@ func (mgr *indexCheckerMgrImpl) registerIndexChecker() { mgr.checkers[IndexFaissIDMap] = newFlatChecker() mgr.checkers[IndexFaissIvfFlat] = newIVFBaseChecker() mgr.checkers[IndexFaissIvfPQ] = newIVFPQChecker() + mgr.checkers[IndexScaNN] = newIVFBaseChecker() mgr.checkers[IndexFaissIvfSQ8] = newIVFSQChecker() mgr.checkers[IndexFaissBinIDMap] = newBinFlatChecker() mgr.checkers[IndexFaissBinIvfFlat] = newBinIVFFlatChecker() diff --git a/pkg/util/indexparamcheck/conf_adapter_mgr_test.go b/pkg/util/indexparamcheck/conf_adapter_mgr_test.go index 3d801c9fd3..370a98e2c2 100644 --- a/pkg/util/indexparamcheck/conf_adapter_mgr_test.go +++ b/pkg/util/indexparamcheck/conf_adapter_mgr_test.go @@ -41,6 +41,12 @@ func Test_GetConfAdapterMgrInstance(t *testing.T) { _, ok = adapter.(*ivfBaseChecker) assert.Equal(t, true, ok) + adapter, err = adapterMgr.GetChecker(IndexScaNN) + assert.Equal(t, nil, err) + assert.NotEqual(t, nil, adapter) + _, ok = adapter.(*ivfBaseChecker) + assert.Equal(t, true, ok) + adapter, err = adapterMgr.GetChecker(IndexFaissIvfPQ) assert.Equal(t, nil, err) assert.NotEqual(t, nil, adapter) @@ -95,6 +101,12 @@ func TestConfAdapterMgrImpl_GetAdapter(t *testing.T) { _, ok = adapter.(*ivfBaseChecker) assert.Equal(t, true, ok) + adapter, err = adapterMgr.GetChecker(IndexScaNN) + assert.Equal(t, nil, err) + assert.NotEqual(t, nil, adapter) + _, ok = adapter.(*ivfBaseChecker) + assert.Equal(t, true, ok) + adapter, err = adapterMgr.GetChecker(IndexFaissIvfPQ) assert.Equal(t, nil, err) assert.NotEqual(t, nil, adapter) diff --git a/pkg/util/indexparamcheck/index_type.go b/pkg/util/indexparamcheck/index_type.go index fc4cd723e2..63737c61ba 100644 --- a/pkg/util/indexparamcheck/index_type.go +++ b/pkg/util/indexparamcheck/index_type.go @@ -21,6 +21,7 @@ const ( IndexFaissIDMap IndexType = "FLAT" // no index is built. IndexFaissIvfFlat IndexType = "IVF_FLAT" IndexFaissIvfPQ IndexType = "IVF_PQ" + IndexScaNN IndexType = "SCANN" IndexFaissIvfSQ8 IndexType = "IVF_SQ8" IndexFaissBinIDMap IndexType = "BIN_FLAT" IndexFaissBinIvfFlat IndexType = "BIN_IVF_FLAT" diff --git a/tests/integration/getvector/get_vector_test.go b/tests/integration/getvector/get_vector_test.go index 2b5eab10d6..9f7dfe6abe 100644 --- a/tests/integration/getvector/get_vector_test.go +++ b/tests/integration/getvector/get_vector_test.go @@ -287,6 +287,17 @@ func (s *TestGetVectorSuite) TestGetVector_IVF_PQ() { s.run() } +func (s *TestGetVectorSuite) TestGetVector_SCANN() { + s.nq = 10 + s.topK = 10 + s.indexType = integration.IndexScaNN + s.metricType = metric.L2 + s.pkType = schemapb.DataType_Int64 + s.vecType = schemapb.DataType_FloatVector + s.searchFailed = false + s.run() +} + func (s *TestGetVectorSuite) TestGetVector_IVF_SQ8() { s.nq = 10 s.topK = 10 diff --git a/tests/integration/util_index.go b/tests/integration/util_index.go index fa7e0b2265..2023201f87 100644 --- a/tests/integration/util_index.go +++ b/tests/integration/util_index.go @@ -35,6 +35,7 @@ const ( IndexFaissIDMap = indexparamcheck.IndexFaissIDMap IndexFaissIvfFlat = indexparamcheck.IndexFaissIvfFlat IndexFaissIvfPQ = indexparamcheck.IndexFaissIvfPQ + IndexScaNN = indexparamcheck.IndexScaNN IndexFaissIvfSQ8 = indexparamcheck.IndexFaissIvfSQ8 IndexFaissBinIDMap = indexparamcheck.IndexFaissBinIDMap IndexFaissBinIvfFlat = indexparamcheck.IndexFaissBinIvfFlat @@ -124,7 +125,7 @@ func ConstructIndexParam(dim int, indexType string, metricType string) []*common switch indexType { case IndexFaissIDMap, IndexFaissBinIDMap: // no index param is required - case IndexFaissIvfFlat, IndexFaissBinIvfFlat, IndexFaissIvfSQ8: + case IndexFaissIvfFlat, IndexFaissBinIvfFlat, IndexFaissIvfSQ8, IndexScaNN: params = append(params, &commonpb.KeyValuePair{ Key: "nlist", Value: "100", @@ -163,7 +164,7 @@ func GetSearchParams(indexType string, metricType string) map[string]any { switch indexType { case IndexFaissIDMap, IndexFaissBinIDMap: params[common.MetricTypeKey] = metricType - case IndexFaissIvfFlat, IndexFaissBinIvfFlat, IndexFaissIvfSQ8, IndexFaissIvfPQ: + case IndexFaissIvfFlat, IndexFaissBinIvfFlat, IndexFaissIvfSQ8, IndexFaissIvfPQ, IndexScaNN: params["nprobe"] = 8 case IndexHNSW: params["ef"] = 200