2024-04-09 11:39:17 +08:00
|
|
|
// Licensed to the LF AI & Data foundation under one
|
|
|
|
// or more contributor license agreements. See the NOTICE file
|
|
|
|
// distributed with this work for additional information
|
|
|
|
// regarding copyright ownership. The ASF licenses this file
|
|
|
|
// to you under the Apache License, Version 2.0 (the
|
|
|
|
// "License"); you may not use this file except in compliance
|
|
|
|
// with the License. You may obtain a copy of the License at
|
|
|
|
//
|
|
|
|
// http://www.apache.org/licenses/LICENSE-2.0
|
|
|
|
//
|
|
|
|
// Unless required by applicable law or agreed to in writing, software
|
|
|
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
|
|
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
|
|
// See the License for the specific language governing permissions and
|
|
|
|
// limitations under the License.
|
|
|
|
|
|
|
|
package partialsearch
|
|
|
|
|
|
|
|
import (
|
|
|
|
"context"
|
|
|
|
"fmt"
|
|
|
|
"strconv"
|
|
|
|
"sync"
|
|
|
|
"testing"
|
|
|
|
"time"
|
|
|
|
|
|
|
|
"github.com/stretchr/testify/suite"
|
2024-07-29 11:31:51 +08:00
|
|
|
"google.golang.org/protobuf/proto"
|
2024-04-09 11:39:17 +08:00
|
|
|
|
|
|
|
"github.com/milvus-io/milvus-proto/go-api/v2/commonpb"
|
|
|
|
"github.com/milvus-io/milvus-proto/go-api/v2/milvuspb"
|
|
|
|
"github.com/milvus-io/milvus-proto/go-api/v2/schemapb"
|
|
|
|
"github.com/milvus-io/milvus/internal/proto/querypb"
|
|
|
|
"github.com/milvus-io/milvus/pkg/common"
|
|
|
|
"github.com/milvus-io/milvus/pkg/log"
|
|
|
|
"github.com/milvus-io/milvus/pkg/util/commonpbutil"
|
|
|
|
"github.com/milvus-io/milvus/pkg/util/funcutil"
|
|
|
|
"github.com/milvus-io/milvus/pkg/util/merr"
|
|
|
|
"github.com/milvus-io/milvus/pkg/util/metric"
|
|
|
|
"github.com/milvus-io/milvus/pkg/util/typeutil"
|
|
|
|
"github.com/milvus-io/milvus/tests/integration"
|
|
|
|
)
|
|
|
|
|
|
|
|
type PartialSearchSuite struct {
|
|
|
|
integration.MiniClusterSuite
|
|
|
|
dim int
|
|
|
|
numCollections int
|
|
|
|
rowsPerCollection int
|
|
|
|
waitTimeInSec time.Duration
|
|
|
|
prefix string
|
|
|
|
}
|
|
|
|
|
|
|
|
func (s *PartialSearchSuite) setupParam() {
|
|
|
|
s.dim = 128
|
|
|
|
s.numCollections = 1
|
|
|
|
s.rowsPerCollection = 100
|
|
|
|
s.waitTimeInSec = time.Second * 10
|
|
|
|
}
|
|
|
|
|
|
|
|
func (s *PartialSearchSuite) loadCollection(collectionName string, dim int, wg *sync.WaitGroup) {
|
|
|
|
c := s.Cluster
|
|
|
|
dbName := ""
|
|
|
|
schema := integration.ConstructSchema(collectionName, dim, true)
|
|
|
|
marshaledSchema, err := proto.Marshal(schema)
|
|
|
|
s.NoError(err)
|
|
|
|
|
|
|
|
createCollectionStatus, err := c.Proxy.CreateCollection(context.TODO(), &milvuspb.CreateCollectionRequest{
|
|
|
|
DbName: dbName,
|
|
|
|
CollectionName: collectionName,
|
|
|
|
Schema: marshaledSchema,
|
|
|
|
ShardsNum: common.DefaultShardsNum,
|
|
|
|
})
|
|
|
|
s.NoError(err)
|
|
|
|
|
|
|
|
err = merr.Error(createCollectionStatus)
|
|
|
|
s.NoError(err)
|
|
|
|
|
|
|
|
showCollectionsResp, err := c.Proxy.ShowCollections(context.TODO(), &milvuspb.ShowCollectionsRequest{})
|
|
|
|
s.NoError(err)
|
|
|
|
s.True(merr.Ok(showCollectionsResp.GetStatus()))
|
|
|
|
|
|
|
|
batchSize := 500000
|
|
|
|
for start := 0; start < s.rowsPerCollection; start += batchSize {
|
|
|
|
rowNum := batchSize
|
|
|
|
if start+batchSize > s.rowsPerCollection {
|
|
|
|
rowNum = s.rowsPerCollection - start
|
|
|
|
}
|
|
|
|
fVecColumn := integration.NewFloatVectorFieldData(integration.FloatVecField, rowNum, dim)
|
|
|
|
hashKeys := integration.GenerateHashKeys(rowNum)
|
|
|
|
insertResult, err := c.Proxy.Insert(context.TODO(), &milvuspb.InsertRequest{
|
|
|
|
DbName: dbName,
|
|
|
|
CollectionName: collectionName,
|
|
|
|
FieldsData: []*schemapb.FieldData{fVecColumn},
|
|
|
|
HashKeys: hashKeys,
|
|
|
|
NumRows: uint32(rowNum),
|
|
|
|
})
|
|
|
|
s.NoError(err)
|
|
|
|
s.True(merr.Ok(insertResult.GetStatus()))
|
|
|
|
}
|
|
|
|
log.Info("=========================Data insertion finished=========================")
|
|
|
|
|
|
|
|
// flush
|
|
|
|
flushResp, err := c.Proxy.Flush(context.TODO(), &milvuspb.FlushRequest{
|
|
|
|
DbName: dbName,
|
|
|
|
CollectionNames: []string{collectionName},
|
|
|
|
})
|
|
|
|
s.NoError(err)
|
|
|
|
segmentIDs, has := flushResp.GetCollSegIDs()[collectionName]
|
|
|
|
ids := segmentIDs.GetData()
|
|
|
|
s.Require().NotEmpty(segmentIDs)
|
|
|
|
s.Require().True(has)
|
|
|
|
flushTs, has := flushResp.GetCollFlushTs()[collectionName]
|
|
|
|
s.True(has)
|
|
|
|
|
|
|
|
segments, err := c.MetaWatcher.ShowSegments()
|
|
|
|
s.NoError(err)
|
|
|
|
s.NotEmpty(segments)
|
|
|
|
s.WaitForFlush(context.TODO(), ids, flushTs, dbName, collectionName)
|
|
|
|
log.Info("=========================Data flush finished=========================")
|
|
|
|
|
|
|
|
// create index
|
|
|
|
createIndexStatus, err := c.Proxy.CreateIndex(context.TODO(), &milvuspb.CreateIndexRequest{
|
|
|
|
CollectionName: collectionName,
|
|
|
|
FieldName: integration.FloatVecField,
|
|
|
|
IndexName: "_default",
|
|
|
|
ExtraParams: integration.ConstructIndexParam(dim, integration.IndexFaissIvfFlat, metric.IP),
|
|
|
|
})
|
|
|
|
s.NoError(err)
|
|
|
|
err = merr.Error(createIndexStatus)
|
|
|
|
s.NoError(err)
|
|
|
|
s.WaitForIndexBuilt(context.TODO(), collectionName, integration.FloatVecField)
|
|
|
|
log.Info("=========================Index created=========================")
|
|
|
|
|
|
|
|
// load
|
|
|
|
loadStatus, err := c.Proxy.LoadCollection(context.TODO(), &milvuspb.LoadCollectionRequest{
|
|
|
|
DbName: dbName,
|
|
|
|
CollectionName: collectionName,
|
|
|
|
})
|
|
|
|
s.NoError(err)
|
|
|
|
err = merr.Error(loadStatus)
|
|
|
|
s.NoError(err)
|
|
|
|
s.WaitForLoad(context.TODO(), collectionName)
|
|
|
|
log.Info("=========================Collection loaded=========================")
|
|
|
|
wg.Done()
|
|
|
|
}
|
|
|
|
|
|
|
|
func (s *PartialSearchSuite) checkCollectionLoaded(collectionName string) bool {
|
|
|
|
loadProgress, err := s.Cluster.Proxy.GetLoadingProgress(context.TODO(), &milvuspb.GetLoadingProgressRequest{
|
|
|
|
DbName: "",
|
|
|
|
CollectionName: collectionName,
|
|
|
|
})
|
|
|
|
s.NoError(err)
|
|
|
|
if loadProgress.GetProgress() != int64(100) {
|
|
|
|
return false
|
|
|
|
}
|
|
|
|
return true
|
|
|
|
}
|
|
|
|
|
|
|
|
func (s *PartialSearchSuite) checkCollectionsLoaded(startCollectionID, endCollectionID int) bool {
|
|
|
|
notLoaded := 0
|
|
|
|
loaded := 0
|
|
|
|
for idx := startCollectionID; idx < endCollectionID; idx++ {
|
|
|
|
collectionName := s.prefix + "_" + strconv.Itoa(idx)
|
|
|
|
if s.checkCollectionLoaded(collectionName) {
|
|
|
|
notLoaded++
|
|
|
|
} else {
|
|
|
|
loaded++
|
|
|
|
}
|
|
|
|
}
|
|
|
|
log.Info(fmt.Sprintf("loading status: %d/%d", loaded, endCollectionID-startCollectionID+1))
|
|
|
|
return notLoaded == 0
|
|
|
|
}
|
|
|
|
|
|
|
|
func (s *PartialSearchSuite) checkAllCollectionsLoaded() bool {
|
|
|
|
return s.checkCollectionsLoaded(0, s.numCollections)
|
|
|
|
}
|
|
|
|
|
|
|
|
func (s *PartialSearchSuite) search(collectionName string, dim int) {
|
|
|
|
c := s.Cluster
|
|
|
|
var err error
|
|
|
|
// Query
|
|
|
|
queryReq := &milvuspb.QueryRequest{
|
|
|
|
Base: nil,
|
|
|
|
CollectionName: collectionName,
|
|
|
|
PartitionNames: nil,
|
|
|
|
Expr: "",
|
|
|
|
OutputFields: []string{"count(*)"},
|
|
|
|
TravelTimestamp: 0,
|
|
|
|
GuaranteeTimestamp: 0,
|
|
|
|
}
|
|
|
|
queryResult, err := c.Proxy.Query(context.TODO(), queryReq)
|
|
|
|
s.NoError(err)
|
|
|
|
s.Equal(queryResult.Status.ErrorCode, commonpb.ErrorCode_Success)
|
|
|
|
s.Equal(len(queryResult.FieldsData), 1)
|
|
|
|
numEntities := queryResult.FieldsData[0].GetScalars().GetLongData().Data[0]
|
|
|
|
s.Equal(numEntities, int64(s.rowsPerCollection))
|
|
|
|
|
|
|
|
// Search
|
|
|
|
expr := fmt.Sprintf("%s > 0", integration.Int64Field)
|
|
|
|
nq := 10
|
|
|
|
topk := 10
|
|
|
|
roundDecimal := -1
|
|
|
|
radius := 10
|
|
|
|
|
|
|
|
params := integration.GetSearchParams(integration.IndexFaissIvfFlat, metric.IP)
|
|
|
|
params["radius"] = radius
|
|
|
|
searchReq := integration.ConstructSearchRequest("", collectionName, expr,
|
|
|
|
integration.FloatVecField, schemapb.DataType_FloatVector, nil, metric.IP, params, nq, dim, topk, roundDecimal)
|
|
|
|
|
|
|
|
searchResult, _ := c.Proxy.Search(context.TODO(), searchReq)
|
|
|
|
|
|
|
|
err = merr.Error(searchResult.GetStatus())
|
|
|
|
s.NoError(err)
|
|
|
|
}
|
|
|
|
|
|
|
|
func (s *PartialSearchSuite) FailOnSearch(collectionName string) {
|
|
|
|
c := s.Cluster
|
|
|
|
expr := fmt.Sprintf("%s > 0", integration.Int64Field)
|
|
|
|
nq := 10
|
|
|
|
topk := 10
|
|
|
|
roundDecimal := -1
|
|
|
|
radius := 10
|
|
|
|
|
|
|
|
params := integration.GetSearchParams(integration.IndexFaissIvfFlat, metric.IP)
|
|
|
|
params["radius"] = radius
|
|
|
|
searchReq := integration.ConstructSearchRequest("", collectionName, expr,
|
|
|
|
integration.FloatVecField, schemapb.DataType_FloatVector, nil, metric.IP, params, nq, s.dim, topk, roundDecimal)
|
|
|
|
|
|
|
|
searchResult, err := c.Proxy.Search(context.TODO(), searchReq)
|
|
|
|
s.NoError(err)
|
|
|
|
err = merr.Error(searchResult.GetStatus())
|
|
|
|
s.Require().Error(err)
|
|
|
|
}
|
|
|
|
|
|
|
|
func (s *PartialSearchSuite) setupData() {
|
|
|
|
// Add the second query node
|
|
|
|
log.Info("=========================Start to inject data=========================")
|
|
|
|
s.prefix = "TestPartialSearchUtil" + funcutil.GenRandomStr()
|
|
|
|
searchName := s.prefix + "_0"
|
|
|
|
wg := sync.WaitGroup{}
|
|
|
|
for idx := 0; idx < s.numCollections; idx++ {
|
|
|
|
wg.Add(1)
|
|
|
|
go s.loadCollection(s.prefix+"_"+strconv.Itoa(idx), s.dim, &wg)
|
|
|
|
}
|
|
|
|
wg.Wait()
|
|
|
|
log.Info("=========================Data injection finished=========================")
|
|
|
|
s.checkAllCollectionsLoaded()
|
|
|
|
log.Info(fmt.Sprintf("=========================start to search %s=========================", searchName))
|
|
|
|
s.search(searchName, s.dim)
|
|
|
|
log.Info("=========================Search finished=========================")
|
|
|
|
time.Sleep(s.waitTimeInSec)
|
|
|
|
s.checkAllCollectionsLoaded()
|
|
|
|
log.Info(fmt.Sprintf("=========================start to search2 %s=========================", searchName))
|
|
|
|
s.search(searchName, s.dim)
|
|
|
|
log.Info("=========================Search2 finished=========================")
|
|
|
|
s.checkAllCollectionsReady()
|
|
|
|
}
|
|
|
|
|
|
|
|
func (s *PartialSearchSuite) checkCollectionsReady(startCollectionID, endCollectionID int) {
|
|
|
|
for i := startCollectionID; i < endCollectionID; i++ {
|
|
|
|
collectionName := s.prefix + "_" + strconv.Itoa(i)
|
|
|
|
s.search(collectionName, s.dim)
|
|
|
|
queryReq := &milvuspb.QueryRequest{
|
|
|
|
CollectionName: collectionName,
|
|
|
|
Expr: "",
|
|
|
|
OutputFields: []string{"count(*)"},
|
|
|
|
}
|
|
|
|
_, err := s.Cluster.Proxy.Query(context.TODO(), queryReq)
|
|
|
|
s.NoError(err)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
func (s *PartialSearchSuite) checkAllCollectionsReady() {
|
|
|
|
s.checkCollectionsReady(0, s.numCollections)
|
|
|
|
}
|
|
|
|
|
|
|
|
func (s *PartialSearchSuite) releaseSegmentsReq(collectionID, nodeID, segmentID typeutil.UniqueID, shard string) *querypb.ReleaseSegmentsRequest {
|
|
|
|
req := &querypb.ReleaseSegmentsRequest{
|
|
|
|
Base: commonpbutil.NewMsgBase(
|
|
|
|
commonpbutil.WithMsgType(commonpb.MsgType_ReleaseSegments),
|
|
|
|
commonpbutil.WithMsgID(1<<30),
|
|
|
|
commonpbutil.WithTargetID(nodeID),
|
|
|
|
),
|
|
|
|
|
|
|
|
NodeID: nodeID,
|
|
|
|
CollectionID: collectionID,
|
|
|
|
SegmentIDs: []int64{segmentID},
|
|
|
|
Scope: querypb.DataScope_Historical,
|
|
|
|
Shard: shard,
|
|
|
|
NeedTransfer: false,
|
|
|
|
}
|
|
|
|
return req
|
|
|
|
}
|
|
|
|
|
|
|
|
func (s *PartialSearchSuite) describeCollection(name string) (int64, []string) {
|
|
|
|
resp, err := s.Cluster.Proxy.DescribeCollection(context.TODO(), &milvuspb.DescribeCollectionRequest{
|
|
|
|
DbName: "default",
|
|
|
|
CollectionName: name,
|
|
|
|
})
|
|
|
|
s.NoError(err)
|
|
|
|
log.Info(fmt.Sprintf("describe collection: %v", resp))
|
|
|
|
return resp.CollectionID, resp.VirtualChannelNames
|
|
|
|
}
|
|
|
|
|
|
|
|
func (s *PartialSearchSuite) getSegmentIDs(collectionName string) []int64 {
|
|
|
|
resp, err := s.Cluster.Proxy.GetPersistentSegmentInfo(context.TODO(), &milvuspb.GetPersistentSegmentInfoRequest{
|
|
|
|
DbName: "default",
|
|
|
|
CollectionName: collectionName,
|
|
|
|
})
|
|
|
|
s.NoError(err)
|
|
|
|
var res []int64
|
|
|
|
for _, seg := range resp.Infos {
|
|
|
|
res = append(res, seg.SegmentID)
|
|
|
|
}
|
|
|
|
return res
|
|
|
|
}
|
|
|
|
|
|
|
|
func (s *PartialSearchSuite) TestPartialSearch() {
|
|
|
|
s.setupParam()
|
|
|
|
s.setupData()
|
|
|
|
|
|
|
|
startCollectionID := 0
|
|
|
|
endCollectionID := 0
|
|
|
|
// Search should work in the beginning
|
|
|
|
s.checkCollectionsReady(startCollectionID, endCollectionID)
|
|
|
|
// Test case with one segment released
|
|
|
|
// Partial search does not work yet.
|
|
|
|
c := s.Cluster
|
|
|
|
q1 := c.QueryNode
|
|
|
|
c.QueryCoord.StopCheckerForTestOnly()
|
|
|
|
collectionName := s.prefix + "_0"
|
|
|
|
nodeID := q1.GetServerIDForTestOnly()
|
|
|
|
collectionID, channels := s.describeCollection(collectionName)
|
|
|
|
segs := s.getSegmentIDs(collectionName)
|
|
|
|
s.Require().Positive(len(segs))
|
|
|
|
s.Require().Positive(len(channels))
|
|
|
|
segmentID := segs[0]
|
|
|
|
shard := channels[0]
|
|
|
|
req := s.releaseSegmentsReq(collectionID, nodeID, segmentID, shard)
|
|
|
|
q1.ReleaseSegments(context.TODO(), req)
|
|
|
|
s.FailOnSearch(collectionName)
|
|
|
|
c.QueryCoord.StartCheckerForTestOnly()
|
|
|
|
}
|
|
|
|
|
|
|
|
func TestPartialSearchUtil(t *testing.T) {
|
|
|
|
suite.Run(t, new(PartialSearchSuite))
|
|
|
|
}
|