Fix drop collection failed due to etcd txn limit. (#19362)

Signed-off-by: longjiquan <jiquan.long@zilliz.com>

Signed-off-by: longjiquan <jiquan.long@zilliz.com>
This commit is contained in:
Jiquan Long 2022-09-22 17:36:52 +08:00 committed by GitHub
parent 5117017355
commit cfd01a188c
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
8 changed files with 81 additions and 20 deletions

View File

@ -373,8 +373,17 @@ func (kc *Catalog) DropCollection(ctx context.Context, collectionInfo *model.Col
fmt.Sprintf("%s/%s", CollectionAliasMetaPrefix, alias), fmt.Sprintf("%s/%s", CollectionAliasMetaPrefix, alias),
) )
} }
delMetakeysSnap = append(delMetakeysSnap, buildPartitionPrefix(collectionInfo.CollectionID)) // Snapshot will list all (k, v) pairs and then use Txn.MultiSave to save tombstone for these keys when it prepares
delMetakeysSnap = append(delMetakeysSnap, buildFieldPrefix(collectionInfo.CollectionID)) // to remove a prefix, so though we have very few prefixes, the final operations may exceed the max txn number.
// TODO(longjiquan): should we list all partitions & fields in KV anyway?
for _, partition := range collectionInfo.Partitions {
delMetakeysSnap = append(delMetakeysSnap, buildPartitionKey(collectionInfo.CollectionID, partition.PartitionID))
}
for _, field := range collectionInfo.Fields {
delMetakeysSnap = append(delMetakeysSnap, buildFieldKey(collectionInfo.CollectionID, field.FieldID))
}
// delMetakeysSnap = append(delMetakeysSnap, buildPartitionPrefix(collectionInfo.CollectionID))
// delMetakeysSnap = append(delMetakeysSnap, buildFieldPrefix(collectionInfo.CollectionID))
// Though batchMultiSaveAndRemoveWithPrefix is not atomic enough, we can promise atomicity outside. // Though batchMultiSaveAndRemoveWithPrefix is not atomic enough, we can promise atomicity outside.
// If we found collection under dropping state, we'll know that gc is not completely on this collection. // If we found collection under dropping state, we'll know that gc is not completely on this collection.

View File

@ -86,7 +86,10 @@ func (t *dropCollectionTask) Execute(ctx context.Context) error {
redoTask.AddAsyncStep(&deleteCollectionMetaStep{ redoTask.AddAsyncStep(&deleteCollectionMetaStep{
baseStep: baseStep{core: t.core}, baseStep: baseStep{core: t.core},
collectionID: collMeta.CollectionID, collectionID: collMeta.CollectionID,
ts: ts, // This ts is less than the ts when we notify data nodes to drop collection, but it's OK since we have already
// marked this collection as deleted. If we want to make this ts greater than the notification's ts, we should
// wrap a step who will have these three children and connect them with ts.
ts: ts,
}) })
return redoTask.Execute(ctx) return redoTask.Execute(ctx)

View File

@ -82,7 +82,10 @@ func (t *dropPartitionTask) Execute(ctx context.Context) error {
baseStep: baseStep{core: t.core}, baseStep: baseStep{core: t.core},
collectionID: t.collMeta.CollectionID, collectionID: t.collMeta.CollectionID,
partitionID: partID, partitionID: partID,
ts: t.GetTs(), // This ts is less than the ts when we notify data nodes to drop partition, but it's OK since we have already
// marked this partition as deleted. If we want to make this ts greater than the notification's ts, we should
// wrap a step who will have these children and connect them with ts.
ts: t.GetTs(),
}) })
return redoTask.Execute(ctx) return redoTask.Execute(ctx)

View File

@ -50,7 +50,10 @@ func (c *bgGarbageCollector) ReDropCollection(collMeta *model.Collection, ts Tim
redo.AddAsyncStep(&deleteCollectionMetaStep{ redo.AddAsyncStep(&deleteCollectionMetaStep{
baseStep: baseStep{core: c.s}, baseStep: baseStep{core: c.s},
collectionID: collMeta.CollectionID, collectionID: collMeta.CollectionID,
ts: ts, // This ts is less than the ts when we notify data nodes to drop collection, but it's OK since we have already
// marked this collection as deleted. If we want to make this ts greater than the notification's ts, we should
// wrap a step who will have these three children and connect them with ts.
ts: ts,
}) })
// err is ignored since no sync steps will be executed. // err is ignored since no sync steps will be executed.
@ -94,7 +97,10 @@ func (c *bgGarbageCollector) ReDropPartition(pChannels []string, partition *mode
baseStep: baseStep{core: c.s}, baseStep: baseStep{core: c.s},
collectionID: partition.CollectionID, collectionID: partition.CollectionID,
partitionID: partition.PartitionID, partitionID: partition.PartitionID,
ts: ts, // This ts is less than the ts when we notify data nodes to drop partition, but it's OK since we have already
// marked this partition as deleted. If we want to make this ts greater than the notification's ts, we should
// wrap a step who will have these children and connect them with ts.
ts: ts,
}) })
// err is ignored since no sync steps will be executed. // err is ignored since no sync steps will be executed.

View File

@ -223,7 +223,6 @@ func (mt *MetaTable) RemoveCollection(ctx context.Context, collectionID UniqueID
if err := mt.catalog.DropCollection(ctx1, &model.Collection{CollectionID: collectionID, Aliases: aliases}, ts); err != nil { if err := mt.catalog.DropCollection(ctx1, &model.Collection{CollectionID: collectionID, Aliases: aliases}, ts); err != nil {
return err return err
} }
delete(mt.collID2Meta, collectionID)
var name string var name string
coll, ok := mt.collID2Meta[collectionID] coll, ok := mt.collID2Meta[collectionID]
@ -236,6 +235,8 @@ func (mt *MetaTable) RemoveCollection(ctx context.Context, collectionID UniqueID
delete(mt.collAlias2ID, alias) delete(mt.collAlias2ID, alias)
} }
delete(mt.collID2Meta, collectionID)
log.Info("remove collection", zap.String("name", name), zap.Int64("id", collectionID), zap.Strings("aliases", aliases)) log.Info("remove collection", zap.String("name", name), zap.Int64("id", collectionID), zap.Strings("aliases", aliases))
return nil return nil
} }

View File

@ -198,6 +198,8 @@ type waitForTsSyncedStep struct {
func (s *waitForTsSyncedStep) Execute(ctx context.Context) ([]nestedStep, error) { func (s *waitForTsSyncedStep) Execute(ctx context.Context) ([]nestedStep, error) {
syncedTs := s.core.chanTimeTick.getSyncedTimeTick(s.channel) syncedTs := s.core.chanTimeTick.getSyncedTimeTick(s.channel)
if syncedTs < s.ts { if syncedTs < s.ts {
// TODO: there may be frequent log here.
// time.Sleep(Params.ProxyCfg.TimeTickInterval)
return nil, fmt.Errorf("ts not synced yet, channel: %s, synced: %d, want: %d", s.channel, syncedTs, s.ts) return nil, fmt.Errorf("ts not synced yet, channel: %s, synced: %d, want: %d", s.channel, syncedTs, s.ts)
} }
return nil, nil return nil, nil

View File

@ -95,7 +95,7 @@ type bgStepExecutor struct {
bufferedSteps map[*stepStack]struct{} bufferedSteps map[*stepStack]struct{}
selector selectStepPolicy selector selectStepPolicy
mu sync.Mutex mu sync.Mutex
notify chan struct{} notifyChan chan struct{}
interval time.Duration interval time.Duration
} }
@ -108,7 +108,7 @@ func newBgStepExecutor(ctx context.Context, opts ...bgOpt) *bgStepExecutor {
bufferedSteps: make(map[*stepStack]struct{}), bufferedSteps: make(map[*stepStack]struct{}),
selector: defaultSelectPolicy(), selector: defaultSelectPolicy(),
mu: sync.Mutex{}, mu: sync.Mutex{},
notify: make(chan struct{}, 1), notifyChan: make(chan struct{}, 1),
interval: defaultBgExecutingInterval, interval: defaultBgExecutingInterval,
} }
for _, opt := range opts { for _, opt := range opts {
@ -128,14 +128,8 @@ func (bg *bgStepExecutor) Stop() {
} }
func (bg *bgStepExecutor) AddSteps(s *stepStack) { func (bg *bgStepExecutor) AddSteps(s *stepStack) {
bg.mu.Lock()
bg.addStepsInternal(s) bg.addStepsInternal(s)
bg.mu.Unlock() bg.notify()
select {
case bg.notify <- struct{}{}:
default:
}
} }
func (bg *bgStepExecutor) process(steps []*stepStack) { func (bg *bgStepExecutor) process(steps []*stepStack) {
@ -150,7 +144,8 @@ func (bg *bgStepExecutor) process(steps []*stepStack) {
defer wg.Done() defer wg.Done()
child := s.Execute(bg.ctx) child := s.Execute(bg.ctx)
if child != nil { if child != nil {
bg.AddSteps(child) // don't notify, wait for reschedule.
bg.addStepsInternal(child)
} }
}() }()
} }
@ -161,7 +156,7 @@ func (bg *bgStepExecutor) schedule() {
bg.mu.Lock() bg.mu.Lock()
selected := bg.selector(bg.bufferedSteps) selected := bg.selector(bg.bufferedSteps)
for _, s := range selected { for _, s := range selected {
bg.removeStepsInternal(s) bg.unlockRemoveSteps(s)
} }
bg.mu.Unlock() bg.mu.Unlock()
@ -178,7 +173,7 @@ func (bg *bgStepExecutor) scheduleLoop() {
select { select {
case <-bg.ctx.Done(): case <-bg.ctx.Done():
return return
case <-bg.notify: case <-bg.notifyChan:
bg.schedule() bg.schedule()
case <-ticker.C: case <-ticker.C:
bg.schedule() bg.schedule()
@ -187,9 +182,22 @@ func (bg *bgStepExecutor) scheduleLoop() {
} }
func (bg *bgStepExecutor) addStepsInternal(s *stepStack) { func (bg *bgStepExecutor) addStepsInternal(s *stepStack) {
bg.mu.Lock()
bg.unlockAddSteps(s)
bg.mu.Unlock()
}
func (bg *bgStepExecutor) unlockAddSteps(s *stepStack) {
bg.bufferedSteps[s] = struct{}{} bg.bufferedSteps[s] = struct{}{}
} }
func (bg *bgStepExecutor) removeStepsInternal(s *stepStack) { func (bg *bgStepExecutor) unlockRemoveSteps(s *stepStack) {
delete(bg.bufferedSteps, s) delete(bg.bufferedSteps, s)
} }
func (bg *bgStepExecutor) notify() {
select {
case bg.notifyChan <- struct{}{}:
default:
}
}

View File

@ -0,0 +1,29 @@
package rootcoord
import (
"context"
"testing"
"github.com/stretchr/testify/assert"
)
func Test_waitForTsSyncedStep_Execute(t *testing.T) {
//Params.InitOnce()
//Params.ProxyCfg.TimeTickInterval = time.Millisecond
ticker := newRocksMqTtSynchronizer()
core := newTestCore(withTtSynchronizer(ticker))
core.chanTimeTick.syncedTtHistogram.update("ch1", 100)
s := &waitForTsSyncedStep{
baseStep: baseStep{core: core},
ts: 101,
channel: "ch1",
}
children, err := s.Execute(context.Background())
assert.Equal(t, 0, len(children))
assert.Error(t, err)
core.chanTimeTick.syncedTtHistogram.update("ch1", 102)
children, err = s.Execute(context.Background())
assert.Equal(t, 0, len(children))
assert.NoError(t, err)
}