milvus/internal/rootcoord/root_coord.go

2922 lines
104 KiB
Go
Raw Normal View History

// Licensed to the LF AI & Data foundation under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package rootcoord
import (
"context"
"fmt"
"math/rand"
"os"
"sync"
"time"
"github.com/cockroachdb/errors"
"github.com/samber/lo"
"github.com/tikv/client-go/v2/txnkv"
clientv3 "go.etcd.io/etcd/client/v3"
"go.uber.org/atomic"
"go.uber.org/zap"
"golang.org/x/sync/errgroup"
"github.com/milvus-io/milvus-proto/go-api/v2/commonpb"
"github.com/milvus-io/milvus-proto/go-api/v2/milvuspb"
"github.com/milvus-io/milvus-proto/go-api/v2/schemapb"
"github.com/milvus-io/milvus/internal/allocator"
etcdkv "github.com/milvus-io/milvus/internal/kv/etcd"
"github.com/milvus-io/milvus/internal/kv/tikv"
"github.com/milvus-io/milvus/internal/metastore"
kvmetestore "github.com/milvus-io/milvus/internal/metastore/kv/rootcoord"
"github.com/milvus-io/milvus/internal/metastore/model"
pb "github.com/milvus-io/milvus/internal/proto/etcdpb"
"github.com/milvus-io/milvus/internal/proto/internalpb"
"github.com/milvus-io/milvus/internal/proto/proxypb"
"github.com/milvus-io/milvus/internal/proto/rootcoordpb"
tso2 "github.com/milvus-io/milvus/internal/tso"
"github.com/milvus-io/milvus/internal/types"
"github.com/milvus-io/milvus/internal/util/dependency"
"github.com/milvus-io/milvus/internal/util/proxyutil"
"github.com/milvus-io/milvus/internal/util/sessionutil"
"github.com/milvus-io/milvus/internal/util/streamingutil"
tsoutil2 "github.com/milvus-io/milvus/internal/util/tsoutil"
"github.com/milvus-io/milvus/pkg/common"
"github.com/milvus-io/milvus/pkg/kv"
"github.com/milvus-io/milvus/pkg/log"
"github.com/milvus-io/milvus/pkg/metrics"
"github.com/milvus-io/milvus/pkg/util"
"github.com/milvus-io/milvus/pkg/util/commonpbutil"
"github.com/milvus-io/milvus/pkg/util/crypto"
"github.com/milvus-io/milvus/pkg/util/expr"
"github.com/milvus-io/milvus/pkg/util/funcutil"
"github.com/milvus-io/milvus/pkg/util/logutil"
"github.com/milvus-io/milvus/pkg/util/merr"
"github.com/milvus-io/milvus/pkg/util/metricsinfo"
"github.com/milvus-io/milvus/pkg/util/paramtable"
"github.com/milvus-io/milvus/pkg/util/retry"
"github.com/milvus-io/milvus/pkg/util/timerecord"
"github.com/milvus-io/milvus/pkg/util/tsoutil"
"github.com/milvus-io/milvus/pkg/util/typeutil"
)
// UniqueID is an alias of typeutil.UniqueID.
type UniqueID = typeutil.UniqueID
// Timestamp is an alias of typeutil.Timestamp
type Timestamp = typeutil.Timestamp
const InvalidCollectionID = UniqueID(0)
var Params *paramtable.ComponentParam = paramtable.Get()
type Opt func(*Core)
type metaKVCreator func() (kv.MetaKv, error)
// Core root coordinator core
type Core struct {
ctx context.Context
cancel context.CancelFunc
wg sync.WaitGroup
etcdCli *clientv3.Client
tikvCli *txnkv.Client
address string
meta IMetaTable
scheduler IScheduler
broker Broker
ddlTsLockManager DdlTsLockManager
garbageCollector GarbageCollector
stepExecutor StepExecutor
metaKVCreator metaKVCreator
proxyCreator proxyutil.ProxyCreator
proxyWatcher *proxyutil.ProxyWatcher
proxyClientManager proxyutil.ProxyClientManagerInterface
metricsCacheManager *metricsinfo.MetricsCacheManager
chanTimeTick *timetickSync
idAllocator allocator.Interface
tsoAllocator tso2.Allocator
dataCoord types.DataCoordClient
queryCoord types.QueryCoordClient
quotaCenter *QuotaCenter
stateCode atomic.Int32
initOnce sync.Once
startOnce sync.Once
session *sessionutil.Session
factory dependency.Factory
enableActiveStandBy bool
activateFunc func() error
}
// --------------------- function --------------------------
// NewCore creates a new rootcoord core
func NewCore(c context.Context, factory dependency.Factory) (*Core, error) {
ctx, cancel := context.WithCancel(c)
rand.Seed(time.Now().UnixNano())
core := &Core{
ctx: ctx,
cancel: cancel,
factory: factory,
enableActiveStandBy: Params.RootCoordCfg.EnableActiveStandby.GetAsBool(),
}
core.UpdateStateCode(commonpb.StateCode_Abnormal)
core.SetProxyCreator(proxyutil.DefaultProxyCreator)
expr.Register("rootcoord", core)
return core, nil
}
// UpdateStateCode update state code
func (c *Core) UpdateStateCode(code commonpb.StateCode) {
c.stateCode.Store(int32(code))
log.Info("update rootcoord state", zap.String("state", code.String()))
}
func (c *Core) GetStateCode() commonpb.StateCode {
return commonpb.StateCode(c.stateCode.Load())
}
func (c *Core) sendTimeTick(t Timestamp, reason string) error {
pc := c.chanTimeTick.listDmlChannels()
pt := make([]uint64, len(pc))
for i := 0; i < len(pt); i++ {
pt[i] = t
}
ttMsg := internalpb.ChannelTimeTickMsg{
Base: commonpbutil.NewMsgBase(
commonpbutil.WithMsgType(commonpb.MsgType_TimeTick),
commonpbutil.WithTimeStamp(t),
commonpbutil.WithSourceID(ddlSourceID),
),
ChannelNames: pc,
Timestamps: pt,
DefaultTimestamp: t,
}
return c.chanTimeTick.updateTimeTick(&ttMsg, reason)
}
func (c *Core) sendMinDdlTsAsTt() {
if !paramtable.Get().CommonCfg.TTMsgEnabled.GetAsBool() {
return
}
code := c.GetStateCode()
if code != commonpb.StateCode_Healthy {
log.Warn("rootCoord is not healthy, skip send timetick")
return
}
minBgDdlTs := c.ddlTsLockManager.GetMinDdlTs()
minNormalDdlTs := c.scheduler.GetMinDdlTs()
minDdlTs := funcutil.Min(minBgDdlTs, minNormalDdlTs)
// zero -> ddlTsLockManager and scheduler not started.
if minDdlTs == typeutil.ZeroTimestamp {
log.Warn("zero ts was met, this should be only occurred in starting state", zap.Uint64("minBgDdlTs", minBgDdlTs), zap.Uint64("minNormalDdlTs", minNormalDdlTs))
return
}
// max -> abnormal case, impossible.
if minDdlTs == typeutil.MaxTimestamp {
log.Warn("ddl ts is abnormal, max ts was met", zap.Uint64("minBgDdlTs", minBgDdlTs), zap.Uint64("minNormalDdlTs", minNormalDdlTs))
return
}
if err := c.sendTimeTick(minDdlTs, "timetick loop"); err != nil {
log.Warn("failed to send timetick", zap.Error(err))
}
}
func (c *Core) startTimeTickLoop() {
defer c.wg.Done()
ticker := time.NewTicker(Params.ProxyCfg.TimeTickInterval.GetAsDuration(time.Millisecond))
defer ticker.Stop()
for {
select {
case <-c.ctx.Done():
log.Info("rootcoord's timetick loop quit!")
return
case <-ticker.C:
c.sendMinDdlTsAsTt()
}
}
}
func (c *Core) tsLoop() {
defer c.wg.Done()
tsoTicker := time.NewTicker(tso2.UpdateTimestampStep)
defer tsoTicker.Stop()
ctx, cancel := context.WithCancel(c.ctx)
defer cancel()
for {
select {
case <-tsoTicker.C:
if err := c.tsoAllocator.UpdateTSO(); err != nil {
log.Warn("failed to update tso", zap.Error(err))
continue
}
ts := c.tsoAllocator.GetLastSavedTime()
metrics.RootCoordTimestampSaved.Set(float64(ts.Unix()))
case <-ctx.Done():
log.Info("rootcoord's ts loop quit!")
return
}
}
}
func (c *Core) SetProxyCreator(f func(ctx context.Context, addr string, nodeID int64) (types.ProxyClient, error)) {
c.proxyCreator = f
}
func (c *Core) SetDataCoordClient(s types.DataCoordClient) error {
if s == nil {
return errors.New("null DataCoord interface")
}
c.dataCoord = s
return nil
}
func (c *Core) SetQueryCoordClient(s types.QueryCoordClient) error {
if s == nil {
return errors.New("null QueryCoord interface")
}
c.queryCoord = s
return nil
}
// Register register rootcoord at etcd
func (c *Core) Register() error {
c.session.Register()
afterRegister := func() {
metrics.NumNodes.WithLabelValues(fmt.Sprint(paramtable.GetNodeID()), typeutil.RootCoordRole).Inc()
log.Info("RootCoord Register Finished")
c.session.LivenessCheck(c.ctx, func() {
log.Error("Root Coord disconnected from etcd, process will exit", zap.Int64("Server Id", c.session.ServerID))
os.Exit(1)
})
}
if c.enableActiveStandBy {
go func() {
if err := c.session.ProcessActiveStandBy(c.activateFunc); err != nil {
log.Warn("failed to activate standby rootcoord server", zap.Error(err))
panic(err)
}
afterRegister()
}()
} else {
afterRegister()
}
return nil
}
func (c *Core) SetAddress(address string) {
c.address = address
}
// SetEtcdClient sets the etcdCli of Core
func (c *Core) SetEtcdClient(etcdClient *clientv3.Client) {
c.etcdCli = etcdClient
}
// SetTiKVClient sets the tikvCli of Core
func (c *Core) SetTiKVClient(client *txnkv.Client) {
c.tikvCli = client
}
func (c *Core) initSession() error {
c.session = sessionutil.NewSession(c.ctx)
if c.session == nil {
return fmt.Errorf("session is nil, the etcd client connection may have failed")
}
c.session.Init(typeutil.RootCoordRole, c.address, true, true)
c.session.SetEnableActiveStandBy(c.enableActiveStandBy)
return nil
}
func (c *Core) initKVCreator() {
if c.metaKVCreator == nil {
if Params.MetaStoreCfg.MetaStoreType.GetValue() == util.MetaStoreTypeTiKV {
c.metaKVCreator = func() (kv.MetaKv, error) {
return tikv.NewTiKV(c.tikvCli, Params.TiKVCfg.MetaRootPath.GetValue(),
tikv.WithRequestTimeout(paramtable.Get().ServiceParam.TiKVCfg.RequestTimeout.GetAsDuration(time.Millisecond))), nil
}
} else {
c.metaKVCreator = func() (kv.MetaKv, error) {
return etcdkv.NewEtcdKV(c.etcdCli, Params.EtcdCfg.MetaRootPath.GetValue(),
etcdkv.WithRequestTimeout(paramtable.Get().ServiceParam.EtcdCfg.RequestTimeout.GetAsDuration(time.Millisecond))), nil
}
}
}
}
func (c *Core) initMetaTable() error {
fn := func() error {
var catalog metastore.RootCoordCatalog
var err error
switch Params.MetaStoreCfg.MetaStoreType.GetValue() {
case util.MetaStoreTypeEtcd:
log.Info("Using etcd as meta storage.")
var metaKV kv.MetaKv
var ss *kvmetestore.SuffixSnapshot
var err error
if metaKV, err = c.metaKVCreator(); err != nil {
return err
}
if ss, err = kvmetestore.NewSuffixSnapshot(metaKV, kvmetestore.SnapshotsSep, Params.EtcdCfg.MetaRootPath.GetValue(), kvmetestore.SnapshotPrefix); err != nil {
return err
}
catalog = &kvmetestore.Catalog{Txn: metaKV, Snapshot: ss}
case util.MetaStoreTypeTiKV:
log.Info("Using tikv as meta storage.")
var metaKV kv.MetaKv
var ss *kvmetestore.SuffixSnapshot
var err error
if metaKV, err = c.metaKVCreator(); err != nil {
return err
}
if ss, err = kvmetestore.NewSuffixSnapshot(metaKV, kvmetestore.SnapshotsSep, Params.TiKVCfg.MetaRootPath.GetValue(), kvmetestore.SnapshotPrefix); err != nil {
return err
}
catalog = &kvmetestore.Catalog{Txn: metaKV, Snapshot: ss}
default:
return retry.Unrecoverable(fmt.Errorf("not supported meta store: %s", Params.MetaStoreCfg.MetaStoreType.GetValue()))
}
if c.meta, err = NewMetaTable(c.ctx, catalog, c.tsoAllocator); err != nil {
return err
}
return nil
}
return retry.Do(c.ctx, fn, retry.Attempts(10))
}
func (c *Core) initIDAllocator() error {
var tsoKV kv.TxnKV
var kvPath string
if Params.MetaStoreCfg.MetaStoreType.GetValue() == util.MetaStoreTypeTiKV {
kvPath = Params.TiKVCfg.KvRootPath.GetValue()
tsoKV = tsoutil2.NewTSOTiKVBase(c.tikvCli, kvPath, globalIDAllocatorSubPath)
} else {
kvPath = Params.EtcdCfg.KvRootPath.GetValue()
tsoKV = tsoutil2.NewTSOKVBase(c.etcdCli, kvPath, globalIDAllocatorSubPath)
}
idAllocator := allocator.NewGlobalIDAllocator(globalIDAllocatorKey, tsoKV)
if err := idAllocator.Initialize(); err != nil {
return err
}
c.idAllocator = idAllocator
log.Info("id allocator initialized",
zap.String("root_path", kvPath),
zap.String("sub_path", globalIDAllocatorSubPath),
zap.String("key", globalIDAllocatorKey))
return nil
}
func (c *Core) initTSOAllocator() error {
var tsoKV kv.TxnKV
var kvPath string
if Params.MetaStoreCfg.MetaStoreType.GetValue() == util.MetaStoreTypeTiKV {
kvPath = Params.TiKVCfg.KvRootPath.GetValue()
tsoKV = tsoutil2.NewTSOTiKVBase(c.tikvCli, Params.TiKVCfg.KvRootPath.GetValue(), globalIDAllocatorSubPath)
} else {
kvPath = Params.EtcdCfg.KvRootPath.GetValue()
tsoKV = tsoutil2.NewTSOKVBase(c.etcdCli, Params.EtcdCfg.KvRootPath.GetValue(), globalIDAllocatorSubPath)
}
tsoAllocator := tso2.NewGlobalTSOAllocator(globalTSOAllocatorKey, tsoKV)
if err := tsoAllocator.Initialize(); err != nil {
return err
}
c.tsoAllocator = tsoAllocator
log.Info("tso allocator initialized",
zap.String("root_path", kvPath),
zap.String("sub_path", globalIDAllocatorSubPath),
zap.String("key", globalIDAllocatorKey))
return nil
}
func (c *Core) initInternal() error {
c.UpdateStateCode(commonpb.StateCode_Initializing)
c.initKVCreator()
if err := c.initIDAllocator(); err != nil {
return err
}
if err := c.initTSOAllocator(); err != nil {
return err
}
if err := c.initMetaTable(); err != nil {
return err
}
c.scheduler = newScheduler(c.ctx, c.idAllocator, c.tsoAllocator)
c.factory.Init(Params)
chanMap := c.meta.ListCollectionPhysicalChannels()
c.chanTimeTick = newTimeTickSync(c.ctx, c.session.ServerID, c.factory, chanMap)
log.Info("create TimeTick sync done")
c.proxyClientManager = proxyutil.NewProxyClientManager(c.proxyCreator)
c.broker = newServerBroker(c)
c.ddlTsLockManager = newDdlTsLockManager(c.tsoAllocator)
c.garbageCollector = newBgGarbageCollector(c)
c.stepExecutor = newBgStepExecutor(c.ctx)
c.proxyWatcher = proxyutil.NewProxyWatcher(
c.etcdCli,
c.chanTimeTick.initSessions,
c.proxyClientManager.AddProxyClients,
)
c.proxyWatcher.AddSessionFunc(c.chanTimeTick.addSession, c.proxyClientManager.AddProxyClient)
c.proxyWatcher.DelSessionFunc(c.chanTimeTick.delSession, c.proxyClientManager.DelProxyClient)
log.Info("init proxy manager done")
c.metricsCacheManager = metricsinfo.NewMetricsCacheManager()
c.quotaCenter = NewQuotaCenter(c.proxyClientManager, c.queryCoord, c.dataCoord, c.tsoAllocator, c.meta)
log.Debug("RootCoord init QuotaCenter done")
if err := c.initCredentials(); err != nil {
return err
}
log.Info("init credentials done")
if err := c.initRbac(); err != nil {
return err
}
log.Info("init rootcoord done", zap.Int64("nodeID", paramtable.GetNodeID()), zap.String("Address", c.address))
return nil
}
// Init initialize routine
func (c *Core) Init() error {
var initError error
c.factory.Init(Params)
if err := c.initSession(); err != nil {
return err
}
if c.enableActiveStandBy {
c.activateFunc = func() error {
log.Info("RootCoord switch from standby to active, activating")
var err error
c.initOnce.Do(func() {
if err = c.initInternal(); err != nil {
log.Error("RootCoord init failed", zap.Error(err))
}
})
if err != nil {
return err
}
c.startOnce.Do(func() {
if err = c.startInternal(); err != nil {
log.Error("RootCoord start failed", zap.Error(err))
}
})
log.Info("RootCoord startup success", zap.String("address", c.session.Address))
return err
}
c.UpdateStateCode(commonpb.StateCode_StandBy)
log.Info("RootCoord enter standby mode successfully")
} else {
c.initOnce.Do(func() {
initError = c.initInternal()
})
}
return initError
}
func (c *Core) initCredentials() error {
credInfo, _ := c.meta.GetCredential(util.UserRoot)
if credInfo == nil {
log.Debug("RootCoord init user root")
encryptedRootPassword, _ := crypto.PasswordEncrypt(Params.CommonCfg.DefaultRootPassword.GetValue())
err := c.meta.AddCredential(&internalpb.CredentialInfo{Username: util.UserRoot, EncryptedPassword: encryptedRootPassword})
return err
}
return nil
}
func (c *Core) initRbac() error {
var err error
// create default roles, including admin, public
for _, role := range util.DefaultRoles {
err = c.meta.CreateRole(util.DefaultTenant, &milvuspb.RoleEntity{Name: role})
if err != nil && !common.IsIgnorableError(err) {
return errors.Wrap(err, "failed to create role")
}
}
if Params.ProxyCfg.EnablePublicPrivilege.GetAsBool() {
err = c.initPublicRolePrivilege()
if err != nil {
return err
}
}
if Params.RoleCfg.Enabled.GetAsBool() {
return c.initBuiltinRoles()
}
return nil
}
func (c *Core) initPublicRolePrivilege() error {
// grant privileges for the public role
globalPrivileges := []string{
commonpb.ObjectPrivilege_PrivilegeDescribeCollection.String(),
}
collectionPrivileges := []string{
commonpb.ObjectPrivilege_PrivilegeIndexDetail.String(),
}
var err error
for _, globalPrivilege := range globalPrivileges {
err = c.meta.OperatePrivilege(util.DefaultTenant, &milvuspb.GrantEntity{
Role: &milvuspb.RoleEntity{Name: util.RolePublic},
Object: &milvuspb.ObjectEntity{Name: commonpb.ObjectType_Global.String()},
ObjectName: util.AnyWord,
DbName: util.AnyWord,
Grantor: &milvuspb.GrantorEntity{
User: &milvuspb.UserEntity{Name: util.UserRoot},
Privilege: &milvuspb.PrivilegeEntity{Name: globalPrivilege},
},
}, milvuspb.OperatePrivilegeType_Grant)
if err != nil && !common.IsIgnorableError(err) {
return errors.Wrap(err, "failed to grant global privilege")
}
}
for _, collectionPrivilege := range collectionPrivileges {
err = c.meta.OperatePrivilege(util.DefaultTenant, &milvuspb.GrantEntity{
Role: &milvuspb.RoleEntity{Name: util.RolePublic},
Object: &milvuspb.ObjectEntity{Name: commonpb.ObjectType_Collection.String()},
ObjectName: util.AnyWord,
DbName: util.AnyWord,
Grantor: &milvuspb.GrantorEntity{
User: &milvuspb.UserEntity{Name: util.UserRoot},
Privilege: &milvuspb.PrivilegeEntity{Name: collectionPrivilege},
},
}, milvuspb.OperatePrivilegeType_Grant)
if err != nil && !common.IsIgnorableError(err) {
return errors.Wrap(err, "failed to grant collection privilege")
}
}
return nil
}
func (c *Core) initBuiltinRoles() error {
rolePrivilegesMap := Params.RoleCfg.Roles.GetAsRoleDetails()
for role, privilegesJSON := range rolePrivilegesMap {
err := c.meta.CreateRole(util.DefaultTenant, &milvuspb.RoleEntity{Name: role})
if err != nil && !common.IsIgnorableError(err) {
log.Error("create a builtin role fail", zap.String("roleName", role), zap.Error(err))
return errors.Wrapf(err, "failed to create a builtin role: %s", role)
}
for _, privilege := range privilegesJSON[util.RoleConfigPrivileges] {
privilegeName := privilege[util.RoleConfigPrivilege]
if !util.IsAnyWord(privilege[util.RoleConfigPrivilege]) {
privilegeName = util.PrivilegeNameForMetastore(privilege[util.RoleConfigPrivilege])
}
err := c.meta.OperatePrivilege(util.DefaultTenant, &milvuspb.GrantEntity{
Role: &milvuspb.RoleEntity{Name: role},
Object: &milvuspb.ObjectEntity{Name: privilege[util.RoleConfigObjectType]},
ObjectName: privilege[util.RoleConfigObjectName],
DbName: privilege[util.RoleConfigDBName],
Grantor: &milvuspb.GrantorEntity{
User: &milvuspb.UserEntity{Name: util.UserRoot},
Privilege: &milvuspb.PrivilegeEntity{Name: privilegeName},
},
}, milvuspb.OperatePrivilegeType_Grant)
if err != nil && !common.IsIgnorableError(err) {
log.Error("grant privilege to builtin role fail", zap.String("roleName", role), zap.Any("privilege", privilege), zap.Error(err))
return errors.Wrapf(err, "failed to grant privilege: <%s, %s, %s> of db: %s to role: %s", privilege[util.RoleConfigObjectType], privilege[util.RoleConfigObjectName], privilege[util.RoleConfigPrivilege], privilege[util.RoleConfigDBName], role)
}
}
util.BuiltinRoles = append(util.BuiltinRoles, role)
log.Info("init a builtin role successfully", zap.String("roleName", role))
}
return nil
}
func (c *Core) restore(ctx context.Context) error {
dbs, err := c.meta.ListDatabases(ctx, typeutil.MaxTimestamp)
if err != nil {
return err
}
for _, db := range dbs {
colls, err := c.meta.ListCollections(ctx, db.Name, typeutil.MaxTimestamp, false)
if err != nil {
return err
}
for _, coll := range colls {
ts, err := c.tsoAllocator.GenerateTSO(1)
if err != nil {
return err
}
if coll.Available() {
for _, part := range coll.Partitions {
switch part.State {
case pb.PartitionState_PartitionDropping:
go c.garbageCollector.ReDropPartition(coll.DBID, coll.PhysicalChannelNames, coll.VirtualChannelNames, part.Clone(), ts)
case pb.PartitionState_PartitionCreating:
go c.garbageCollector.RemoveCreatingPartition(coll.DBID, part.Clone(), ts)
default:
}
}
} else {
switch coll.State {
case pb.CollectionState_CollectionDropping:
go c.garbageCollector.ReDropCollection(coll.Clone(), ts)
case pb.CollectionState_CollectionCreating:
go c.garbageCollector.RemoveCreatingCollection(coll.Clone())
default:
}
}
}
}
return nil
}
func (c *Core) startInternal() error {
if err := c.proxyWatcher.WatchProxy(c.ctx); err != nil {
log.Fatal("rootcoord failed to watch proxy", zap.Error(err))
// you can not just stuck here,
panic(err)
}
if err := c.restore(c.ctx); err != nil {
panic(err)
}
if Params.QuotaConfig.QuotaAndLimitsEnabled.GetAsBool() {
c.quotaCenter.Start()
}
c.scheduler.Start()
c.stepExecutor.Start()
go func() {
// refresh rbac cache
if err := retry.Do(c.ctx, func() error {
if err := c.proxyClientManager.RefreshPolicyInfoCache(c.ctx, &proxypb.RefreshPolicyInfoCacheRequest{
OpType: int32(typeutil.CacheRefresh),
}); err != nil {
log.RatedWarn(60, "fail to refresh policy info cache", zap.Error(err))
return err
}
return nil
}, retry.Attempts(100), retry.Sleep(time.Second)); err != nil {
log.Warn("fail to refresh policy info cache", zap.Error(err))
}
}()
c.startServerLoop()
c.UpdateStateCode(commonpb.StateCode_Healthy)
sessionutil.SaveServerInfo(typeutil.RootCoordRole, c.session.ServerID)
logutil.Logger(c.ctx).Info("rootcoord startup successfully")
return nil
}
func (c *Core) startServerLoop() {
c.wg.Add(2)
go c.startTimeTickLoop()
go c.tsLoop()
if !streamingutil.IsStreamingServiceEnabled() {
c.wg.Add(1)
go c.chanTimeTick.startWatch(&c.wg)
}
}
// Start starts RootCoord.
func (c *Core) Start() error {
var err error
if !c.enableActiveStandBy {
c.startOnce.Do(func() {
err = c.startInternal()
})
}
return err
}
func (c *Core) stopExecutor() {
if c.stepExecutor != nil {
c.stepExecutor.Stop()
log.Info("stop rootcoord executor")
}
}
func (c *Core) stopScheduler() {
if c.scheduler != nil {
c.scheduler.Stop()
log.Info("stop rootcoord scheduler")
}
}
func (c *Core) cancelIfNotNil() {
if c.cancel != nil {
c.cancel()
log.Info("cancel rootcoord goroutines")
}
}
func (c *Core) revokeSession() {
if c.session != nil {
// wait at most one second to revoke
c.session.Stop()
log.Info("rootcoord session stop")
}
}
// Stop stops rootCoord.
func (c *Core) Stop() error {
c.UpdateStateCode(commonpb.StateCode_Abnormal)
c.stopExecutor()
c.stopScheduler()
if c.proxyWatcher != nil {
c.proxyWatcher.Stop()
}
if c.quotaCenter != nil {
c.quotaCenter.stop()
}
c.revokeSession()
c.cancelIfNotNil()
c.wg.Wait()
return nil
}
// GetComponentStates get states of components
func (c *Core) GetComponentStates(ctx context.Context, req *milvuspb.GetComponentStatesRequest) (*milvuspb.ComponentStates, error) {
code := c.GetStateCode()
log.Debug("RootCoord current state", zap.String("StateCode", code.String()))
nodeID := common.NotRegisteredID
if c.session != nil && c.session.Registered() {
nodeID = c.session.ServerID
}
return &milvuspb.ComponentStates{
State: &milvuspb.ComponentInfo{
// NodeID: c.session.ServerID, // will race with Core.Register()
NodeID: nodeID,
Role: typeutil.RootCoordRole,
StateCode: code,
ExtraInfo: nil,
},
Status: merr.Success(),
SubcomponentStates: []*milvuspb.ComponentInfo{
{
NodeID: nodeID,
Role: typeutil.RootCoordRole,
StateCode: code,
ExtraInfo: nil,
},
},
}, nil
}
// GetTimeTickChannel get timetick channel name
func (c *Core) GetTimeTickChannel(ctx context.Context, req *internalpb.GetTimeTickChannelRequest) (*milvuspb.StringResponse, error) {
return &milvuspb.StringResponse{
Status: merr.Success(),
Value: Params.CommonCfg.RootCoordTimeTick.GetValue(),
}, nil
}
// GetStatisticsChannel get statistics channel name
func (c *Core) GetStatisticsChannel(ctx context.Context, req *internalpb.GetStatisticsChannelRequest) (*milvuspb.StringResponse, error) {
return &milvuspb.StringResponse{
Status: merr.Success(),
Value: Params.CommonCfg.RootCoordStatistics.GetValue(),
}, nil
}
func (c *Core) CreateDatabase(ctx context.Context, in *milvuspb.CreateDatabaseRequest) (*commonpb.Status, error) {
if err := merr.CheckHealthy(c.GetStateCode()); err != nil {
return merr.Status(err), nil
}
method := "CreateDatabase"
metrics.RootCoordDDLReqCounter.WithLabelValues(method, metrics.TotalLabel).Inc()
tr := timerecord.NewTimeRecorder("CreateDatabase")
log.Ctx(ctx).Info("received request to create database", zap.String("role", typeutil.RootCoordRole),
zap.String("dbName", in.GetDbName()), zap.Int64("msgID", in.GetBase().GetMsgID()))
t := &createDatabaseTask{
baseTask: newBaseTask(ctx, c),
Req: in,
}
if err := c.scheduler.AddTask(t); err != nil {
log.Ctx(ctx).Info("failed to enqueue request to create database",
zap.String("role", typeutil.RootCoordRole),
zap.Error(err),
zap.String("dbName", in.GetDbName()), zap.Int64("msgID", in.GetBase().GetMsgID()))
metrics.RootCoordDDLReqCounter.WithLabelValues(method, metrics.FailLabel).Inc()
return merr.Status(err), nil
}
if err := t.WaitToFinish(); err != nil {
log.Ctx(ctx).Info("failed to create database",
zap.String("role", typeutil.RootCoordRole),
zap.Error(err),
zap.String("dbName", in.GetDbName()),
zap.Int64("msgID", in.GetBase().GetMsgID()), zap.Uint64("ts", t.GetTs()))
metrics.RootCoordDDLReqCounter.WithLabelValues(method, metrics.FailLabel).Inc()
return merr.Status(err), nil
}
metrics.RootCoordDDLReqCounter.WithLabelValues(method, metrics.SuccessLabel).Inc()
metrics.RootCoordDDLReqLatency.WithLabelValues(method).Observe(float64(tr.ElapseSpan().Milliseconds()))
log.Ctx(ctx).Info("done to create database", zap.String("role", typeutil.RootCoordRole),
zap.String("dbName", in.GetDbName()),
zap.Int64("msgID", in.GetBase().GetMsgID()), zap.Uint64("ts", t.GetTs()))
return merr.Success(), nil
}
func (c *Core) DropDatabase(ctx context.Context, in *milvuspb.DropDatabaseRequest) (*commonpb.Status, error) {
if err := merr.CheckHealthy(c.GetStateCode()); err != nil {
return merr.Status(err), nil
}
method := "DropDatabase"
metrics.RootCoordDDLReqCounter.WithLabelValues(method, metrics.TotalLabel).Inc()
tr := timerecord.NewTimeRecorder("DropDatabase")
log.Ctx(ctx).Info("received request to drop database", zap.String("role", typeutil.RootCoordRole),
zap.String("dbName", in.GetDbName()), zap.Int64("msgID", in.GetBase().GetMsgID()))
t := &dropDatabaseTask{
baseTask: newBaseTask(ctx, c),
Req: in,
}
if err := c.scheduler.AddTask(t); err != nil {
log.Ctx(ctx).Info("failed to enqueue request to drop database", zap.String("role", typeutil.RootCoordRole),
zap.Error(err),
zap.String("dbName", in.GetDbName()), zap.Int64("msgID", in.GetBase().GetMsgID()))
metrics.RootCoordDDLReqCounter.WithLabelValues(method, metrics.FailLabel).Inc()
return merr.Status(err), nil
}
if err := t.WaitToFinish(); err != nil {
log.Ctx(ctx).Info("failed to drop database", zap.String("role", typeutil.RootCoordRole),
zap.Error(err),
zap.String("dbName", in.GetDbName()),
zap.Int64("msgID", in.GetBase().GetMsgID()), zap.Uint64("ts", t.GetTs()))
metrics.RootCoordDDLReqCounter.WithLabelValues(method, metrics.FailLabel).Inc()
return merr.Status(err), nil
}
metrics.RootCoordDDLReqCounter.WithLabelValues(method, metrics.SuccessLabel).Inc()
metrics.RootCoordDDLReqLatency.WithLabelValues(method).Observe(float64(tr.ElapseSpan().Milliseconds()))
metrics.CleanupRootCoordDBMetrics(in.GetDbName())
log.Ctx(ctx).Info("done to drop database", zap.String("role", typeutil.RootCoordRole),
zap.String("dbName", in.GetDbName()), zap.Int64("msgID", in.GetBase().GetMsgID()),
zap.Uint64("ts", t.GetTs()))
return merr.Success(), nil
}
func (c *Core) ListDatabases(ctx context.Context, in *milvuspb.ListDatabasesRequest) (*milvuspb.ListDatabasesResponse, error) {
if err := merr.CheckHealthy(c.GetStateCode()); err != nil {
ret := &milvuspb.ListDatabasesResponse{Status: merr.Status(err)}
return ret, nil
}
method := "ListDatabases"
metrics.RootCoordDDLReqCounter.WithLabelValues(method, metrics.TotalLabel).Inc()
tr := timerecord.NewTimeRecorder("ListDatabases")
log := log.Ctx(ctx).With(zap.Int64("msgID", in.GetBase().GetMsgID()))
log.Info("received request to list databases")
t := &listDatabaseTask{
baseTask: newBaseTask(ctx, c),
Req: in,
Resp: &milvuspb.ListDatabasesResponse{},
}
if err := c.scheduler.AddTask(t); err != nil {
log.Info("failed to enqueue request to list databases", zap.Error(err))
metrics.RootCoordDDLReqCounter.WithLabelValues(method, metrics.FailLabel).Inc()
return &milvuspb.ListDatabasesResponse{
Status: merr.Status(err),
}, nil
}
if err := t.WaitToFinish(); err != nil {
log.Info("failed to list databases", zap.Error(err))
metrics.RootCoordDDLReqCounter.WithLabelValues(method, metrics.FailLabel).Inc()
return &milvuspb.ListDatabasesResponse{
Status: merr.Status(err),
}, nil
}
metrics.RootCoordDDLReqCounter.WithLabelValues(method, metrics.SuccessLabel).Inc()
metrics.RootCoordDDLReqLatency.WithLabelValues(method).Observe(float64(tr.ElapseSpan().Milliseconds()))
log.Info("done to list databases", zap.Int("num of databases", len(t.Resp.GetDbNames())))
return t.Resp, nil
}
// CreateCollection create collection
func (c *Core) CreateCollection(ctx context.Context, in *milvuspb.CreateCollectionRequest) (*commonpb.Status, error) {
if err := merr.CheckHealthy(c.GetStateCode()); err != nil {
return merr.Status(err), nil
}
metrics.RootCoordDDLReqCounter.WithLabelValues("CreateCollection", metrics.TotalLabel).Inc()
tr := timerecord.NewTimeRecorder("CreateCollection")
log.Ctx(ctx).Info("received request to create collection",
zap.String("dbName", in.GetDbName()),
zap.String("name", in.GetCollectionName()),
zap.String("role", typeutil.RootCoordRole))
t := &createCollectionTask{
baseTask: newBaseTask(ctx, c),
Req: in,
}
if err := c.scheduler.AddTask(t); err != nil {
log.Ctx(ctx).Info("failed to enqueue request to create collection",
zap.String("role", typeutil.RootCoordRole),
zap.Error(err),
zap.String("name", in.GetCollectionName()))
metrics.RootCoordDDLReqCounter.WithLabelValues("CreateCollection", metrics.FailLabel).Inc()
return merr.Status(err), nil
}
if err := t.WaitToFinish(); err != nil {
log.Ctx(ctx).Info("failed to create collection",
zap.String("role", typeutil.RootCoordRole),
zap.Error(err),
zap.String("name", in.GetCollectionName()),
zap.Uint64("ts", t.GetTs()))
metrics.RootCoordDDLReqCounter.WithLabelValues("CreateCollection", metrics.FailLabel).Inc()
return merr.Status(err), nil
}
metrics.RootCoordDDLReqCounter.WithLabelValues("CreateCollection", metrics.SuccessLabel).Inc()
metrics.RootCoordDDLReqLatency.WithLabelValues("CreateCollection").Observe(float64(tr.ElapseSpan().Milliseconds()))
metrics.RootCoordDDLReqLatencyInQueue.WithLabelValues("CreateCollection").Observe(float64(t.queueDur.Milliseconds()))
log.Ctx(ctx).Info("done to create collection",
zap.String("role", typeutil.RootCoordRole),
zap.String("name", in.GetCollectionName()),
zap.Uint64("ts", t.GetTs()))
return merr.Success(), nil
}
// DropCollection drop collection
func (c *Core) DropCollection(ctx context.Context, in *milvuspb.DropCollectionRequest) (*commonpb.Status, error) {
if err := merr.CheckHealthy(c.GetStateCode()); err != nil {
return merr.Status(err), nil
}
metrics.RootCoordDDLReqCounter.WithLabelValues("DropCollection", metrics.TotalLabel).Inc()
tr := timerecord.NewTimeRecorder("DropCollection")
log.Ctx(ctx).Info("received request to drop collection",
zap.String("role", typeutil.RootCoordRole),
zap.String("dbName", in.GetDbName()),
zap.String("name", in.GetCollectionName()))
t := &dropCollectionTask{
baseTask: newBaseTask(ctx, c),
Req: in,
}
if err := c.scheduler.AddTask(t); err != nil {
log.Ctx(ctx).Info("failed to enqueue request to drop collection", zap.String("role", typeutil.RootCoordRole),
zap.Error(err),
zap.String("name", in.GetCollectionName()))
metrics.RootCoordDDLReqCounter.WithLabelValues("DropCollection", metrics.FailLabel).Inc()
return merr.Status(err), nil
}
if err := t.WaitToFinish(); err != nil {
log.Ctx(ctx).Info("failed to drop collection", zap.String("role", typeutil.RootCoordRole),
zap.Error(err),
zap.String("name", in.GetCollectionName()),
zap.Uint64("ts", t.GetTs()))
metrics.RootCoordDDLReqCounter.WithLabelValues("DropCollection", metrics.FailLabel).Inc()
return merr.Status(err), nil
}
metrics.RootCoordDDLReqCounter.WithLabelValues("DropCollection", metrics.SuccessLabel).Inc()
metrics.RootCoordDDLReqLatency.WithLabelValues("DropCollection").Observe(float64(tr.ElapseSpan().Milliseconds()))
metrics.RootCoordDDLReqLatencyInQueue.WithLabelValues("DropCollection").Observe(float64(t.queueDur.Milliseconds()))
log.Ctx(ctx).Info("done to drop collection", zap.String("role", typeutil.RootCoordRole),
zap.String("name", in.GetCollectionName()),
zap.Uint64("ts", t.GetTs()))
return merr.Success(), nil
}
// HasCollection check collection existence
func (c *Core) HasCollection(ctx context.Context, in *milvuspb.HasCollectionRequest) (*milvuspb.BoolResponse, error) {
if err := merr.CheckHealthy(c.GetStateCode()); err != nil {
return &milvuspb.BoolResponse{
Status: merr.Status(err),
}, nil
}
metrics.RootCoordDDLReqCounter.WithLabelValues("HasCollection", metrics.TotalLabel).Inc()
tr := timerecord.NewTimeRecorder("HasCollection")
ts := getTravelTs(in)
log := log.Ctx(ctx).With(zap.String("collectionName", in.GetCollectionName()),
zap.Uint64("ts", ts))
t := &hasCollectionTask{
baseTask: newBaseTask(ctx, c),
Req: in,
Rsp: &milvuspb.BoolResponse{},
}
if err := c.scheduler.AddTask(t); err != nil {
log.Info("failed to enqueue request to has collection", zap.Error(err))
metrics.RootCoordDDLReqCounter.WithLabelValues("HasCollection", metrics.FailLabel).Inc()
return &milvuspb.BoolResponse{
Status: merr.Status(err),
}, nil
}
if err := t.WaitToFinish(); err != nil {
log.Info("failed to has collection", zap.Error(err))
metrics.RootCoordDDLReqCounter.WithLabelValues("HasCollection", metrics.FailLabel).Inc()
return &milvuspb.BoolResponse{
Status: merr.Status(err),
}, nil
}
metrics.RootCoordDDLReqCounter.WithLabelValues("HasCollection", metrics.SuccessLabel).Inc()
metrics.RootCoordDDLReqLatency.WithLabelValues("HasCollection").Observe(float64(tr.ElapseSpan().Milliseconds()))
metrics.RootCoordDDLReqLatencyInQueue.WithLabelValues("HasCollection").Observe(float64(t.queueDur.Milliseconds()))
return t.Rsp, nil
}
func (c *Core) describeCollection(ctx context.Context, in *milvuspb.DescribeCollectionRequest, allowUnavailable bool) (*model.Collection, error) {
ts := getTravelTs(in)
if in.GetCollectionName() != "" {
return c.meta.GetCollectionByName(ctx, in.GetDbName(), in.GetCollectionName(), ts)
}
return c.meta.GetCollectionByID(ctx, in.GetDbName(), in.GetCollectionID(), ts, allowUnavailable)
}
func convertModelToDesc(collInfo *model.Collection, aliases []string, dbName string) *milvuspb.DescribeCollectionResponse {
resp := &milvuspb.DescribeCollectionResponse{
Status: merr.Success(),
DbName: dbName,
}
resp.Schema = &schemapb.CollectionSchema{
Name: collInfo.Name,
Description: collInfo.Description,
AutoID: collInfo.AutoID,
Fields: model.MarshalFieldModels(collInfo.Fields),
EnableDynamicField: collInfo.EnableDynamicField,
}
resp.CollectionID = collInfo.CollectionID
resp.VirtualChannelNames = collInfo.VirtualChannelNames
resp.PhysicalChannelNames = collInfo.PhysicalChannelNames
if collInfo.ShardsNum == 0 {
collInfo.ShardsNum = int32(len(collInfo.VirtualChannelNames))
}
resp.ShardsNum = collInfo.ShardsNum
resp.ConsistencyLevel = collInfo.ConsistencyLevel
resp.CreatedTimestamp = collInfo.CreateTime
createdPhysicalTime, _ := tsoutil.ParseHybridTs(collInfo.CreateTime)
resp.CreatedUtcTimestamp = uint64(createdPhysicalTime)
resp.Aliases = aliases
resp.StartPositions = collInfo.StartPositions
resp.CollectionName = resp.Schema.Name
resp.Properties = collInfo.Properties
resp.NumPartitions = int64(len(collInfo.Partitions))
resp.DbId = collInfo.DBID
return resp
}
func (c *Core) describeCollectionImpl(ctx context.Context, in *milvuspb.DescribeCollectionRequest, allowUnavailable bool) (*milvuspb.DescribeCollectionResponse, error) {
if err := merr.CheckHealthy(c.GetStateCode()); err != nil {
return &milvuspb.DescribeCollectionResponse{
Status: merr.Status(err),
}, nil
}
metrics.RootCoordDDLReqCounter.WithLabelValues("DescribeCollection", metrics.TotalLabel).Inc()
tr := timerecord.NewTimeRecorder("DescribeCollection")
ts := getTravelTs(in)
log := log.Ctx(ctx).With(zap.String("collectionName", in.GetCollectionName()),
zap.String("dbName", in.GetDbName()),
zap.Int64("id", in.GetCollectionID()),
zap.Uint64("ts", ts),
zap.Bool("allowUnavailable", allowUnavailable))
t := &describeCollectionTask{
baseTask: newBaseTask(ctx, c),
Req: in,
Rsp: &milvuspb.DescribeCollectionResponse{Status: merr.Success()},
allowUnavailable: allowUnavailable,
}
if err := c.scheduler.AddTask(t); err != nil {
log.Info("failed to enqueue request to describe collection", zap.Error(err))
metrics.RootCoordDDLReqCounter.WithLabelValues("DescribeCollection", metrics.FailLabel).Inc()
return &milvuspb.DescribeCollectionResponse{
Status: merr.Status(err),
}, nil
}
if err := t.WaitToFinish(); err != nil {
log.Warn("failed to describe collection", zap.Error(err))
metrics.RootCoordDDLReqCounter.WithLabelValues("DescribeCollection", metrics.FailLabel).Inc()
return &milvuspb.DescribeCollectionResponse{
Status: merr.Status(err),
}, nil
}
metrics.RootCoordDDLReqCounter.WithLabelValues("DescribeCollection", metrics.SuccessLabel).Inc()
metrics.RootCoordDDLReqLatency.WithLabelValues("DescribeCollection").Observe(float64(tr.ElapseSpan().Milliseconds()))
metrics.RootCoordDDLReqLatencyInQueue.WithLabelValues("DescribeCollection").Observe(float64(t.queueDur.Milliseconds()))
return t.Rsp, nil
}
// DescribeCollection return collection info
func (c *Core) DescribeCollection(ctx context.Context, in *milvuspb.DescribeCollectionRequest) (*milvuspb.DescribeCollectionResponse, error) {
return c.describeCollectionImpl(ctx, in, false)
}
// DescribeCollectionInternal same to DescribeCollection, but will return unavailable collections and
// only used in internal RPC.
// When query cluster tried to do recovery, it'll be healthy until all collections' targets were recovered,
// so during this time, releasing request generated by rootcoord's recovery won't succeed. So in theory, rootcoord goes
// to be healthy, querycoord recovers all collections' targets, and then querycoord serves the releasing request sent
// by rootcoord, eventually, the dropping collections will be released.
func (c *Core) DescribeCollectionInternal(ctx context.Context, in *milvuspb.DescribeCollectionRequest) (*milvuspb.DescribeCollectionResponse, error) {
return c.describeCollectionImpl(ctx, in, true)
}
// ShowCollections list all collection names
func (c *Core) ShowCollections(ctx context.Context, in *milvuspb.ShowCollectionsRequest) (*milvuspb.ShowCollectionsResponse, error) {
if err := merr.CheckHealthy(c.GetStateCode()); err != nil {
return &milvuspb.ShowCollectionsResponse{
Status: merr.Status(err),
}, nil
}
metrics.RootCoordDDLReqCounter.WithLabelValues("ShowCollections", metrics.TotalLabel).Inc()
tr := timerecord.NewTimeRecorder("ShowCollections")
ts := getTravelTs(in)
log := log.Ctx(ctx).With(zap.String("dbname", in.GetDbName()),
zap.Uint64("ts", ts))
t := &showCollectionTask{
baseTask: newBaseTask(ctx, c),
Req: in,
Rsp: &milvuspb.ShowCollectionsResponse{},
}
if err := c.scheduler.AddTask(t); err != nil {
log.Info("failed to enqueue request to show collections", zap.Error(err))
metrics.RootCoordDDLReqCounter.WithLabelValues("ShowCollections", metrics.FailLabel).Inc()
return &milvuspb.ShowCollectionsResponse{
Status: merr.Status(err),
}, nil
}
if err := t.WaitToFinish(); err != nil {
log.Info("failed to show collections", zap.Error(err))
metrics.RootCoordDDLReqCounter.WithLabelValues("ShowCollections", metrics.FailLabel).Inc()
return &milvuspb.ShowCollectionsResponse{
Status: merr.Status(err),
}, nil
}
metrics.RootCoordDDLReqCounter.WithLabelValues("ShowCollections", metrics.SuccessLabel).Inc()
metrics.RootCoordDDLReqLatency.WithLabelValues("ShowCollections").Observe(float64(tr.ElapseSpan().Milliseconds()))
metrics.RootCoordDDLReqLatencyInQueue.WithLabelValues("ShowCollections").Observe(float64(t.queueDur.Milliseconds()))
return t.Rsp, nil
}
func (c *Core) AlterCollection(ctx context.Context, in *milvuspb.AlterCollectionRequest) (*commonpb.Status, error) {
if err := merr.CheckHealthy(c.GetStateCode()); err != nil {
return merr.Status(err), nil
}
metrics.RootCoordDDLReqCounter.WithLabelValues("AlterCollection", metrics.TotalLabel).Inc()
tr := timerecord.NewTimeRecorder("AlterCollection")
log.Ctx(ctx).Info("received request to alter collection",
zap.String("role", typeutil.RootCoordRole),
zap.String("name", in.GetCollectionName()),
zap.Any("props", in.Properties))
t := &alterCollectionTask{
baseTask: newBaseTask(ctx, c),
Req: in,
}
if err := c.scheduler.AddTask(t); err != nil {
log.Warn("failed to enqueue request to alter collection",
zap.String("role", typeutil.RootCoordRole),
zap.Error(err),
zap.String("name", in.GetCollectionName()))
metrics.RootCoordDDLReqCounter.WithLabelValues("AlterCollection", metrics.FailLabel).Inc()
return merr.Status(err), nil
}
if err := t.WaitToFinish(); err != nil {
log.Warn("failed to alter collection",
zap.String("role", typeutil.RootCoordRole),
zap.Error(err),
zap.String("name", in.GetCollectionName()),
zap.Uint64("ts", t.GetTs()))
metrics.RootCoordDDLReqCounter.WithLabelValues("AlterCollection", metrics.FailLabel).Inc()
return merr.Status(err), nil
}
metrics.RootCoordDDLReqCounter.WithLabelValues("AlterCollection", metrics.SuccessLabel).Inc()
metrics.RootCoordDDLReqLatency.WithLabelValues("AlterCollection").Observe(float64(tr.ElapseSpan().Milliseconds()))
metrics.RootCoordDDLReqLatencyInQueue.WithLabelValues("AlterCollection").Observe(float64(t.queueDur.Milliseconds()))
log.Info("done to alter collection",
zap.String("role", typeutil.RootCoordRole),
zap.String("name", in.GetCollectionName()),
zap.Uint64("ts", t.GetTs()))
return merr.Success(), nil
}
func (c *Core) AlterDatabase(ctx context.Context, in *rootcoordpb.AlterDatabaseRequest) (*commonpb.Status, error) {
if err := merr.CheckHealthy(c.GetStateCode()); err != nil {
return merr.Status(err), nil
}
method := "AlterDatabase"
metrics.RootCoordDDLReqCounter.WithLabelValues(method, metrics.TotalLabel).Inc()
tr := timerecord.NewTimeRecorder(method)
log.Ctx(ctx).Info("received request to alter database",
zap.String("role", typeutil.RootCoordRole),
zap.String("name", in.GetDbName()),
zap.Any("props", in.Properties))
t := &alterDatabaseTask{
baseTask: newBaseTask(ctx, c),
Req: in,
}
if err := c.scheduler.AddTask(t); err != nil {
log.Warn("failed to enqueue request to alter database",
zap.String("role", typeutil.RootCoordRole),
zap.String("name", in.GetDbName()),
zap.Error(err))
metrics.RootCoordDDLReqCounter.WithLabelValues(method, metrics.FailLabel).Inc()
return merr.Status(err), nil
}
if err := t.WaitToFinish(); err != nil {
log.Warn("failed to alter database",
zap.String("role", typeutil.RootCoordRole),
zap.Error(err),
zap.String("name", in.GetDbName()),
zap.Uint64("ts", t.GetTs()))
metrics.RootCoordDDLReqCounter.WithLabelValues(method, metrics.FailLabel).Inc()
return merr.Status(err), nil
}
metrics.RootCoordDDLReqCounter.WithLabelValues(method, metrics.SuccessLabel).Inc()
metrics.RootCoordDDLReqLatency.WithLabelValues(method).Observe(float64(tr.ElapseSpan().Milliseconds()))
metrics.RootCoordDDLReqLatencyInQueue.WithLabelValues(method).Observe(float64(t.queueDur.Milliseconds()))
log.Ctx(ctx).Info("done to alter database",
zap.String("role", typeutil.RootCoordRole),
zap.String("name", in.GetDbName()),
zap.Uint64("ts", t.GetTs()))
return merr.Success(), nil
}
// CreatePartition create partition
func (c *Core) CreatePartition(ctx context.Context, in *milvuspb.CreatePartitionRequest) (*commonpb.Status, error) {
if err := merr.CheckHealthy(c.GetStateCode()); err != nil {
return merr.Status(err), nil
}
metrics.RootCoordDDLReqCounter.WithLabelValues("CreatePartition", metrics.TotalLabel).Inc()
tr := timerecord.NewTimeRecorder("CreatePartition")
log.Ctx(ctx).Info("received request to create partition",
zap.String("role", typeutil.RootCoordRole),
zap.String("collection", in.GetCollectionName()),
zap.String("partition", in.GetPartitionName()))
t := &createPartitionTask{
baseTask: newBaseTask(ctx, c),
Req: in,
}
if err := c.scheduler.AddTask(t); err != nil {
log.Ctx(ctx).Info("failed to enqueue request to create partition",
zap.String("role", typeutil.RootCoordRole),
zap.Error(err),
zap.String("collection", in.GetCollectionName()),
zap.String("partition", in.GetPartitionName()))
metrics.RootCoordDDLReqCounter.WithLabelValues("CreatePartition", metrics.FailLabel).Inc()
return merr.Status(err), nil
}
if err := t.WaitToFinish(); err != nil {
log.Ctx(ctx).Info("failed to create partition",
zap.String("role", typeutil.RootCoordRole),
zap.Error(err),
zap.String("collection", in.GetCollectionName()),
zap.String("partition", in.GetPartitionName()),
zap.Uint64("ts", t.GetTs()))
metrics.RootCoordDDLReqCounter.WithLabelValues("CreatePartition", metrics.FailLabel).Inc()
return merr.Status(err), nil
}
metrics.RootCoordDDLReqCounter.WithLabelValues("CreatePartition", metrics.SuccessLabel).Inc()
metrics.RootCoordDDLReqLatency.WithLabelValues("CreatePartition").Observe(float64(tr.ElapseSpan().Milliseconds()))
metrics.RootCoordDDLReqLatencyInQueue.WithLabelValues("CreatePartition").Observe(float64(t.queueDur.Milliseconds()))
log.Ctx(ctx).Info("done to create partition",
zap.String("role", typeutil.RootCoordRole),
zap.String("collection", in.GetCollectionName()),
zap.String("partition", in.GetPartitionName()),
zap.Uint64("ts", t.GetTs()))
return merr.Success(), nil
}
// DropPartition drop partition
func (c *Core) DropPartition(ctx context.Context, in *milvuspb.DropPartitionRequest) (*commonpb.Status, error) {
if err := merr.CheckHealthy(c.GetStateCode()); err != nil {
return merr.Status(err), nil
}
metrics.RootCoordDDLReqCounter.WithLabelValues("DropPartition", metrics.TotalLabel).Inc()
tr := timerecord.NewTimeRecorder("DropPartition")
log.Ctx(ctx).Info("received request to drop partition",
zap.String("role", typeutil.RootCoordRole),
zap.String("collection", in.GetCollectionName()),
zap.String("partition", in.GetPartitionName()))
t := &dropPartitionTask{
baseTask: newBaseTask(ctx, c),
Req: in,
}
if err := c.scheduler.AddTask(t); err != nil {
log.Ctx(ctx).Info("failed to enqueue request to drop partition",
zap.String("role", typeutil.RootCoordRole),
zap.Error(err),
zap.String("collection", in.GetCollectionName()),
zap.String("partition", in.GetPartitionName()))
metrics.RootCoordDDLReqCounter.WithLabelValues("DropPartition", metrics.FailLabel).Inc()
return merr.Status(err), nil
}
if err := t.WaitToFinish(); err != nil {
log.Ctx(ctx).Info("failed to drop partition",
zap.String("role", typeutil.RootCoordRole),
zap.Error(err),
zap.String("collection", in.GetCollectionName()),
zap.String("partition", in.GetPartitionName()),
zap.Uint64("ts", t.GetTs()))
metrics.RootCoordDDLReqCounter.WithLabelValues("DropPartition", metrics.FailLabel).Inc()
return merr.Status(err), nil
}
metrics.RootCoordDDLReqCounter.WithLabelValues("DropPartition", metrics.SuccessLabel).Inc()
metrics.RootCoordDDLReqLatency.WithLabelValues("DropPartition").Observe(float64(tr.ElapseSpan().Milliseconds()))
metrics.RootCoordDDLReqLatencyInQueue.WithLabelValues("DropPartition").Observe(float64(t.queueDur.Milliseconds()))
log.Ctx(ctx).Info("done to drop partition",
zap.String("role", typeutil.RootCoordRole),
zap.String("collection", in.GetCollectionName()),
zap.String("partition", in.GetPartitionName()),
zap.Uint64("ts", t.GetTs()))
return merr.Success(), nil
}
// HasPartition check partition existence
func (c *Core) HasPartition(ctx context.Context, in *milvuspb.HasPartitionRequest) (*milvuspb.BoolResponse, error) {
if err := merr.CheckHealthy(c.GetStateCode()); err != nil {
return &milvuspb.BoolResponse{
Status: merr.Status(err),
}, nil
}
metrics.RootCoordDDLReqCounter.WithLabelValues("HasPartition", metrics.TotalLabel).Inc()
tr := timerecord.NewTimeRecorder("HasPartition")
// TODO(longjiquan): why HasPartitionRequest doesn't contain Timestamp but other requests do.
ts := typeutil.MaxTimestamp
log := log.Ctx(ctx).With(zap.String("collection", in.GetCollectionName()),
zap.String("partition", in.GetPartitionName()),
zap.Uint64("ts", ts))
t := &hasPartitionTask{
baseTask: newBaseTask(ctx, c),
Req: in,
Rsp: &milvuspb.BoolResponse{},
}
if err := c.scheduler.AddTask(t); err != nil {
log.Info("failed to enqueue request to has partition", zap.Error(err))
metrics.RootCoordDDLReqCounter.WithLabelValues("HasPartition", metrics.FailLabel).Inc()
return &milvuspb.BoolResponse{
Status: merr.Status(err),
}, nil
}
if err := t.WaitToFinish(); err != nil {
log.Info("failed to has partition", zap.Error(err))
metrics.RootCoordDDLReqCounter.WithLabelValues("HasPartition", metrics.FailLabel).Inc()
return &milvuspb.BoolResponse{
Status: merr.Status(err),
}, nil
}
metrics.RootCoordDDLReqCounter.WithLabelValues("HasPartition", metrics.SuccessLabel).Inc()
metrics.RootCoordDDLReqLatency.WithLabelValues("HasPartition").Observe(float64(tr.ElapseSpan().Milliseconds()))
metrics.RootCoordDDLReqLatencyInQueue.WithLabelValues("HasPartition").Observe(float64(t.queueDur.Milliseconds()))
return t.Rsp, nil
}
func (c *Core) showPartitionsImpl(ctx context.Context, in *milvuspb.ShowPartitionsRequest, allowUnavailable bool) (*milvuspb.ShowPartitionsResponse, error) {
if err := merr.CheckHealthy(c.GetStateCode()); err != nil {
return &milvuspb.ShowPartitionsResponse{
Status: merr.Status(err),
}, nil
}
metrics.RootCoordDDLReqCounter.WithLabelValues("ShowPartitions", metrics.TotalLabel).Inc()
tr := timerecord.NewTimeRecorder("ShowPartitions")
log := log.Ctx(ctx).With(zap.String("collection", in.GetCollectionName()),
zap.Int64("collection_id", in.GetCollectionID()),
zap.Strings("partitions", in.GetPartitionNames()),
zap.Bool("allowUnavailable", allowUnavailable))
t := &showPartitionTask{
baseTask: newBaseTask(ctx, c),
Req: in,
Rsp: &milvuspb.ShowPartitionsResponse{},
allowUnavailable: allowUnavailable,
}
if err := c.scheduler.AddTask(t); err != nil {
log.Info("failed to enqueue request to show partitions", zap.Error(err))
metrics.RootCoordDDLReqCounter.WithLabelValues("ShowPartitions", metrics.FailLabel).Inc()
return &milvuspb.ShowPartitionsResponse{
Status: merr.Status(err),
// Status: common.StatusFromError(err),
}, nil
}
if err := t.WaitToFinish(); err != nil {
log.Info("failed to show partitions", zap.Error(err))
metrics.RootCoordDDLReqCounter.WithLabelValues("ShowPartitions", metrics.FailLabel).Inc()
return &milvuspb.ShowPartitionsResponse{
Status: merr.Status(err),
// Status: common.StatusFromError(err),
}, nil
}
metrics.RootCoordDDLReqCounter.WithLabelValues("ShowPartitions", metrics.SuccessLabel).Inc()
metrics.RootCoordDDLReqLatency.WithLabelValues("ShowPartitions").Observe(float64(tr.ElapseSpan().Milliseconds()))
metrics.RootCoordDDLReqLatencyInQueue.WithLabelValues("ShowPartitions").Observe(float64(t.queueDur.Milliseconds()))
return t.Rsp, nil
}
// ShowPartitions list all partition names
func (c *Core) ShowPartitions(ctx context.Context, in *milvuspb.ShowPartitionsRequest) (*milvuspb.ShowPartitionsResponse, error) {
return c.showPartitionsImpl(ctx, in, false)
}
// ShowPartitionsInternal same to ShowPartitions, only used in internal RPC.
func (c *Core) ShowPartitionsInternal(ctx context.Context, in *milvuspb.ShowPartitionsRequest) (*milvuspb.ShowPartitionsResponse, error) {
return c.showPartitionsImpl(ctx, in, true)
}
// ShowSegments list all segments
func (c *Core) ShowSegments(ctx context.Context, in *milvuspb.ShowSegmentsRequest) (*milvuspb.ShowSegmentsResponse, error) {
// ShowSegments Only used in GetPersistentSegmentInfo, it's already deprecated for a long time.
// Though we continue to keep current logic, it's not right enough since RootCoord only contains indexed segments.
return &milvuspb.ShowSegmentsResponse{Status: merr.Success()}, nil
}
// GetPChannelInfo get pchannel info.
func (c *Core) GetPChannelInfo(ctx context.Context, in *rootcoordpb.GetPChannelInfoRequest) (*rootcoordpb.GetPChannelInfoResponse, error) {
if err := merr.CheckHealthy(c.GetStateCode()); err != nil {
return &rootcoordpb.GetPChannelInfoResponse{
Status: merr.Status(err),
}, nil
}
return c.meta.GetPChannelInfo(in.GetPchannel()), nil
}
// AllocTimestamp alloc timestamp
func (c *Core) AllocTimestamp(ctx context.Context, in *rootcoordpb.AllocTimestampRequest) (*rootcoordpb.AllocTimestampResponse, error) {
if err := merr.CheckHealthy(c.GetStateCode()); err != nil {
return &rootcoordpb.AllocTimestampResponse{
Status: merr.Status(err),
}, nil
}
ts, err := c.tsoAllocator.GenerateTSO(in.GetCount())
if err != nil {
log.Ctx(ctx).Error("failed to allocate timestamp", zap.String("role", typeutil.RootCoordRole),
zap.Error(err))
return &rootcoordpb.AllocTimestampResponse{
Status: merr.Status(err),
}, nil
}
// return first available timestamp
ts = ts - uint64(in.GetCount()) + 1
metrics.RootCoordTimestamp.Set(float64(ts))
return &rootcoordpb.AllocTimestampResponse{
Status: merr.Success(),
Timestamp: ts,
Count: in.GetCount(),
}, nil
}
// AllocID alloc ids
func (c *Core) AllocID(ctx context.Context, in *rootcoordpb.AllocIDRequest) (*rootcoordpb.AllocIDResponse, error) {
if err := merr.CheckHealthy(c.GetStateCode()); err != nil {
return &rootcoordpb.AllocIDResponse{
Status: merr.Status(err),
}, nil
}
start, _, err := c.idAllocator.Alloc(in.Count)
if err != nil {
log.Ctx(ctx).Error("failed to allocate id",
zap.String("role", typeutil.RootCoordRole),
zap.Error(err))
return &rootcoordpb.AllocIDResponse{
Status: merr.Status(err),
Count: in.Count,
}, nil
}
metrics.RootCoordIDAllocCounter.Add(float64(in.Count))
return &rootcoordpb.AllocIDResponse{
Status: merr.Success(),
ID: start,
Count: in.Count,
}, nil
}
// UpdateChannelTimeTick used to handle ChannelTimeTickMsg
func (c *Core) UpdateChannelTimeTick(ctx context.Context, in *internalpb.ChannelTimeTickMsg) (*commonpb.Status, error) {
log := log.Ctx(ctx)
if err := merr.CheckHealthy(c.GetStateCode()); err != nil {
log.Warn("failed to updateTimeTick because rootcoord is not healthy", zap.Error(err))
return merr.Status(err), nil
}
if in.Base.MsgType != commonpb.MsgType_TimeTick {
log.Warn("failed to updateTimeTick because base messasge is not timetick, state", zap.Any("base message type", in.Base.MsgType))
return merr.Status(merr.WrapErrParameterInvalid(commonpb.MsgType_TimeTick.String(), in.Base.MsgType.String(), "invalid message type")), nil
}
err := c.chanTimeTick.updateTimeTick(in, "gRPC")
if err != nil {
log.Warn("failed to updateTimeTick",
zap.String("role", typeutil.RootCoordRole),
zap.Error(err))
return merr.Status(err), nil
}
return merr.Success(), nil
}
// InvalidateCollectionMetaCache notifies RootCoord to release the collection cache in Proxies.
func (c *Core) InvalidateCollectionMetaCache(ctx context.Context, in *proxypb.InvalidateCollMetaCacheRequest) (*commonpb.Status, error) {
if err := merr.CheckHealthy(c.GetStateCode()); err != nil {
return merr.Status(err), nil
}
err := c.proxyClientManager.InvalidateCollectionMetaCache(ctx, in)
if err != nil {
return merr.Status(err), nil
}
return merr.Success(), nil
}
// ShowConfigurations returns the configurations of RootCoord matching req.Pattern
func (c *Core) ShowConfigurations(ctx context.Context, req *internalpb.ShowConfigurationsRequest) (*internalpb.ShowConfigurationsResponse, error) {
if err := merr.CheckHealthy(c.GetStateCode()); err != nil {
return &internalpb.ShowConfigurationsResponse{
Status: merr.Status(err),
Configuations: nil,
}, nil
}
configList := make([]*commonpb.KeyValuePair, 0)
for key, value := range Params.GetComponentConfigurations("rootcoord", req.Pattern) {
configList = append(configList,
&commonpb.KeyValuePair{
Key: key,
Value: value,
})
}
return &internalpb.ShowConfigurationsResponse{
Status: merr.Success(),
Configuations: configList,
}, nil
}
// GetMetrics get metrics
func (c *Core) GetMetrics(ctx context.Context, in *milvuspb.GetMetricsRequest) (*milvuspb.GetMetricsResponse, error) {
if err := merr.CheckHealthyStandby(c.GetStateCode()); err != nil {
return &milvuspb.GetMetricsResponse{
Status: merr.Status(err),
Response: "",
}, nil
}
metricType, err := metricsinfo.ParseMetricType(in.Request)
if err != nil {
log.Warn("ParseMetricType failed", zap.String("role", typeutil.RootCoordRole),
zap.Int64("nodeID", c.session.ServerID), zap.String("req", in.Request), zap.Error(err))
return &milvuspb.GetMetricsResponse{
Status: merr.Status(err),
Response: "",
}, nil
}
if metricType == metricsinfo.SystemInfoMetrics {
metrics, err := c.metricsCacheManager.GetSystemInfoMetrics()
if err != nil {
metrics, err = c.getSystemInfoMetrics(ctx, in)
}
if err != nil {
log.Warn("GetSystemInfoMetrics failed",
zap.String("role", typeutil.RootCoordRole),
zap.String("metricType", metricType),
zap.Error(err))
return &milvuspb.GetMetricsResponse{
Status: merr.Status(err),
Response: "",
}, nil
}
c.metricsCacheManager.UpdateSystemInfoMetrics(metrics)
return metrics, err
}
log.RatedWarn(60, "GetMetrics failed, metric type not implemented", zap.String("role", typeutil.RootCoordRole),
zap.String("metricType", metricType))
return &milvuspb.GetMetricsResponse{
Status: merr.Status(merr.WrapErrMetricNotFound(metricType)),
Response: "",
}, nil
}
// CreateAlias create collection alias
func (c *Core) CreateAlias(ctx context.Context, in *milvuspb.CreateAliasRequest) (*commonpb.Status, error) {
if err := merr.CheckHealthy(c.GetStateCode()); err != nil {
return merr.Status(err), nil
}
metrics.RootCoordDDLReqCounter.WithLabelValues("CreateAlias", metrics.TotalLabel).Inc()
tr := timerecord.NewTimeRecorder("CreateAlias")
log.Ctx(ctx).Info("received request to create alias",
zap.String("role", typeutil.RootCoordRole),
zap.String("alias", in.GetAlias()),
zap.String("collection", in.GetCollectionName()))
t := &createAliasTask{
baseTask: newBaseTask(ctx, c),
Req: in,
}
if err := c.scheduler.AddTask(t); err != nil {
log.Ctx(ctx).Info("failed to enqueue request to create alias",
zap.String("role", typeutil.RootCoordRole),
zap.Error(err),
zap.String("alias", in.GetAlias()),
zap.String("collection", in.GetCollectionName()))
metrics.RootCoordDDLReqCounter.WithLabelValues("CreateAlias", metrics.FailLabel).Inc()
return merr.Status(err), nil
}
if err := t.WaitToFinish(); err != nil {
log.Ctx(ctx).Info("failed to create alias",
zap.String("role", typeutil.RootCoordRole),
zap.Error(err),
zap.String("alias", in.GetAlias()),
zap.String("collection", in.GetCollectionName()),
zap.Uint64("ts", t.GetTs()))
metrics.RootCoordDDLReqCounter.WithLabelValues("CreateAlias", metrics.FailLabel).Inc()
return merr.Status(err), nil
}
metrics.RootCoordDDLReqCounter.WithLabelValues("CreateAlias", metrics.SuccessLabel).Inc()
metrics.RootCoordDDLReqLatency.WithLabelValues("CreateAlias").Observe(float64(tr.ElapseSpan().Milliseconds()))
metrics.RootCoordDDLReqLatencyInQueue.WithLabelValues("CreateAlias").Observe(float64(t.queueDur.Milliseconds()))
log.Ctx(ctx).Info("done to create alias",
zap.String("role", typeutil.RootCoordRole),
zap.String("alias", in.GetAlias()),
zap.String("collection", in.GetCollectionName()),
zap.Uint64("ts", t.GetTs()))
return merr.Success(), nil
}
// DropAlias drop collection alias
func (c *Core) DropAlias(ctx context.Context, in *milvuspb.DropAliasRequest) (*commonpb.Status, error) {
if err := merr.CheckHealthy(c.GetStateCode()); err != nil {
return merr.Status(err), nil
}
metrics.RootCoordDDLReqCounter.WithLabelValues("DropAlias", metrics.TotalLabel).Inc()
tr := timerecord.NewTimeRecorder("DropAlias")
log.Ctx(ctx).Info("received request to drop alias",
zap.String("role", typeutil.RootCoordRole),
zap.String("alias", in.GetAlias()))
t := &dropAliasTask{
baseTask: newBaseTask(ctx, c),
Req: in,
}
if err := c.scheduler.AddTask(t); err != nil {
log.Ctx(ctx).Info("failed to enqueue request to drop alias",
zap.String("role", typeutil.RootCoordRole),
zap.Error(err),
zap.String("alias", in.GetAlias()))
metrics.RootCoordDDLReqCounter.WithLabelValues("DropAlias", metrics.FailLabel).Inc()
return merr.Status(err), nil
}
if err := t.WaitToFinish(); err != nil {
log.Ctx(ctx).Info("failed to drop alias",
zap.String("role", typeutil.RootCoordRole),
zap.Error(err),
zap.String("alias", in.GetAlias()),
zap.Uint64("ts", t.GetTs()))
metrics.RootCoordDDLReqCounter.WithLabelValues("DropAlias", metrics.FailLabel).Inc()
return merr.Status(err), nil
}
metrics.RootCoordDDLReqCounter.WithLabelValues("DropAlias", metrics.SuccessLabel).Inc()
metrics.RootCoordDDLReqLatency.WithLabelValues("DropAlias").Observe(float64(tr.ElapseSpan().Milliseconds()))
metrics.RootCoordDDLReqLatencyInQueue.WithLabelValues("DropAlias").Observe(float64(t.queueDur.Milliseconds()))
log.Ctx(ctx).Info("done to drop alias",
zap.String("role", typeutil.RootCoordRole),
zap.String("alias", in.GetAlias()),
zap.Uint64("ts", t.GetTs()))
return merr.Success(), nil
}
// AlterAlias alter collection alias
func (c *Core) AlterAlias(ctx context.Context, in *milvuspb.AlterAliasRequest) (*commonpb.Status, error) {
if err := merr.CheckHealthy(c.GetStateCode()); err != nil {
return merr.Status(err), nil
}
metrics.RootCoordDDLReqCounter.WithLabelValues("DropAlias", metrics.TotalLabel).Inc()
tr := timerecord.NewTimeRecorder("AlterAlias")
log.Ctx(ctx).Info("received request to alter alias",
zap.String("role", typeutil.RootCoordRole),
zap.String("alias", in.GetAlias()),
zap.String("collection", in.GetCollectionName()))
t := &alterAliasTask{
baseTask: newBaseTask(ctx, c),
Req: in,
}
if err := c.scheduler.AddTask(t); err != nil {
log.Ctx(ctx).Info("failed to enqueue request to alter alias",
zap.String("role", typeutil.RootCoordRole),
zap.Error(err),
zap.String("alias", in.GetAlias()),
zap.String("collection", in.GetCollectionName()))
metrics.RootCoordDDLReqCounter.WithLabelValues("AlterAlias", metrics.FailLabel).Inc()
return merr.Status(err), nil
}
if err := t.WaitToFinish(); err != nil {
log.Ctx(ctx).Info("failed to alter alias",
zap.String("role", typeutil.RootCoordRole),
zap.Error(err),
zap.String("alias", in.GetAlias()),
zap.String("collection", in.GetCollectionName()),
zap.Uint64("ts", t.GetTs()))
metrics.RootCoordDDLReqCounter.WithLabelValues("AlterAlias", metrics.FailLabel).Inc()
return merr.Status(err), nil
}
metrics.RootCoordDDLReqCounter.WithLabelValues("AlterAlias", metrics.SuccessLabel).Inc()
metrics.RootCoordDDLReqLatency.WithLabelValues("AlterAlias").Observe(float64(tr.ElapseSpan().Milliseconds()))
metrics.RootCoordDDLReqLatencyInQueue.WithLabelValues("AlterAlias").Observe(float64(t.queueDur.Milliseconds()))
log.Info("done to alter alias",
zap.String("role", typeutil.RootCoordRole),
zap.String("alias", in.GetAlias()),
zap.String("collection", in.GetCollectionName()),
zap.Uint64("ts", t.GetTs()))
return merr.Success(), nil
}
// DescribeAlias describe collection alias
func (c *Core) DescribeAlias(ctx context.Context, in *milvuspb.DescribeAliasRequest) (*milvuspb.DescribeAliasResponse, error) {
if err := merr.CheckHealthy(c.GetStateCode()); err != nil {
return &milvuspb.DescribeAliasResponse{
Status: merr.Status(err),
}, nil
}
log := log.Ctx(ctx).With(
zap.String("role", typeutil.RootCoordRole),
zap.String("db", in.GetDbName()),
zap.String("alias", in.GetAlias()))
method := "DescribeAlias"
metrics.RootCoordDDLReqCounter.WithLabelValues(method, metrics.TotalLabel).Inc()
tr := timerecord.NewTimeRecorder("DescribeAlias")
log.Info("received request to describe alias")
if in.GetAlias() == "" {
return &milvuspb.DescribeAliasResponse{
Status: merr.Status(merr.WrapErrParameterMissing("alias", "no input alias")),
}, nil
}
collectionName, err := c.meta.DescribeAlias(ctx, in.GetDbName(), in.GetAlias(), 0)
if err != nil {
log.Warn("fail to DescribeAlias", zap.Error(err))
return &milvuspb.DescribeAliasResponse{
Status: merr.Status(err),
}, nil
}
metrics.RootCoordDDLReqCounter.WithLabelValues(method, metrics.SuccessLabel).Inc()
metrics.RootCoordDDLReqLatency.WithLabelValues(method).Observe(float64(tr.ElapseSpan().Milliseconds()))
log.Info("done to describe alias")
return &milvuspb.DescribeAliasResponse{
Status: merr.Status(nil),
DbName: in.GetDbName(),
Alias: in.GetAlias(),
Collection: collectionName,
}, nil
}
// ListAliases list aliases
func (c *Core) ListAliases(ctx context.Context, in *milvuspb.ListAliasesRequest) (*milvuspb.ListAliasesResponse, error) {
if err := merr.CheckHealthy(c.GetStateCode()); err != nil {
return &milvuspb.ListAliasesResponse{
Status: merr.Status(err),
}, nil
}
method := "ListAliases"
metrics.RootCoordDDLReqCounter.WithLabelValues(method, metrics.TotalLabel).Inc()
tr := timerecord.NewTimeRecorder(method)
log := log.Ctx(ctx).With(
zap.String("role", typeutil.RootCoordRole),
zap.String("db", in.GetDbName()),
zap.String("collectionName", in.GetCollectionName()))
log.Info("received request to list aliases")
aliases, err := c.meta.ListAliases(ctx, in.GetDbName(), in.GetCollectionName(), 0)
if err != nil {
log.Warn("fail to ListAliases", zap.Error(err))
return &milvuspb.ListAliasesResponse{
Status: merr.Status(err),
}, nil
}
metrics.RootCoordDDLReqCounter.WithLabelValues(method, metrics.SuccessLabel).Inc()
metrics.RootCoordDDLReqLatency.WithLabelValues(method).Observe(float64(tr.ElapseSpan().Milliseconds()))
log.Info("done to list aliases")
return &milvuspb.ListAliasesResponse{
Status: merr.Status(nil),
DbName: in.GetDbName(),
CollectionName: in.GetCollectionName(),
Aliases: aliases,
}, nil
}
// ExpireCredCache will call invalidate credential cache
func (c *Core) ExpireCredCache(ctx context.Context, username string) error {
req := proxypb.InvalidateCredCacheRequest{
Base: commonpbutil.NewMsgBase(
commonpbutil.WithSourceID(c.session.ServerID),
),
Username: username,
}
return c.proxyClientManager.InvalidateCredentialCache(ctx, &req)
}
// UpdateCredCache will call update credential cache
func (c *Core) UpdateCredCache(ctx context.Context, credInfo *internalpb.CredentialInfo) error {
req := proxypb.UpdateCredCacheRequest{
Base: commonpbutil.NewMsgBase(
commonpbutil.WithSourceID(c.session.ServerID),
),
Username: credInfo.Username,
Password: credInfo.Sha256Password,
}
return c.proxyClientManager.UpdateCredentialCache(ctx, &req)
}
// CreateCredential create new user and password
// 1. decode ciphertext password to raw password
// 2. encrypt raw password
// 3. save in to etcd
func (c *Core) CreateCredential(ctx context.Context, credInfo *internalpb.CredentialInfo) (*commonpb.Status, error) {
method := "CreateCredential"
metrics.RootCoordDDLReqCounter.WithLabelValues(method, metrics.TotalLabel).Inc()
tr := timerecord.NewTimeRecorder(method)
ctxLog := log.Ctx(ctx).With(zap.String("role", typeutil.RootCoordRole), zap.String("username", credInfo.Username))
ctxLog.Debug(method)
if err := merr.CheckHealthy(c.GetStateCode()); err != nil {
return merr.Status(err), nil
}
// insert to db
err := c.meta.AddCredential(credInfo)
if err != nil {
ctxLog.Warn("CreateCredential save credential failed", zap.Error(err))
metrics.RootCoordDDLReqCounter.WithLabelValues(method, metrics.FailLabel).Inc()
return merr.StatusWithErrorCode(err, commonpb.ErrorCode_CreateCredentialFailure), nil
}
// update proxy's local cache
err = c.UpdateCredCache(ctx, credInfo)
if err != nil {
ctxLog.Warn("CreateCredential add cache failed", zap.Error(err))
metrics.RootCoordDDLReqCounter.WithLabelValues(method, metrics.FailLabel).Inc()
}
log.Debug("CreateCredential success", zap.String("role", typeutil.RootCoordRole),
zap.String("username", credInfo.Username))
metrics.RootCoordDDLReqCounter.WithLabelValues(method, metrics.SuccessLabel).Inc()
metrics.RootCoordDDLReqLatency.WithLabelValues(method).Observe(float64(tr.ElapseSpan().Milliseconds()))
metrics.RootCoordNumOfCredentials.Inc()
return merr.Success(), nil
}
// GetCredential get credential by username
func (c *Core) GetCredential(ctx context.Context, in *rootcoordpb.GetCredentialRequest) (*rootcoordpb.GetCredentialResponse, error) {
method := "GetCredential"
metrics.RootCoordDDLReqCounter.WithLabelValues(method, metrics.TotalLabel).Inc()
tr := timerecord.NewTimeRecorder(method)
ctxLog := log.Ctx(ctx).With(zap.String("role", typeutil.RootCoordRole), zap.String("username", in.Username))
ctxLog.Debug(method)
if err := merr.CheckHealthy(c.GetStateCode()); err != nil {
return &rootcoordpb.GetCredentialResponse{Status: merr.Status(err)}, nil
}
credInfo, err := c.meta.GetCredential(in.Username)
if err != nil {
ctxLog.Warn("GetCredential query credential failed", zap.Error(err))
metrics.RootCoordDDLReqCounter.WithLabelValues(method, metrics.FailLabel).Inc()
return &rootcoordpb.GetCredentialResponse{
Status: merr.StatusWithErrorCode(err, commonpb.ErrorCode_GetCredentialFailure),
}, nil
}
ctxLog.Debug("GetCredential success")
metrics.RootCoordDDLReqCounter.WithLabelValues(method, metrics.SuccessLabel).Inc()
metrics.RootCoordDDLReqLatency.WithLabelValues(method).Observe(float64(tr.ElapseSpan().Milliseconds()))
return &rootcoordpb.GetCredentialResponse{
Status: merr.Success(),
Username: credInfo.Username,
Password: credInfo.EncryptedPassword,
}, nil
}
// UpdateCredential update password for a user
func (c *Core) UpdateCredential(ctx context.Context, credInfo *internalpb.CredentialInfo) (*commonpb.Status, error) {
method := "UpdateCredential"
metrics.RootCoordDDLReqCounter.WithLabelValues(method, metrics.TotalLabel).Inc()
tr := timerecord.NewTimeRecorder(method)
ctxLog := log.Ctx(ctx).With(zap.String("role", typeutil.RootCoordRole), zap.String("username", credInfo.Username))
ctxLog.Debug(method)
if err := merr.CheckHealthy(c.GetStateCode()); err != nil {
return merr.Status(err), nil
}
// update data on storage
err := c.meta.AlterCredential(credInfo)
if err != nil {
ctxLog.Warn("UpdateCredential save credential failed", zap.Error(err))
metrics.RootCoordDDLReqCounter.WithLabelValues(method, metrics.FailLabel).Inc()
return merr.StatusWithErrorCode(err, commonpb.ErrorCode_UpdateCredentialFailure), nil
}
// update proxy's local cache
err = c.UpdateCredCache(ctx, credInfo)
if err != nil {
ctxLog.Warn("UpdateCredential update cache failed", zap.Error(err))
metrics.RootCoordDDLReqCounter.WithLabelValues(method, metrics.FailLabel).Inc()
return merr.StatusWithErrorCode(err, commonpb.ErrorCode_UpdateCredentialFailure), nil
}
log.Debug("UpdateCredential success")
metrics.RootCoordDDLReqCounter.WithLabelValues(method, metrics.SuccessLabel).Inc()
metrics.RootCoordDDLReqLatency.WithLabelValues(method).Observe(float64(tr.ElapseSpan().Milliseconds()))
return merr.Success(), nil
}
// DeleteCredential delete a user
func (c *Core) DeleteCredential(ctx context.Context, in *milvuspb.DeleteCredentialRequest) (*commonpb.Status, error) {
method := "DeleteCredential"
metrics.RootCoordDDLReqCounter.WithLabelValues(method, metrics.TotalLabel).Inc()
tr := timerecord.NewTimeRecorder(method)
ctxLog := log.Ctx(ctx).With(zap.String("role", typeutil.RootCoordRole), zap.String("username", in.Username))
ctxLog.Debug(method)
if err := merr.CheckHealthy(c.GetStateCode()); err != nil {
return merr.Status(err), nil
}
var status *commonpb.Status
defer func() {
if status.Code != 0 {
metrics.RootCoordDDLReqCounter.WithLabelValues(method, metrics.FailLabel).Inc()
}
}()
redoTask := newBaseRedoTask(c.stepExecutor)
redoTask.AddSyncStep(NewSimpleStep("delete credential meta data", func(ctx context.Context) ([]nestedStep, error) {
err := c.meta.DeleteCredential(in.Username)
if err != nil {
ctxLog.Warn("delete credential meta data failed", zap.Error(err))
}
return nil, err
}))
redoTask.AddAsyncStep(NewSimpleStep("delete credential cache", func(ctx context.Context) ([]nestedStep, error) {
err := c.ExpireCredCache(ctx, in.Username)
if err != nil {
ctxLog.Warn("delete credential cache failed", zap.Error(err))
}
return nil, err
}))
redoTask.AddAsyncStep(NewSimpleStep("delete user role cache for the user", func(ctx context.Context) ([]nestedStep, error) {
err := c.proxyClientManager.RefreshPolicyInfoCache(ctx, &proxypb.RefreshPolicyInfoCacheRequest{
OpType: int32(typeutil.CacheDeleteUser),
OpKey: in.Username,
})
if err != nil {
ctxLog.Warn("delete user role cache failed for the user", zap.Error(err))
}
return nil, err
}))
err := redoTask.Execute(ctx)
if err != nil {
errMsg := "fail to execute task when deleting the user"
ctxLog.Warn(errMsg, zap.Error(err))
status = merr.StatusWithErrorCode(errors.New(errMsg), commonpb.ErrorCode_DeleteCredentialFailure)
return status, nil
}
ctxLog.Debug("DeleteCredential success")
metrics.RootCoordDDLReqCounter.WithLabelValues(method, metrics.SuccessLabel).Inc()
metrics.RootCoordDDLReqLatency.WithLabelValues(method).Observe(float64(tr.ElapseSpan().Milliseconds()))
metrics.RootCoordNumOfCredentials.Dec()
status = merr.Success()
return status, nil
}
// ListCredUsers list all usernames
func (c *Core) ListCredUsers(ctx context.Context, in *milvuspb.ListCredUsersRequest) (*milvuspb.ListCredUsersResponse, error) {
method := "ListCredUsers"
metrics.RootCoordDDLReqCounter.WithLabelValues(method, metrics.TotalLabel).Inc()
tr := timerecord.NewTimeRecorder(method)
ctxLog := log.Ctx(ctx).With(zap.String("role", typeutil.RootCoordRole))
ctxLog.Debug(method)
if err := merr.CheckHealthy(c.GetStateCode()); err != nil {
return &milvuspb.ListCredUsersResponse{Status: merr.Status(err)}, nil
}
credInfo, err := c.meta.ListCredentialUsernames()
if err != nil {
ctxLog.Warn("ListCredUsers query usernames failed", zap.Error(err))
metrics.RootCoordDDLReqCounter.WithLabelValues(method, metrics.FailLabel).Inc()
status := merr.Status(err)
return &milvuspb.ListCredUsersResponse{Status: status}, nil
}
ctxLog.Debug("ListCredUsers success")
metrics.RootCoordDDLReqCounter.WithLabelValues(method, metrics.SuccessLabel).Inc()
metrics.RootCoordDDLReqLatency.WithLabelValues(method).Observe(float64(tr.ElapseSpan().Milliseconds()))
return &milvuspb.ListCredUsersResponse{
Status: merr.Success(),
Usernames: credInfo.Usernames,
}, nil
}
// CreateRole create role
// - check the node health
// - check if the role is existed
// - check if the role num has reached the limit
// - create the role by the meta api
func (c *Core) CreateRole(ctx context.Context, in *milvuspb.CreateRoleRequest) (*commonpb.Status, error) {
method := "CreateRole"
metrics.RootCoordDDLReqCounter.WithLabelValues(method, metrics.TotalLabel).Inc()
tr := timerecord.NewTimeRecorder(method)
ctxLog := log.Ctx(ctx).With(zap.String("role", typeutil.RootCoordRole), zap.Any("in", in))
ctxLog.Debug(method + " begin")
if err := merr.CheckHealthy(c.GetStateCode()); err != nil {
return merr.Status(err), nil
}
entity := in.Entity
err := c.meta.CreateRole(util.DefaultTenant, &milvuspb.RoleEntity{Name: entity.Name})
if err != nil {
errMsg := "fail to create role"
ctxLog.Warn(errMsg, zap.Error(err))
return merr.StatusWithErrorCode(err, commonpb.ErrorCode_CreateRoleFailure), nil
}
ctxLog.Debug(method + " success")
metrics.RootCoordDDLReqCounter.WithLabelValues(method, metrics.SuccessLabel).Inc()
metrics.RootCoordDDLReqLatency.WithLabelValues(method).Observe(float64(tr.ElapseSpan().Milliseconds()))
metrics.RootCoordNumOfRoles.Inc()
return merr.Success(), nil
}
// DropRole drop role
// - check the node health
// - check if the role name is existed
// - check if the role has some grant info
// - get all role mapping of this role
// - drop these role mappings
// - drop the role by the meta api
func (c *Core) DropRole(ctx context.Context, in *milvuspb.DropRoleRequest) (*commonpb.Status, error) {
method := "DropRole"
metrics.RootCoordDDLReqCounter.WithLabelValues(method, metrics.TotalLabel).Inc()
tr := timerecord.NewTimeRecorder(method)
ctxLog := log.Ctx(ctx).With(zap.String("role", typeutil.RootCoordRole), zap.String("role_name", in.RoleName))
ctxLog.Debug(method)
if err := merr.CheckHealthy(c.GetStateCode()); err != nil {
return merr.Status(err), nil
}
for util.IsBuiltinRole(in.GetRoleName()) {
err := merr.WrapErrPrivilegeNotPermitted("the role[%s] is a builtin role, which can't be dropped", in.GetRoleName())
return merr.Status(err), nil
}
if _, err := c.meta.SelectRole(util.DefaultTenant, &milvuspb.RoleEntity{Name: in.RoleName}, false); err != nil {
errMsg := "not found the role, maybe the role isn't existed or internal system error"
ctxLog.Warn(errMsg, zap.Error(err))
return merr.StatusWithErrorCode(errors.New(errMsg), commonpb.ErrorCode_DropRoleFailure), nil
}
if !in.ForceDrop {
grantEntities, err := c.meta.SelectGrant(util.DefaultTenant, &milvuspb.GrantEntity{
Role: &milvuspb.RoleEntity{Name: in.RoleName},
})
if len(grantEntities) != 0 {
errMsg := "fail to drop the role that it has privileges. Use REVOKE API to revoke privileges"
ctxLog.Warn(errMsg, zap.Any("grants", grantEntities), zap.Error(err))
return merr.StatusWithErrorCode(errors.New(errMsg), commonpb.ErrorCode_DropRoleFailure), nil
}
}
redoTask := newBaseRedoTask(c.stepExecutor)
redoTask.AddSyncStep(NewSimpleStep("drop role meta data", func(ctx context.Context) ([]nestedStep, error) {
err := c.meta.DropRole(util.DefaultTenant, in.RoleName)
if err != nil {
ctxLog.Warn("drop role mata data failed", zap.Error(err))
}
return nil, err
}))
redoTask.AddAsyncStep(NewSimpleStep("drop the privilege list of this role", func(ctx context.Context) ([]nestedStep, error) {
if !in.ForceDrop {
return nil, nil
}
err := c.meta.DropGrant(util.DefaultTenant, &milvuspb.RoleEntity{Name: in.RoleName})
if err != nil {
ctxLog.Warn("drop the privilege list failed for the role", zap.Error(err))
}
return nil, err
}))
redoTask.AddAsyncStep(NewSimpleStep("drop role cache", func(ctx context.Context) ([]nestedStep, error) {
err := c.proxyClientManager.RefreshPolicyInfoCache(ctx, &proxypb.RefreshPolicyInfoCacheRequest{
OpType: int32(typeutil.CacheDropRole),
OpKey: in.RoleName,
})
if err != nil {
ctxLog.Warn("delete user role cache failed for the role", zap.Error(err))
}
return nil, err
}))
err := redoTask.Execute(ctx)
if err != nil {
errMsg := "fail to execute task when dropping the role"
ctxLog.Warn(errMsg, zap.Error(err))
return merr.StatusWithErrorCode(errors.New(errMsg), commonpb.ErrorCode_DropRoleFailure), nil
}
ctxLog.Debug(method+" success", zap.String("role_name", in.RoleName))
metrics.RootCoordDDLReqCounter.WithLabelValues(method, metrics.SuccessLabel).Inc()
metrics.RootCoordDDLReqLatency.WithLabelValues(method).Observe(float64(tr.ElapseSpan().Milliseconds()))
metrics.RootCoordNumOfRoles.Dec()
return merr.Success(), nil
}
// OperateUserRole operate the relationship between a user and a role
// - check the node health
// - check if the role is valid
// - check if the user is valid
// - operate the user-role by the meta api
// - update the policy cache
func (c *Core) OperateUserRole(ctx context.Context, in *milvuspb.OperateUserRoleRequest) (*commonpb.Status, error) {
method := "OperateUserRole-" + in.Type.String()
metrics.RootCoordDDLReqCounter.WithLabelValues(method, metrics.TotalLabel).Inc()
tr := timerecord.NewTimeRecorder(method)
ctxLog := log.Ctx(ctx).With(zap.String("role", typeutil.RootCoordRole), zap.Any("in", in))
ctxLog.Debug(method)
if err := merr.CheckHealthy(c.GetStateCode()); err != nil {
return merr.Status(err), nil
}
if _, err := c.meta.SelectRole(util.DefaultTenant, &milvuspb.RoleEntity{Name: in.RoleName}, false); err != nil {
errMsg := "not found the role, maybe the role isn't existed or internal system error"
ctxLog.Warn(errMsg, zap.Error(err))
return merr.StatusWithErrorCode(errors.New(errMsg), commonpb.ErrorCode_OperateUserRoleFailure), nil
}
if in.Type != milvuspb.OperateUserRoleType_RemoveUserFromRole {
if _, err := c.meta.SelectUser(util.DefaultTenant, &milvuspb.UserEntity{Name: in.Username}, false); err != nil {
errMsg := "not found the user, maybe the user isn't existed or internal system error"
ctxLog.Warn(errMsg, zap.Error(err))
return merr.StatusWithErrorCode(errors.New(errMsg), commonpb.ErrorCode_OperateUserRoleFailure), nil
}
}
redoTask := newBaseRedoTask(c.stepExecutor)
redoTask.AddSyncStep(NewSimpleStep("operate user role meta data", func(ctx context.Context) ([]nestedStep, error) {
err := c.meta.OperateUserRole(util.DefaultTenant, &milvuspb.UserEntity{Name: in.Username}, &milvuspb.RoleEntity{Name: in.RoleName}, in.Type)
if err != nil && !common.IsIgnorableError(err) {
log.Warn("operate user role mata data failed", zap.Error(err))
return nil, err
}
return nil, nil
}))
redoTask.AddAsyncStep(NewSimpleStep("operate user role cache", func(ctx context.Context) ([]nestedStep, error) {
var opType int32
switch in.Type {
case milvuspb.OperateUserRoleType_AddUserToRole:
opType = int32(typeutil.CacheAddUserToRole)
case milvuspb.OperateUserRoleType_RemoveUserFromRole:
opType = int32(typeutil.CacheRemoveUserFromRole)
default:
errMsg := "invalid operate type for the OperateUserRole api"
log.Warn(errMsg, zap.Any("in", in))
return nil, nil
}
if err := c.proxyClientManager.RefreshPolicyInfoCache(ctx, &proxypb.RefreshPolicyInfoCacheRequest{
OpType: opType,
OpKey: funcutil.EncodeUserRoleCache(in.Username, in.RoleName),
}); err != nil {
log.Warn("fail to refresh policy info cache", zap.Any("in", in), zap.Error(err))
return nil, err
}
return nil, nil
}))
err := redoTask.Execute(ctx)
if err != nil {
errMsg := "fail to execute task when operate the user and role"
log.Warn(errMsg, zap.Error(err))
return merr.StatusWithErrorCode(errors.New(errMsg), commonpb.ErrorCode_OperateUserRoleFailure), nil
}
ctxLog.Debug(method + " success")
metrics.RootCoordDDLReqCounter.WithLabelValues(method, metrics.SuccessLabel).Inc()
metrics.RootCoordDDLReqLatency.WithLabelValues(method).Observe(float64(tr.ElapseSpan().Milliseconds()))
return merr.Success(), nil
}
// SelectRole select role
// - check the node health
// - check if the role is valid when this param is provided
// - select role by the meta api
func (c *Core) SelectRole(ctx context.Context, in *milvuspb.SelectRoleRequest) (*milvuspb.SelectRoleResponse, error) {
method := "SelectRole"
metrics.RootCoordDDLReqCounter.WithLabelValues(method, metrics.TotalLabel).Inc()
tr := timerecord.NewTimeRecorder(method)
ctxLog := log.Ctx(ctx).With(zap.String("role", typeutil.RootCoordRole), zap.Any("in", in))
ctxLog.Debug(method)
if err := merr.CheckHealthy(c.GetStateCode()); err != nil {
return &milvuspb.SelectRoleResponse{Status: merr.Status(err)}, nil
}
if in.Role != nil {
if _, err := c.meta.SelectRole(util.DefaultTenant, &milvuspb.RoleEntity{Name: in.Role.Name}, false); err != nil {
if errors.Is(err, merr.ErrIoKeyNotFound) {
return &milvuspb.SelectRoleResponse{
Status: merr.Success(),
}, nil
}
errMsg := "fail to select the role to check the role name"
ctxLog.Warn(errMsg, zap.Error(err))
return &milvuspb.SelectRoleResponse{
Status: merr.StatusWithErrorCode(errors.New(errMsg), commonpb.ErrorCode_SelectRoleFailure),
}, nil
}
}
roleResults, err := c.meta.SelectRole(util.DefaultTenant, in.Role, in.IncludeUserInfo)
if err != nil {
errMsg := "fail to select the role"
ctxLog.Warn(errMsg, zap.Error(err))
return &milvuspb.SelectRoleResponse{
Status: merr.StatusWithErrorCode(errors.New(errMsg), commonpb.ErrorCode_SelectRoleFailure),
}, nil
}
ctxLog.Debug(method + " success")
metrics.RootCoordDDLReqCounter.WithLabelValues(method, metrics.SuccessLabel).Inc()
metrics.RootCoordDDLReqLatency.WithLabelValues(method).Observe(float64(tr.ElapseSpan().Milliseconds()))
return &milvuspb.SelectRoleResponse{
Status: merr.Success(),
Results: roleResults,
}, nil
}
// SelectUser select user
// - check the node health
// - check if the user is valid when this param is provided
// - select user by the meta api
func (c *Core) SelectUser(ctx context.Context, in *milvuspb.SelectUserRequest) (*milvuspb.SelectUserResponse, error) {
method := "SelectUser"
metrics.RootCoordDDLReqCounter.WithLabelValues(method, metrics.TotalLabel).Inc()
tr := timerecord.NewTimeRecorder(method)
ctxLog := log.Ctx(ctx).With(zap.String("role", typeutil.RootCoordRole), zap.Any("in", in))
ctxLog.Debug(method)
if err := merr.CheckHealthy(c.GetStateCode()); err != nil {
return &milvuspb.SelectUserResponse{Status: merr.Status(err)}, nil
}
if in.User != nil {
if _, err := c.meta.SelectUser(util.DefaultTenant, &milvuspb.UserEntity{Name: in.User.Name}, false); err != nil {
if errors.Is(err, merr.ErrIoKeyNotFound) {
return &milvuspb.SelectUserResponse{
Status: merr.Success(),
}, nil
}
errMsg := "fail to select the user to check the username"
ctxLog.Warn(errMsg, zap.Any("in", in), zap.Error(err))
return &milvuspb.SelectUserResponse{
Status: merr.StatusWithErrorCode(errors.New(errMsg), commonpb.ErrorCode_SelectUserFailure),
}, nil
}
}
userResults, err := c.meta.SelectUser(util.DefaultTenant, in.User, in.IncludeRoleInfo)
if err != nil {
errMsg := "fail to select the user"
ctxLog.Warn(errMsg, zap.Error(err))
return &milvuspb.SelectUserResponse{
Status: merr.StatusWithErrorCode(errors.New(errMsg), commonpb.ErrorCode_SelectUserFailure),
}, nil
}
ctxLog.Debug(method + " success")
metrics.RootCoordDDLReqCounter.WithLabelValues(method, metrics.SuccessLabel).Inc()
metrics.RootCoordDDLReqLatency.WithLabelValues(method).Observe(float64(tr.ElapseSpan().Milliseconds()))
return &milvuspb.SelectUserResponse{
Status: merr.Success(),
Results: userResults,
}, nil
}
func (c *Core) isValidRole(entity *milvuspb.RoleEntity) error {
if entity == nil {
return errors.New("the role entity is nil")
}
if entity.Name == "" {
return errors.New("the name in the role entity is empty")
}
if _, err := c.meta.SelectRole(util.DefaultTenant, &milvuspb.RoleEntity{Name: entity.Name}, false); err != nil {
log.Warn("fail to select the role", zap.String("role_name", entity.Name), zap.Error(err))
return errors.New("not found the role, maybe the role isn't existed or internal system error")
}
return nil
}
func (c *Core) isValidObject(entity *milvuspb.ObjectEntity) error {
if entity == nil {
return errors.New("the object entity is nil")
}
if _, ok := commonpb.ObjectType_value[entity.Name]; !ok {
return fmt.Errorf("not found the object type[name: %s], supported the object types: %v", entity.Name, lo.Keys(commonpb.ObjectType_value))
}
return nil
}
func (c *Core) isValidGrantor(entity *milvuspb.GrantorEntity, object string) error {
if entity == nil {
return errors.New("the grantor entity is nil")
}
if entity.User == nil {
return errors.New("the user entity in the grantor entity is nil")
}
if entity.User.Name == "" {
return errors.New("the name in the user entity of the grantor entity is empty")
}
if _, err := c.meta.SelectUser(util.DefaultTenant, &milvuspb.UserEntity{Name: entity.User.Name}, false); err != nil {
log.Warn("fail to select the user", zap.String("username", entity.User.Name), zap.Error(err))
return errors.New("not found the user, maybe the user isn't existed or internal system error")
}
if entity.Privilege == nil {
return errors.New("the privilege entity in the grantor entity is nil")
}
if util.IsAnyWord(entity.Privilege.Name) {
return nil
}
if privilegeName := util.PrivilegeNameForMetastore(entity.Privilege.Name); privilegeName == "" {
return fmt.Errorf("not found the privilege name[%s]", entity.Privilege.Name)
}
privileges, ok := util.ObjectPrivileges[object]
if !ok {
return fmt.Errorf("not found the object type[name: %s], supported the object types: %v", object, lo.Keys(commonpb.ObjectType_value))
}
for _, privilege := range privileges {
if privilege == entity.Privilege.Name {
return nil
}
}
return fmt.Errorf("not found the privilege name[%s] in object[%s]", entity.Privilege.Name, object)
}
// OperatePrivilege operate the privilege, including grant and revoke
// - check the node health
// - check if the operating type is valid
// - check if the entity is nil
// - check if the params, including the resource entity, the principal entity, the grantor entity, is valid
// - operate the privilege by the meta api
// - update the policy cache
func (c *Core) OperatePrivilege(ctx context.Context, in *milvuspb.OperatePrivilegeRequest) (*commonpb.Status, error) {
method := "OperatePrivilege"
metrics.RootCoordDDLReqCounter.WithLabelValues(method, metrics.TotalLabel).Inc()
tr := timerecord.NewTimeRecorder(method)
ctxLog := log.Ctx(ctx).With(zap.String("role", typeutil.RootCoordRole), zap.Any("in", in))
ctxLog.Debug(method)
if err := merr.CheckHealthy(c.GetStateCode()); err != nil {
return merr.Status(err), nil
}
if in.Type != milvuspb.OperatePrivilegeType_Grant && in.Type != milvuspb.OperatePrivilegeType_Revoke {
errMsg := fmt.Sprintf("invalid operate privilege type, current type: %s, valid value: [%s, %s]", in.Type, milvuspb.OperatePrivilegeType_Grant, milvuspb.OperatePrivilegeType_Revoke)
ctxLog.Warn(errMsg)
return merr.StatusWithErrorCode(errors.New(errMsg), commonpb.ErrorCode_OperatePrivilegeFailure), nil
}
if in.Entity == nil {
errMsg := "the grant entity in the request is nil"
ctxLog.Error(errMsg)
return merr.StatusWithErrorCode(errors.New(errMsg), commonpb.ErrorCode_OperatePrivilegeFailure), nil
}
if err := c.isValidObject(in.Entity.Object); err != nil {
ctxLog.Warn("", zap.Error(err))
return merr.StatusWithErrorCode(err, commonpb.ErrorCode_OperatePrivilegeFailure), nil
}
if err := c.isValidRole(in.Entity.Role); err != nil {
ctxLog.Warn("", zap.Error(err))
return merr.StatusWithErrorCode(err, commonpb.ErrorCode_OperatePrivilegeFailure), nil
}
if err := c.isValidGrantor(in.Entity.Grantor, in.Entity.Object.Name); err != nil {
ctxLog.Error("", zap.Error(err))
return merr.StatusWithErrorCode(err, commonpb.ErrorCode_OperatePrivilegeFailure), nil
}
ctxLog.Debug("before PrivilegeNameForMetastore", zap.String("privilege", in.Entity.Grantor.Privilege.Name))
if !util.IsAnyWord(in.Entity.Grantor.Privilege.Name) {
in.Entity.Grantor.Privilege.Name = util.PrivilegeNameForMetastore(in.Entity.Grantor.Privilege.Name)
}
ctxLog.Debug("after PrivilegeNameForMetastore", zap.String("privilege", in.Entity.Grantor.Privilege.Name))
if in.Entity.Object.Name == commonpb.ObjectType_Global.String() {
in.Entity.ObjectName = util.AnyWord
}
redoTask := newBaseRedoTask(c.stepExecutor)
redoTask.AddSyncStep(NewSimpleStep("operate privilege meta data", func(ctx context.Context) ([]nestedStep, error) {
err := c.meta.OperatePrivilege(util.DefaultTenant, in.Entity, in.Type)
if err != nil && !common.IsIgnorableError(err) {
log.Warn("fail to operate the privilege", zap.Any("in", in), zap.Error(err))
return nil, err
}
return nil, nil
}))
redoTask.AddAsyncStep(NewSimpleStep("operate privilege cache", func(ctx context.Context) ([]nestedStep, error) {
var opType int32
switch in.Type {
case milvuspb.OperatePrivilegeType_Grant:
opType = int32(typeutil.CacheGrantPrivilege)
case milvuspb.OperatePrivilegeType_Revoke:
opType = int32(typeutil.CacheRevokePrivilege)
default:
log.Warn("invalid operate type for the OperatePrivilege api", zap.Any("in", in))
return nil, nil
}
if err := c.proxyClientManager.RefreshPolicyInfoCache(ctx, &proxypb.RefreshPolicyInfoCacheRequest{
OpType: opType,
OpKey: funcutil.PolicyForPrivilege(in.Entity.Role.Name, in.Entity.Object.Name, in.Entity.ObjectName, in.Entity.Grantor.Privilege.Name, in.Entity.DbName),
}); err != nil {
log.Warn("fail to refresh policy info cache", zap.Any("in", in), zap.Error(err))
return nil, err
}
return nil, nil
}))
err := redoTask.Execute(ctx)
if err != nil {
errMsg := "fail to execute task when operating the privilege"
log.Warn(errMsg, zap.Error(err))
return merr.StatusWithErrorCode(err, commonpb.ErrorCode_OperatePrivilegeFailure), nil
}
ctxLog.Debug(method + " success")
metrics.RootCoordDDLReqCounter.WithLabelValues(method, metrics.SuccessLabel).Inc()
metrics.RootCoordDDLReqLatency.WithLabelValues(method).Observe(float64(tr.ElapseSpan().Milliseconds()))
return merr.Success(), nil
}
// SelectGrant select grant
// - check the node health
// - check if the principal entity is valid
// - check if the resource entity which is provided by the user is valid
// - select grant by the meta api
func (c *Core) SelectGrant(ctx context.Context, in *milvuspb.SelectGrantRequest) (*milvuspb.SelectGrantResponse, error) {
method := "SelectGrant"
metrics.RootCoordDDLReqCounter.WithLabelValues(method, metrics.TotalLabel).Inc()
tr := timerecord.NewTimeRecorder(method)
ctxLog := log.Ctx(ctx).With(zap.String("role", typeutil.RootCoordRole), zap.Any("in", in))
ctxLog.Debug(method)
if err := merr.CheckHealthy(c.GetStateCode()); err != nil {
return &milvuspb.SelectGrantResponse{
Status: merr.Status(err),
}, nil
}
if in.Entity == nil {
errMsg := "the grant entity in the request is nil"
ctxLog.Warn(errMsg)
return &milvuspb.SelectGrantResponse{
Status: merr.StatusWithErrorCode(errors.New(errMsg), commonpb.ErrorCode_SelectGrantFailure),
}, nil
}
if err := c.isValidRole(in.Entity.Role); err != nil {
ctxLog.Warn("", zap.Error(err))
return &milvuspb.SelectGrantResponse{
Status: merr.StatusWithErrorCode(err, commonpb.ErrorCode_SelectGrantFailure),
}, nil
}
if in.Entity.Object != nil {
if err := c.isValidObject(in.Entity.Object); err != nil {
ctxLog.Warn("", zap.Error(err))
return &milvuspb.SelectGrantResponse{
Status: merr.StatusWithErrorCode(err, commonpb.ErrorCode_SelectGrantFailure),
}, nil
}
}
grantEntities, err := c.meta.SelectGrant(util.DefaultTenant, in.Entity)
if errors.Is(err, merr.ErrIoKeyNotFound) {
return &milvuspb.SelectGrantResponse{
Status: merr.Success(),
Entities: grantEntities,
}, nil
}
if err != nil {
errMsg := "fail to select the grant"
ctxLog.Warn(errMsg, zap.Error(err))
return &milvuspb.SelectGrantResponse{
Status: merr.StatusWithErrorCode(errors.New(errMsg), commonpb.ErrorCode_SelectGrantFailure),
}, nil
}
ctxLog.Debug(method + " success")
metrics.RootCoordDDLReqCounter.WithLabelValues(method, metrics.SuccessLabel).Inc()
metrics.RootCoordDDLReqLatency.WithLabelValues(method).Observe(float64(tr.ElapseSpan().Milliseconds()))
return &milvuspb.SelectGrantResponse{
Status: merr.Success(),
Entities: grantEntities,
}, nil
}
func (c *Core) ListPolicy(ctx context.Context, in *internalpb.ListPolicyRequest) (*internalpb.ListPolicyResponse, error) {
method := "PolicyList"
metrics.RootCoordDDLReqCounter.WithLabelValues(method, metrics.TotalLabel).Inc()
tr := timerecord.NewTimeRecorder(method)
ctxLog := log.Ctx(ctx).With(zap.String("role", typeutil.RootCoordRole), zap.Any("in", in))
ctxLog.Debug(method)
if err := merr.CheckHealthy(c.GetStateCode()); err != nil {
return &internalpb.ListPolicyResponse{
Status: merr.Status(err),
}, nil
}
policies, err := c.meta.ListPolicy(util.DefaultTenant)
if err != nil {
errMsg := "fail to list policy"
ctxLog.Warn(errMsg, zap.Error(err))
return &internalpb.ListPolicyResponse{
Status: merr.StatusWithErrorCode(errors.New(errMsg), commonpb.ErrorCode_ListPolicyFailure),
}, nil
}
userRoles, err := c.meta.ListUserRole(util.DefaultTenant)
if err != nil {
errMsg := "fail to list user-role"
ctxLog.Warn(errMsg, zap.Any("in", in), zap.Error(err))
return &internalpb.ListPolicyResponse{
Status: merr.StatusWithErrorCode(errors.New(errMsg), commonpb.ErrorCode_ListPolicyFailure),
}, nil
}
ctxLog.Debug(method + " success")
metrics.RootCoordDDLReqCounter.WithLabelValues(method, metrics.SuccessLabel).Inc()
metrics.RootCoordDDLReqLatency.WithLabelValues(method).Observe(float64(tr.ElapseSpan().Milliseconds()))
return &internalpb.ListPolicyResponse{
Status: merr.Success(),
PolicyInfos: policies,
UserRoles: userRoles,
}, nil
}
func (c *Core) BackupRBAC(ctx context.Context, in *milvuspb.BackupRBACMetaRequest) (*milvuspb.BackupRBACMetaResponse, error) {
method := "BackupRBAC"
metrics.RootCoordDDLReqCounter.WithLabelValues(method, metrics.TotalLabel).Inc()
tr := timerecord.NewTimeRecorder(method)
ctxLog := log.Ctx(ctx).With(zap.String("role", typeutil.RootCoordRole), zap.Any("in", in))
ctxLog.Debug(method)
if err := merr.CheckHealthy(c.GetStateCode()); err != nil {
return &milvuspb.BackupRBACMetaResponse{
Status: merr.Status(err),
}, nil
}
rbacMeta, err := c.meta.BackupRBAC(ctx, util.DefaultTenant)
if err != nil {
return &milvuspb.BackupRBACMetaResponse{
Status: merr.Status(err),
}, nil
}
ctxLog.Debug(method + " success")
metrics.RootCoordDDLReqCounter.WithLabelValues(method, metrics.SuccessLabel).Inc()
metrics.RootCoordDDLReqLatency.WithLabelValues(method).Observe(float64(tr.ElapseSpan().Milliseconds()))
return &milvuspb.BackupRBACMetaResponse{
Status: merr.Success(),
RBACMeta: rbacMeta,
}, nil
}
func (c *Core) RestoreRBAC(ctx context.Context, in *milvuspb.RestoreRBACMetaRequest) (*commonpb.Status, error) {
method := "RestoreRBAC"
metrics.RootCoordDDLReqCounter.WithLabelValues(method, metrics.TotalLabel).Inc()
tr := timerecord.NewTimeRecorder(method)
ctxLog := log.Ctx(ctx).With(zap.String("role", typeutil.RootCoordRole))
ctxLog.Debug(method)
if err := merr.CheckHealthy(c.GetStateCode()); err != nil {
return merr.Status(err), nil
}
redoTask := newBaseRedoTask(c.stepExecutor)
redoTask.AddSyncStep(NewSimpleStep("restore rbac meta data", func(ctx context.Context) ([]nestedStep, error) {
if err := c.meta.RestoreRBAC(ctx, util.DefaultTenant, in.RBACMeta); err != nil {
log.Warn("fail to restore rbac meta data", zap.Any("in", in), zap.Error(err))
return nil, err
}
return nil, nil
}))
redoTask.AddAsyncStep(NewSimpleStep("operate privilege cache", func(ctx context.Context) ([]nestedStep, error) {
if err := c.proxyClientManager.RefreshPolicyInfoCache(c.ctx, &proxypb.RefreshPolicyInfoCacheRequest{
OpType: int32(typeutil.CacheRefresh),
}); err != nil {
log.Warn("fail to refresh policy info cache", zap.Any("in", in), zap.Error(err))
return nil, err
}
return nil, nil
}))
err := redoTask.Execute(ctx)
if err != nil {
errMsg := "fail to execute task when restore rbac meta data"
log.Warn(errMsg, zap.Error(err))
return merr.StatusWithErrorCode(err, commonpb.ErrorCode_OperatePrivilegeFailure), nil
}
ctxLog.Debug(method + " success")
metrics.RootCoordDDLReqCounter.WithLabelValues(method, metrics.SuccessLabel).Inc()
metrics.RootCoordDDLReqLatency.WithLabelValues(method).Observe(float64(tr.ElapseSpan().Milliseconds()))
return merr.Success(), nil
}
func (c *Core) RenameCollection(ctx context.Context, req *milvuspb.RenameCollectionRequest) (*commonpb.Status, error) {
if err := merr.CheckHealthy(c.GetStateCode()); err != nil {
return merr.Status(err), nil
}
log := log.Ctx(ctx).With(zap.String("oldCollectionName", req.GetOldName()), zap.String("newCollectionName", req.GetNewName()))
log.Info("received request to rename collection")
metrics.RootCoordDDLReqCounter.WithLabelValues("RenameCollection", metrics.TotalLabel).Inc()
tr := timerecord.NewTimeRecorder("RenameCollection")
t := &renameCollectionTask{
baseTask: newBaseTask(ctx, c),
Req: req,
}
if err := c.scheduler.AddTask(t); err != nil {
log.Warn("failed to enqueue request to rename collection", zap.Error(err))
metrics.RootCoordDDLReqCounter.WithLabelValues("RenameCollection", metrics.FailLabel).Inc()
return merr.Status(err), nil
}
if err := t.WaitToFinish(); err != nil {
log.Warn("failed to rename collection", zap.Uint64("ts", t.GetTs()), zap.Error(err))
metrics.RootCoordDDLReqCounter.WithLabelValues("RenameCollection", metrics.FailLabel).Inc()
return merr.Status(err), nil
}
metrics.RootCoordDDLReqCounter.WithLabelValues("RenameCollection", metrics.SuccessLabel).Inc()
metrics.RootCoordDDLReqLatency.WithLabelValues("RenameCollection").Observe(float64(tr.ElapseSpan().Milliseconds()))
log.Info("done to rename collection", zap.Uint64("ts", t.GetTs()))
return merr.Success(), nil
}
func (c *Core) DescribeDatabase(ctx context.Context, req *rootcoordpb.DescribeDatabaseRequest) (*rootcoordpb.DescribeDatabaseResponse, error) {
if err := merr.CheckHealthy(c.GetStateCode()); err != nil {
return &rootcoordpb.DescribeDatabaseResponse{Status: merr.Status(err)}, nil
}
log := log.Ctx(ctx).With(zap.String("dbName", req.GetDbName()))
log.Info("received request to describe database ")
metrics.RootCoordDDLReqCounter.WithLabelValues("DescribeDatabase", metrics.TotalLabel).Inc()
tr := timerecord.NewTimeRecorder("DescribeDatabase")
t := &describeDBTask{
baseTask: newBaseTask(ctx, c),
Req: req,
}
if err := c.scheduler.AddTask(t); err != nil {
log.Warn("failed to enqueue request to describe database", zap.Error(err))
metrics.RootCoordDDLReqCounter.WithLabelValues("DescribeDatabase", metrics.FailLabel).Inc()
return &rootcoordpb.DescribeDatabaseResponse{Status: merr.Status(err)}, nil
}
if err := t.WaitToFinish(); err != nil {
log.Warn("failed to describe database", zap.Uint64("ts", t.GetTs()), zap.Error(err))
metrics.RootCoordDDLReqCounter.WithLabelValues("DescribeDatabase", metrics.FailLabel).Inc()
return &rootcoordpb.DescribeDatabaseResponse{Status: merr.Status(err)}, nil
}
metrics.RootCoordDDLReqCounter.WithLabelValues("DescribeDatabase", metrics.SuccessLabel).Inc()
metrics.RootCoordDDLReqLatency.WithLabelValues("DescribeDatabase").Observe(float64(tr.ElapseSpan().Milliseconds()))
log.Info("done to describe database", zap.Uint64("ts", t.GetTs()))
return t.Rsp, nil
}
func (c *Core) CheckHealth(ctx context.Context, in *milvuspb.CheckHealthRequest) (*milvuspb.CheckHealthResponse, error) {
if err := merr.CheckHealthy(c.GetStateCode()); err != nil {
return &milvuspb.CheckHealthResponse{
Status: merr.Status(err),
IsHealthy: false,
Reasons: []string{fmt.Sprintf("serverID=%d: %v", c.session.ServerID, err)},
}, nil
}
group, ctx := errgroup.WithContext(ctx)
errs := typeutil.NewConcurrentSet[error]()
proxyClients := c.proxyClientManager.GetProxyClients()
proxyClients.Range(func(key int64, value types.ProxyClient) bool {
nodeID := key
proxyClient := value
group.Go(func() error {
sta, err := proxyClient.GetComponentStates(ctx, &milvuspb.GetComponentStatesRequest{})
if err != nil {
errs.Insert(err)
return err
}
err = merr.AnalyzeState("Proxy", nodeID, sta)
if err != nil {
errs.Insert(err)
}
return err
})
return true
})
maxDelay := Params.QuotaConfig.MaxTimeTickDelay.GetAsDuration(time.Second)
if maxDelay > 0 {
group.Go(func() error {
err := CheckTimeTickLagExceeded(ctx, c.queryCoord, c.dataCoord, maxDelay)
if err != nil {
errs.Insert(err)
}
return err
})
}
err := group.Wait()
if err != nil {
return &milvuspb.CheckHealthResponse{
Status: merr.Success(),
IsHealthy: false,
Reasons: lo.Map(errs.Collect(), func(e error, i int) string {
return err.Error()
}),
}, nil
}
return &milvuspb.CheckHealthResponse{Status: merr.Success(), IsHealthy: true, Reasons: []string{}}, nil
}