milvus/cmd/tools/config/generate.go
Bingyi Sun 4724779b3b
enhance: remove fallback keys for config generator (#32946)
Signed-off-by: sunby <sunbingyi1992@gmail.com>
2024-05-13 13:33:31 +08:00

333 lines
9.9 KiB
Go

package main
import (
"encoding/csv"
"fmt"
"io"
"reflect"
"sort"
"strings"
"github.com/samber/lo"
"go.uber.org/zap"
"golang.org/x/exp/slices"
"github.com/milvus-io/milvus/pkg/log"
"github.com/milvus-io/milvus/pkg/util/paramtable"
"github.com/milvus-io/milvus/pkg/util/typeutil"
)
type DocContent struct {
key string
defaultValue string
sinceVersion string
refreshable string
exportToUser bool
comment string
}
func collect() []DocContent {
params := &paramtable.ComponentParam{}
params.Init(paramtable.NewBaseTable())
val := reflect.ValueOf(params).Elem()
data := make([]DocContent, 0)
keySet := typeutil.NewSet[string]()
for i := 0; i < val.NumField(); i++ {
valueField := val.Field(i)
collectRecursive(params, &data, &valueField)
}
result := make([]DocContent, 0)
for _, d := range data {
if keySet.Contain(d.key) {
continue
}
keySet.Insert(d.key)
result = append(result, d)
}
return result
}
func quoteIfNeeded(s string) string {
if strings.ContainsAny(s, "[],{}") {
return fmt.Sprintf("\"%s\"", s)
}
return s
}
func collectRecursive(params *paramtable.ComponentParam, data *[]DocContent, val *reflect.Value) {
if val.Kind() != reflect.Struct {
return
}
log.Debug("enter", zap.Any("variable", val.String()))
for j := 0; j < val.NumField(); j++ {
subVal := val.Field(j)
tag := val.Type().Field(j).Tag
t := val.Type().Field(j).Type.String()
if t == "paramtable.ParamItem" {
item := subVal.Interface().(paramtable.ParamItem) //nolint:govet
refreshable := tag.Get("refreshable")
defaultValue := params.GetWithDefault(item.Key, item.DefaultValue)
log.Debug("got key", zap.String("key", item.Key), zap.Any("value", defaultValue), zap.String("variable", val.Type().Field(j).Name))
*data = append(*data, DocContent{item.Key, defaultValue, item.Version, refreshable, item.Export, item.Doc})
} else if t == "paramtable.ParamGroup" {
item := subVal.Interface().(paramtable.ParamGroup)
log.Debug("got key", zap.String("key", item.KeyPrefix), zap.String("variable", val.Type().Field(j).Name))
refreshable := tag.Get("refreshable")
// Sort group items to stablize the output order
m := item.GetValue()
keys := make([]string, 0, len(m))
for k := range m {
keys = append(keys, k)
}
sort.Strings(keys)
for _, key := range keys {
value := m[key]
log.Debug("got group entry", zap.String("key", key), zap.String("value", value))
*data = append(*data, DocContent{fmt.Sprintf("%s%s", item.KeyPrefix, key), quoteIfNeeded(value), item.Version, refreshable, item.Export, ""})
}
} else {
collectRecursive(params, data, &subVal)
}
}
}
func WriteCsv(f io.Writer) {
w := csv.NewWriter(f)
w.Write([]string{"key", "defaultValue", "sinceVersion", "refreshable", "exportToUser", "comment"})
result := collect()
w.WriteAll(lo.Map(result, func(d DocContent, _ int) []string {
return []string{d.key, d.defaultValue, d.sinceVersion, d.refreshable, fmt.Sprintf("%t", d.exportToUser), d.comment}
}))
w.Flush()
}
type YamlGroup struct {
name string
header string
disable bool
}
type YamlMarshaller struct {
writer io.Writer
groups []YamlGroup
data []DocContent
}
func (m *YamlMarshaller) writeYamlRecursive(data []DocContent, level int) {
topLevels := typeutil.NewOrderedMap[string, []DocContent]()
for _, d := range data {
key := strings.Split(d.key, ".")[level]
old, ok := topLevels.Get(key)
if !ok {
topLevels.Set(key, []DocContent{d})
} else {
topLevels.Set(key, append(old, d))
}
}
var keys []string
var extraHeaders map[string]string
disabledGroups := lo.Map(
lo.Filter(
m.groups,
func(g YamlGroup, _ int) bool { return g.disable }),
func(g YamlGroup, _ int) string { return g.name })
if level == 0 {
keys = lo.Map(m.groups, func(g YamlGroup, _ int) string { return g.name })
extraHeaders = lo.SliceToMap(m.groups, func(g YamlGroup) (string, string) { return g.name, g.header })
} else {
keys = topLevels.Keys()
}
for _, key := range keys {
contents, ok := topLevels.Get(key)
if !ok {
log.Debug("didnot found config for " + key)
continue
}
content := contents[0]
isDisabled := slices.Contains(disabledGroups, strings.Split(content.key, ".")[0])
if strings.Count(content.key, ".") == level {
if isDisabled {
io.WriteString(m.writer, "# ")
}
m.writeContent(key, content.defaultValue, content.comment, level)
continue
}
extra, ok := extraHeaders[key]
if ok {
io.WriteString(m.writer, extra+"\n")
}
if isDisabled {
io.WriteString(m.writer, "# ")
}
io.WriteString(m.writer, fmt.Sprintf("%s%s:\n", strings.Repeat(" ", level*2), key))
m.writeYamlRecursive(contents, level+1)
}
}
func (m *YamlMarshaller) writeContent(key, value, comment string, level int) {
if strings.Contains(comment, "\n") {
multilines := strings.Split(comment, "\n")
for _, line := range multilines {
io.WriteString(m.writer, fmt.Sprintf("%s# %s\n", strings.Repeat(" ", level*2), line))
}
io.WriteString(m.writer, fmt.Sprintf("%s%s: %s\n", strings.Repeat(" ", level*2), key, value))
} else if comment != "" {
io.WriteString(m.writer, fmt.Sprintf("%s%s: %s # %s\n", strings.Repeat(" ", level*2), key, value, comment))
} else {
io.WriteString(m.writer, fmt.Sprintf("%s%s: %s\n", strings.Repeat(" ", level*2), key, value))
}
}
func WriteYaml(w io.Writer) {
result := collect()
io.WriteString(w, `# Licensed to the LF AI & Data foundation under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
`)
groups := []YamlGroup{
{
name: "etcd",
header: "\n# Related configuration of etcd, used to store Milvus metadata & service discovery.",
},
{
name: "metastore",
},
{
name: "tikv",
header: `
# Related configuration of tikv, used to store Milvus metadata.
# Notice that when TiKV is enabled for metastore, you still need to have etcd for service discovery.
# TiKV is a good option when the metadata size requires better horizontal scalability.`,
},
{
name: "localStorage",
},
{
name: "minio",
header: `
# Related configuration of MinIO/S3/GCS or any other service supports S3 API, which is responsible for data persistence for Milvus.
# We refer to the storage service as MinIO/S3 in the following description for simplicity.`,
},
{
name: "mq",
header: `
# Milvus supports four MQ: rocksmq(based on RockDB), natsmq(embedded nats-server), Pulsar and Kafka.
# You can change your mq by setting mq.type field.
# If you don't set mq.type field as default, there is a note about enabling priority if we config multiple mq in this file.
# 1. standalone(local) mode: rocksmq(default) > natsmq > Pulsar > Kafka
# 2. cluster mode: Pulsar(default) > Kafka (rocksmq and natsmq is unsupported in cluster mode)`,
},
{
name: "pulsar",
header: `
# Related configuration of pulsar, used to manage Milvus logs of recent mutation operations, output streaming log, and provide log publish-subscribe services.`,
},
{
name: "kafka",
header: "\n# If you want to enable kafka, needs to comment the pulsar configs",
disable: true,
},
{
name: "rocksmq",
},
{
name: "natsmq",
header: `
# natsmq configuration.
# more detail: https://docs.nats.io/running-a-nats-service/configuration`,
},
{
name: "rootCoord",
header: "\n# Related configuration of rootCoord, used to handle data definition language (DDL) and data control language (DCL) requests",
},
{
name: "proxy",
header: "\n# Related configuration of proxy, used to validate client requests and reduce the returned results.",
},
{
name: "queryCoord",
header: "\n# Related configuration of queryCoord, used to manage topology and load balancing for the query nodes, and handoff from growing segments to sealed segments.",
},
{
name: "queryNode",
header: "\n# Related configuration of queryNode, used to run hybrid search between vector and scalar data.",
},
{
name: "indexCoord",
},
{
name: "indexNode",
},
{
name: "dataCoord",
},
{
name: "dataNode",
},
{
name: "log",
header: "\n# Configures the system log output.",
},
{
name: "grpc",
},
{
name: "tls",
header: "\n# Configure the proxy tls enable.",
},
{
name: "common",
},
{
name: "quotaAndLimits",
header: `
# QuotaConfig, configurations of Milvus quota and limits.
# By default, we enable:
# 1. TT protection;
# 2. Memory protection.
# 3. Disk quota protection.
# You can enable:
# 1. DML throughput limitation;
# 2. DDL, DQL qps/rps limitation;
# 3. DQL Queue length/latency protection;
# 4. DQL result rate protection;
# If necessary, you can also manually force to deny RW requests.`,
},
{
name: "trace",
},
{
name: "gpu",
header: `
#when using GPU indexing, Milvus will utilize a memory pool to avoid frequent memory allocation and deallocation.
#here, you can set the size of the memory occupied by the memory pool, with the unit being MB.
#note that there is a possibility of Milvus crashing when the actual memory demand exceeds the value set by maxMemSize.
#if initMemSize and MaxMemSize both set zero,
#milvus will automatically initialize half of the available GPU memory,
#maxMemSize will the whole available GPU memory.`,
},
}
marshller := YamlMarshaller{w, groups, result}
marshller.writeYamlRecursive(lo.Filter(result, func(d DocContent, _ int) bool {
return d.exportToUser
}), 0)
}