Add model parameter translation (#8509)

Co-authored-by: swingchen01 <swings@126.com>
Co-authored-by: 陈长君 <chenchangjun@shuwen.com>
This commit is contained in:
AAEE86 2024-09-22 10:14:33 +08:00 committed by GitHub
parent 740fad06c1
commit c9f1e18df1
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 82 additions and 36 deletions

View File

@ -472,12 +472,13 @@ class OllamaLargeLanguageModel(LargeLanguageModel):
ParameterRule( ParameterRule(
name=DefaultParameterName.TEMPERATURE.value, name=DefaultParameterName.TEMPERATURE.value,
use_template=DefaultParameterName.TEMPERATURE.value, use_template=DefaultParameterName.TEMPERATURE.value,
label=I18nObject(en_US="Temperature"), label=I18nObject(en_US="Temperature", zh_Hans="温度"),
type=ParameterType.FLOAT, type=ParameterType.FLOAT,
help=I18nObject( help=I18nObject(
en_US="The temperature of the model. " en_US="The temperature of the model. "
"Increasing the temperature will make the model answer " "Increasing the temperature will make the model answer "
"more creatively. (Default: 0.8)" "more creatively. (Default: 0.8)",
zh_Hans="模型的温度。增加温度将使模型的回答更具创造性。默认值0.8",
), ),
default=0.1, default=0.1,
min=0, min=0,
@ -486,12 +487,13 @@ class OllamaLargeLanguageModel(LargeLanguageModel):
ParameterRule( ParameterRule(
name=DefaultParameterName.TOP_P.value, name=DefaultParameterName.TOP_P.value,
use_template=DefaultParameterName.TOP_P.value, use_template=DefaultParameterName.TOP_P.value,
label=I18nObject(en_US="Top P"), label=I18nObject(en_US="Top P", zh_Hans="Top P"),
type=ParameterType.FLOAT, type=ParameterType.FLOAT,
help=I18nObject( help=I18nObject(
en_US="Works together with top-k. A higher value (e.g., 0.95) will lead to " en_US="Works together with top-k. A higher value (e.g., 0.95) will lead to "
"more diverse text, while a lower value (e.g., 0.5) will generate more " "more diverse text, while a lower value (e.g., 0.5) will generate more "
"focused and conservative text. (Default: 0.9)" "focused and conservative text. (Default: 0.9)",
zh_Hans="与top-k一起工作。较高的值例如0.95会导致生成更多样化的文本而较低的值例如0.5会生成更专注和保守的文本。默认值0.9",
), ),
default=0.9, default=0.9,
min=0, min=0,
@ -499,12 +501,13 @@ class OllamaLargeLanguageModel(LargeLanguageModel):
), ),
ParameterRule( ParameterRule(
name="top_k", name="top_k",
label=I18nObject(en_US="Top K"), label=I18nObject(en_US="Top K", zh_Hans="Top K"),
type=ParameterType.INT, type=ParameterType.INT,
help=I18nObject( help=I18nObject(
en_US="Reduces the probability of generating nonsense. " en_US="Reduces the probability of generating nonsense. "
"A higher value (e.g. 100) will give more diverse answers, " "A higher value (e.g. 100) will give more diverse answers, "
"while a lower value (e.g. 10) will be more conservative. (Default: 40)" "while a lower value (e.g. 10) will be more conservative. (Default: 40)",
zh_Hans="减少生成无意义内容的可能性。较高的值例如100将提供更多样化的答案而较低的值例如10将更为保守。默认值40",
), ),
min=1, min=1,
max=100, max=100,
@ -516,7 +519,8 @@ class OllamaLargeLanguageModel(LargeLanguageModel):
help=I18nObject( help=I18nObject(
en_US="Sets how strongly to penalize repetitions. " en_US="Sets how strongly to penalize repetitions. "
"A higher value (e.g., 1.5) will penalize repetitions more strongly, " "A higher value (e.g., 1.5) will penalize repetitions more strongly, "
"while a lower value (e.g., 0.9) will be more lenient. (Default: 1.1)" "while a lower value (e.g., 0.9) will be more lenient. (Default: 1.1)",
zh_Hans="设置对重复内容的惩罚强度。一个较高的值例如1.5会更强地惩罚重复内容而一个较低的值例如0.9则会相对宽容。默认值1.1",
), ),
min=-2, min=-2,
max=2, max=2,
@ -524,11 +528,12 @@ class OllamaLargeLanguageModel(LargeLanguageModel):
ParameterRule( ParameterRule(
name="num_predict", name="num_predict",
use_template="max_tokens", use_template="max_tokens",
label=I18nObject(en_US="Num Predict"), label=I18nObject(en_US="Num Predict", zh_Hans="最大令牌数预测"),
type=ParameterType.INT, type=ParameterType.INT,
help=I18nObject( help=I18nObject(
en_US="Maximum number of tokens to predict when generating text. " en_US="Maximum number of tokens to predict when generating text. "
"(Default: 128, -1 = infinite generation, -2 = fill context)" "(Default: 128, -1 = infinite generation, -2 = fill context)",
zh_Hans="生成文本时预测的最大令牌数。默认值128-1 = 无限生成,-2 = 填充上下文)",
), ),
default=(512 if int(credentials.get("max_tokens", 4096)) >= 768 else 128), default=(512 if int(credentials.get("max_tokens", 4096)) >= 768 else 128),
min=-2, min=-2,
@ -536,121 +541,137 @@ class OllamaLargeLanguageModel(LargeLanguageModel):
), ),
ParameterRule( ParameterRule(
name="mirostat", name="mirostat",
label=I18nObject(en_US="Mirostat sampling"), label=I18nObject(en_US="Mirostat sampling", zh_Hans="Mirostat 采样"),
type=ParameterType.INT, type=ParameterType.INT,
help=I18nObject( help=I18nObject(
en_US="Enable Mirostat sampling for controlling perplexity. " en_US="Enable Mirostat sampling for controlling perplexity. "
"(default: 0, 0 = disabled, 1 = Mirostat, 2 = Mirostat 2.0)" "(default: 0, 0 = disabled, 1 = Mirostat, 2 = Mirostat 2.0)",
zh_Hans="启用 Mirostat 采样以控制困惑度。"
"默认值00 = 禁用1 = Mirostat2 = Mirostat 2.0",
), ),
min=0, min=0,
max=2, max=2,
), ),
ParameterRule( ParameterRule(
name="mirostat_eta", name="mirostat_eta",
label=I18nObject(en_US="Mirostat Eta"), label=I18nObject(en_US="Mirostat Eta", zh_Hans="学习率"),
type=ParameterType.FLOAT, type=ParameterType.FLOAT,
help=I18nObject( help=I18nObject(
en_US="Influences how quickly the algorithm responds to feedback from " en_US="Influences how quickly the algorithm responds to feedback from "
"the generated text. A lower learning rate will result in slower adjustments, " "the generated text. A lower learning rate will result in slower adjustments, "
"while a higher learning rate will make the algorithm more responsive. " "while a higher learning rate will make the algorithm more responsive. "
"(Default: 0.1)" "(Default: 0.1)",
zh_Hans="影响算法对生成文本反馈响应的速度。较低的学习率会导致调整速度变慢而较高的学习率会使得算法更加灵敏。默认值0.1",
), ),
precision=1, precision=1,
), ),
ParameterRule( ParameterRule(
name="mirostat_tau", name="mirostat_tau",
label=I18nObject(en_US="Mirostat Tau"), label=I18nObject(en_US="Mirostat Tau", zh_Hans="文本连贯度"),
type=ParameterType.FLOAT, type=ParameterType.FLOAT,
help=I18nObject( help=I18nObject(
en_US="Controls the balance between coherence and diversity of the output. " en_US="Controls the balance between coherence and diversity of the output. "
"A lower value will result in more focused and coherent text. (Default: 5.0)" "A lower value will result in more focused and coherent text. (Default: 5.0)",
zh_Hans="控制输出的连贯性和多样性之间的平衡。较低的值会导致更专注和连贯的文本。默认值5.0",
), ),
precision=1, precision=1,
), ),
ParameterRule( ParameterRule(
name="num_ctx", name="num_ctx",
label=I18nObject(en_US="Size of context window"), label=I18nObject(en_US="Size of context window", zh_Hans="上下文窗口大小"),
type=ParameterType.INT, type=ParameterType.INT,
help=I18nObject( help=I18nObject(
en_US="Sets the size of the context window used to generate the next token. (Default: 2048)" en_US="Sets the size of the context window used to generate the next token. (Default: 2048)",
zh_Hans="设置用于生成下一个标记的上下文窗口大小。默认值2048",
), ),
default=2048, default=2048,
min=1, min=1,
), ),
ParameterRule( ParameterRule(
name="num_gpu", name="num_gpu",
label=I18nObject(en_US="GPU Layers"), label=I18nObject(en_US="GPU Layers", zh_Hans="GPU 层数"),
type=ParameterType.INT, type=ParameterType.INT,
help=I18nObject( help=I18nObject(
en_US="The number of layers to offload to the GPU(s). " en_US="The number of layers to offload to the GPU(s). "
"On macOS it defaults to 1 to enable metal support, 0 to disable." "On macOS it defaults to 1 to enable metal support, 0 to disable."
"As long as a model fits into one gpu it stays in one. " "As long as a model fits into one gpu it stays in one. "
"It does not set the number of GPU(s). " "It does not set the number of GPU(s). ",
zh_Hans="加载到 GPU 的层数。在 macOS 上,默认为 1 以启用 Metal 支持,设置为 0 则禁用。"
"只要模型适合一个 GPU它就保留在其中。它不设置 GPU 的数量。",
), ),
min=-1, min=-1,
default=1, default=1,
), ),
ParameterRule( ParameterRule(
name="num_thread", name="num_thread",
label=I18nObject(en_US="Num Thread"), label=I18nObject(en_US="Num Thread", zh_Hans="线程数"),
type=ParameterType.INT, type=ParameterType.INT,
help=I18nObject( help=I18nObject(
en_US="Sets the number of threads to use during computation. " en_US="Sets the number of threads to use during computation. "
"By default, Ollama will detect this for optimal performance. " "By default, Ollama will detect this for optimal performance. "
"It is recommended to set this value to the number of physical CPU cores " "It is recommended to set this value to the number of physical CPU cores "
"your system has (as opposed to the logical number of cores)." "your system has (as opposed to the logical number of cores).",
zh_Hans="设置计算过程中使用的线程数。默认情况下Ollama会检测以获得最佳性能。建议将此值设置为系统拥有的物理CPU核心数而不是逻辑核心数",
), ),
min=1, min=1,
), ),
ParameterRule( ParameterRule(
name="repeat_last_n", name="repeat_last_n",
label=I18nObject(en_US="Repeat last N"), label=I18nObject(en_US="Repeat last N", zh_Hans="回溯内容"),
type=ParameterType.INT, type=ParameterType.INT,
help=I18nObject( help=I18nObject(
en_US="Sets how far back for the model to look back to prevent repetition. " en_US="Sets how far back for the model to look back to prevent repetition. "
"(Default: 64, 0 = disabled, -1 = num_ctx)" "(Default: 64, 0 = disabled, -1 = num_ctx)",
zh_Hans="设置模型回溯多远的内容以防止重复。默认值640 = 禁用,-1 = num_ctx",
), ),
min=-1, min=-1,
), ),
ParameterRule( ParameterRule(
name="tfs_z", name="tfs_z",
label=I18nObject(en_US="TFS Z"), label=I18nObject(en_US="TFS Z", zh_Hans="减少标记影响"),
type=ParameterType.FLOAT, type=ParameterType.FLOAT,
help=I18nObject( help=I18nObject(
en_US="Tail free sampling is used to reduce the impact of less probable tokens " en_US="Tail free sampling is used to reduce the impact of less probable tokens "
"from the output. A higher value (e.g., 2.0) will reduce the impact more, " "from the output. A higher value (e.g., 2.0) will reduce the impact more, "
"while a value of 1.0 disables this setting. (default: 1)" "while a value of 1.0 disables this setting. (default: 1)",
zh_Hans="用于减少输出中不太可能的标记的影响。较高的值例如2.0会更多地减少这种影响而1.0的值则会禁用此设置。默认值1",
), ),
precision=1, precision=1,
), ),
ParameterRule( ParameterRule(
name="seed", name="seed",
label=I18nObject(en_US="Seed"), label=I18nObject(en_US="Seed", zh_Hans="随机数种子"),
type=ParameterType.INT, type=ParameterType.INT,
help=I18nObject( help=I18nObject(
en_US="Sets the random number seed to use for generation. Setting this to " en_US="Sets the random number seed to use for generation. Setting this to "
"a specific number will make the model generate the same text for " "a specific number will make the model generate the same text for "
"the same prompt. (Default: 0)" "the same prompt. (Default: 0)",
zh_Hans="设置用于生成的随机数种子。将此设置为特定数字将使模型对相同的提示生成相同的文本。默认值0",
), ),
), ),
ParameterRule( ParameterRule(
name="keep_alive", name="keep_alive",
label=I18nObject(en_US="Keep Alive"), label=I18nObject(en_US="Keep Alive", zh_Hans="模型存活时间"),
type=ParameterType.STRING, type=ParameterType.STRING,
help=I18nObject( help=I18nObject(
en_US="Sets how long the model is kept in memory after generating a response. " en_US="Sets how long the model is kept in memory after generating a response. "
"This must be a duration string with a unit (e.g., '10m' for 10 minutes or '24h' for 24 hours)." "This must be a duration string with a unit (e.g., '10m' for 10 minutes or '24h' for 24 hours)."
" A negative number keeps the model loaded indefinitely, and '0' unloads the model" " A negative number keeps the model loaded indefinitely, and '0' unloads the model"
" immediately after generating a response." " immediately after generating a response."
" Valid time units are 's','m','h'. (Default: 5m)" " Valid time units are 's','m','h'. (Default: 5m)",
zh_Hans="设置模型在生成响应后在内存中保留的时间。"
"这必须是一个带有单位的持续时间字符串(例如,'10m' 表示10分钟'24h' 表示24小时"
"负数表示无限期地保留模型,'0'表示在生成响应后立即卸载模型。"
"有效的时间单位有 's'(秒)、'm'(分钟)、'h'小时默认值5m",
), ),
), ),
ParameterRule( ParameterRule(
name="format", name="format",
label=I18nObject(en_US="Format"), label=I18nObject(en_US="Format", zh_Hans="返回格式"),
type=ParameterType.STRING, type=ParameterType.STRING,
help=I18nObject( help=I18nObject(
en_US="the format to return a response in. Currently the only accepted value is json." en_US="the format to return a response in. Currently the only accepted value is json.",
zh_Hans="返回响应的格式。目前唯一接受的值是json。",
), ),
options=["json"], options=["json"],
), ),

View File

@ -205,7 +205,13 @@ class OAIAPICompatLargeLanguageModel(_CommonOaiApiCompat, LargeLanguageModel):
parameter_rules=[ parameter_rules=[
ParameterRule( ParameterRule(
name=DefaultParameterName.TEMPERATURE.value, name=DefaultParameterName.TEMPERATURE.value,
label=I18nObject(en_US="Temperature"), label=I18nObject(en_US="Temperature", zh_Hans="温度"),
help=I18nObject(
en_US="Kernel sampling threshold. Used to determine the randomness of the results."
"The higher the value, the stronger the randomness."
"The higher the possibility of getting different answers to the same question.",
zh_Hans="核采样阈值。用于决定结果随机性,取值越高随机性越强即相同的问题得到的不同答案的可能性越高。",
),
type=ParameterType.FLOAT, type=ParameterType.FLOAT,
default=float(credentials.get("temperature", 0.7)), default=float(credentials.get("temperature", 0.7)),
min=0, min=0,
@ -214,7 +220,13 @@ class OAIAPICompatLargeLanguageModel(_CommonOaiApiCompat, LargeLanguageModel):
), ),
ParameterRule( ParameterRule(
name=DefaultParameterName.TOP_P.value, name=DefaultParameterName.TOP_P.value,
label=I18nObject(en_US="Top P"), label=I18nObject(en_US="Top P", zh_Hans="Top P"),
help=I18nObject(
en_US="The probability threshold of the nucleus sampling method during the generation process."
"The larger the value is, the higher the randomness of generation will be."
"The smaller the value is, the higher the certainty of generation will be.",
zh_Hans="生成过程中核采样方法概率阈值。取值越大,生成的随机性越高;取值越小,生成的确定性越高。",
),
type=ParameterType.FLOAT, type=ParameterType.FLOAT,
default=float(credentials.get("top_p", 1)), default=float(credentials.get("top_p", 1)),
min=0, min=0,
@ -223,7 +235,12 @@ class OAIAPICompatLargeLanguageModel(_CommonOaiApiCompat, LargeLanguageModel):
), ),
ParameterRule( ParameterRule(
name=DefaultParameterName.FREQUENCY_PENALTY.value, name=DefaultParameterName.FREQUENCY_PENALTY.value,
label=I18nObject(en_US="Frequency Penalty"), label=I18nObject(en_US="Frequency Penalty", zh_Hans="频率惩罚"),
help=I18nObject(
en_US="For controlling the repetition rate of words used by the model."
"Increasing this can reduce the repetition of the same words in the model's output.",
zh_Hans="用于控制模型已使用字词的重复率。 提高此项可以降低模型在输出中重复相同字词的重复度。",
),
type=ParameterType.FLOAT, type=ParameterType.FLOAT,
default=float(credentials.get("frequency_penalty", 0)), default=float(credentials.get("frequency_penalty", 0)),
min=-2, min=-2,
@ -231,7 +248,12 @@ class OAIAPICompatLargeLanguageModel(_CommonOaiApiCompat, LargeLanguageModel):
), ),
ParameterRule( ParameterRule(
name=DefaultParameterName.PRESENCE_PENALTY.value, name=DefaultParameterName.PRESENCE_PENALTY.value,
label=I18nObject(en_US="Presence Penalty"), label=I18nObject(en_US="Presence Penalty", zh_Hans="存在惩罚"),
help=I18nObject(
en_US="Used to control the repetition rate when generating models."
"Increasing this can reduce the repetition rate of model generation.",
zh_Hans="用于控制模型生成时的重复度。提高此项可以降低模型生成的重复度。",
),
type=ParameterType.FLOAT, type=ParameterType.FLOAT,
default=float(credentials.get("presence_penalty", 0)), default=float(credentials.get("presence_penalty", 0)),
min=-2, min=-2,
@ -239,7 +261,10 @@ class OAIAPICompatLargeLanguageModel(_CommonOaiApiCompat, LargeLanguageModel):
), ),
ParameterRule( ParameterRule(
name=DefaultParameterName.MAX_TOKENS.value, name=DefaultParameterName.MAX_TOKENS.value,
label=I18nObject(en_US="Max Tokens"), label=I18nObject(en_US="Max Tokens", zh_Hans="最大标记"),
help=I18nObject(
en_US="Maximum length of tokens for the model response.", zh_Hans="模型回答的tokens的最大长度。"
),
type=ParameterType.INT, type=ParameterType.INT,
default=512, default=512,
min=1, min=1,