feat: support predefined models for openrouter (#5494)

This commit is contained in:
sino 2024-06-24 16:31:53 +08:00 committed by GitHub
parent f7900f298f
commit 877a2c144b
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
25 changed files with 874 additions and 19 deletions

View File

@ -0,0 +1,21 @@
- openai/gpt-4o
- openai/gpt-4
- openai/gpt-4-32k
- openai/gpt-3.5-turbo
- anthropic/claude-3.5-sonnet
- anthropic/claude-3-haiku
- anthropic/claude-3-opus
- anthropic/claude-3-sonnet
- google/gemini-pro-1.5
- google/gemini-flash-1.5
- google/gemini-pro
- cohere/command-r-plus
- cohere/command-r
- meta-llama/llama-3-70b-instruct
- meta-llama/llama-3-8b-instruct
- mistralai/mixtral-8x22b-instruct
- mistralai/mixtral-8x7b-instruct
- mistralai/mistral-7b-instruct
- qwen/qwen-2-72b-instruct
- deepseek/deepseek-chat
- deepseek/deepseek-coder

View File

@ -0,0 +1,39 @@
model: anthropic/claude-3.5-sonnet
label:
en_US: claude-3.5-sonnet
model_type: llm
features:
- agent-thought
- vision
- tool-call
- stream-tool-call
model_properties:
mode: chat
context_size: 200000
parameter_rules:
- name: temperature
use_template: temperature
- name: top_p
use_template: top_p
- name: top_k
label:
zh_Hans: 取样数量
en_US: Top k
type: int
help:
zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
en_US: Only sample from the top K options for each subsequent token.
required: false
- name: max_tokens
use_template: max_tokens
required: true
default: 4096
min: 1
max: 4096
- name: response_format
use_template: response_format
pricing:
input: "3.00"
output: "15.00"
unit: "0.000001"
currency: USD

View File

@ -0,0 +1,39 @@
model: anthropic/claude-3-haiku
label:
en_US: claude-3-haiku
model_type: llm
features:
- agent-thought
- vision
- tool-call
- stream-tool-call
model_properties:
mode: chat
context_size: 200000
parameter_rules:
- name: temperature
use_template: temperature
- name: top_p
use_template: top_p
- name: top_k
label:
zh_Hans: 取样数量
en_US: Top k
type: int
help:
zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
en_US: Only sample from the top K options for each subsequent token.
required: false
- name: max_tokens
use_template: max_tokens
required: true
default: 4096
min: 1
max: 4096
- name: response_format
use_template: response_format
pricing:
input: "0.25"
output: "1.25"
unit: "0.000001"
currency: USD

View File

@ -0,0 +1,39 @@
model: anthropic/claude-3-opus
label:
en_US: claude-3-opus
model_type: llm
features:
- agent-thought
- vision
- tool-call
- stream-tool-call
model_properties:
mode: chat
context_size: 200000
parameter_rules:
- name: temperature
use_template: temperature
- name: top_p
use_template: top_p
- name: top_k
label:
zh_Hans: 取样数量
en_US: Top k
type: int
help:
zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
en_US: Only sample from the top K options for each subsequent token.
required: false
- name: max_tokens
use_template: max_tokens
required: true
default: 4096
min: 1
max: 4096
- name: response_format
use_template: response_format
pricing:
input: "15.00"
output: "75.00"
unit: "0.000001"
currency: USD

View File

@ -0,0 +1,39 @@
model: anthropic/claude-3-sonnet
label:
en_US: claude-3-sonnet
model_type: llm
features:
- agent-thought
- vision
- tool-call
- stream-tool-call
model_properties:
mode: chat
context_size: 200000
parameter_rules:
- name: temperature
use_template: temperature
- name: top_p
use_template: top_p
- name: top_k
label:
zh_Hans: 取样数量
en_US: Top k
type: int
help:
zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
en_US: Only sample from the top K options for each subsequent token.
required: false
- name: max_tokens
use_template: max_tokens
required: true
default: 4096
min: 1
max: 4096
- name: response_format
use_template: response_format
pricing:
input: "3.00"
output: "15.00"
unit: "0.000001"
currency: USD

View File

@ -0,0 +1,45 @@
model: cohere/command-r-plus
label:
en_US: command-r-plus
model_type: llm
features:
- multi-tool-call
- agent-thought
- stream-tool-call
model_properties:
mode: chat
context_size: 128000
parameter_rules:
- name: temperature
use_template: temperature
max: 5.0
- name: top_p
use_template: top_p
default: 0.75
min: 0.01
max: 0.99
- name: top_k
label:
zh_Hans: 取样数量
en_US: Top k
type: int
help:
zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
en_US: Only sample from the top K options for each subsequent token.
required: false
default: 0
min: 0
max: 500
- name: presence_penalty
use_template: presence_penalty
- name: frequency_penalty
use_template: frequency_penalty
- name: max_tokens
use_template: max_tokens
default: 1024
max: 4096
pricing:
input: "3"
output: "15"
unit: "0.000001"
currency: USD

View File

@ -0,0 +1,45 @@
model: cohere/command-r
label:
en_US: command-r
model_type: llm
features:
- multi-tool-call
- agent-thought
- stream-tool-call
model_properties:
mode: chat
context_size: 128000
parameter_rules:
- name: temperature
use_template: temperature
max: 5.0
- name: top_p
use_template: top_p
default: 0.75
min: 0.01
max: 0.99
- name: top_k
label:
zh_Hans: 取样数量
en_US: Top k
type: int
help:
zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
en_US: Only sample from the top K options for each subsequent token.
required: false
default: 0
min: 0
max: 500
- name: presence_penalty
use_template: presence_penalty
- name: frequency_penalty
use_template: frequency_penalty
- name: max_tokens
use_template: max_tokens
default: 1024
max: 4096
pricing:
input: "0.5"
output: "1.5"
unit: "0.000001"
currency: USD

View File

@ -0,0 +1,50 @@
model: deepseek/deepseek-chat
label:
en_US: deepseek-chat
model_type: llm
features:
- agent-thought
model_properties:
mode: chat
context_size: 32000
parameter_rules:
- name: temperature
use_template: temperature
type: float
default: 1
min: 0.0
max: 2.0
help:
zh_Hans: 控制生成结果的多样性和随机性。数值越小,越严谨;数值越大,越发散。
en_US: Control the diversity and randomness of generated results. The smaller the value, the more rigorous it is; the larger the value, the more divergent it is.
- name: max_tokens
use_template: max_tokens
type: int
default: 4096
min: 1
max: 4096
help:
zh_Hans: 指定生成结果长度的上限。如果生成结果截断,可以调大该参数。
en_US: Specifies the upper limit on the length of generated results. If the generated results are truncated, you can increase this parameter.
- name: top_p
use_template: top_p
type: float
default: 1
min: 0.01
max: 1.00
help:
zh_Hans: 控制生成结果的随机性。数值越小随机性越弱数值越大随机性越强。一般而言top_p 和 temperature 两个参数选择一个进行调整即可。
en_US: Control the randomness of generated results. The smaller the value, the weaker the randomness; the larger the value, the stronger the randomness. Generally speaking, you can adjust one of the two parameters top_p and temperature.
- name: frequency_penalty
use_template: frequency_penalty
default: 0
min: -2.0
max: 2.0
help:
zh_Hans: 介于 -2.0 和 2.0 之间的数字。如果该值为正,那么新 token 会根据其在已有文本中的出现频率受到相应的惩罚,降低模型重复相同内容的可能性。
en_US: A number between -2.0 and 2.0. If the value is positive, new tokens are penalized based on their frequency of occurrence in existing text, reducing the likelihood that the model will repeat the same content.
pricing:
input: "0.14"
output: "0.28"
unit: "0.000001"
currency: USD

View File

@ -0,0 +1,30 @@
model: deepseek/deepseek-coder
label:
en_US: deepseek-coder
model_type: llm
features:
- agent-thought
model_properties:
mode: chat
context_size: 32000
parameter_rules:
- name: temperature
use_template: temperature
min: 0
max: 1
default: 0.5
- name: top_p
use_template: top_p
min: 0
max: 1
default: 1
- name: max_tokens
use_template: max_tokens
min: 1
max: 4096
default: 1024
pricing:
input: "0.14"
output: "0.28"
unit: "0.000001"
currency: USD

View File

@ -0,0 +1,39 @@
model: google/gemini-flash-1.5
label:
en_US: gemini-flash-1.5
model_type: llm
features:
- agent-thought
- vision
- tool-call
- stream-tool-call
model_properties:
mode: chat
context_size: 1048576
parameter_rules:
- name: temperature
use_template: temperature
- name: top_p
use_template: top_p
- name: top_k
label:
zh_Hans: 取样数量
en_US: Top k
type: int
help:
zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
en_US: Only sample from the top K options for each subsequent token.
required: false
- name: max_tokens
use_template: max_tokens
required: true
default: 8192
min: 1
max: 8192
- name: response_format
use_template: response_format
pricing:
input: "0.25"
output: "0.75"
unit: "0.000001"
currency: USD

View File

@ -0,0 +1,39 @@
model: google/gemini-pro-1.5
label:
en_US: gemini-pro-1.5
model_type: llm
features:
- agent-thought
- vision
- tool-call
- stream-tool-call
model_properties:
mode: chat
context_size: 1048576
parameter_rules:
- name: temperature
use_template: temperature
- name: top_p
use_template: top_p
- name: top_k
label:
zh_Hans: 取样数量
en_US: Top k
type: int
help:
zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
en_US: Only sample from the top K options for each subsequent token.
required: false
- name: max_tokens
use_template: max_tokens
required: true
default: 8192
min: 1
max: 8192
- name: response_format
use_template: response_format
pricing:
input: "2.5"
output: "7.5"
unit: "0.000001"
currency: USD

View File

@ -0,0 +1,38 @@
model: google/gemini-pro
label:
en_US: gemini-pro
model_type: llm
features:
- agent-thought
- tool-call
- stream-tool-call
model_properties:
mode: chat
context_size: 30720
parameter_rules:
- name: temperature
use_template: temperature
- name: top_p
use_template: top_p
- name: top_k
label:
zh_Hans: 取样数量
en_US: Top k
type: int
help:
zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
en_US: Only sample from the top K options for each subsequent token.
required: false
- name: max_tokens
use_template: max_tokens
required: true
default: 2048
min: 1
max: 2048
- name: response_format
use_template: response_format
pricing:
input: "0.125"
output: "0.375"
unit: "0.000001"
currency: USD

View File

@ -0,0 +1,42 @@
model: openai/gpt-3.5-turbo
label:
en_US: gpt-3.5-turbo
model_type: llm
features:
- multi-tool-call
- agent-thought
- stream-tool-call
model_properties:
mode: chat
context_size: 16385
parameter_rules:
- name: temperature
use_template: temperature
- name: top_p
use_template: top_p
- name: presence_penalty
use_template: presence_penalty
- name: frequency_penalty
use_template: frequency_penalty
- name: max_tokens
use_template: max_tokens
default: 512
min: 1
max: 4096
- name: response_format
label:
zh_Hans: 回复格式
en_US: response_format
type: string
help:
zh_Hans: 指定模型必须输出的格式
en_US: specifying the format that the model must output
required: false
options:
- text
- json_object
pricing:
input: "0.5"
output: "1.5"
unit: "0.000001"
currency: USD

View File

@ -0,0 +1,57 @@
model: openai/gpt-4-32k
label:
en_US: gpt-4-32k
model_type: llm
features:
- multi-tool-call
- agent-thought
- stream-tool-call
model_properties:
mode: chat
context_size: 32768
parameter_rules:
- name: temperature
use_template: temperature
- name: top_p
use_template: top_p
- name: presence_penalty
use_template: presence_penalty
- name: frequency_penalty
use_template: frequency_penalty
- name: max_tokens
use_template: max_tokens
default: 512
min: 1
max: 32768
- name: seed
label:
zh_Hans: 种子
en_US: Seed
type: int
help:
zh_Hans:
如果指定,模型将尽最大努力进行确定性采样,使得重复的具有相同种子和参数的请求应该返回相同的结果。不能保证确定性,您应该参考 system_fingerprint
响应参数来监视变化。
en_US:
If specified, model will make a best effort to sample deterministically,
such that repeated requests with the same seed and parameters should return
the same result. Determinism is not guaranteed, and you should refer to the
system_fingerprint response parameter to monitor changes in the backend.
required: false
- name: response_format
label:
zh_Hans: 回复格式
en_US: response_format
type: string
help:
zh_Hans: 指定模型必须输出的格式
en_US: specifying the format that the model must output
required: false
options:
- text
- json_object
pricing:
input: "60"
output: "120"
unit: "0.000001"
currency: USD

View File

@ -0,0 +1,57 @@
model: openai/gpt-4
label:
en_US: gpt-4
model_type: llm
features:
- multi-tool-call
- agent-thought
- stream-tool-call
model_properties:
mode: chat
context_size: 8192
parameter_rules:
- name: temperature
use_template: temperature
- name: top_p
use_template: top_p
- name: presence_penalty
use_template: presence_penalty
- name: frequency_penalty
use_template: frequency_penalty
- name: max_tokens
use_template: max_tokens
default: 512
min: 1
max: 8192
- name: seed
label:
zh_Hans: 种子
en_US: Seed
type: int
help:
zh_Hans:
如果指定,模型将尽最大努力进行确定性采样,使得重复的具有相同种子和参数的请求应该返回相同的结果。不能保证确定性,您应该参考 system_fingerprint
响应参数来监视变化。
en_US:
If specified, model will make a best effort to sample deterministically,
such that repeated requests with the same seed and parameters should return
the same result. Determinism is not guaranteed, and you should refer to the
system_fingerprint response parameter to monitor changes in the backend.
required: false
- name: response_format
label:
zh_Hans: 回复格式
en_US: response_format
type: string
help:
zh_Hans: 指定模型必须输出的格式
en_US: specifying the format that the model must output
required: false
options:
- text
- json_object
pricing:
input: "30"
output: "60"
unit: "0.000001"
currency: USD

View File

@ -0,0 +1,43 @@
model: openai/gpt-4o
label:
en_US: gpt-4o
model_type: llm
features:
- multi-tool-call
- agent-thought
- stream-tool-call
- vision
model_properties:
mode: chat
context_size: 128000
parameter_rules:
- name: temperature
use_template: temperature
- name: top_p
use_template: top_p
- name: presence_penalty
use_template: presence_penalty
- name: frequency_penalty
use_template: frequency_penalty
- name: max_tokens
use_template: max_tokens
default: 512
min: 1
max: 4096
- name: response_format
label:
zh_Hans: 回复格式
en_US: response_format
type: string
help:
zh_Hans: 指定模型必须输出的格式
en_US: specifying the format that the model must output
required: false
options:
- text
- json_object
pricing:
input: "5.00"
output: "15.00"
unit: "0.000001"
currency: USD

View File

@ -0,0 +1,23 @@
model: meta-llama/llama-3-70b-instruct
label:
en_US: llama-3-70b-instruct
model_type: llm
model_properties:
mode: completion
context_size: 8192
parameter_rules:
- name: temperature
use_template: temperature
- name: top_p
use_template: top_p
- name: max_tokens
use_template: max_tokens
required: true
default: 512
min: 1
max: 2048
pricing:
input: "0.59"
output: "0.79"
unit: "0.000001"
currency: USD

View File

@ -0,0 +1,23 @@
model: meta-llama/llama-3-8b-instruct
label:
en_US: llama-3-8b-instruct
model_type: llm
model_properties:
mode: completion
context_size: 8192
parameter_rules:
- name: temperature
use_template: temperature
- name: top_p
use_template: top_p
- name: max_tokens
use_template: max_tokens
required: true
default: 512
min: 1
max: 2048
pricing:
input: "0.07"
output: "0.07"
unit: "0.000001"
currency: USD

View File

@ -9,38 +9,40 @@ from core.model_runtime.model_providers.openai_api_compatible.llm.llm import OAI
class OpenRouterLargeLanguageModel(OAIAPICompatLargeLanguageModel):
def _update_endpoint_url(self, credentials: dict):
def _update_credential(self, model: str, credentials: dict):
credentials['endpoint_url'] = "https://openrouter.ai/api/v1"
return credentials
credentials['mode'] = self.get_model_mode(model).value
credentials['function_calling_type'] = 'tool_call'
return
def _invoke(self, model: str, credentials: dict,
prompt_messages: list[PromptMessage], model_parameters: dict,
tools: Optional[list[PromptMessageTool]] = None, stop: Optional[list[str]] = None,
stream: bool = True, user: Optional[str] = None) \
-> Union[LLMResult, Generator]:
cred_with_endpoint = self._update_endpoint_url(credentials=credentials)
self._update_credential(model, credentials)
return super()._invoke(model, cred_with_endpoint, prompt_messages, model_parameters, tools, stop, stream, user)
return super()._invoke(model, credentials, prompt_messages, model_parameters, tools, stop, stream, user)
def validate_credentials(self, model: str, credentials: dict) -> None:
cred_with_endpoint = self._update_endpoint_url(credentials=credentials)
self._update_credential(model, credentials)
return super().validate_credentials(model, cred_with_endpoint)
return super().validate_credentials(model, credentials)
def _generate(self, model: str, credentials: dict, prompt_messages: list[PromptMessage], model_parameters: dict,
tools: Optional[list[PromptMessageTool]] = None, stop: Optional[list[str]] = None,
stream: bool = True, user: Optional[str] = None) -> Union[LLMResult, Generator]:
cred_with_endpoint = self._update_endpoint_url(credentials=credentials)
self._update_credential(model, credentials)
return super()._generate(model, cred_with_endpoint, prompt_messages, model_parameters, tools, stop, stream, user)
return super()._generate(model, credentials, prompt_messages, model_parameters, tools, stop, stream, user)
def get_customizable_model_schema(self, model: str, credentials: dict) -> AIModelEntity:
cred_with_endpoint = self._update_endpoint_url(credentials=credentials)
self._update_credential(model, credentials)
return super().get_customizable_model_schema(model, cred_with_endpoint)
return super().get_customizable_model_schema(model, credentials)
def get_num_tokens(self, model: str, credentials: dict, prompt_messages: list[PromptMessage],
tools: Optional[list[PromptMessageTool]] = None) -> int:
cred_with_endpoint = self._update_endpoint_url(credentials=credentials)
self._update_credential(model, credentials)
return super().get_num_tokens(model, cred_with_endpoint, prompt_messages, tools)
return super().get_num_tokens(model, credentials, prompt_messages, tools)

View File

@ -0,0 +1,30 @@
model: mistralai/mistral-7b-instruct
label:
en_US: mistral-7b-instruct
model_type: llm
features:
- agent-thought
model_properties:
mode: completion
context_size: 8000
parameter_rules:
- name: temperature
use_template: temperature
default: 0.7
min: 0
max: 1
- name: top_p
use_template: top_p
default: 1
min: 0
max: 1
- name: max_tokens
use_template: max_tokens
default: 1024
min: 1
max: 2048
pricing:
input: "0.07"
output: "0.07"
unit: "0.000001"
currency: USD

View File

@ -0,0 +1,30 @@
model: mistralai/mixtral-8x22b-instruct
label:
en_US: mixtral-8x22b-instruct
model_type: llm
features:
- agent-thought
model_properties:
mode: completion
context_size: 64000
parameter_rules:
- name: temperature
use_template: temperature
default: 0.7
min: 0
max: 1
- name: top_p
use_template: top_p
default: 1
min: 0
max: 1
- name: max_tokens
use_template: max_tokens
default: 1024
min: 1
max: 8000
pricing:
input: "0.65"
output: "0.65"
unit: "0.000001"
currency: USD

View File

@ -0,0 +1,31 @@
model: mistralai/mixtral-8x7b-instruct
label:
zh_Hans: mixtral-8x7b-instruct
en_US: mixtral-8x7b-instruct
model_type: llm
features:
- agent-thought
model_properties:
mode: completion
context_size: 32000
parameter_rules:
- name: temperature
use_template: temperature
default: 0.7
min: 0
max: 1
- name: top_p
use_template: top_p
default: 1
min: 0
max: 1
- name: max_tokens
use_template: max_tokens
default: 1024
min: 1
max: 8000
pricing:
input: "0.24"
output: "0.24"
unit: "0.000001"
currency: USD

View File

@ -0,0 +1,30 @@
model: qwen/qwen-2-72b-instruct
label:
en_US: qwen-2-72b-instruct
model_type: llm
features:
- agent-thought
model_properties:
mode: completion
context_size: 32768
parameter_rules:
- name: temperature
use_template: temperature
- name: max_tokens
use_template: max_tokens
type: int
default: 512
min: 1
max: 4096
help:
zh_Hans: 指定生成结果长度的上限。如果生成结果截断,可以调大该参数。
en_US: Specifies the upper limit on the length of generated results. If the generated results are truncated, you can increase this parameter.
- name: top_p
use_template: top_p
- name: frequency_penalty
use_template: frequency_penalty
pricing:
input: "0.59"
output: "0.79"
unit: "0.000001"
currency: USD

View File

@ -1,5 +1,7 @@
import logging
from core.model_runtime.entities.model_entities import ModelType
from core.model_runtime.errors.validate import CredentialsValidateFailedError
from core.model_runtime.model_providers.__base.model_provider import ModelProvider
logger = logging.getLogger(__name__)
@ -8,4 +10,15 @@ logger = logging.getLogger(__name__)
class OpenRouterProvider(ModelProvider):
def validate_provider_credentials(self, credentials: dict) -> None:
pass
try:
model_instance = self.get_model_instance(ModelType.LLM)
model_instance.validate_credentials(
model='openai/gpt-3.5-turbo',
credentials=credentials
)
except CredentialsValidateFailedError as ex:
raise ex
except Exception as ex:
logger.exception(f'{self.get_provider_schema().provider} credentials validate failed')
raise ex

View File

@ -1,6 +1,6 @@
provider: openrouter
label:
en_US: openrouter.ai
en_US: OpenRouter
icon_small:
en_US: openrouter_square.svg
icon_large:
@ -15,6 +15,7 @@ help:
supported_model_types:
- llm
configurate_methods:
- predefined-model
- customizable-model
model_credential_schema:
model:
@ -82,13 +83,23 @@ model_credential_schema:
en_US: Vision Support
type: radio
required: false
default: 'no_support'
default: "no_support"
options:
- value: 'support'
- value: "support"
label:
en_US: 'Yes'
en_US: "Yes"
zh_Hans:
- value: 'no_support'
- value: "no_support"
label:
en_US: 'No'
en_US: "No"
zh_Hans:
provider_credential_schema:
credential_form_schemas:
- variable: api_key
required: true
label:
en_US: API Key
type: secret-input
placeholder:
zh_Hans: 在此输入您的 API Key
en_US: Enter your API Key