azure openai add gpt-4-1106-preview、gpt-4-vision-preview models (#1751)

Co-authored-by: luowei <glpat-EjySCyNjWiLqAED-YmwM>
Co-authored-by: crazywoola <100913391+crazywoola@users.noreply.github.com>
This commit is contained in:
Charlie.Wei 2023-12-14 09:55:30 +08:00 committed by GitHub
parent 7c43123956
commit b0d8d196e1
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
8 changed files with 811 additions and 336 deletions

View File

@ -23,7 +23,8 @@ FUNCTION_CALL_MODELS = [
'gpt-4',
'gpt-4-32k',
'gpt-35-turbo',
'gpt-35-turbo-16k'
'gpt-35-turbo-16k',
'gpt-4-1106-preview'
]
class AzureOpenAIModel(BaseLLM):

View File

@ -122,6 +122,22 @@ class AzureOpenAIProvider(BaseModelProvider):
ModelFeature.AGENT_THOUGHT.value
]
},
{
'id': 'gpt-4-1106-preview',
'name': 'gpt-4-1106-preview',
'mode': ModelMode.CHAT.value,
'features': [
ModelFeature.AGENT_THOUGHT.value
]
},
{
'id': 'gpt-4-vision-preview',
'name': 'gpt-4-vision-preview',
'mode': ModelMode.CHAT.value,
'features': [
ModelFeature.VISION.value
]
},
{
'id': 'text-davinci-003',
'name': 'text-davinci-003',
@ -171,6 +187,8 @@ class AzureOpenAIProvider(BaseModelProvider):
base_model_max_tokens = {
'gpt-4': 8192,
'gpt-4-32k': 32768,
'gpt-4-1106-preview': 4096,
'gpt-4-vision-preview': 4096,
'gpt-35-turbo': 4096,
'gpt-35-turbo-16k': 16384,
'text-davinci-003': 4097,
@ -376,6 +394,18 @@ class AzureOpenAIProvider(BaseModelProvider):
provider_credentials=credentials
)
self._add_provider_model(
model_name='gpt-4-1106-preview',
model_type=ModelType.TEXT_GENERATION,
provider_credentials=credentials
)
self._add_provider_model(
model_name='gpt-4-vision-preview',
model_type=ModelType.TEXT_GENERATION,
provider_credentials=credentials
)
self._add_provider_model(
model_name='text-davinci-003',
model_type=ModelType.TEXT_GENERATION,

View File

@ -21,6 +21,18 @@
"unit": "0.001",
"currency": "USD"
},
"gpt-4-1106-preview": {
"prompt": "0.01",
"completion": "0.03",
"unit": "0.001",
"currency": "USD"
},
"gpt-4-vision-preview": {
"prompt": "0.01",
"completion": "0.03",
"unit": "0.001",
"currency": "USD"
},
"gpt-35-turbo": {
"prompt": "0.002",
"completion": "0.0015",

View File

@ -1,11 +1,13 @@
from typing import Dict, Any, Optional, List, Tuple, Union
from typing import Dict, Any, Optional, List, Tuple, Union, cast
from langchain.callbacks.manager import CallbackManagerForLLMRun
from langchain.chat_models import AzureChatOpenAI
from langchain.chat_models.openai import _convert_dict_to_message
from langchain.schema import ChatResult, BaseMessage, ChatGeneration
from pydantic import root_validator
from langchain.schema import ChatResult, BaseMessage, ChatGeneration, ChatMessage, HumanMessage, AIMessage, SystemMessage, FunctionMessage
from core.model_providers.models.entity.message import LCHumanMessageWithFiles, PromptMessageFileType, ImagePromptMessageFile
class EnhanceAzureChatOpenAI(AzureChatOpenAI):
request_timeout: Optional[Union[float, Tuple[float, float]]] = (5.0, 300.0)
@ -51,13 +53,18 @@ class EnhanceAzureChatOpenAI(AzureChatOpenAI):
}
def _generate(
self,
messages: List[BaseMessage],
stop: Optional[List[str]] = None,
run_manager: Optional[CallbackManagerForLLMRun] = None,
**kwargs: Any,
self,
messages: List[BaseMessage],
stop: Optional[List[str]] = None,
run_manager: Optional[CallbackManagerForLLMRun] = None,
**kwargs: Any,
) -> ChatResult:
message_dicts, params = self._create_message_dicts(messages, stop)
params = self._client_params
if stop is not None:
if "stop" in params:
raise ValueError("`stop` found in both the input and default params.")
params["stop"] = stop
message_dicts = [self._convert_message_to_dict(m) for m in messages]
params = {**params, **kwargs}
if self.streaming:
inner_completion = ""
@ -65,7 +72,7 @@ class EnhanceAzureChatOpenAI(AzureChatOpenAI):
params["stream"] = True
function_call: Optional[dict] = None
for stream_resp in self.completion_with_retry(
messages=message_dicts, **params
messages=message_dicts, **params
):
if len(stream_resp["choices"]) > 0:
role = stream_resp["choices"][0]["delta"].get("role", role)
@ -88,4 +95,47 @@ class EnhanceAzureChatOpenAI(AzureChatOpenAI):
)
return ChatResult(generations=[ChatGeneration(message=message)])
response = self.completion_with_retry(messages=message_dicts, **params)
return self._create_chat_result(response)
return self._create_chat_result(response)
def _convert_message_to_dict(self, message: BaseMessage) -> dict:
if isinstance(message, ChatMessage):
message_dict = {"role": message.role, "content": message.content}
elif isinstance(message, LCHumanMessageWithFiles):
content = [
{
"type": "text",
"text": message.content
}
]
for file in message.files:
if file.type == PromptMessageFileType.IMAGE:
file = cast(ImagePromptMessageFile, file)
content.append({
"type": "image_url",
"image_url": {
"url": file.data,
"detail": file.detail.value
}
})
message_dict = {"role": "user", "content": content}
elif isinstance(message, HumanMessage):
message_dict = {"role": "user", "content": message.content}
elif isinstance(message, AIMessage):
message_dict = {"role": "assistant", "content": message.content}
if "function_call" in message.additional_kwargs:
message_dict["function_call"] = message.additional_kwargs["function_call"]
elif isinstance(message, SystemMessage):
message_dict = {"role": "system", "content": message.content}
elif isinstance(message, FunctionMessage):
message_dict = {
"role": "function",
"content": message.content,
"name": message.name,
}
else:
raise ValueError(f"Got unknown type {message}")
if "name" in message.additional_kwargs:
message_dict["name"] = message.additional_kwargs["name"]
return message_dict

View File

@ -65,7 +65,7 @@ const translation = {
'How much to penalize new tokens based on their existing frequency in the text so far.\nDecreases the model\'s likelihood to repeat the same line verbatim.',
max_tokens: 'Max token',
max_tokensTip:
'Used to limit the maximum length of the reply, in tokens. \nLarger values may limit the space left for prompt words, chat logs, and Knowledge. \nIt is recommended to set it below two-thirds.',
'Used to limit the maximum length of the reply, in tokens. \nLarger values may limit the space left for prompt words, chat logs, and Knowledge. \nIt is recommended to set it below two-thirds\ngpt-4-1106-preview, gpt-4-vision-preview max token (input 128k output 4k)',
maxTokenSettingTip: 'Your max token setting is high, potentially limiting space for prompts, queries, and data. Consider setting it below 2/3.',
setToCurrentModelMaxTokenTip: 'Max token is updated to the 80% maximum token of the current model {{maxToken}}.',
stop_sequences: 'Stop sequences',

View File

@ -65,7 +65,7 @@ const translation = {
'影响常见与罕见词汇使用。\n值较大时倾向于生成不常见的词汇和表达方式。\n值越小更倾向于使用常见和普遍接受的词汇或短语。',
max_tokens: '单次回复限制 max_tokens',
max_tokensTip:
'用于限制回复的最大长度,以 token 为单位。\n较大的值可能会限制给提示词、聊天记录和知识库留出的空间。\n建议将其设置在三分之二以下。',
'用于限制回复的最大长度,以 token 为单位。\n较大的值可能会限制给提示词、聊天记录和知识库留出的空间。\n建议将其设置在三分之二以下。\ngpt-4-1106-preview、gpt-4-vision-preview 最大长度 (输入128k输出4k)',
maxTokenSettingTip: '您设置的最大 tokens 数较大,可能会导致 prompt、用户问题、知识库内容没有 token 空间进行处理,建议设置到 2/3 以下。',
setToCurrentModelMaxTokenTip: '最大令牌数更新为当前模型最大的令牌数 {{maxToken}} 的 80%。',
stop_sequences: '停止序列 stop_sequences',

View File

@ -98,7 +98,7 @@
"@types/sortablejs": "^1.15.1",
"autoprefixer": "^10.4.14",
"cross-env": "^7.0.3",
"eslint": "8.36.0",
"eslint": "^8.36.0",
"eslint-config-next": "^13.4.7",
"husky": "^8.0.3",
"lint-staged": "^13.2.2",

File diff suppressed because it is too large Load Diff