From 5541248264fdf69392917a25924786a1b834e91e Mon Sep 17 00:00:00 2001
From: Hongbin <61153998+BingGeX@users.noreply.github.com>
Date: Sat, 21 Sep 2024 17:33:15 +0800
Subject: [PATCH] =?UTF-8?q?Update=20the=20PerfXCloud=20provider=20model=20?=
 =?UTF-8?q?list=EF=BC=8CUpdate=20PerfXCloudProvider=20validate=5Fprovider?=
 =?UTF-8?q?=5Fcredentials=20method.=20(#8587)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Co-authored-by: xhb <466010723@qq.com>
---
 .../perfxcloud/llm/Llama3-Chinese_v2.yaml     |  1 +
 .../Meta-Llama-3-70B-Instruct-GPTQ-Int4.yaml  |  1 +
 .../llm/Meta-Llama-3-8B-Instruct.yaml         |  1 +
 ...Meta-Llama-3.1-405B-Instruct-AWQ-INT4.yaml |  1 +
 .../llm/Qwen1.5-72B-Chat-GPTQ-Int4.yaml       |  1 +
 .../perfxcloud/llm/Qwen1.5-7B.yaml            |  1 +
 .../llm/Qwen2-72B-Instruct-AWQ-int4.yaml      | 61 ++++++++++++++++++
 .../llm/Qwen2-72B-Instruct-GPTQ-Int4.yaml     |  1 +
 .../perfxcloud/llm/Qwen2-7B-Instruct.yaml     | 63 +++++++++++++++++++
 .../perfxcloud/llm/Qwen2-7B.yaml              |  1 +
 .../perfxcloud/llm/Qwen2.5-72B-Instruct.yaml  | 61 ++++++++++++++++++
 .../perfxcloud/llm/Qwen2.5-7B-Instruct.yaml   | 61 ++++++++++++++++++
 .../llm/Reflection-Llama-3.1-70B.yaml         | 61 ++++++++++++++++++
 .../perfxcloud/llm/Yi-1_5-9B-Chat-16K.yaml    | 61 ++++++++++++++++++
 .../perfxcloud/llm/Yi-Coder-1.5B-Chat.yaml    | 61 ++++++++++++++++++
 .../perfxcloud/llm/Yi-Coder-9B-Chat.yaml      | 61 ++++++++++++++++++
 .../perfxcloud/llm/_position.yaml             | 25 +++++---
 .../perfxcloud/llm/chatglm3-6b.yaml           |  1 +
 .../perfxcloud/llm/deepseek-v2-chat.yaml      |  1 +
 .../perfxcloud/llm/deepseek-v2-lite-chat.yaml |  1 +
 .../model_providers/perfxcloud/perfxcloud.py  | 20 +-----
 .../text_embedding/gte-Qwen2-7B-instruct.yaml |  4 ++
 22 files changed, 523 insertions(+), 27 deletions(-)
 create mode 100644 api/core/model_runtime/model_providers/perfxcloud/llm/Qwen2-72B-Instruct-AWQ-int4.yaml
 create mode 100644 api/core/model_runtime/model_providers/perfxcloud/llm/Qwen2-7B-Instruct.yaml
 create mode 100644 api/core/model_runtime/model_providers/perfxcloud/llm/Qwen2.5-72B-Instruct.yaml
 create mode 100644 api/core/model_runtime/model_providers/perfxcloud/llm/Qwen2.5-7B-Instruct.yaml
 create mode 100644 api/core/model_runtime/model_providers/perfxcloud/llm/Reflection-Llama-3.1-70B.yaml
 create mode 100644 api/core/model_runtime/model_providers/perfxcloud/llm/Yi-1_5-9B-Chat-16K.yaml
 create mode 100644 api/core/model_runtime/model_providers/perfxcloud/llm/Yi-Coder-1.5B-Chat.yaml
 create mode 100644 api/core/model_runtime/model_providers/perfxcloud/llm/Yi-Coder-9B-Chat.yaml
 create mode 100644 api/core/model_runtime/model_providers/perfxcloud/text_embedding/gte-Qwen2-7B-instruct.yaml

diff --git a/api/core/model_runtime/model_providers/perfxcloud/llm/Llama3-Chinese_v2.yaml b/api/core/model_runtime/model_providers/perfxcloud/llm/Llama3-Chinese_v2.yaml
index 87712874b..bf91468fc 100644
--- a/api/core/model_runtime/model_providers/perfxcloud/llm/Llama3-Chinese_v2.yaml
+++ b/api/core/model_runtime/model_providers/perfxcloud/llm/Llama3-Chinese_v2.yaml
@@ -59,3 +59,4 @@ pricing:
   output: "0.000"
   unit: "0.000"
   currency: RMB
+deprecated: true
diff --git a/api/core/model_runtime/model_providers/perfxcloud/llm/Meta-Llama-3-70B-Instruct-GPTQ-Int4.yaml b/api/core/model_runtime/model_providers/perfxcloud/llm/Meta-Llama-3-70B-Instruct-GPTQ-Int4.yaml
index f16f3de60..781b837e8 100644
--- a/api/core/model_runtime/model_providers/perfxcloud/llm/Meta-Llama-3-70B-Instruct-GPTQ-Int4.yaml
+++ b/api/core/model_runtime/model_providers/perfxcloud/llm/Meta-Llama-3-70B-Instruct-GPTQ-Int4.yaml
@@ -59,3 +59,4 @@ pricing:
   output: "0.000"
   unit: "0.000"
   currency: RMB
+deprecated: true
diff --git a/api/core/model_runtime/model_providers/perfxcloud/llm/Meta-Llama-3-8B-Instruct.yaml b/api/core/model_runtime/model_providers/perfxcloud/llm/Meta-Llama-3-8B-Instruct.yaml
index 21267c240..67210e902 100644
--- a/api/core/model_runtime/model_providers/perfxcloud/llm/Meta-Llama-3-8B-Instruct.yaml
+++ b/api/core/model_runtime/model_providers/perfxcloud/llm/Meta-Llama-3-8B-Instruct.yaml
@@ -59,3 +59,4 @@ pricing:
   output: "0.000"
   unit: "0.000"
   currency: RMB
+deprecated: true
diff --git a/api/core/model_runtime/model_providers/perfxcloud/llm/Meta-Llama-3.1-405B-Instruct-AWQ-INT4.yaml b/api/core/model_runtime/model_providers/perfxcloud/llm/Meta-Llama-3.1-405B-Instruct-AWQ-INT4.yaml
index 80c7ec40f..482632ff0 100644
--- a/api/core/model_runtime/model_providers/perfxcloud/llm/Meta-Llama-3.1-405B-Instruct-AWQ-INT4.yaml
+++ b/api/core/model_runtime/model_providers/perfxcloud/llm/Meta-Llama-3.1-405B-Instruct-AWQ-INT4.yaml
@@ -59,3 +59,4 @@ pricing:
   output: "0.000"
   unit: "0.000"
   currency: RMB
+deprecated: true
diff --git a/api/core/model_runtime/model_providers/perfxcloud/llm/Qwen1.5-72B-Chat-GPTQ-Int4.yaml b/api/core/model_runtime/model_providers/perfxcloud/llm/Qwen1.5-72B-Chat-GPTQ-Int4.yaml
index 841dd97f3..ddb6fd977 100644
--- a/api/core/model_runtime/model_providers/perfxcloud/llm/Qwen1.5-72B-Chat-GPTQ-Int4.yaml
+++ b/api/core/model_runtime/model_providers/perfxcloud/llm/Qwen1.5-72B-Chat-GPTQ-Int4.yaml
@@ -59,3 +59,4 @@ pricing:
   output: "0.000"
   unit: "0.000"
   currency: RMB
+deprecated: true
diff --git a/api/core/model_runtime/model_providers/perfxcloud/llm/Qwen1.5-7B.yaml b/api/core/model_runtime/model_providers/perfxcloud/llm/Qwen1.5-7B.yaml
index 33d5d12b2..024c79dbc 100644
--- a/api/core/model_runtime/model_providers/perfxcloud/llm/Qwen1.5-7B.yaml
+++ b/api/core/model_runtime/model_providers/perfxcloud/llm/Qwen1.5-7B.yaml
@@ -59,3 +59,4 @@ pricing:
   output: "0.000"
   unit: "0.000"
   currency: RMB
+deprecated: true
diff --git a/api/core/model_runtime/model_providers/perfxcloud/llm/Qwen2-72B-Instruct-AWQ-int4.yaml b/api/core/model_runtime/model_providers/perfxcloud/llm/Qwen2-72B-Instruct-AWQ-int4.yaml
new file mode 100644
index 000000000..94f661f40
--- /dev/null
+++ b/api/core/model_runtime/model_providers/perfxcloud/llm/Qwen2-72B-Instruct-AWQ-int4.yaml
@@ -0,0 +1,61 @@
+model: Qwen2-72B-Instruct-AWQ-int4
+label:
+  en_US: Qwen2-72B-Instruct-AWQ-int4
+model_type: llm
+features:
+  - agent-thought
+model_properties:
+  mode: chat
+  context_size: 32768
+parameter_rules:
+  - name: temperature
+    use_template: temperature
+    type: float
+    default: 0.5
+    min: 0.0
+    max: 2.0
+    help:
+      zh_Hans: 用于控制随机性和多样性的程度。具体来说，temperature值控制了生成文本时对每个候选词的概率分布进行平滑的程度。较高的temperature值会降低概率分布的峰值，使得更多的低概率词被选择，生成结果更加多样化；而较低的temperature值则会增强概率分布的峰值，使得高概率词更容易被选择，生成结果更加确定。
+      en_US: Used to control the degree of randomness and diversity. Specifically, the temperature value controls the degree to which the probability distribution of each candidate word is smoothed when generating text. A higher temperature value will reduce the peak value of the probability distribution, allowing more low-probability words to be selected, and the generated results will be more diverse; while a lower temperature value will enhance the peak value of the probability distribution, making it easier for high-probability words to be selected. , the generated results are more certain.
+  - name: max_tokens
+    use_template: max_tokens
+    type: int
+    default: 600
+    min: 1
+    max: 1248
+    help:
+      zh_Hans: 用于指定模型在生成内容时token的最大数量，它定义了生成的上限，但不保证每次都会生成到这个数量。
+      en_US: It is used to specify the maximum number of tokens when the model generates content. It defines the upper limit of generation, but does not guarantee that this number will be generated every time.
+  - name: top_p
+    use_template: top_p
+    type: float
+    default: 0.8
+    min: 0.1
+    max: 0.9
+    help:
+      zh_Hans: 生成过程中核采样方法概率阈值，例如，取值为0.8时，仅保留概率加起来大于等于0.8的最可能token的最小集合作为候选集。取值范围为（0,1.0)，取值越大，生成的随机性越高；取值越低，生成的确定性越高。
+      en_US: The probability threshold of the kernel sampling method during the generation process. For example, when the value is 0.8, only the smallest set of the most likely tokens with a sum of probabilities greater than or equal to 0.8 is retained as the candidate set. The value range is (0,1.0). The larger the value, the higher the randomness generated; the lower the value, the higher the certainty generated.
+  - name: top_k
+    type: int
+    min: 0
+    max: 99
+    label:
+      zh_Hans: 取样数量
+      en_US: Top k
+    help:
+      zh_Hans: 生成时，采样候选集的大小。例如，取值为50时，仅将单次生成中得分最高的50个token组成随机采样的候选集。取值越大，生成的随机性越高；取值越小，生成的确定性越高。
+      en_US: The size of the sample candidate set when generated. For example, when the value is 50, only the 50 highest-scoring tokens in a single generation form a randomly sampled candidate set. The larger the value, the higher the randomness generated; the smaller the value, the higher the certainty generated.
+  - name: repetition_penalty
+    required: false
+    type: float
+    default: 1.1
+    label:
+      en_US: Repetition penalty
+    help:
+      zh_Hans: 用于控制模型生成时的重复度。提高repetition_penalty时可以降低模型生成的重复度。1.0表示不做惩罚。
+      en_US: Used to control the repeatability when generating models. Increasing repetition_penalty can reduce the duplication of model generation. 1.0 means no punishment.
+pricing:
+  input: "0.000"
+  output: "0.000"
+  unit: "0.000"
+  currency: RMB
diff --git a/api/core/model_runtime/model_providers/perfxcloud/llm/Qwen2-72B-Instruct-GPTQ-Int4.yaml b/api/core/model_runtime/model_providers/perfxcloud/llm/Qwen2-72B-Instruct-GPTQ-Int4.yaml
index 62255cc7d..a06f8d5ab 100644
--- a/api/core/model_runtime/model_providers/perfxcloud/llm/Qwen2-72B-Instruct-GPTQ-Int4.yaml
+++ b/api/core/model_runtime/model_providers/perfxcloud/llm/Qwen2-72B-Instruct-GPTQ-Int4.yaml
@@ -61,3 +61,4 @@ pricing:
   output: "0.000"
   unit: "0.000"
   currency: RMB
+deprecated: true
diff --git a/api/core/model_runtime/model_providers/perfxcloud/llm/Qwen2-7B-Instruct.yaml b/api/core/model_runtime/model_providers/perfxcloud/llm/Qwen2-7B-Instruct.yaml
new file mode 100644
index 000000000..436941139
--- /dev/null
+++ b/api/core/model_runtime/model_providers/perfxcloud/llm/Qwen2-7B-Instruct.yaml
@@ -0,0 +1,63 @@
+model: Qwen2-7B-Instruct
+label:
+  en_US: Qwen2-7B-Instruct
+model_type: llm
+features:
+  - multi-tool-call
+  - agent-thought
+  - stream-tool-call
+model_properties:
+  mode: completion
+  context_size: 32768
+parameter_rules:
+  - name: temperature
+    use_template: temperature
+    type: float
+    default: 0.3
+    min: 0.0
+    max: 2.0
+    help:
+      zh_Hans: 用于控制随机性和多样性的程度。具体来说，temperature值控制了生成文本时对每个候选词的概率分布进行平滑的程度。较高的temperature值会降低概率分布的峰值，使得更多的低概率词被选择，生成结果更加多样化；而较低的temperature值则会增强概率分布的峰值，使得高概率词更容易被选择，生成结果更加确定。
+      en_US: Used to control the degree of randomness and diversity. Specifically, the temperature value controls the degree to which the probability distribution of each candidate word is smoothed when generating text. A higher temperature value will reduce the peak value of the probability distribution, allowing more low-probability words to be selected, and the generated results will be more diverse; while a lower temperature value will enhance the peak value of the probability distribution, making it easier for high-probability words to be selected. , the generated results are more certain.
+  - name: max_tokens
+    use_template: max_tokens
+    type: int
+    default: 600
+    min: 1
+    max: 2000
+    help:
+      zh_Hans: 用于指定模型在生成内容时token的最大数量，它定义了生成的上限，但不保证每次都会生成到这个数量。
+      en_US: It is used to specify the maximum number of tokens when the model generates content. It defines the upper limit of generation, but does not guarantee that this number will be generated every time.
+  - name: top_p
+    use_template: top_p
+    type: float
+    default: 0.8
+    min: 0.1
+    max: 0.9
+    help:
+      zh_Hans: 生成过程中核采样方法概率阈值，例如，取值为0.8时，仅保留概率加起来大于等于0.8的最可能token的最小集合作为候选集。取值范围为（0,1.0)，取值越大，生成的随机性越高；取值越低，生成的确定性越高。
+      en_US: The probability threshold of the kernel sampling method during the generation process. For example, when the value is 0.8, only the smallest set of the most likely tokens with a sum of probabilities greater than or equal to 0.8 is retained as the candidate set. The value range is (0,1.0). The larger the value, the higher the randomness generated; the lower the value, the higher the certainty generated.
+  - name: top_k
+    type: int
+    min: 0
+    max: 99
+    label:
+      zh_Hans: 取样数量
+      en_US: Top k
+    help:
+      zh_Hans: 生成时，采样候选集的大小。例如，取值为50时，仅将单次生成中得分最高的50个token组成随机采样的候选集。取值越大，生成的随机性越高；取值越小，生成的确定性越高。
+      en_US: The size of the sample candidate set when generated. For example, when the value is 50, only the 50 highest-scoring tokens in a single generation form a randomly sampled candidate set. The larger the value, the higher the randomness generated; the smaller the value, the higher the certainty generated.
+  - name: repetition_penalty
+    required: false
+    type: float
+    default: 1.1
+    label:
+      en_US: Repetition penalty
+    help:
+      zh_Hans: 用于控制模型生成时的重复度。提高repetition_penalty时可以降低模型生成的重复度。1.0表示不做惩罚。
+      en_US: Used to control the repeatability when generating models. Increasing repetition_penalty can reduce the duplication of model generation. 1.0 means no punishment.
+pricing:
+  input: "0.000"
+  output: "0.000"
+  unit: "0.000"
+  currency: RMB
diff --git a/api/core/model_runtime/model_providers/perfxcloud/llm/Qwen2-7B.yaml b/api/core/model_runtime/model_providers/perfxcloud/llm/Qwen2-7B.yaml
index 2f3f1f022..d549ecd22 100644
--- a/api/core/model_runtime/model_providers/perfxcloud/llm/Qwen2-7B.yaml
+++ b/api/core/model_runtime/model_providers/perfxcloud/llm/Qwen2-7B.yaml
@@ -61,3 +61,4 @@ pricing:
   output: "0.000"
   unit: "0.000"
   currency: RMB
+deprecated: true
diff --git a/api/core/model_runtime/model_providers/perfxcloud/llm/Qwen2.5-72B-Instruct.yaml b/api/core/model_runtime/model_providers/perfxcloud/llm/Qwen2.5-72B-Instruct.yaml
new file mode 100644
index 000000000..15cbf01f1
--- /dev/null
+++ b/api/core/model_runtime/model_providers/perfxcloud/llm/Qwen2.5-72B-Instruct.yaml
@@ -0,0 +1,61 @@
+model: Qwen2.5-72B-Instruct
+label:
+  en_US: Qwen2.5-72B-Instruct
+model_type: llm
+features:
+  - agent-thought
+model_properties:
+  mode: chat
+  context_size: 30720
+parameter_rules:
+  - name: temperature
+    use_template: temperature
+    type: float
+    default: 0.5
+    min: 0.0
+    max: 2.0
+    help:
+      zh_Hans: 用于控制随机性和多样性的程度。具体来说，temperature值控制了生成文本时对每个候选词的概率分布进行平滑的程度。较高的temperature值会降低概率分布的峰值，使得更多的低概率词被选择，生成结果更加多样化；而较低的temperature值则会增强概率分布的峰值，使得高概率词更容易被选择，生成结果更加确定。
+      en_US: Used to control the degree of randomness and diversity. Specifically, the temperature value controls the degree to which the probability distribution of each candidate word is smoothed when generating text. A higher temperature value will reduce the peak value of the probability distribution, allowing more low-probability words to be selected, and the generated results will be more diverse; while a lower temperature value will enhance the peak value of the probability distribution, making it easier for high-probability words to be selected. , the generated results are more certain.
+  - name: max_tokens
+    use_template: max_tokens
+    type: int
+    default: 600
+    min: 1
+    max: 1248
+    help:
+      zh_Hans: 用于指定模型在生成内容时token的最大数量，它定义了生成的上限，但不保证每次都会生成到这个数量。
+      en_US: It is used to specify the maximum number of tokens when the model generates content. It defines the upper limit of generation, but does not guarantee that this number will be generated every time.
+  - name: top_p
+    use_template: top_p
+    type: float
+    default: 0.8
+    min: 0.1
+    max: 0.9
+    help:
+      zh_Hans: 生成过程中核采样方法概率阈值，例如，取值为0.8时，仅保留概率加起来大于等于0.8的最可能token的最小集合作为候选集。取值范围为（0,1.0)，取值越大，生成的随机性越高；取值越低，生成的确定性越高。
+      en_US: The probability threshold of the kernel sampling method during the generation process. For example, when the value is 0.8, only the smallest set of the most likely tokens with a sum of probabilities greater than or equal to 0.8 is retained as the candidate set. The value range is (0,1.0). The larger the value, the higher the randomness generated; the lower the value, the higher the certainty generated.
+  - name: top_k
+    type: int
+    min: 0
+    max: 99
+    label:
+      zh_Hans: 取样数量
+      en_US: Top k
+    help:
+      zh_Hans: 生成时，采样候选集的大小。例如，取值为50时，仅将单次生成中得分最高的50个token组成随机采样的候选集。取值越大，生成的随机性越高；取值越小，生成的确定性越高。
+      en_US: The size of the sample candidate set when generated. For example, when the value is 50, only the 50 highest-scoring tokens in a single generation form a randomly sampled candidate set. The larger the value, the higher the randomness generated; the smaller the value, the higher the certainty generated.
+  - name: repetition_penalty
+    required: false
+    type: float
+    default: 1.1
+    label:
+      en_US: Repetition penalty
+    help:
+      zh_Hans: 用于控制模型生成时的重复度。提高repetition_penalty时可以降低模型生成的重复度。1.0表示不做惩罚。
+      en_US: Used to control the repeatability when generating models. Increasing repetition_penalty can reduce the duplication of model generation. 1.0 means no punishment.
+pricing:
+  input: "0.000"
+  output: "0.000"
+  unit: "0.000"
+  currency: RMB
diff --git a/api/core/model_runtime/model_providers/perfxcloud/llm/Qwen2.5-7B-Instruct.yaml b/api/core/model_runtime/model_providers/perfxcloud/llm/Qwen2.5-7B-Instruct.yaml
new file mode 100644
index 000000000..dadc8f8f3
--- /dev/null
+++ b/api/core/model_runtime/model_providers/perfxcloud/llm/Qwen2.5-7B-Instruct.yaml
@@ -0,0 +1,61 @@
+model: Qwen2.5-7B-Instruct
+label:
+  en_US: Qwen2.5-7B-Instruct
+model_type: llm
+features:
+  - agent-thought
+model_properties:
+  mode: chat
+  context_size: 8192
+parameter_rules:
+  - name: temperature
+    use_template: temperature
+    type: float
+    default: 0.5
+    min: 0.0
+    max: 2.0
+    help:
+      zh_Hans: 用于控制随机性和多样性的程度。具体来说，temperature值控制了生成文本时对每个候选词的概率分布进行平滑的程度。较高的temperature值会降低概率分布的峰值，使得更多的低概率词被选择，生成结果更加多样化；而较低的temperature值则会增强概率分布的峰值，使得高概率词更容易被选择，生成结果更加确定。
+      en_US: Used to control the degree of randomness and diversity. Specifically, the temperature value controls the degree to which the probability distribution of each candidate word is smoothed when generating text. A higher temperature value will reduce the peak value of the probability distribution, allowing more low-probability words to be selected, and the generated results will be more diverse; while a lower temperature value will enhance the peak value of the probability distribution, making it easier for high-probability words to be selected. , the generated results are more certain.
+  - name: max_tokens
+    use_template: max_tokens
+    type: int
+    default: 600
+    min: 1
+    max: 1248
+    help:
+      zh_Hans: 用于指定模型在生成内容时token的最大数量，它定义了生成的上限，但不保证每次都会生成到这个数量。
+      en_US: It is used to specify the maximum number of tokens when the model generates content. It defines the upper limit of generation, but does not guarantee that this number will be generated every time.
+  - name: top_p
+    use_template: top_p
+    type: float
+    default: 0.8
+    min: 0.1
+    max: 0.9
+    help:
+      zh_Hans: 生成过程中核采样方法概率阈值，例如，取值为0.8时，仅保留概率加起来大于等于0.8的最可能token的最小集合作为候选集。取值范围为（0,1.0)，取值越大，生成的随机性越高；取值越低，生成的确定性越高。
+      en_US: The probability threshold of the kernel sampling method during the generation process. For example, when the value is 0.8, only the smallest set of the most likely tokens with a sum of probabilities greater than or equal to 0.8 is retained as the candidate set. The value range is (0,1.0). The larger the value, the higher the randomness generated; the lower the value, the higher the certainty generated.
+  - name: top_k
+    type: int
+    min: 0
+    max: 99
+    label:
+      zh_Hans: 取样数量
+      en_US: Top k
+    help:
+      zh_Hans: 生成时，采样候选集的大小。例如，取值为50时，仅将单次生成中得分最高的50个token组成随机采样的候选集。取值越大，生成的随机性越高；取值越小，生成的确定性越高。
+      en_US: The size of the sample candidate set when generated. For example, when the value is 50, only the 50 highest-scoring tokens in a single generation form a randomly sampled candidate set. The larger the value, the higher the randomness generated; the smaller the value, the higher the certainty generated.
+  - name: repetition_penalty
+    required: false
+    type: float
+    default: 1.1
+    label:
+      en_US: Repetition penalty
+    help:
+      zh_Hans: 用于控制模型生成时的重复度。提高repetition_penalty时可以降低模型生成的重复度。1.0表示不做惩罚。
+      en_US: Used to control the repeatability when generating models. Increasing repetition_penalty can reduce the duplication of model generation. 1.0 means no punishment.
+pricing:
+  input: "0.000"
+  output: "0.000"
+  unit: "0.000"
+  currency: RMB
diff --git a/api/core/model_runtime/model_providers/perfxcloud/llm/Reflection-Llama-3.1-70B.yaml b/api/core/model_runtime/model_providers/perfxcloud/llm/Reflection-Llama-3.1-70B.yaml
new file mode 100644
index 000000000..649be20b4
--- /dev/null
+++ b/api/core/model_runtime/model_providers/perfxcloud/llm/Reflection-Llama-3.1-70B.yaml
@@ -0,0 +1,61 @@
+model: Reflection-Llama-3.1-70B
+label:
+  en_US: Reflection-Llama-3.1-70B
+model_type: llm
+features:
+  - agent-thought
+model_properties:
+  mode: chat
+  context_size: 10240
+parameter_rules:
+  - name: temperature
+    use_template: temperature
+    type: float
+    default: 0.5
+    min: 0.0
+    max: 2.0
+    help:
+      zh_Hans: 用于控制随机性和多样性的程度。具体来说，temperature值控制了生成文本时对每个候选词的概率分布进行平滑的程度。较高的temperature值会降低概率分布的峰值，使得更多的低概率词被选择，生成结果更加多样化；而较低的temperature值则会增强概率分布的峰值，使得高概率词更容易被选择，生成结果更加确定。
+      en_US: Used to control the degree of randomness and diversity. Specifically, the temperature value controls the degree to which the probability distribution of each candidate word is smoothed when generating text. A higher temperature value will reduce the peak value of the probability distribution, allowing more low-probability words to be selected, and the generated results will be more diverse; while a lower temperature value will enhance the peak value of the probability distribution, making it easier for high-probability words to be selected. , the generated results are more certain.
+  - name: max_tokens
+    use_template: max_tokens
+    type: int
+    default: 600
+    min: 1
+    max: 1248
+    help:
+      zh_Hans: 用于指定模型在生成内容时token的最大数量，它定义了生成的上限，但不保证每次都会生成到这个数量。
+      en_US: It is used to specify the maximum number of tokens when the model generates content. It defines the upper limit of generation, but does not guarantee that this number will be generated every time.
+  - name: top_p
+    use_template: top_p
+    type: float
+    default: 0.8
+    min: 0.1
+    max: 0.9
+    help:
+      zh_Hans: 生成过程中核采样方法概率阈值，例如，取值为0.8时，仅保留概率加起来大于等于0.8的最可能token的最小集合作为候选集。取值范围为（0,1.0)，取值越大，生成的随机性越高；取值越低，生成的确定性越高。
+      en_US: The probability threshold of the kernel sampling method during the generation process. For example, when the value is 0.8, only the smallest set of the most likely tokens with a sum of probabilities greater than or equal to 0.8 is retained as the candidate set. The value range is (0,1.0). The larger the value, the higher the randomness generated; the lower the value, the higher the certainty generated.
+  - name: top_k
+    type: int
+    min: 0
+    max: 99
+    label:
+      zh_Hans: 取样数量
+      en_US: Top k
+    help:
+      zh_Hans: 生成时，采样候选集的大小。例如，取值为50时，仅将单次生成中得分最高的50个token组成随机采样的候选集。取值越大，生成的随机性越高；取值越小，生成的确定性越高。
+      en_US: The size of the sample candidate set when generated. For example, when the value is 50, only the 50 highest-scoring tokens in a single generation form a randomly sampled candidate set. The larger the value, the higher the randomness generated; the smaller the value, the higher the certainty generated.
+  - name: repetition_penalty
+    required: false
+    type: float
+    default: 1.1
+    label:
+      en_US: Repetition penalty
+    help:
+      zh_Hans: 用于控制模型生成时的重复度。提高repetition_penalty时可以降低模型生成的重复度。1.0表示不做惩罚。
+      en_US: Used to control the repeatability when generating models. Increasing repetition_penalty can reduce the duplication of model generation. 1.0 means no punishment.
+pricing:
+  input: "0.000"
+  output: "0.000"
+  unit: "0.000"
+  currency: RMB
diff --git a/api/core/model_runtime/model_providers/perfxcloud/llm/Yi-1_5-9B-Chat-16K.yaml b/api/core/model_runtime/model_providers/perfxcloud/llm/Yi-1_5-9B-Chat-16K.yaml
new file mode 100644
index 000000000..92eae6804
--- /dev/null
+++ b/api/core/model_runtime/model_providers/perfxcloud/llm/Yi-1_5-9B-Chat-16K.yaml
@@ -0,0 +1,61 @@
+model: Yi-1_5-9B-Chat-16K
+label:
+  en_US: Yi-1_5-9B-Chat-16K
+model_type: llm
+features:
+  - agent-thought
+model_properties:
+  mode: chat
+  context_size: 16384
+parameter_rules:
+  - name: temperature
+    use_template: temperature
+    type: float
+    default: 0.5
+    min: 0.0
+    max: 2.0
+    help:
+      zh_Hans: 用于控制随机性和多样性的程度。具体来说，temperature值控制了生成文本时对每个候选词的概率分布进行平滑的程度。较高的temperature值会降低概率分布的峰值，使得更多的低概率词被选择，生成结果更加多样化；而较低的temperature值则会增强概率分布的峰值，使得高概率词更容易被选择，生成结果更加确定。
+      en_US: Used to control the degree of randomness and diversity. Specifically, the temperature value controls the degree to which the probability distribution of each candidate word is smoothed when generating text. A higher temperature value will reduce the peak value of the probability distribution, allowing more low-probability words to be selected, and the generated results will be more diverse; while a lower temperature value will enhance the peak value of the probability distribution, making it easier for high-probability words to be selected. , the generated results are more certain.
+  - name: max_tokens
+    use_template: max_tokens
+    type: int
+    default: 600
+    min: 1
+    max: 1248
+    help:
+      zh_Hans: 用于指定模型在生成内容时token的最大数量，它定义了生成的上限，但不保证每次都会生成到这个数量。
+      en_US: It is used to specify the maximum number of tokens when the model generates content. It defines the upper limit of generation, but does not guarantee that this number will be generated every time.
+  - name: top_p
+    use_template: top_p
+    type: float
+    default: 0.8
+    min: 0.1
+    max: 0.9
+    help:
+      zh_Hans: 生成过程中核采样方法概率阈值，例如，取值为0.8时，仅保留概率加起来大于等于0.8的最可能token的最小集合作为候选集。取值范围为（0,1.0)，取值越大，生成的随机性越高；取值越低，生成的确定性越高。
+      en_US: The probability threshold of the kernel sampling method during the generation process. For example, when the value is 0.8, only the smallest set of the most likely tokens with a sum of probabilities greater than or equal to 0.8 is retained as the candidate set. The value range is (0,1.0). The larger the value, the higher the randomness generated; the lower the value, the higher the certainty generated.
+  - name: top_k
+    type: int
+    min: 0
+    max: 99
+    label:
+      zh_Hans: 取样数量
+      en_US: Top k
+    help:
+      zh_Hans: 生成时，采样候选集的大小。例如，取值为50时，仅将单次生成中得分最高的50个token组成随机采样的候选集。取值越大，生成的随机性越高；取值越小，生成的确定性越高。
+      en_US: The size of the sample candidate set when generated. For example, when the value is 50, only the 50 highest-scoring tokens in a single generation form a randomly sampled candidate set. The larger the value, the higher the randomness generated; the smaller the value, the higher the certainty generated.
+  - name: repetition_penalty
+    required: false
+    type: float
+    default: 1.1
+    label:
+      en_US: Repetition penalty
+    help:
+      zh_Hans: 用于控制模型生成时的重复度。提高repetition_penalty时可以降低模型生成的重复度。1.0表示不做惩罚。
+      en_US: Used to control the repeatability when generating models. Increasing repetition_penalty can reduce the duplication of model generation. 1.0 means no punishment.
+pricing:
+  input: "0.000"
+  output: "0.000"
+  unit: "0.000"
+  currency: RMB
diff --git a/api/core/model_runtime/model_providers/perfxcloud/llm/Yi-Coder-1.5B-Chat.yaml b/api/core/model_runtime/model_providers/perfxcloud/llm/Yi-Coder-1.5B-Chat.yaml
new file mode 100644
index 000000000..0e21ce148
--- /dev/null
+++ b/api/core/model_runtime/model_providers/perfxcloud/llm/Yi-Coder-1.5B-Chat.yaml
@@ -0,0 +1,61 @@
+model: Yi-Coder-1.5B-Chat
+label:
+  en_US: Yi-Coder-1.5B-Chat
+model_type: llm
+features:
+  - agent-thought
+model_properties:
+  mode: chat
+  context_size: 20480
+parameter_rules:
+  - name: temperature
+    use_template: temperature
+    type: float
+    default: 0.5
+    min: 0.0
+    max: 2.0
+    help:
+      zh_Hans: 用于控制随机性和多样性的程度。具体来说，temperature值控制了生成文本时对每个候选词的概率分布进行平滑的程度。较高的temperature值会降低概率分布的峰值，使得更多的低概率词被选择，生成结果更加多样化；而较低的temperature值则会增强概率分布的峰值，使得高概率词更容易被选择，生成结果更加确定。
+      en_US: Used to control the degree of randomness and diversity. Specifically, the temperature value controls the degree to which the probability distribution of each candidate word is smoothed when generating text. A higher temperature value will reduce the peak value of the probability distribution, allowing more low-probability words to be selected, and the generated results will be more diverse; while a lower temperature value will enhance the peak value of the probability distribution, making it easier for high-probability words to be selected. , the generated results are more certain.
+  - name: max_tokens
+    use_template: max_tokens
+    type: int
+    default: 600
+    min: 1
+    max: 1248
+    help:
+      zh_Hans: 用于指定模型在生成内容时token的最大数量，它定义了生成的上限，但不保证每次都会生成到这个数量。
+      en_US: It is used to specify the maximum number of tokens when the model generates content. It defines the upper limit of generation, but does not guarantee that this number will be generated every time.
+  - name: top_p
+    use_template: top_p
+    type: float
+    default: 0.8
+    min: 0.1
+    max: 0.9
+    help:
+      zh_Hans: 生成过程中核采样方法概率阈值，例如，取值为0.8时，仅保留概率加起来大于等于0.8的最可能token的最小集合作为候选集。取值范围为（0,1.0)，取值越大，生成的随机性越高；取值越低，生成的确定性越高。
+      en_US: The probability threshold of the kernel sampling method during the generation process. For example, when the value is 0.8, only the smallest set of the most likely tokens with a sum of probabilities greater than or equal to 0.8 is retained as the candidate set. The value range is (0,1.0). The larger the value, the higher the randomness generated; the lower the value, the higher the certainty generated.
+  - name: top_k
+    type: int
+    min: 0
+    max: 99
+    label:
+      zh_Hans: 取样数量
+      en_US: Top k
+    help:
+      zh_Hans: 生成时，采样候选集的大小。例如，取值为50时，仅将单次生成中得分最高的50个token组成随机采样的候选集。取值越大，生成的随机性越高；取值越小，生成的确定性越高。
+      en_US: The size of the sample candidate set when generated. For example, when the value is 50, only the 50 highest-scoring tokens in a single generation form a randomly sampled candidate set. The larger the value, the higher the randomness generated; the smaller the value, the higher the certainty generated.
+  - name: repetition_penalty
+    required: false
+    type: float
+    default: 1.1
+    label:
+      en_US: Repetition penalty
+    help:
+      zh_Hans: 用于控制模型生成时的重复度。提高repetition_penalty时可以降低模型生成的重复度。1.0表示不做惩罚。
+      en_US: Used to control the repeatability when generating models. Increasing repetition_penalty can reduce the duplication of model generation. 1.0 means no punishment.
+pricing:
+  input: "0.000"
+  output: "0.000"
+  unit: "0.000"
+  currency: RMB
diff --git a/api/core/model_runtime/model_providers/perfxcloud/llm/Yi-Coder-9B-Chat.yaml b/api/core/model_runtime/model_providers/perfxcloud/llm/Yi-Coder-9B-Chat.yaml
new file mode 100644
index 000000000..23b0841ce
--- /dev/null
+++ b/api/core/model_runtime/model_providers/perfxcloud/llm/Yi-Coder-9B-Chat.yaml
@@ -0,0 +1,61 @@
+model: Yi-Coder-9B-Chat
+label:
+  en_US: Yi-Coder-9B-Chat
+model_type: llm
+features:
+  - agent-thought
+model_properties:
+  mode: chat
+  context_size: 20480
+parameter_rules:
+  - name: temperature
+    use_template: temperature
+    type: float
+    default: 0.5
+    min: 0.0
+    max: 2.0
+    help:
+      zh_Hans: 用于控制随机性和多样性的程度。具体来说，temperature值控制了生成文本时对每个候选词的概率分布进行平滑的程度。较高的temperature值会降低概率分布的峰值，使得更多的低概率词被选择，生成结果更加多样化；而较低的temperature值则会增强概率分布的峰值，使得高概率词更容易被选择，生成结果更加确定。
+      en_US: Used to control the degree of randomness and diversity. Specifically, the temperature value controls the degree to which the probability distribution of each candidate word is smoothed when generating text. A higher temperature value will reduce the peak value of the probability distribution, allowing more low-probability words to be selected, and the generated results will be more diverse; while a lower temperature value will enhance the peak value of the probability distribution, making it easier for high-probability words to be selected. , the generated results are more certain.
+  - name: max_tokens
+    use_template: max_tokens
+    type: int
+    default: 600
+    min: 1
+    max: 1248
+    help:
+      zh_Hans: 用于指定模型在生成内容时token的最大数量，它定义了生成的上限，但不保证每次都会生成到这个数量。
+      en_US: It is used to specify the maximum number of tokens when the model generates content. It defines the upper limit of generation, but does not guarantee that this number will be generated every time.
+  - name: top_p
+    use_template: top_p
+    type: float
+    default: 0.8
+    min: 0.1
+    max: 0.9
+    help:
+      zh_Hans: 生成过程中核采样方法概率阈值，例如，取值为0.8时，仅保留概率加起来大于等于0.8的最可能token的最小集合作为候选集。取值范围为（0,1.0)，取值越大，生成的随机性越高；取值越低，生成的确定性越高。
+      en_US: The probability threshold of the kernel sampling method during the generation process. For example, when the value is 0.8, only the smallest set of the most likely tokens with a sum of probabilities greater than or equal to 0.8 is retained as the candidate set. The value range is (0,1.0). The larger the value, the higher the randomness generated; the lower the value, the higher the certainty generated.
+  - name: top_k
+    type: int
+    min: 0
+    max: 99
+    label:
+      zh_Hans: 取样数量
+      en_US: Top k
+    help:
+      zh_Hans: 生成时，采样候选集的大小。例如，取值为50时，仅将单次生成中得分最高的50个token组成随机采样的候选集。取值越大，生成的随机性越高；取值越小，生成的确定性越高。
+      en_US: The size of the sample candidate set when generated. For example, when the value is 50, only the 50 highest-scoring tokens in a single generation form a randomly sampled candidate set. The larger the value, the higher the randomness generated; the smaller the value, the higher the certainty generated.
+  - name: repetition_penalty
+    required: false
+    type: float
+    default: 1.1
+    label:
+      en_US: Repetition penalty
+    help:
+      zh_Hans: 用于控制模型生成时的重复度。提高repetition_penalty时可以降低模型生成的重复度。1.0表示不做惩罚。
+      en_US: Used to control the repeatability when generating models. Increasing repetition_penalty can reduce the duplication of model generation. 1.0 means no punishment.
+pricing:
+  input: "0.000"
+  output: "0.000"
+  unit: "0.000"
+  currency: RMB
diff --git a/api/core/model_runtime/model_providers/perfxcloud/llm/_position.yaml b/api/core/model_runtime/model_providers/perfxcloud/llm/_position.yaml
index 2c9eac0e4..37bf400f1 100644
--- a/api/core/model_runtime/model_providers/perfxcloud/llm/_position.yaml
+++ b/api/core/model_runtime/model_providers/perfxcloud/llm/_position.yaml
@@ -1,15 +1,24 @@
-- Meta-Llama-3.1-405B-Instruct-AWQ-INT4
-- Meta-Llama-3.1-8B-Instruct
-- Meta-Llama-3-70B-Instruct-GPTQ-Int4
-- Meta-Llama-3-8B-Instruct
-- Qwen2-72B-Instruct-GPTQ-Int4
+- Qwen2.5-72B-Instruct
+- Qwen2.5-7B-Instruct
+- Yi-Coder-1.5B-Chat
+- Yi-Coder-9B-Chat
+- Qwen2-72B-Instruct-AWQ-int4
+- Yi-1_5-9B-Chat-16K
+- Qwen2-7B-Instruct
+- Reflection-Llama-3.1-70B
 - Qwen2-72B-Instruct
+- Meta-Llama-3.1-8B-Instruct
+
+- Meta-Llama-3.1-405B-Instruct-AWQ-INT4
+- Meta-Llama-3-70B-Instruct-GPTQ-Int4
+- chatglm3-6b
+- Meta-Llama-3-8B-Instruct
+- Llama3-Chinese_v2
+- deepseek-v2-lite-chat
+- Qwen2-72B-Instruct-GPTQ-Int4
 - Qwen2-7B
 - Qwen-14B-Chat-Int4
 - Qwen1.5-72B-Chat-GPTQ-Int4
 - Qwen1.5-7B
 - Qwen1.5-110B-Chat-GPTQ-Int4
 - deepseek-v2-chat
-- deepseek-v2-lite-chat
-- Llama3-Chinese_v2
-- chatglm3-6b
diff --git a/api/core/model_runtime/model_providers/perfxcloud/llm/chatglm3-6b.yaml b/api/core/model_runtime/model_providers/perfxcloud/llm/chatglm3-6b.yaml
index f9c26b7f9..75d80f784 100644
--- a/api/core/model_runtime/model_providers/perfxcloud/llm/chatglm3-6b.yaml
+++ b/api/core/model_runtime/model_providers/perfxcloud/llm/chatglm3-6b.yaml
@@ -59,3 +59,4 @@ pricing:
   output: "0.000"
   unit: "0.000"
   currency: RMB
+deprecated: true
diff --git a/api/core/model_runtime/model_providers/perfxcloud/llm/deepseek-v2-chat.yaml b/api/core/model_runtime/model_providers/perfxcloud/llm/deepseek-v2-chat.yaml
index 078922ef9..fa9a7b717 100644
--- a/api/core/model_runtime/model_providers/perfxcloud/llm/deepseek-v2-chat.yaml
+++ b/api/core/model_runtime/model_providers/perfxcloud/llm/deepseek-v2-chat.yaml
@@ -59,3 +59,4 @@ pricing:
   output: "0.000"
   unit: "0.000"
   currency: RMB
+deprecated: true
diff --git a/api/core/model_runtime/model_providers/perfxcloud/llm/deepseek-v2-lite-chat.yaml b/api/core/model_runtime/model_providers/perfxcloud/llm/deepseek-v2-lite-chat.yaml
index 4ff3af7b5..75a26d250 100644
--- a/api/core/model_runtime/model_providers/perfxcloud/llm/deepseek-v2-lite-chat.yaml
+++ b/api/core/model_runtime/model_providers/perfxcloud/llm/deepseek-v2-lite-chat.yaml
@@ -59,3 +59,4 @@ pricing:
   output: "0.000"
   unit: "0.000"
   currency: RMB
+deprecated: true
diff --git a/api/core/model_runtime/model_providers/perfxcloud/perfxcloud.py b/api/core/model_runtime/model_providers/perfxcloud/perfxcloud.py
index 450d22fb7..9a4ead031 100644
--- a/api/core/model_runtime/model_providers/perfxcloud/perfxcloud.py
+++ b/api/core/model_runtime/model_providers/perfxcloud/perfxcloud.py
@@ -1,7 +1,5 @@
 import logging
 
-from core.model_runtime.entities.model_entities import ModelType
-from core.model_runtime.errors.validate import CredentialsValidateFailedError
 from core.model_runtime.model_providers.__base.model_provider import ModelProvider
 
 logger = logging.getLogger(__name__)
@@ -9,20 +7,4 @@ logger = logging.getLogger(__name__)
 
 class PerfXCloudProvider(ModelProvider):
     def validate_provider_credentials(self, credentials: dict) -> None:
-        """
-        Validate provider credentials
-        if validate failed, raise exception
-
-        :param credentials: provider credentials, credentials form defined in `provider_credential_schema`.
-        """
-        try:
-            model_instance = self.get_model_instance(ModelType.LLM)
-
-            # Use `Qwen2_72B_Chat_GPTQ_Int4` model for validate,
-            # no matter what model you pass in, text completion model or chat model
-            model_instance.validate_credentials(model="Qwen2-72B-Instruct-GPTQ-Int4", credentials=credentials)
-        except CredentialsValidateFailedError as ex:
-            raise ex
-        except Exception as ex:
-            logger.exception(f"{self.get_provider_schema().provider} credentials validate failed")
-            raise ex
+        pass
diff --git a/api/core/model_runtime/model_providers/perfxcloud/text_embedding/gte-Qwen2-7B-instruct.yaml b/api/core/model_runtime/model_providers/perfxcloud/text_embedding/gte-Qwen2-7B-instruct.yaml
new file mode 100644
index 000000000..03db0d8bc
--- /dev/null
+++ b/api/core/model_runtime/model_providers/perfxcloud/text_embedding/gte-Qwen2-7B-instruct.yaml
@@ -0,0 +1,4 @@
+model: gte-Qwen2-7B-instruct
+model_type: text-embedding
+model_properties:
+  context_size: 2048