[seanguo] feat: add llama 3.1 support in bedrock (#6645)

2024-12-04 04:07:47 +08:00 · 2024-07-25 11:20:37 +08:00 · 2024-07-25 11:20:37 +08:00 · 9815aab7a3
commit 9815aab7a3
parent 349ec0db77
4 changed files with 69 additions and 7 deletions
--- a/api/core/model_runtime/model_providers/bedrock/llm/_position.yaml
+++ b/api/core/model_runtime/model_providers/bedrock/llm/_position.yaml
@ -10,6 +10,8 @@
 - cohere.command-text-v14
 - cohere.command-r-plus-v1.0
 - cohere.command-r-v1.0
+- meta.llama3-1-8b-instruct-v1:0
+- meta.llama3-1-70b-instruct-v1:0
 - meta.llama3-8b-instruct-v1:0
 - meta.llama3-70b-instruct-v1:0
 - meta.llama2-13b-chat-v1
--- a/api/core/model_runtime/model_providers/bedrock/llm/llm.py
+++ b/api/core/model_runtime/model_providers/bedrock/llm/llm.py
@ -208,14 +208,25 @@ class BedrockLargeLanguageModel(LargeLanguageModel):

        if model_info['support_tool_use'] and tools:
            parameters['toolConfig'] = self._convert_converse_tool_config(tools=tools)
+        try:
+            if stream:
+                response = bedrock_client.converse_stream(**parameters)
+                return self._handle_converse_stream_response(model_info['model'], credentials, response, prompt_messages)
+            else:
+                response = bedrock_client.converse(**parameters)
+                return self._handle_converse_response(model_info['model'], credentials, response, prompt_messages)
+        except ClientError as ex:
+            error_code = ex.response['Error']['Code']
+            full_error_msg = f"{error_code}: {ex.response['Error']['Message']}"
+            raise self._map_client_to_invoke_error(error_code, full_error_msg)
+        except (EndpointConnectionError, NoRegionError, ServiceNotInRegionError) as ex:
+            raise InvokeConnectionError(str(ex))

-        if stream:
-            response = bedrock_client.converse_stream(**parameters)
-            return self._handle_converse_stream_response(model_info['model'], credentials, response, prompt_messages)
-        else:
-            response = bedrock_client.converse(**parameters)
-            return self._handle_converse_response(model_info['model'], credentials, response, prompt_messages)
+        except UnknownServiceError as ex:
+            raise InvokeServerUnavailableError(str(ex))

+        except Exception as ex:
+            raise InvokeError(str(ex))
    def _handle_converse_response(self, model: str, credentials: dict, response: dict,
                                prompt_messages: list[PromptMessage]) -> LLMResult:
        """
@ -558,7 +569,6 @@ class BedrockLargeLanguageModel(LargeLanguageModel):
        except ClientError as ex:
            error_code = ex.response['Error']['Code']
            full_error_msg = f"{error_code}: {ex.response['Error']['Message']}"
-
            raise CredentialsValidateFailedError(str(self._map_client_to_invoke_error(error_code, full_error_msg)))

        except Exception as ex:
--- a/api/core/model_runtime/model_providers/bedrock/llm/meta.llama3-1-70b-instruct-v1:0.yaml
+++ b/api/core/model_runtime/model_providers/bedrock/llm/meta.llama3-1-70b-instruct-v1:0.yaml
@ -0,0 +1,25 @@
+model: meta.llama3-1-70b-instruct-v1:0
+label:
+  en_US: Llama 3.1 Instruct 70B
+model_type: llm
+model_properties:
+  mode: completion
+  context_size: 128000
+parameter_rules:
+  - name: temperature
+    use_template: temperature
+    default: 0.5
+  - name: top_p
+    use_template: top_p
+    default: 0.9
+  - name: max_gen_len
+    use_template: max_tokens
+    required: true
+    default: 512
+    min: 1
+    max: 2048
+pricing:
+  input: '0.00265'
+  output: '0.0035'
+  unit: '0.001'
+  currency: USD
--- a/api/core/model_runtime/model_providers/bedrock/llm/meta.llama3-1-8b-instruct-v1:0.yaml
+++ b/api/core/model_runtime/model_providers/bedrock/llm/meta.llama3-1-8b-instruct-v1:0.yaml
@ -0,0 +1,25 @@
+model: meta.llama3-1-8b-instruct-v1:0
+label:
+  en_US: Llama 3.1 Instruct 8B
+model_type: llm
+model_properties:
+  mode: completion
+  context_size: 128000
+parameter_rules:
+  - name: temperature
+    use_template: temperature
+    default: 0.5
+  - name: top_p
+    use_template: top_p
+    default: 0.9
+  - name: max_gen_len
+    use_template: max_tokens
+    required: true
+    default: 512
+    min: 1
+    max: 2048
+pricing:
+  input: '0.0003'
+  output: '0.0006'
+  unit: '0.001'
+  currency: USD