From 5b7071e4b0df7bfeea1b09df184407f77bbf32fc Mon Sep 17 00:00:00 2001 From: Garfield Dai Date: Mon, 20 Nov 2023 17:54:01 +0800 Subject: [PATCH] Feat/sdk vision support (#1531) Co-authored-by: Joel --- api/controllers/service_api/app/file.py | 3 + sdks/nodejs-client/README.md | 20 ++-- sdks/nodejs-client/index.js | 35 ++++-- sdks/nodejs-client/package.json | 2 +- sdks/php-client/README.md | 43 +++++++- sdks/php-client/dify-client.php | 43 +++++++- sdks/python-client/README.md | 119 ++++++++++++++++++--- sdks/python-client/dify_client/__init__.py | 2 +- sdks/python-client/dify_client/client.py | 43 +++++--- sdks/python-client/setup.py | 2 +- sdks/python-client/tests/test_client.py | 69 ++++++++++-- 11 files changed, 319 insertions(+), 62 deletions(-) diff --git a/api/controllers/service_api/app/file.py b/api/controllers/service_api/app/file.py index f928e083a..b2cb7a05f 100644 --- a/api/controllers/service_api/app/file.py +++ b/api/controllers/service_api/app/file.py @@ -26,6 +26,9 @@ class FileApi(AppApiResource): if 'file' not in request.files: raise NoFileUploadedError() + if not file.mimetype: + raise UnsupportedFileTypeError() + if len(request.files) > 1: raise TooManyFilesError() diff --git a/sdks/nodejs-client/README.md b/sdks/nodejs-client/README.md index a9cf3edac..50303b486 100644 --- a/sdks/nodejs-client/README.md +++ b/sdks/nodejs-client/README.md @@ -14,27 +14,33 @@ import { DifyClient, ChatClient, CompletionClient } from 'dify-client' const API_KEY = 'your-api-key-here' const user = `random-user-id` -const inputs = { - name: 'test name a' -} -const query = "Please tell me a short story in 10 words or less." +const query = 'Please tell me a short story in 10 words or less.' +const remote_url_files = [{ + type: 'image', + transfer_method: 'remote_url', + url: 'your_url_addresss' +}] // Create a completion client const completionClient = new CompletionClient(API_KEY) // Create a completion message -completionClient.createCompletionMessage(inputs, query, responseMode, user) +completionClient.createCompletionMessage({'query': query}, user) +// Create a completion message with vision model +completionClient.createCompletionMessage({'query': 'Describe the picture.'}, user, false, remote_url_files) // Create a chat client const chatClient = new ChatClient(API_KEY) // Create a chat message in stream mode -const response = await chatClient.createChatMessage(inputs, query, user, true, null) +const response = await chatClient.createChatMessage({}, query, user, true, null) const stream = response.data; stream.on('data', data => { console.log(data); }); stream.on('end', () => { - console.log("stream done"); + console.log('stream done'); }); +// Create a chat message with vision model +chatClient.createChatMessage({}, 'Describe the picture.', user, false, null, remote_url_files) // Fetch conversations chatClient.getConversations(user) // Fetch conversation messages diff --git a/sdks/nodejs-client/index.js b/sdks/nodejs-client/index.js index 8aa69e9bb..584ee8743 100644 --- a/sdks/nodejs-client/index.js +++ b/sdks/nodejs-client/index.js @@ -34,6 +34,10 @@ export const routes = { method: "DELETE", url: (conversation_id) => `/conversations/${conversation_id}`, }, + fileUpload: { + method: "POST", + url: () => `/files/upload`, + } }; export class DifyClient { @@ -51,11 +55,15 @@ export class DifyClient { endpoint, data = null, params = null, - stream = false + stream = false, + headerParams = {} ) { const headers = { - Authorization: `Bearer ${this.apiKey}`, - "Content-Type": "application/json", + ...{ + Authorization: `Bearer ${this.apiKey}`, + "Content-Type": "application/json", + }, + ...headerParams }; const url = `${this.baseUrl}${endpoint}`; @@ -104,15 +112,28 @@ export class DifyClient { params ); } + + fileUpload(data) { + return this.sendRequest( + routes.fileUpload.method, + routes.fileUpload.url(), + data, + null, + false, + { + "Content-Type": 'multipart/form-data' + } + ); + } } export class CompletionClient extends DifyClient { - createCompletionMessage(inputs, query, user, stream = false) { + createCompletionMessage(inputs, user, stream = false, files = null) { const data = { inputs, - query, user, response_mode: stream ? "streaming" : "blocking", + files, }; return this.sendRequest( routes.createCompletionMessage.method, @@ -130,13 +151,15 @@ export class ChatClient extends DifyClient { query, user, stream = false, - conversation_id = null + conversation_id = null, + files = null ) { const data = { inputs, query, user, response_mode: stream ? "streaming" : "blocking", + files, }; if (conversation_id) data.conversation_id = conversation_id; diff --git a/sdks/nodejs-client/package.json b/sdks/nodejs-client/package.json index a59b766a1..30de613f3 100644 --- a/sdks/nodejs-client/package.json +++ b/sdks/nodejs-client/package.json @@ -1,6 +1,6 @@ { "name": "dify-client", - "version": "2.0.0", + "version": "2.1.0", "description": "This is the Node.js SDK for the Dify.AI API, which allows you to easily integrate Dify.AI into your Node.js applications.", "main": "index.js", "type": "module", diff --git a/sdks/php-client/README.md b/sdks/php-client/README.md index a44abc0f8..b0a435bba 100644 --- a/sdks/php-client/README.md +++ b/sdks/php-client/README.md @@ -11,7 +11,7 @@ This is the PHP SDK for the Dify API, which allows you to easily integrate Dify After installing the SDK, you can use it in your project like this: -``` +```php create_completion_message($inputs, $query, $response_mode, $user); +$response = $completionClient->create_completion_message(array("query" => "Who are you?"), "blocking", "user_id"); // Create a chat client $chatClient = new ChatClient($apiKey); -$response = $chatClient->create_chat_message($inputs, $query, $user, $response_mode, $conversation_id); +$response = $chatClient->create_chat_message(array(), "Who are you?", "user_id", "blocking", $conversation_id); + +$fileForVision = [ + [ + "type" => "image", + "transfer_method" => "remote_url", + "url" => "your_image_url" + ] +]; + +// $fileForVision = [ +// [ +// "type" => "image", +// "transfer_method" => "local_file", +// "url" => "your_file_id" +// ] +// ]; + +// Create a completion client with vision model like gpt-4-vision +$response = $completionClient->create_completion_message(array("query" => "Describe this image."), "blocking", "user_id", $fileForVision); + +// Create a chat client with vision model like gpt-4-vision +$response = $chatClient->create_chat_message(array(), "Describe this image.", "user_id", "blocking", $conversation_id, $fileForVision); + +// File Upload +$fileForUpload = [ + [ + 'tmp_name' => '/path/to/file/filename.jpg', + 'name' => 'filename.jpg' + ] +]; +$response = $difyClient->file_upload("user_id", $fileForUpload); +$result = json_decode($response->getBody(), true); +echo 'upload_file_id: ' . $result['id']; // Fetch application parameters -$response = $difyClient->get_application_parameters($user); +$response = $difyClient->get_application_parameters("user_id"); // Provide feedback for a message -$response = $difyClient->message_feedback($message_id, $rating, $user); +$response = $difyClient->message_feedback($message_id, $rating, "user_id"); // Other available methods: // - get_conversation_messages() diff --git a/sdks/php-client/dify-client.php b/sdks/php-client/dify-client.php index cc2e85477..c5ddcc3a8 100644 --- a/sdks/php-client/dify-client.php +++ b/sdks/php-client/dify-client.php @@ -19,6 +19,13 @@ class DifyClient { 'Content-Type' => 'application/json', ], ]); + $this->file_client = new Client([ + 'base_uri' => $this->base_url, + 'headers' => [ + 'Authorization' => 'Bearer ' . $this->api_key, + 'Content-Type' => 'multipart/form-data', + ], + ]); } protected function send_request($method, $endpoint, $data = null, $params = null, $stream = false) { @@ -44,27 +51,57 @@ class DifyClient { $params = ['user' => $user]; return $this->send_request('GET', 'parameters', null, $params); } + + public function file_upload($user, $files) { + $data = ['user' => $user]; + $options = [ + 'multipart' => $this->prepareMultipart($data, $files) + ]; + + return $this->file_client->request('POST', 'files/upload', $options); + } + + protected function prepareMultipart($data, $files) { + $multipart = []; + foreach ($data as $key => $value) { + $multipart[] = [ + 'name' => $key, + 'contents' => $value + ]; + } + + foreach ($files as $file) { + $multipart[] = [ + 'name' => 'file', + 'contents' => fopen($file['tmp_name'], 'r'), + 'filename' => $file['name'] + ]; + } + + return $multipart; + } } class CompletionClient extends DifyClient { - public function create_completion_message($inputs, $query, $response_mode, $user) { + public function create_completion_message($inputs, $response_mode, $user, $files = null) { $data = [ 'inputs' => $inputs, - 'query' => $query, 'response_mode' => $response_mode, 'user' => $user, + 'files' => $files, ]; return $this->send_request('POST', 'completion-messages', $data, null, $response_mode === 'streaming'); } } class ChatClient extends DifyClient { - public function create_chat_message($inputs, $query, $user, $response_mode = 'blocking', $conversation_id = null) { + public function create_chat_message($inputs, $query, $user, $response_mode = 'blocking', $conversation_id = null, $files = null) { $data = [ 'inputs' => $inputs, 'query' => $query, 'user' => $user, 'response_mode' => $response_mode, + 'files' => $files, ]; if ($conversation_id) { $data['conversation_id'] = $conversation_id; diff --git a/sdks/python-client/README.md b/sdks/python-client/README.md index 0997d3263..8949ef08f 100644 --- a/sdks/python-client/README.md +++ b/sdks/python-client/README.md @@ -14,8 +14,7 @@ Write your code with sdk: - completion generate with `blocking` response_mode -``` -import json +```python from dify_client import CompletionClient api_key = "your_api_key" @@ -24,18 +23,50 @@ api_key = "your_api_key" completion_client = CompletionClient(api_key) # Create Completion Message using CompletionClient -completion_response = completion_client.create_completion_message(inputs={}, query="Hello", response_mode="blocking", user="user_id") +completion_response = completion_client.create_completion_message(inputs={"query": "What's the weather like today?"}, + response_mode="blocking", user="user_id") completion_response.raise_for_status() -result = completion_response.text -result = json.loads(result) +result = completion_response.json() + +print(result.get('answer')) +``` + +- completion using vision model, like gpt-4-vision + +```python +from dify_client import CompletionClient + +api_key = "your_api_key" + +# Initialize CompletionClient +completion_client = CompletionClient(api_key) + +files = [{ + "type": "image", + "transfer_method": "remote_url", + "url": "your_image_url" +}] + +# files = [{ +# "type": "image", +# "transfer_method": "local_file", +# "upload_file_id": "your_file_id" +# }] + +# Create Completion Message using CompletionClient +completion_response = completion_client.create_completion_message(inputs={"query": "Describe the picture."}, + response_mode="blocking", user="user_id", files=files) +completion_response.raise_for_status() + +result = completion_response.json() print(result.get('answer')) ``` - chat generate with `streaming` response_mode -``` +```python import json from dify_client import ChatClient @@ -55,10 +86,67 @@ for line in chat_response.iter_lines(decode_unicode=True): print(line.get('answer')) ``` +- chat using vision model, like gpt-4-vision + +```python +from dify_client import ChatClient + +api_key = "your_api_key" + +# Initialize ChatClient +chat_client = ChatClient(api_key) + +files = [{ + "type": "image", + "transfer_method": "remote_url", + "url": "your_image_url" +}] + +# files = [{ +# "type": "image", +# "transfer_method": "local_file", +# "upload_file_id": "your_file_id" +# }] + +# Create Chat Message using ChatClient +chat_response = chat_client.create_chat_message(inputs={}, query="Describe the picture.", user="user_id", + response_mode="blocking", files=files) +chat_response.raise_for_status() + +result = chat_response.json() + +print(result.get("answer")) +``` + +- upload file when using vision model + +```python +from dify_client import DifyClient + +api_key = "your_api_key" + +# Initialize Client +dify_client = DifyClient(api_key) + +file_path = "your_image_file_path" +file_name = "panda.jpeg" +mime_type = "image/jpeg" + +with open(file_path, "rb") as file: + files = { + "file": (file_name, file, mime_type) + } + response = dify_client.file_upload("user_id", files) + + result = response.json() + print(f'upload_file_id: {result.get("id")}') +``` + + + - Others -``` -import json +```python from dify_client import ChatClient api_key = "your_api_key" @@ -69,32 +157,29 @@ client = ChatClient(api_key) # Get App parameters parameters = client.get_application_parameters(user="user_id") parameters.raise_for_status() -parameters = json.loads(parameters.text) print('[parameters]') -print(parameters) +print(parameters.json()) # Get Conversation List (only for chat) conversations = client.get_conversations(user="user_id") conversations.raise_for_status() -conversations = json.loads(conversations.text) print('[conversations]') -print(conversations) +print(conversations.json()) # Get Message List (only for chat) messages = client.get_conversation_messages(user="user_id", conversation_id="conversation_id") messages.raise_for_status() -messages = json.loads(messages.text) print('[messages]') -print(messages) +print(messages.json()) # Rename Conversation (only for chat) -rename_conversation_response = client.rename_conversation(conversation_id="conversation_id", name="new_name", user="user_id") +rename_conversation_response = client.rename_conversation(conversation_id="conversation_id", + name="new_name", user="user_id") rename_conversation_response.raise_for_status() -rename_conversation_result = json.loads(rename_conversation_response.text) print('[rename result]') -print(rename_conversation_result) +print(rename_conversation_response.json()) ``` diff --git a/sdks/python-client/dify_client/__init__.py b/sdks/python-client/dify_client/__init__.py index 471b8d199..6fa9d190e 100644 --- a/sdks/python-client/dify_client/__init__.py +++ b/sdks/python-client/dify_client/__init__.py @@ -1 +1 @@ -from dify_client.client import ChatClient, CompletionClient \ No newline at end of file +from dify_client.client import ChatClient, CompletionClient, DifyClient diff --git a/sdks/python-client/dify_client/client.py b/sdks/python-client/dify_client/client.py index 23f9b9c3a..53880c100 100644 --- a/sdks/python-client/dify_client/client.py +++ b/sdks/python-client/dify_client/client.py @@ -6,14 +6,24 @@ class DifyClient: self.api_key = api_key self.base_url = "https://api.dify.ai/v1" - def _send_request(self, method, endpoint, data=None, params=None, stream=False): + def _send_request(self, method, endpoint, json=None, params=None, stream=False): headers = { "Authorization": f"Bearer {self.api_key}", "Content-Type": "application/json" } url = f"{self.base_url}{endpoint}" - response = requests.request(method, url, json=data, params=params, headers=headers, stream=stream) + response = requests.request(method, url, json=json, params=params, headers=headers, stream=stream) + + return response + + def _send_request_with_files(self, method, endpoint, data, files): + headers = { + "Authorization": f"Bearer {self.api_key}" + } + + url = f"{self.base_url}{endpoint}" + response = requests.request(method, url, data=data, headers=headers, files=files) return response @@ -28,30 +38,39 @@ class DifyClient: params = {"user": user} return self._send_request("GET", "/parameters", params=params) - -class CompletionClient(DifyClient): - def create_completion_message(self, inputs, query, response_mode, user): + def file_upload(self, user, files): data = { - "inputs": inputs, - "query": query, - "response_mode": response_mode, "user": user } - return self._send_request("POST", "/completion-messages", data, stream=True if response_mode == "streaming" else False) + return self._send_request_with_files("POST", "/files/upload", data=data, files=files) + + +class CompletionClient(DifyClient): + def create_completion_message(self, inputs, response_mode, user, files=None): + data = { + "inputs": inputs, + "response_mode": response_mode, + "user": user, + "files": files + } + return self._send_request("POST", "/completion-messages", data, + stream=True if response_mode == "streaming" else False) class ChatClient(DifyClient): - def create_chat_message(self, inputs, query, user, response_mode="blocking", conversation_id=None): + def create_chat_message(self, inputs, query, user, response_mode="blocking", conversation_id=None, files=None): data = { "inputs": inputs, "query": query, "user": user, - "response_mode": response_mode + "response_mode": response_mode, + "files": files } if conversation_id: data["conversation_id"] = conversation_id - return self._send_request("POST", "/chat-messages", data, stream=True if response_mode == "streaming" else False) + return self._send_request("POST", "/chat-messages", data, + stream=True if response_mode == "streaming" else False) def get_conversation_messages(self, user, conversation_id=None, first_id=None, limit=None): params = {"user": user} diff --git a/sdks/python-client/setup.py b/sdks/python-client/setup.py index 3d4e0b0bb..e74748377 100644 --- a/sdks/python-client/setup.py +++ b/sdks/python-client/setup.py @@ -5,7 +5,7 @@ with open("README.md", "r", encoding="utf-8") as fh: setup( name="dify-client", - version="0.1.8", + version="0.1.10", author="Dify", author_email="hello@dify.ai", description="A package for interacting with the Dify Service-API", diff --git a/sdks/python-client/tests/test_client.py b/sdks/python-client/tests/test_client.py index f123c1882..ac954ff83 100644 --- a/sdks/python-client/tests/test_client.py +++ b/sdks/python-client/tests/test_client.py @@ -12,15 +12,33 @@ class TestChatClient(unittest.TestCase): def test_create_chat_message(self): response = self.chat_client.create_chat_message({}, "Hello, World!", "test_user") - self.assertIn("message_id", response) + self.assertIn("answer", response.text) + + def test_create_chat_message_with_vision_model_by_remote_url(self): + files = [{ + "type": "image", + "transfer_method": "remote_url", + "url": "your_image_url" + }] + response = self.chat_client.create_chat_message({}, "Describe the picture.", "test_user", files=files) + self.assertIn("answer", response.text) + + def test_create_chat_message_with_vision_model_by_local_file(self): + files = [{ + "type": "image", + "transfer_method": "local_file", + "upload_file_id": "your_file_id" + }] + response = self.chat_client.create_chat_message({}, "Describe the picture.", "test_user", files=files) + self.assertIn("answer", response.text) def test_get_conversation_messages(self): - response = self.chat_client.get_conversation_messages("test_user") - self.assertIsInstance(response, list) + response = self.chat_client.get_conversation_messages("test_user", "your_conversation_id") + self.assertIn("answer", response.text) def test_get_conversations(self): response = self.chat_client.get_conversations("test_user") - self.assertIsInstance(response, list) + self.assertIn("data", response.text) class TestCompletionClient(unittest.TestCase): @@ -28,8 +46,29 @@ class TestCompletionClient(unittest.TestCase): self.completion_client = CompletionClient(API_KEY) def test_create_completion_message(self): - response = self.completion_client.create_completion_message({}, "What's the weather like today?", "blocking", "test_user") - self.assertIn("message_id", response) + response = self.completion_client.create_completion_message({"query": "What's the weather like today?"}, + "blocking", "test_user") + self.assertIn("answer", response.text) + + def test_create_completion_message_with_vision_model_by_remote_url(self): + files = [{ + "type": "image", + "transfer_method": "remote_url", + "url": "your_image_url" + }] + response = self.completion_client.create_completion_message( + {"query": "Describe the picture."}, "blocking", "test_user", files) + self.assertIn("answer", response.text) + + def test_create_completion_message_with_vision_model_by_local_file(self): + files = [{ + "type": "image", + "transfer_method": "local_file", + "upload_file_id": "your_file_id" + }] + response = self.completion_client.create_completion_message( + {"query": "Describe the picture."}, "blocking", "test_user", files) + self.assertIn("answer", response.text) class TestDifyClient(unittest.TestCase): @@ -37,12 +76,24 @@ class TestDifyClient(unittest.TestCase): self.dify_client = DifyClient(API_KEY) def test_message_feedback(self): - response = self.dify_client.message_feedback("test_message_id", 5, "test_user") - self.assertIn("success", response) + response = self.dify_client.message_feedback("your_message_id", 'like', "test_user") + self.assertIn("success", response.text) def test_get_application_parameters(self): response = self.dify_client.get_application_parameters("test_user") - self.assertIsInstance(response, dict) + self.assertIn("user_input_form", response.text) + + def test_file_upload(self): + file_path = "your_image_file_path" + file_name = "panda.jpeg" + mime_type = "image/jpeg" + + with open(file_path, "rb") as file: + files = { + "file": (file_name, file, mime_type) + } + response = self.dify_client.file_upload("test_user", files) + self.assertIn("name", response.text) if __name__ == "__main__":