fix: fixed qwen model unable to embed

2024-11-29 18:38:17 +08:00 · 2024-11-20 10:15:51 +08:00 · 2024-11-20 10:15:51 +08:00 · a1be752d92
commit a1be752d92
parent c1e440b604
4 changed files with 68 additions and 106 deletions
--- a/agents-flex-llm/agents-flex-llm-qwen/src/main/java/com/agentsflex/llm/qwen/QwenLlm.java
+++ b/agents-flex-llm/agents-flex-llm-qwen/src/main/java/com/agentsflex/llm/qwen/QwenLlm.java
@ -26,9 +26,7 @@ import com.agentsflex.core.llm.client.LlmClientListener;
 import com.agentsflex.core.llm.client.impl.SseClient;
 import com.agentsflex.core.llm.embedding.EmbeddingOptions;
 import com.agentsflex.core.llm.response.AiMessageResponse;
-import com.agentsflex.core.message.AiMessage;
 import com.agentsflex.core.parser.AiMessageParser;
-import com.agentsflex.core.parser.impl.DefaultAiMessageParser;
 import com.agentsflex.core.prompt.Prompt;
 import com.agentsflex.core.store.VectorData;
 import com.agentsflex.core.util.StringUtil;
@ -44,8 +42,9 @@ public class QwenLlm extends BaseLlm<QwenLlmConfig> {

    HttpClient httpClient = new HttpClient();

-    public AiMessageParser aiMessageParser = QwenLlmUtil.getAiMessageParser();
-//    public FunctionMessageParser functionMessageParser = QwenLlmUtil.getFunctionMessageParser();
+    public AiMessageParser aiMessageParser = QwenLlmUtil.getAiMessageParser(false);
+    public AiMessageParser streamMessageParser = QwenLlmUtil.getAiMessageParser(true);
+

    public QwenLlm(QwenLlmConfig config) {
        super(config);
@ -59,9 +58,9 @@ public class QwenLlm extends BaseLlm<QwenLlmConfig> {
        headers.put("Authorization", "Bearer " + getConfig().getApiKey());


-        String payload = QwenLlmUtil.promptToPayload(prompt, config, options);
+        String payload = QwenLlmUtil.promptToPayload(prompt, config, options, false);
        String endpoint = config.getEndpoint();
-        String response = httpClient.post(endpoint + "/api/v1/services/aigc/text-generation/generation", headers, payload);
+        String response = httpClient.post(endpoint + "/compatible-mode/v1/chat/completions", headers, payload);

        if (config.isDebug()) {
            System.out.println(">>>>receive payload:" + response);
@ -95,34 +94,23 @@ public class QwenLlm extends BaseLlm<QwenLlmConfig> {
        headers.put("Authorization", "Bearer " + getConfig().getApiKey());
        headers.put("X-DashScope-SSE", "enable"); //stream

-        String payload = QwenLlmUtil.promptToPayload(prompt, config, options);
-
-        LlmClientListener clientListener = new BaseLlmClientListener(this, llmClient, listener, prompt, new DefaultAiMessageParser() {
-            int prevMessageLength = 0;
-            @Override
-            public AiMessage parse(JSONObject content) {
-                AiMessage aiMessage = aiMessageParser.parse(content);
-                String messageContent = aiMessage.getContent();
-                aiMessage.setContent(messageContent.substring(prevMessageLength));
-                prevMessageLength = messageContent.length();
-                return aiMessage;
-            }
-        });
+        String payload = QwenLlmUtil.promptToPayload(prompt, config, options, true);
+        LlmClientListener clientListener = new BaseLlmClientListener(this, llmClient, listener, prompt, streamMessageParser);

        String endpoint = config.getEndpoint();
-        llmClient.start(endpoint + "/api/v1/services/aigc/text-generation/generation", headers, payload, clientListener, config);
+        llmClient.start(endpoint + "/compatible-mode/v1/chat/completions", headers, payload, clientListener, config);
    }

+
    @Override
    public VectorData embed(Document document, EmbeddingOptions options) {
        String payload = QwenLlmUtil.promptToEnabledPayload(document, options, config);
-
-
        Map<String, String> headers = new HashMap<>();
        headers.put("Content-Type", "application/json");
        headers.put("Authorization", "Bearer " + getConfig().getApiKey());

-        String response = httpClient.post(QwenLlmUtil.createEmbedURL(config), headers, payload);
+        String url = config.getEndpoint() + "/compatible-mode/v1/embeddings";
+        String response = httpClient.post(url, headers, payload);

        if (config.isDebug()) {
            System.out.println(">>>>receive payload:" + response);
@ -133,9 +121,9 @@ public class QwenLlm extends BaseLlm<QwenLlmConfig> {
        }

        VectorData vectorData = new VectorData();
-        Object embedding = JSONPath.read(response, "$.output.embeddings[0].embedding");
-        double[] vector = JSON.parseObject(JSON.toJSONString(embedding), double[].class);
-        vectorData.setVector(vector);
+        double[] embedding = JSONPath.read(response, "$.data[0].embedding", double[].class);
+        vectorData.setVector(embedding);
+
        return vectorData;
    }

--- a/agents-flex-llm/agents-flex-llm-qwen/src/main/java/com/agentsflex/llm/qwen/QwenLlmConfig.java
+++ b/agents-flex-llm/agents-flex-llm-qwen/src/main/java/com/agentsflex/llm/qwen/QwenLlmConfig.java
@ -19,12 +19,20 @@ import com.agentsflex.core.llm.LlmConfig;

 public class QwenLlmConfig extends LlmConfig {

-	private static final String DEFAULT_MODEL = "qwen-turbo";
-	private static final String DEFAULT_ENDPOINT = "https://dashscope.aliyuncs.com";
+    private static final String DEFAULT_MODEL = "qwen-turbo";
+    private static final String DEFAULT_ENDPOINT = "https://dashscope.aliyuncs.com";
+    private String defaultEmbeddingModel = "text-embedding-v1";

-	public QwenLlmConfig() {
-		setEndpoint(DEFAULT_ENDPOINT);
-		setModel(DEFAULT_MODEL);
-	}
+    public QwenLlmConfig() {
+        setEndpoint(DEFAULT_ENDPOINT);
+        setModel(DEFAULT_MODEL);
+    }

+    public String getDefaultEmbeddingModel() {
+        return defaultEmbeddingModel;
+    }
+
+    public void setDefaultEmbeddingModel(String defaultEmbeddingModel) {
+        this.defaultEmbeddingModel = defaultEmbeddingModel;
+    }
 }
--- a/agents-flex-llm/agents-flex-llm-qwen/src/main/java/com/agentsflex/llm/qwen/QwenLlmUtil.java
+++ b/agents-flex-llm/agents-flex-llm-qwen/src/main/java/com/agentsflex/llm/qwen/QwenLlmUtil.java
@ -18,101 +18,49 @@ package com.agentsflex.llm.qwen;
 import com.agentsflex.core.document.Document;
 import com.agentsflex.core.llm.ChatOptions;
 import com.agentsflex.core.llm.embedding.EmbeddingOptions;
-import com.agentsflex.core.message.FunctionCall;
+import com.agentsflex.core.message.HumanMessage;
 import com.agentsflex.core.message.Message;
-import com.agentsflex.core.message.MessageStatus;
 import com.agentsflex.core.parser.AiMessageParser;
 import com.agentsflex.core.parser.impl.DefaultAiMessageParser;
 import com.agentsflex.core.prompt.DefaultPromptFormat;
 import com.agentsflex.core.prompt.Prompt;
 import com.agentsflex.core.prompt.PromptFormat;
+import com.agentsflex.core.util.CollectionUtil;
 import com.agentsflex.core.util.Maps;
-import com.alibaba.fastjson.JSON;
-import com.alibaba.fastjson.JSONArray;
-import com.alibaba.fastjson.JSONObject;
-import com.alibaba.fastjson.JSONPath;

-import java.util.ArrayList;
-import java.util.Collections;
 import java.util.List;
-import java.util.Map;

 public class QwenLlmUtil {

    private static final PromptFormat promptFormat = new DefaultPromptFormat();

-    public static AiMessageParser getAiMessageParser() {
-        DefaultAiMessageParser aiMessageParser = new DefaultAiMessageParser();
-        aiMessageParser.setContentPath("$.output.choices[0].message.content");
-        aiMessageParser.setTotalTokensPath("$.usage.total_tokens");
-        aiMessageParser.setTotalTokensPath("$.usage.total_tokens");
-        aiMessageParser.setPromptTokensPath("$.usage.input_tokens");
-        aiMessageParser.setCompletionTokensPath("$.usage.output_tokens");
-
-        aiMessageParser.setStatusParser(content -> {
-            Object finishReason = JSONPath.eval(content, "$.output.choices[0].finish_reason");
-            if (finishReason != null) {
-                return MessageStatus.END;
-            }
-            return MessageStatus.MIDDLE;
-        });
-
-        aiMessageParser.setCallsParser(content -> {
-            JSONArray toolCalls = (JSONArray) JSONPath.eval(content, "$.output.choices[0].message.tool_calls");
-            if (toolCalls == null || toolCalls.isEmpty()) {
-                return Collections.emptyList();
-            }
-            List<FunctionCall> functionCalls = new ArrayList<>();
-            for (int i = 0; i < toolCalls.size(); i++) {
-                JSONObject jsonObject = toolCalls.getJSONObject(i);
-                JSONObject functionObject = jsonObject.getJSONObject("function");
-                if (functionObject != null) {
-                    FunctionCall functionCall = new FunctionCall();
-                    functionCall.setName(functionObject.getString("name"));
-                    Object arguments = functionObject.get("arguments");
-                    if (arguments instanceof Map) {
-                        //noinspection unchecked
-                        functionCall.setArgs((Map<String, Object>) arguments);
-                    } else if (arguments instanceof String) {
-                        //noinspection unchecked
-                        functionCall.setArgs(JSON.parseObject(arguments.toString(), Map.class));
-                    }
-                    functionCalls.add(functionCall);
-                }
-            }
-            return functionCalls;
-        });
-
-        return aiMessageParser;
+    public static AiMessageParser getAiMessageParser(boolean isStream) {
+        return DefaultAiMessageParser.getChatGPTMessageParser(isStream);
    }


-    public static String promptToPayload(Prompt prompt, QwenLlmConfig config, ChatOptions options) {
+    public static String promptToPayload(Prompt prompt, QwenLlmConfig config, ChatOptions options, boolean withStream) {
        // https://help.aliyun.com/zh/dashscope/developer-reference/api-details?spm=a2c4g.11186623.0.0.1ff6fa70jCgGRc#b8ebf6b25eul6
-
        List<Message> messages = prompt.toMessages();
+        HumanMessage humanMessage = (HumanMessage) CollectionUtil.lastItem(messages);
        return Maps.of("model", config.getModel())
-            .set("input", Maps.of("messages", promptFormat.toMessagesJsonObject(messages)))
-            .set("parameters", Maps.of("result_format", "message")
-                .setIfNotEmpty("tools", promptFormat.toFunctionsJsonObject(messages.get(messages.size() - 1)))
-                .setIf(map -> !map.containsKey("tools") && options.getTemperature() > 0, "temperature", options.getTemperature())
-                .setIf(map -> !map.containsKey("tools") && options.getMaxTokens() != null, "max_tokens", options.getMaxTokens())
-                .setIfNotNull("top_p", options.getTopP())
-                .setIfNotNull("top_k", options.getTopK())
-                .setIfNotEmpty("stop", options.getStop())
-            ).toJSON();
-    }
-
-    public static String promptToEnabledPayload(Document text, EmbeddingOptions options, QwenLlmConfig config) {
-        // https://help.aliyun.com/zh/model-studio/developer-reference/text-embedding-synchronous-api?spm=a2c4g.11186623.0.nextDoc.100230b7arAV4X#e6bf7ae0fedrb
-        List<String> list = new ArrayList<>();
-        list.add(text.getContent());
-        return Maps.of("model", config.getModel())
-            .set("input", Maps.of("texts", list))
+            .set("messages", promptFormat.toMessagesJsonObject(messages))
+            .setIf(withStream, "stream", true)
+            .setIfNotEmpty("tools", promptFormat.toFunctionsJsonObject(humanMessage))
+            .setIfContainsKey("tools", "tool_choice", humanMessage.getToolChoice())
+            .setIfNotNull("top_p", options.getTopP())
+            .setIfNotEmpty("stop", options.getStop())
+            .setIf(map -> !map.containsKey("tools") && options.getTemperature() > 0, "temperature", options.getTemperature())
+            .setIf(map -> !map.containsKey("tools") && options.getMaxTokens() != null, "max_tokens", options.getMaxTokens())
            .toJSON();
    }

-    public static String createEmbedURL(QwenLlmConfig config) {
-        return "https://dashscope.aliyuncs.com/api/v1/services/embeddings/text-embedding/text-embedding";
+    public static String promptToEnabledPayload(Document text, EmbeddingOptions options, QwenLlmConfig config) {
+        //https://help.aliyun.com/zh/model-studio/developer-reference/embedding-interfaces-compatible-with-openai?spm=a2c4g.11186623.0.i3
+        return Maps.of("model", options.getModelOrDefault(config.getDefaultEmbeddingModel()))
+            .set("encoding_format", "float")
+            .set("input", text.getContent())
+            .toJSON();
    }
+
 }
--- a/agents-flex-llm/agents-flex-llm-qwen/src/test/java/com/agentsflex/llm/qwen/test/QwenTest.java
+++ b/agents-flex-llm/agents-flex-llm-qwen/src/test/java/com/agentsflex/llm/qwen/test/QwenTest.java
@ -1,7 +1,9 @@
 package com.agentsflex.llm.qwen.test;

+import com.agentsflex.core.document.Document;
 import com.agentsflex.core.llm.Llm;
 import com.agentsflex.core.llm.response.AiMessageResponse;
+import com.agentsflex.core.store.VectorData;
 import com.agentsflex.llm.qwen.QwenLlm;
 import com.agentsflex.llm.qwen.QwenLlmConfig;
 import com.agentsflex.core.message.AiMessage;
@ -12,8 +14,10 @@ public class QwenTest {

    public static void main(String[] args) throws InterruptedException {
        QwenLlmConfig config = new QwenLlmConfig();
+
+        //https://bailian.console.aliyun.com/?apiKey=1#/api-key
        config.setApiKey("sk-28a6be3236****");
-        config.setModel("qwen-turbo");
+        config.setModel("qwen-plus");

        Llm llm = new QwenLlm(config);
        llm.chatStream("请写一个小兔子战胜大灰狼的故事", (context, response) -> {
@ -39,4 +43,18 @@ public class QwenTest {
        System.out.println(response.callFunctions());
        // "Today it will be dull and overcast in 北京"
    }
+
+
+    @Test
+    public void testEmbedding() throws InterruptedException {
+        QwenLlmConfig config = new QwenLlmConfig();
+        config.setApiKey("sk-28a6be3236****");
+        config.setModel("qwen-turbo");
+
+        Llm llm = new QwenLlm(config);
+        VectorData vectorData = llm.embed(Document.of("test"));
+
+
+        System.out.println(vectorData);
+    }
 }