mirror of
https://gitee.com/mymagicpower/AIAS.git
synced 2024-12-02 12:17:37 +08:00
Add sentence encoder english sdk.
This commit is contained in:
parent
5f67904c43
commit
a6b7e21cdb
@ -24,3 +24,6 @@ build/output/result.gif
|
||||
```
|
||||
|
||||
![gif](https://djl-model.oss-cn-hongkong.aliyuncs.com/AIAS/gan_sdks/result.gif)
|
||||
|
||||
帮助
|
||||
添加依赖库:lib/aias-first-order-lib-0.1.0.jar
|
37
nlp_sdks/sentence_encoder_sdk/README.md
Normal file
37
nlp_sdks/sentence_encoder_sdk/README.md
Normal file
@ -0,0 +1,37 @@
|
||||
# 句向量SDK【英文】
|
||||
句向量是指将语句映射至固定维度的实数向量。
|
||||
将不定长的句子用定长的向量表示,为NLP下游任务提供服务。
|
||||
句向量应用:
|
||||
- 语义搜索,通过句向量相似性,检索语料库中与query最匹配的文本
|
||||
- 文本聚类,文本转为定长向量,通过聚类模型可无监督聚集相似文本
|
||||
- 文本分类,表示成句向量,直接用简单分类器即训练文本分类器
|
||||
|
||||
### SDK包含两个模型:(模型较大,首次运行耐心等待下载)
|
||||
- SentenceEncoder 500M
|
||||
- SentenceEncoderLarge5 1G
|
||||
|
||||
### SDK功能:
|
||||
- 句向量提取
|
||||
- 相似度计算
|
||||
|
||||
## 运行例子 - SentenceEncoderExample
|
||||
- 测试语句:
|
||||
- I am a sentence for which I would like to get its embedding
|
||||
- I am a sentence
|
||||
- I am a sentence for which I would like to get ...
|
||||
|
||||
运行成功后,命令行应该看到下面的信息:
|
||||
```text
|
||||
...
|
||||
[INFO ] - length: 512
|
||||
|
||||
[INFO ] - [0.050808605, -0.016524296, 0.015737822, -0.042864114, ..., 0.017881196]
|
||||
[INFO ] - [0.018138515, -0.037706323, 0.04290087, -0.019213252, ..., 0.058189757]
|
||||
[INFO ] - [0.04447042, -0.07614114, 0.0073701036, -0.045335855, ..., 0.054146692]
|
||||
|
||||
[INFO ] - 0.80258906
|
||||
[INFO ] - 0.90100515
|
||||
```
|
||||
|
||||
### 帮助
|
||||
添加依赖库:lib/aias-sentence-encoder-lib-0.1.0.jar
|
Binary file not shown.
124
nlp_sdks/sentence_encoder_sdk/pom.xml
Normal file
124
nlp_sdks/sentence_encoder_sdk/pom.xml
Normal file
@ -0,0 +1,124 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<!--
|
||||
~ Licensed to the Apache Software Foundation (ASF) under one
|
||||
~ or more contributor license agreements. See the NOTICE file
|
||||
~ distributed with this work for additional information
|
||||
~ regarding copyright ownership. The ASF licenses this file
|
||||
~ to you under the Apache License, Version 2.0 (the
|
||||
~ "License"); you may not use this file except in compliance
|
||||
~ with the License. You may obtain a copy of the License at
|
||||
~
|
||||
~ http://www.apache.org/licenses/LICENSE-2.0
|
||||
~
|
||||
~ Unless required by applicable law or agreed to in writing,
|
||||
~ software distributed under the License is distributed on an
|
||||
~ "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
~ KIND, either express or implied. See the License for the
|
||||
~ specific language governing permissions and limitations
|
||||
~ under the License.
|
||||
-->
|
||||
|
||||
<project xmlns="http://maven.apache.org/POM/4.0.0"
|
||||
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
|
||||
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
|
||||
<modelVersion>4.0.0</modelVersion>
|
||||
|
||||
<groupId>calvin</groupId>
|
||||
<artifactId>sentence-encoder-sdk</artifactId>
|
||||
<version>0.1</version>
|
||||
|
||||
<properties>
|
||||
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
|
||||
<maven.compiler.source>1.8</maven.compiler.source>
|
||||
<maven.compiler.target>1.8</maven.compiler.target>
|
||||
<djl.version>0.12.0</djl.version>
|
||||
</properties>
|
||||
|
||||
<build>
|
||||
<plugins>
|
||||
<plugin>
|
||||
<groupId>org.apache.maven.plugins</groupId>
|
||||
<artifactId>maven-compiler-plugin</artifactId>
|
||||
<configuration>
|
||||
<source>8</source>
|
||||
<target>8</target>
|
||||
</configuration>
|
||||
<version>3.8.1</version>
|
||||
</plugin>
|
||||
</plugins>
|
||||
</build>
|
||||
<dependencies>
|
||||
<dependency>
|
||||
<groupId>com.google.code.gson</groupId>
|
||||
<artifactId>gson</artifactId>
|
||||
<version>2.8.6</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.slf4j</groupId>
|
||||
<artifactId>slf4j-api</artifactId>
|
||||
<version>1.7.30</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>commons-cli</groupId>
|
||||
<artifactId>commons-cli</artifactId>
|
||||
<version>1.4</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.apache.logging.log4j</groupId>
|
||||
<artifactId>log4j-slf4j-impl</artifactId>
|
||||
<version>2.12.1</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>ai.djl</groupId>
|
||||
<artifactId>api</artifactId>
|
||||
<version>${djl.version}</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>ai.djl</groupId>
|
||||
<artifactId>basicdataset</artifactId>
|
||||
<version>${djl.version}</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>ai.djl</groupId>
|
||||
<artifactId>model-zoo</artifactId>
|
||||
<version>${djl.version}</version>
|
||||
</dependency>
|
||||
<!-- Tensorflow -->
|
||||
<dependency>
|
||||
<groupId>ai.djl.tensorflow</groupId>
|
||||
<artifactId>tensorflow-api</artifactId>
|
||||
<version>${djl.version}</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>ai.djl.tensorflow</groupId>
|
||||
<artifactId>tensorflow-engine</artifactId>
|
||||
<version>${djl.version}</version>
|
||||
<scope>runtime</scope>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>ai.djl.tensorflow</groupId>
|
||||
<artifactId>tensorflow-model-zoo</artifactId>
|
||||
<version>${djl.version}</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>ai.djl.tensorflow</groupId>
|
||||
<artifactId>tensorflow-native-auto</artifactId>
|
||||
<version>2.4.1</version>
|
||||
<scope>runtime</scope>
|
||||
</dependency>
|
||||
|
||||
<dependency>
|
||||
<groupId>org.projectlombok</groupId>
|
||||
<artifactId>lombok</artifactId>
|
||||
<version>1.18.18</version>
|
||||
<scope>provided</scope>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.testng</groupId>
|
||||
<artifactId>testng</artifactId>
|
||||
<version>6.8.1</version>
|
||||
<scope>test</scope>
|
||||
</dependency>
|
||||
</dependencies>
|
||||
|
||||
</project>
|
47
nlp_sdks/sentence_encoder_sdk/sentence_encoder_sdk.iml
Normal file
47
nlp_sdks/sentence_encoder_sdk/sentence_encoder_sdk.iml
Normal file
@ -0,0 +1,47 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<module org.jetbrains.idea.maven.project.MavenProjectsManager.isMavenModule="true" type="JAVA_MODULE" version="4">
|
||||
<component name="CheckStyle-IDEA-Module">
|
||||
<option name="configuration">
|
||||
<map />
|
||||
</option>
|
||||
</component>
|
||||
<component name="NewModuleRootManager" LANGUAGE_LEVEL="JDK_1_8">
|
||||
<output url="file://$MODULE_DIR$/target/classes" />
|
||||
<output-test url="file://$MODULE_DIR$/target/test-classes" />
|
||||
<content url="file://$MODULE_DIR$">
|
||||
<sourceFolder url="file://$MODULE_DIR$/src/main/java" isTestSource="false" />
|
||||
<sourceFolder url="file://$MODULE_DIR$/src/main/resources" type="java-resource" />
|
||||
<sourceFolder url="file://$MODULE_DIR$/src/test/java" isTestSource="true" />
|
||||
<sourceFolder url="file://$MODULE_DIR$/src/test/resources" type="java-test-resource" />
|
||||
<excludeFolder url="file://$MODULE_DIR$/target" />
|
||||
</content>
|
||||
<orderEntry type="inheritedJdk" />
|
||||
<orderEntry type="sourceFolder" forTests="false" />
|
||||
<orderEntry type="library" name="aias-sentence-encoder-lib-0.1.0" level="project" />
|
||||
<orderEntry type="library" name="Maven: com.google.code.gson:gson:2.8.6" level="project" />
|
||||
<orderEntry type="library" name="Maven: org.slf4j:slf4j-api:1.7.30" level="project" />
|
||||
<orderEntry type="library" name="Maven: commons-cli:commons-cli:1.4" level="project" />
|
||||
<orderEntry type="library" name="Maven: org.apache.logging.log4j:log4j-slf4j-impl:2.12.1" level="project" />
|
||||
<orderEntry type="library" name="Maven: org.apache.logging.log4j:log4j-api:2.12.1" level="project" />
|
||||
<orderEntry type="library" scope="RUNTIME" name="Maven: org.apache.logging.log4j:log4j-core:2.12.1" level="project" />
|
||||
<orderEntry type="library" name="Maven: ai.djl:api:0.12.0" level="project" />
|
||||
<orderEntry type="library" name="Maven: net.java.dev.jna:jna:5.8.0" level="project" />
|
||||
<orderEntry type="library" name="Maven: org.apache.commons:commons-compress:1.20" level="project" />
|
||||
<orderEntry type="library" name="Maven: ai.djl:basicdataset:0.12.0" level="project" />
|
||||
<orderEntry type="library" name="Maven: org.apache.commons:commons-csv:1.8" level="project" />
|
||||
<orderEntry type="library" name="Maven: ai.djl:model-zoo:0.12.0" level="project" />
|
||||
<orderEntry type="library" name="Maven: ai.djl.tensorflow:tensorflow-api:0.12.0" level="project" />
|
||||
<orderEntry type="library" name="Maven: org.bytedeco:javacpp:1.5.5" level="project" />
|
||||
<orderEntry type="library" name="Maven: com.google.protobuf:protobuf-java:3.8.0" level="project" />
|
||||
<orderEntry type="library" scope="RUNTIME" name="Maven: ai.djl.tensorflow:tensorflow-engine:0.12.0" level="project" />
|
||||
<orderEntry type="library" name="Maven: ai.djl.tensorflow:tensorflow-model-zoo:0.12.0" level="project" />
|
||||
<orderEntry type="library" scope="RUNTIME" name="Maven: ai.djl.tensorflow:tensorflow-native-auto:2.4.1" level="project" />
|
||||
<orderEntry type="library" scope="PROVIDED" name="Maven: org.projectlombok:lombok:1.18.18" level="project" />
|
||||
<orderEntry type="library" scope="TEST" name="Maven: org.testng:testng:6.8.1" level="project" />
|
||||
<orderEntry type="library" scope="TEST" name="Maven: junit:junit:4.10" level="project" />
|
||||
<orderEntry type="library" scope="TEST" name="Maven: org.hamcrest:hamcrest-core:1.1" level="project" />
|
||||
<orderEntry type="library" scope="TEST" name="Maven: org.beanshell:bsh:2.0b4" level="project" />
|
||||
<orderEntry type="library" scope="TEST" name="Maven: com.beust:jcommander:1.27" level="project" />
|
||||
<orderEntry type="library" scope="TEST" name="Maven: org.yaml:snakeyaml:1.6" level="project" />
|
||||
</component>
|
||||
</module>
|
@ -0,0 +1,51 @@
|
||||
package me.calvin.example;
|
||||
|
||||
import ai.djl.ModelException;
|
||||
import ai.djl.inference.Predictor;
|
||||
import ai.djl.repository.zoo.ModelZoo;
|
||||
import ai.djl.repository.zoo.ZooModel;
|
||||
import ai.djl.translate.TranslateException;
|
||||
import me.calvin.aias.SentenceEncoder;
|
||||
import me.calvin.aias.util.FeatureComparison;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Arrays;
|
||||
import java.util.List;
|
||||
|
||||
public final class SentenceEncoderExample {
|
||||
|
||||
private static final Logger logger = LoggerFactory.getLogger(SentenceEncoderExample.class);
|
||||
|
||||
private SentenceEncoderExample() {}
|
||||
|
||||
public static void main(String[] args) throws IOException, ModelException, TranslateException {
|
||||
List<String> inputs = new ArrayList<>();
|
||||
inputs.add("I am a sentence for which I would like to get its embedding");
|
||||
inputs.add("I am a sentence");
|
||||
inputs.add("I am a sentence for which I would like to get ...");
|
||||
|
||||
SentenceEncoder sentenceEncoder = new SentenceEncoder();
|
||||
try (ZooModel<String[], float[][]> model = ModelZoo.loadModel(sentenceEncoder.criteria());
|
||||
Predictor<String[], float[][]> predictor = model.newPredictor()) {
|
||||
|
||||
float[][] embeddings = predictor.predict(inputs.toArray(new String[0]));
|
||||
|
||||
float[] feature1 = embeddings[0];
|
||||
float[] feature2 = embeddings[1];
|
||||
float[] feature3 = embeddings[2];
|
||||
|
||||
logger.info("length: " + feature1.length);
|
||||
|
||||
logger.info(Arrays.toString(feature1));
|
||||
logger.info(Arrays.toString(feature2));
|
||||
logger.info(Arrays.toString(feature3));
|
||||
|
||||
logger.info(Float.toString(FeatureComparison.calculSimilar(feature1, feature2)));
|
||||
logger.info(Float.toString(FeatureComparison.calculSimilar(feature1, feature3)));
|
||||
|
||||
}
|
||||
}
|
||||
}
|
@ -0,0 +1,50 @@
|
||||
package me.calvin.example;
|
||||
|
||||
import ai.djl.ModelException;
|
||||
import ai.djl.inference.Predictor;
|
||||
import ai.djl.repository.zoo.ModelZoo;
|
||||
import ai.djl.repository.zoo.ZooModel;
|
||||
import ai.djl.translate.TranslateException;
|
||||
import me.calvin.aias.SentenceEncoderLarge5;
|
||||
import me.calvin.aias.util.FeatureComparison;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Arrays;
|
||||
import java.util.List;
|
||||
|
||||
public final class SentenceEncoderLarge5Example {
|
||||
|
||||
private static final Logger logger = LoggerFactory.getLogger(SentenceEncoderLarge5Example.class);
|
||||
|
||||
private SentenceEncoderLarge5Example() {}
|
||||
|
||||
public static void main(String[] args) throws IOException, ModelException, TranslateException {
|
||||
List<String> inputs = new ArrayList<>();
|
||||
inputs.add("I am a sentence for which I would like to get its embedding");
|
||||
inputs.add("I am a sentence");
|
||||
inputs.add("I am a sentence for which I would like to get ...");
|
||||
|
||||
SentenceEncoderLarge5 sentenceEncoder = new SentenceEncoderLarge5();
|
||||
try (ZooModel<String[], float[][]> model = ModelZoo.loadModel(sentenceEncoder.criteria());
|
||||
Predictor<String[], float[][]> predictor = model.newPredictor()) {
|
||||
|
||||
float[][] embeddings = predictor.predict(inputs.toArray(new String[0]));
|
||||
|
||||
float[] feature1 = embeddings[0];
|
||||
float[] feature2 = embeddings[1];
|
||||
float[] feature3 = embeddings[2];
|
||||
logger.info("length: " + feature1.length);
|
||||
|
||||
logger.info(Arrays.toString(feature1));
|
||||
logger.info(Arrays.toString(feature2));
|
||||
logger.info(Arrays.toString(feature3));
|
||||
|
||||
logger.info(Float.toString(FeatureComparison.calculSimilar(feature1, feature2)));
|
||||
logger.info(Float.toString(FeatureComparison.calculSimilar(feature1, feature3)));
|
||||
|
||||
}
|
||||
}
|
||||
}
|
17
nlp_sdks/sentence_encoder_sdk/src/main/resources/log4j2.xml
Normal file
17
nlp_sdks/sentence_encoder_sdk/src/main/resources/log4j2.xml
Normal file
@ -0,0 +1,17 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<Configuration status="INFO">
|
||||
<Appenders>
|
||||
<Console name="console" target="SYSTEM_OUT">
|
||||
<PatternLayout
|
||||
pattern="[%-5level] - %msg%n"/>
|
||||
</Console>
|
||||
</Appenders>
|
||||
<Loggers>
|
||||
<Root level="info" additivity="false">
|
||||
<AppenderRef ref="console"/>
|
||||
</Root>
|
||||
<Logger name="me.calvin" level="${sys:me.calvin.logging.level:-info}" additivity="false">
|
||||
<AppenderRef ref="console"/>
|
||||
</Logger>
|
||||
</Loggers>
|
||||
</Configuration>
|
Loading…
Reference in New Issue
Block a user