⏸ segment(all): pre i18n

This commit is contained in:
小吾立 2024-06-13 10:56:02 +08:00
parent 724ed029cb
commit f77a178f00
5 changed files with 244 additions and 106 deletions

View File

@ -5,27 +5,62 @@ import cn.hutool.core.date.SystemClock;
import cn.hutool.core.io.FileUtil;
import cn.hutool.core.io.IoUtil;
import cn.hutool.core.io.resource.ResourceUtil;
import cn.hutool.core.map.MapUtil;
import cn.hutool.core.net.url.UrlBuilder;
import cn.hutool.core.util.RandomUtil;
import cn.hutool.core.util.StrUtil;
import cn.hutool.http.*;
import cn.hutool.system.SystemUtil;
import com.alibaba.fastjson2.JSONArray;
import com.alibaba.fastjson2.JSONObject;
import org.junit.Assert;
import org.junit.Test;
import java.io.File;
import java.io.InputStream;
import java.util.HashMap;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Set;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
/**
* @author bwcx_jzy1
* @since 2024/6/12
*/
public class BaidubceRpcTexttransTest {
public class BaiduBceRpcTexttransTest {
@Test
public void doTranslate() {
public void testTranslate() {
ArrayList<String> strings = CollUtil.newArrayList("请输入正确的验证码", "请传入 body 参数", "开始准备项目重启:{} {}");
JSONObject jsonObject = this.doTranslate(strings);
System.out.println(jsonObject);
}
private boolean checkHasI18nKey(JSONObject jsonObject) {
Set<String> keyed = jsonObject.keySet();
for (String s : keyed) {
if (StrUtil.startWith(s, "i18n.")) {
// 提前失败 或者翻译失败
//System.err.println("翻译失败或者提取失败," + s + "=" + jsonObject.get(s));
return true;
}
}
return false;
}
public JSONObject doTranslate(Collection<String> words) {
while (true) {
JSONObject jsonObject = this.doTranslate2(words);
if (checkHasI18nKey(jsonObject)) {
System.err.println("翻译失败或者提取失败,自动重试," + jsonObject);
} else {
return jsonObject;
}
}
}
private JSONObject doTranslate2(Collection<String> words) {
String token = this.getToken();
UrlBuilder urlBuilder = UrlBuilder.of("https://aip.baidubce.com/rpc/2.0/ai_custom/v1/wenxinworkshop/chat/completions");
urlBuilder.addQuery("access_token", token);
@ -40,14 +75,17 @@ public class BaidubceRpcTexttransTest {
//
InputStream inputStream = ResourceUtil.getStream("baidubce_translate.txt");
String string = IoUtil.readUtf8(inputStream);
HashMap<Object, Object> map = new HashMap<>();
JSONArray jsonArray = new JSONArray();
jsonArray.add("测试变量");
jsonArray.add("连接关闭 {} {}");
jsonArray.add("清除临时文件失败,请手动清理:");
map.put("REQUEST_STR", jsonArray.toString());
string = StrUtil.format(string, map);
System.out.println(string);
//
JSONObject from = new JSONObject();
for (String value : words) {
String key;
do {
key = StrUtil.format("i18n.{}", RandomUtil.randomStringUpper(6));
} while (from.containsKey(key));
from.put(key, value);
}
string = StrUtil.format(string, MapUtil.of("REQUEST_STR", from.toString()));
//System.out.println(string);
message.put("content", string);
jsonObject.put("messages", CollUtil.newArrayList(message));
//
@ -55,9 +93,26 @@ public class BaidubceRpcTexttransTest {
String result = httpRequest.thenFunction(httpResponse -> {
String body = httpResponse.body();
JSONObject jsonObject1 = JSONObject.parseObject(body);
if (jsonObject1.getIntValue("error_code") != 0) {
Assert.fail(jsonObject1.getString("error_msg"));
}
return jsonObject1.getString("result");
});
System.out.println(result);
String patternString = "(?s)```json\\s*([^`]*?)\\s*```";
Pattern pattern = Pattern.compile(patternString);
Matcher matcher = pattern.matcher(result);
//
JSONObject jsonObject1 = null;
while (matcher.find()) {
//System.out.println(result);
String jsonContent = matcher.group(1);
jsonObject1 = JSONObject.parseObject(jsonContent);
if (!this.checkHasI18nKey(jsonObject1)) {
return jsonObject1;
}
}
Assert.assertNotNull("翻译失败或者提取失败", jsonObject1);
return jsonObject1;
}
private String getToken() {
@ -77,9 +132,9 @@ public class BaidubceRpcTexttransTest {
}
/**
* https://cloud.baidu.com/doc/WENXINWORKSHOP/s/7lpch74jm
* <a href="https://cloud.baidu.com/doc/WENXINWORKSHOP/s/7lpch74jm">https://cloud.baidu.com/doc/WENXINWORKSHOP/s/7lpch74jm</a>
*
* @return
* @return token
*/
private JSONObject doTokenByApi(File file) {
String bceCi = SystemUtil.get("JPOM_TRANSLATE_BAIDUBCE_CI", StrUtil.EMPTY);

View File

@ -3,10 +3,14 @@ package i8n;
import cn.hutool.core.collection.CollUtil;
import cn.hutool.core.io.FileUtil;
import cn.hutool.core.util.CharsetUtil;
import cn.hutool.core.util.RandomUtil;
import cn.hutool.core.util.PageUtil;
import cn.hutool.core.util.StrUtil;
import cn.hutool.crypto.SecureUtil;
import com.alibaba.fastjson2.JSONArray;
import com.alibaba.fastjson2.JSONObject;
import lombok.Lombok;
import lombok.SneakyThrows;
import org.junit.Assert;
import org.junit.Before;
import org.junit.Test;
import java.io.BufferedReader;
@ -20,6 +24,7 @@ import java.util.*;
import java.util.function.Consumer;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import java.util.stream.Collectors;
/**
* 提交代码中的中文并生成随机 key 转存到 properties
@ -30,10 +35,6 @@ import java.util.regex.Pattern;
* @since 2024/6/11
*/
public class ExtractI18nTest {
/**
* 中文字符串
*/
private Collection<String> wordsSet = new LinkedHashSet<>();
/**
* 中文对应的 key map
* <p>
@ -49,6 +50,8 @@ public class ExtractI18nTest {
* 项目根路径
*/
private File rootFile;
private final Properties zhProperties = new Properties();
/**
* 匹配中文字符的正则表达式
*/
@ -81,38 +84,125 @@ public class ExtractI18nTest {
"@ValidatorItem", "nameKey = \""
};
@Test
@SneakyThrows
public void extract() {
@Before
public void before() throws Exception {
File file = new File("");
String rootPath = file.getAbsolutePath();
rootFile = file = new File(rootPath).getParentFile();
rootFile = new File(rootPath).getParentFile();
//
// 中文资源文件存储路径
File zhPropertiesFile = FileUtil.file(rootFile, "common/src/main/resources/i18n/messages_zh_CN.properties");
Charset charset = CharsetUtil.CHARSET_UTF_8;
try (BufferedReader inputStream = FileUtil.getReader(zhPropertiesFile, charset)) {
zhProperties.load(inputStream);
}
}
@Test
public void extract() {
// 删除临时文件
FileUtil.del(FileUtil.file(rootFile, "i18n-temp"));
// 中文字符串
Set<String> wordsSet = new LinkedHashSet<>();
// 提取中文
walkFile(file, file1 -> {
walkFile(rootFile, file1 -> {
try {
for (Pattern chinesePattern : chinesePatterns) {
verifyDuplicates(file1, chinesePattern);
extractFile(file1, chinesePattern);
extractFile(file1, chinesePattern, wordsSet);
}
} catch (Exception e) {
throw Lombok.sneakyThrow(e);
}
});
// 生成 key
generateKey(file);
// 检查去除前后空格后是否重复
Map<String, Long> collect = wordsSet.stream()
.map(StrUtil::trim)
.collect(Collectors.groupingBy(e -> e, Collectors.counting()));
for (Map.Entry<String, Long> entry : collect.entrySet()) {
long value = entry.getValue();
Assert.assertEquals("[" + entry.getKey() + "]出现去重空格后重复", 1L, value);
}
// 语意化中文存储为 key
Collection<String> wordsSetSort = CollUtil.sort(wordsSet, String::compareTo);
int pageSize = 50;
int total = CollUtil.size(wordsSet);
int page = PageUtil.totalPage(total, pageSize);
JSONObject allResult = new JSONObject();
//
for (int i = PageUtil.getFirstPageNo(); i <= page; i++) {
int start = PageUtil.getStart(i, pageSize);
int end = PageUtil.getEnd(i, pageSize);
List<String> sub = CollUtil.sub(wordsSetSort, start, end);
while (true) out:{
BaiduBceRpcTexttransTest bceRpcTexttrans = new BaiduBceRpcTexttransTest();
JSONObject jsonObject = bceRpcTexttrans.doTranslate(sub);
System.out.println("翻译结果:" + jsonObject);
// 转换为可用 key
for (Map.Entry<String, Object> entry : jsonObject.entrySet()) {
String key = entry.getKey();
String value = (String) entry.getValue();
String originalValue = findOriginal(sub, value);
if (originalValue == null) {
System.err.println("翻译后的中文和翻译前的中文不一致(需要重试):" + value);
break out;
}
String buildKey = this.buildKey(key, originalValue, allResult);
allResult.put(buildKey, originalValue);
}
// 提前保存
File wordsFile = FileUtil.file(rootFile, "common/src/main/resources/i18n/words.json");
FileUtil.writeString(JSONArray.toJSONString(allResult), wordsFile, StandardCharsets.UTF_8);
break;
}
}
}
private String buildKey(String key, String value, JSONObject jsonObject) {
int md5IdLen = 4;
while (true) {
String md5 = SecureUtil.md5(value);
Assert.assertTrue("截取中文 md5 key 超范围:" + value, md5.length() >= md5IdLen);
md5 = md5.substring(0, md5IdLen);
String newKey = StrUtil.format("i18n.{}.{}", StrUtil.toUnderlineCase(key), md5);
if (jsonObject.containsKey(newKey)) {
md5IdLen += 2;
continue;
}
return newKey;
}
}
/**
* 找到原始的中文字符串大模型处理后面前后空格可能不存在
*
* @param list list
* @param value value
* @return 原始的中文字符串 null 不存在
*/
private String findOriginal(List<String> list, String value) {
for (String s : list) {
if (StrUtil.equals(s, value) || StrUtil.equals(StrUtil.trim(s), value)) {
return value;
}
}
return null;
}
@Test
public void replace() {
// 替换中文
// walkFile(file, file1 -> {
// try {
// for (Pattern chinesePattern : chinesePatterns) {
// replaceQuotedChineseInFile(file1, chinesePattern);
// }
// } catch (Exception e) {
// throw Lombok.sneakyThrow(e);
// }
// });
walkFile(rootFile, file1 -> {
try {
for (Pattern chinesePattern : chinesePatterns) {
replaceQuotedChineseInFile(file1, chinesePattern);
}
} catch (Exception e) {
throw Lombok.sneakyThrow(e);
}
});
}
/**
@ -144,64 +234,57 @@ public class ExtractI18nTest {
*/
private void generateKey(File file) throws IOException {
// /Users/user/IdeaProjects/Jpom/jpom-parent/modules/.DS_Store
// 中文资源文件存储路径
File zhPropertiesFile = FileUtil.file(file, "common/src/main/resources/i18n/messages_zh_CN.properties");
Properties zhProperties = new Properties();
Charset charset = CharsetUtil.CHARSET_UTF_8;
try (BufferedReader inputStream = FileUtil.getReader(zhPropertiesFile, charset)) {
zhProperties.load(inputStream);
}
Collection<Object> oldKeys = zhProperties.keySet();
Collection<Object> linkUsed = new LinkedHashSet<>();
wordsSet = CollUtil.sort(wordsSet, String::compareTo);
wordsSet.forEach(s -> {
// 根据中文反查 key
String key = null;
for (Map.Entry<Object, Object> entry : zhProperties.entrySet()) {
if (StrUtil.equals(StrUtil.toStringOrNull(entry.getValue()), s)) {
key = (String) entry.getKey();
break;
}
}
if (key == null) {
do {
key = StrUtil.format("key.{}", RandomUtil.randomStringUpper(6));
} while (zhProperties.containsKey(key));
System.out.println("生成新的 key:" + key);
zhProperties.put(key, s);
}
linkUsed.add(key);
});
// 删除不存在的
int beforeSize = oldKeys.size();
oldKeys.removeIf(next -> {
//
boolean b = !linkUsed.contains(next) && !useKeys.contains(next);
if (b) {
System.out.println("删除 key" + next);
}
return b;
});
int afterSize = oldKeys.size();
if (beforeSize != afterSize) {
System.out.println(beforeSize + " " + afterSize);
}
for (Object useKey : useKeys) {
if (zhProperties.containsKey(useKey)) {
continue;
}
System.out.println("存在未关联的key:" + useKey);
}
try (BufferedWriter writer = FileUtil.getWriter(zhPropertiesFile, charset, false)) {
zhProperties.store(writer, "i18n zh");
}
System.out.println(zhProperties.size());
for (Map.Entry<Object, Object> entry : zhProperties.entrySet()) {
chineseMap.put(StrUtil.toStringOrNull(entry.getValue()), StrUtil.toStringOrNull(entry.getKey()));
}
// Collection<Object> oldKeys = zhProperties.keySet();
// Collection<Object> linkUsed = new LinkedHashSet<>();
// wordsSet = CollUtil.sort(wordsSet, String::compareTo);
// wordsSet.forEach(s -> {
// // 根据中文反查 key
// String key = null;
// for (Map.Entry<Object, Object> entry : zhProperties.entrySet()) {
// if (StrUtil.equals(StrUtil.toStringOrNull(entry.getValue()), s)) {
// key = (String) entry.getKey();
// break;
// }
// }
// if (key == null) {
// do {
// key = StrUtil.format("i18n.{}", RandomUtil.randomStringUpper(6));
// } while (zhProperties.containsKey(key));
// System.out.println("生成新的 key:" + key);
// zhProperties.put(key, s);
// }
// linkUsed.add(key);
// });
// // 删除不存在的
// int beforeSize = oldKeys.size();
// oldKeys.removeIf(next -> {
// //
// boolean b = !linkUsed.contains(next) && !useKeys.contains(next);
// if (b) {
// System.out.println("删除 key" + next);
// }
// return b;
// });
// int afterSize = oldKeys.size();
// if (beforeSize != afterSize) {
// System.out.println(beforeSize + " " + afterSize);
// }
//
// for (Object useKey : useKeys) {
// if (zhProperties.containsKey(useKey)) {
// continue;
// }
// System.out.println("存在未关联的key:" + useKey);
// }
//
// try (BufferedWriter writer = FileUtil.getWriter(zhPropertiesFile, charset, false)) {
// zhProperties.store(writer, "i18n zh");
// }
// System.out.println(zhProperties.size());
//
// for (Map.Entry<Object, Object> entry : zhProperties.entrySet()) {
// chineseMap.put(StrUtil.toStringOrNull(entry.getValue()), StrUtil.toStringOrNull(entry.getKey()));
// }
}
/**
@ -311,7 +394,7 @@ public class ExtractI18nTest {
* @param pattern 匹配的正则
* @throws IOException io 异常
*/
private void extractFile(File file, Pattern pattern) throws Exception {
private void extractFile(File file, Pattern pattern, Collection<String> wordsSet) throws Exception {
try (BufferedReader reader = Files.newBufferedReader(file.toPath())) {
String line;
while ((line = reader.readLine()) != null) {

View File

@ -1,15 +1,15 @@
You are the service that converts a user request JSON into a new (user-expected) JSON object based on the following JavaScript-defined JSON object:
```
// 将下面json中的 将`值`的含义进行转为语义化且简短的首字母为小写的小驼峰英文变量名替换无意义字符串
// 将下面json中的 根据`值`的含义将 `key` 转为语义化且简短的首字母为小写的小驼峰英文变量名替换无意义字符串
// 此处进行替换,禁止出现 k1 k2 k3
const json = [
"string",
"string2"
]
const template = {
"key1": "string1",
"key2": "string2",
}
```
The following is a user request:
```
const json ={REQUEST_STR}
const template = {REQUEST_STR}
```

View File

@ -263,7 +263,7 @@ public class WorkspaceController extends BaseServerController {
systemParametersServer.delByKey(StrUtil.format("node_config_{}", id));
// 删除信息
workspaceService.delByKey(id);
return new JsonMessage<>(200, "删除成功 " + autoDelete);
return new JsonMessage<>(200, "删除成功:" + autoDelete);
}
/**

View File

@ -235,7 +235,7 @@ public class NodeUpdateHandler extends BaseProxyHandler {
}
} catch (Exception e) {
log.error("升级失败", e);
this.onError(session, "升级失败 " + e.getMessage());
this.onError(session, "升级失败:" + e.getMessage());
}
}