mirror of
https://gitee.com/dify_ai/dify.git
synced 2024-12-05 20:57:46 +08:00
392 lines
15 KiB
TypeScript
392 lines
15 KiB
TypeScript
"use client";
|
|
import { useTranslation } from "react-i18next";
|
|
import dayjs from "dayjs";
|
|
import { formatNumber, formatFileSize, formatTime } from '@/utils/format'
|
|
import type { DocType } from '@/models/datasets'
|
|
|
|
export type inputType = 'input' | 'select' | 'textarea'
|
|
export type metadataType = DocType | 'originInfo' | 'technicalParameters'
|
|
|
|
type MetadataMap = Record<
|
|
metadataType,
|
|
{
|
|
text: string;
|
|
allowEdit?: boolean;
|
|
icon?: React.ReactNode;
|
|
iconName?: string;
|
|
subFieldsMap: Record<
|
|
string,
|
|
{
|
|
label: string;
|
|
inputType?: inputType;
|
|
field?: string;
|
|
render?: (value: any, total?: number) => React.ReactNode | string
|
|
}
|
|
>;
|
|
}
|
|
>;
|
|
|
|
const fieldPrefix = "datasetDocuments.metadata.field";
|
|
|
|
export const useMetadataMap = (): MetadataMap => {
|
|
const { t } = useTranslation();
|
|
return {
|
|
book: {
|
|
text: t("datasetDocuments.metadata.type.book"),
|
|
iconName: "bookOpen",
|
|
subFieldsMap: {
|
|
title: { label: t(`${fieldPrefix}.book.title`) },
|
|
language: {
|
|
label: t(`${fieldPrefix}.book.language`),
|
|
inputType: "select",
|
|
},
|
|
author: { label: t(`${fieldPrefix}.book.author`) },
|
|
publisher: { label: t(`${fieldPrefix}.book.publisher`) },
|
|
publication_date: { label: t(`${fieldPrefix}.book.publicationDate`) },
|
|
isbn: { label: t(`${fieldPrefix}.book.ISBN`) },
|
|
category: {
|
|
label: t(`${fieldPrefix}.book.category`),
|
|
inputType: "select",
|
|
},
|
|
},
|
|
},
|
|
web_page: {
|
|
text: t("datasetDocuments.metadata.type.webPage"),
|
|
iconName: "globe",
|
|
subFieldsMap: {
|
|
title: { label: t(`${fieldPrefix}.webPage.title`) },
|
|
url: { label: t(`${fieldPrefix}.webPage.url`) },
|
|
language: {
|
|
label: t(`${fieldPrefix}.webPage.language`),
|
|
inputType: "select",
|
|
},
|
|
['author/publisher']: { label: t(`${fieldPrefix}.webPage.authorPublisher`) },
|
|
publish_date: { label: t(`${fieldPrefix}.webPage.publishDate`) },
|
|
['topics/keywords']: { label: t(`${fieldPrefix}.webPage.topicsKeywords`) },
|
|
description: { label: t(`${fieldPrefix}.webPage.description`) },
|
|
},
|
|
},
|
|
paper: {
|
|
text: t("datasetDocuments.metadata.type.paper"),
|
|
iconName: "graduationHat",
|
|
subFieldsMap: {
|
|
title: { label: t(`${fieldPrefix}.paper.title`) },
|
|
language: {
|
|
label: t(`${fieldPrefix}.paper.language`),
|
|
inputType: "select",
|
|
},
|
|
author: { label: t(`${fieldPrefix}.paper.author`) },
|
|
publish_date: { label: t(`${fieldPrefix}.paper.publishDate`) },
|
|
['journal/conference_name']: {
|
|
label: t(`${fieldPrefix}.paper.journalConferenceName`),
|
|
},
|
|
['volume/issue/page_numbers']: { label: t(`${fieldPrefix}.paper.volumeIssuePage`) },
|
|
doi: { label: t(`${fieldPrefix}.paper.DOI`) },
|
|
['topics/keywords']: { label: t(`${fieldPrefix}.paper.topicsKeywords`) },
|
|
abstract: {
|
|
label: t(`${fieldPrefix}.paper.abstract`),
|
|
inputType: "textarea",
|
|
},
|
|
},
|
|
},
|
|
social_media_post: {
|
|
text: t("datasetDocuments.metadata.type.socialMediaPost"),
|
|
iconName: "atSign",
|
|
subFieldsMap: {
|
|
platform: { label: t(`${fieldPrefix}.socialMediaPost.platform`) },
|
|
['author/username']: {
|
|
label: t(`${fieldPrefix}.socialMediaPost.authorUsername`),
|
|
},
|
|
publish_date: { label: t(`${fieldPrefix}.socialMediaPost.publishDate`) },
|
|
post_url: { label: t(`${fieldPrefix}.socialMediaPost.postURL`) },
|
|
['topics/tags']: { label: t(`${fieldPrefix}.socialMediaPost.topicsTags`) },
|
|
},
|
|
},
|
|
personal_document: {
|
|
text: t("datasetDocuments.metadata.type.personalDocument"),
|
|
iconName: "file",
|
|
subFieldsMap: {
|
|
title: { label: t(`${fieldPrefix}.personalDocument.title`) },
|
|
author: { label: t(`${fieldPrefix}.personalDocument.author`) },
|
|
creation_date: {
|
|
label: t(`${fieldPrefix}.personalDocument.creationDate`),
|
|
},
|
|
last_modified_date: {
|
|
label: t(`${fieldPrefix}.personalDocument.lastModifiedDate`),
|
|
},
|
|
document_type: {
|
|
label: t(`${fieldPrefix}.personalDocument.documentType`),
|
|
inputType: "select",
|
|
},
|
|
['tags/category']: {
|
|
label: t(`${fieldPrefix}.personalDocument.tagsCategory`),
|
|
},
|
|
},
|
|
},
|
|
business_document: {
|
|
text: t("datasetDocuments.metadata.type.businessDocument"),
|
|
iconName: "briefcase",
|
|
subFieldsMap: {
|
|
title: { label: t(`${fieldPrefix}.businessDocument.title`) },
|
|
author: { label: t(`${fieldPrefix}.businessDocument.author`) },
|
|
creation_date: {
|
|
label: t(`${fieldPrefix}.businessDocument.creationDate`),
|
|
},
|
|
last_modified_date: {
|
|
label: t(`${fieldPrefix}.businessDocument.lastModifiedDate`),
|
|
},
|
|
document_type: {
|
|
label: t(`${fieldPrefix}.businessDocument.documentType`),
|
|
inputType: "select",
|
|
},
|
|
['department/team']: {
|
|
label: t(`${fieldPrefix}.businessDocument.departmentTeam`),
|
|
},
|
|
},
|
|
},
|
|
im_chat_log: {
|
|
text: t("datasetDocuments.metadata.type.IMChat"),
|
|
iconName: "messageTextCircle",
|
|
subFieldsMap: {
|
|
chat_platform: { label: t(`${fieldPrefix}.IMChat.chatPlatform`) },
|
|
['chat_participants/group_name']: {
|
|
label: t(`${fieldPrefix}.IMChat.chatPartiesGroupName`),
|
|
},
|
|
start_date: { label: t(`${fieldPrefix}.IMChat.startDate`) },
|
|
end_date: { label: t(`${fieldPrefix}.IMChat.endDate`) },
|
|
participants: { label: t(`${fieldPrefix}.IMChat.participants`) },
|
|
topicsKeywords: {
|
|
label: t(`${fieldPrefix}.IMChat.topicsKeywords`),
|
|
inputType: "textarea",
|
|
},
|
|
fileType: { label: t(`${fieldPrefix}.IMChat.fileType`) },
|
|
},
|
|
},
|
|
wikipedia_entry: {
|
|
text: t("datasetDocuments.metadata.type.wikipediaEntry"),
|
|
allowEdit: false,
|
|
subFieldsMap: {
|
|
title: { label: t(`${fieldPrefix}.wikipediaEntry.title`) },
|
|
language: {
|
|
label: t(`${fieldPrefix}.wikipediaEntry.language`),
|
|
inputType: "select",
|
|
},
|
|
web_page_url: { label: t(`${fieldPrefix}.wikipediaEntry.webpageURL`) },
|
|
['editor/contributor']: {
|
|
label: t(`${fieldPrefix}.wikipediaEntry.editorContributor`),
|
|
},
|
|
last_edit_date: {
|
|
label: t(`${fieldPrefix}.wikipediaEntry.lastEditDate`),
|
|
},
|
|
['summary/introduction']: {
|
|
label: t(`${fieldPrefix}.wikipediaEntry.summaryIntroduction`),
|
|
inputType: "textarea",
|
|
},
|
|
},
|
|
},
|
|
synced_from_notion: {
|
|
text: t("datasetDocuments.metadata.type.notion"),
|
|
allowEdit: false,
|
|
subFieldsMap: {
|
|
title: { label: t(`${fieldPrefix}.notion.title`) },
|
|
language: { label: t(`${fieldPrefix}.notion.lang`), inputType: "select" },
|
|
['author/creator']: { label: t(`${fieldPrefix}.notion.author`) },
|
|
creation_date: { label: t(`${fieldPrefix}.notion.createdTime`) },
|
|
last_modified_date: {
|
|
label: t(`${fieldPrefix}.notion.lastModifiedTime`),
|
|
},
|
|
notion_page_link: { label: t(`${fieldPrefix}.notion.url`) },
|
|
['category/tags']: { label: t(`${fieldPrefix}.notion.tag`) },
|
|
description: { label: t(`${fieldPrefix}.notion.desc`) },
|
|
},
|
|
},
|
|
synced_from_github: {
|
|
text: t("datasetDocuments.metadata.type.github"),
|
|
allowEdit: false,
|
|
subFieldsMap: {
|
|
repository_name: { label: t(`${fieldPrefix}.github.repoName`) },
|
|
repository_description: { label: t(`${fieldPrefix}.github.repoDesc`) },
|
|
['repository_owner/organization']: { label: t(`${fieldPrefix}.github.repoOwner`) },
|
|
code_filename: { label: t(`${fieldPrefix}.github.fileName`) },
|
|
code_file_path: { label: t(`${fieldPrefix}.github.filePath`) },
|
|
programming_language: { label: t(`${fieldPrefix}.github.programmingLang`) },
|
|
github_link: { label: t(`${fieldPrefix}.github.url`) },
|
|
open_source_license: { label: t(`${fieldPrefix}.github.license`) },
|
|
commit_date: { label: t(`${fieldPrefix}.github.lastCommitTime`) },
|
|
commit_author: {
|
|
label: t(`${fieldPrefix}.github.lastCommitAuthor`),
|
|
},
|
|
},
|
|
},
|
|
originInfo: {
|
|
text: "",
|
|
allowEdit: false,
|
|
subFieldsMap: {
|
|
name: { label: t(`${fieldPrefix}.originInfo.originalFilename`) },
|
|
"data_source_info.upload_file.size": {
|
|
label: t(`${fieldPrefix}.originInfo.originalFileSize`),
|
|
render: (value) => formatFileSize(value)
|
|
},
|
|
created_at: {
|
|
label: t(`${fieldPrefix}.originInfo.uploadDate`),
|
|
render: (value) => dayjs.unix(value).format(t('datasetDocuments.metadata.dateTimeFormat') as string)
|
|
},
|
|
completed_at: {
|
|
label: t(`${fieldPrefix}.originInfo.lastUpdateDate`),
|
|
render: (value) => dayjs.unix(value).format(t('datasetDocuments.metadata.dateTimeFormat') as string)
|
|
},
|
|
data_source_type: {
|
|
label: t(`${fieldPrefix}.originInfo.source`),
|
|
render: (value) => t(`datasetDocuments.metadata.source.${value}`)
|
|
},
|
|
},
|
|
},
|
|
technicalParameters: {
|
|
text: t("datasetDocuments.metadata.type.technicalParameters"),
|
|
allowEdit: false,
|
|
subFieldsMap: {
|
|
'dataset_process_rule.mode': {
|
|
label: t(`${fieldPrefix}.technicalParameters.segmentSpecification`),
|
|
render: value => value === 'automatic' ? (t('datasetDocuments.embedding.automatic') as string) : (t('datasetDocuments.embedding.custom') as string)
|
|
},
|
|
'dataset_process_rule.rules.segmentation.max_tokens': {
|
|
label: t(`${fieldPrefix}.technicalParameters.segmentLength`),
|
|
render: value => formatNumber(value)
|
|
},
|
|
average_segment_length: {
|
|
label: t(`${fieldPrefix}.technicalParameters.avgParagraphLength`),
|
|
render: (value) => `${formatNumber(value)} characters`
|
|
},
|
|
segment_count: {
|
|
label: t(`${fieldPrefix}.technicalParameters.paragraphs`),
|
|
render: (value) => `${formatNumber(value)} paragraphs`
|
|
},
|
|
hit_count: {
|
|
label: t(`${fieldPrefix}.technicalParameters.hitCount`),
|
|
render: (value, total) => {
|
|
const v = value || 0;
|
|
return `${!total ? 0 : ((v / total) * 100).toFixed(2)}% (${v}/${total})`
|
|
}
|
|
},
|
|
indexing_latency: {
|
|
label: t(`${fieldPrefix}.technicalParameters.embeddingTime`),
|
|
render: (value) => formatTime(value)
|
|
},
|
|
tokens: {
|
|
label: t(`${fieldPrefix}.technicalParameters.embeddedSpend`),
|
|
render: (value) => `${formatNumber(value)} tokens`
|
|
},
|
|
},
|
|
},
|
|
};
|
|
};
|
|
|
|
const langPrefix = "datasetDocuments.metadata.languageMap.";
|
|
|
|
export const useLanguages = () => {
|
|
const { t } = useTranslation();
|
|
return {
|
|
zh: t(langPrefix + "zh"),
|
|
en: t(langPrefix + "en"),
|
|
es: t(langPrefix + "es"),
|
|
fr: t(langPrefix + "fr"),
|
|
de: t(langPrefix + "de"),
|
|
ja: t(langPrefix + "ja"),
|
|
ko: t(langPrefix + "ko"),
|
|
ru: t(langPrefix + "ru"),
|
|
ar: t(langPrefix + "ar"),
|
|
pt: t(langPrefix + "pt"),
|
|
it: t(langPrefix + "it"),
|
|
nl: t(langPrefix + "nl"),
|
|
pl: t(langPrefix + "pl"),
|
|
sv: t(langPrefix + "sv"),
|
|
tr: t(langPrefix + "tr"),
|
|
he: t(langPrefix + "he"),
|
|
hi: t(langPrefix + "hi"),
|
|
da: t(langPrefix + "da"),
|
|
fi: t(langPrefix + "fi"),
|
|
no: t(langPrefix + "no"),
|
|
hu: t(langPrefix + "hu"),
|
|
el: t(langPrefix + "el"),
|
|
cs: t(langPrefix + "cs"),
|
|
th: t(langPrefix + "th"),
|
|
id: t(langPrefix + "id"),
|
|
};
|
|
};
|
|
|
|
const bookCategoryPrefix = "datasetDocuments.metadata.categoryMap.book.";
|
|
|
|
export const useBookCategories = () => {
|
|
const { t } = useTranslation();
|
|
return {
|
|
fiction: t(bookCategoryPrefix + "fiction"),
|
|
biography: t(bookCategoryPrefix + "biography"),
|
|
history: t(bookCategoryPrefix + "history"),
|
|
science: t(bookCategoryPrefix + "science"),
|
|
technology: t(bookCategoryPrefix + "technology"),
|
|
education: t(bookCategoryPrefix + "education"),
|
|
philosophy: t(bookCategoryPrefix + "philosophy"),
|
|
religion: t(bookCategoryPrefix + "religion"),
|
|
socialSciences: t(bookCategoryPrefix + "socialSciences"),
|
|
art: t(bookCategoryPrefix + "art"),
|
|
travel: t(bookCategoryPrefix + "travel"),
|
|
health: t(bookCategoryPrefix + "health"),
|
|
selfHelp: t(bookCategoryPrefix + "selfHelp"),
|
|
businessEconomics: t(bookCategoryPrefix + "businessEconomics"),
|
|
cooking: t(bookCategoryPrefix + "cooking"),
|
|
childrenYoungAdults: t(bookCategoryPrefix + "childrenYoungAdults"),
|
|
comicsGraphicNovels: t(bookCategoryPrefix + "comicsGraphicNovels"),
|
|
poetry: t(bookCategoryPrefix + "poetry"),
|
|
drama: t(bookCategoryPrefix + "drama"),
|
|
other: t(bookCategoryPrefix + "other"),
|
|
};
|
|
};
|
|
|
|
const personalDocCategoryPrefix =
|
|
"datasetDocuments.metadata.categoryMap.personalDoc.";
|
|
|
|
export const usePersonalDocCategories = () => {
|
|
const { t } = useTranslation();
|
|
return {
|
|
notes: t(personalDocCategoryPrefix + "notes"),
|
|
blogDraft: t(personalDocCategoryPrefix + "blogDraft"),
|
|
diary: t(personalDocCategoryPrefix + "diary"),
|
|
researchReport: t(personalDocCategoryPrefix + "researchReport"),
|
|
bookExcerpt: t(personalDocCategoryPrefix + "bookExcerpt"),
|
|
schedule: t(personalDocCategoryPrefix + "schedule"),
|
|
list: t(personalDocCategoryPrefix + "list"),
|
|
projectOverview: t(personalDocCategoryPrefix + "projectOverview"),
|
|
photoCollection: t(personalDocCategoryPrefix + "photoCollection"),
|
|
creativeWriting: t(personalDocCategoryPrefix + "creativeWriting"),
|
|
codeSnippet: t(personalDocCategoryPrefix + "codeSnippet"),
|
|
designDraft: t(personalDocCategoryPrefix + "designDraft"),
|
|
personalResume: t(personalDocCategoryPrefix + "personalResume"),
|
|
other: t(personalDocCategoryPrefix + "other"),
|
|
};
|
|
};
|
|
|
|
const businessDocCategoryPrefix =
|
|
"datasetDocuments.metadata.categoryMap.businessDoc.";
|
|
|
|
export const useBusinessDocCategories = () => {
|
|
const { t } = useTranslation();
|
|
return {
|
|
meetingMinutes: t(businessDocCategoryPrefix + "meetingMinutes"),
|
|
researchReport: t(businessDocCategoryPrefix + "researchReport"),
|
|
proposal: t(businessDocCategoryPrefix + "proposal"),
|
|
employeeHandbook: t(businessDocCategoryPrefix + "employeeHandbook"),
|
|
trainingMaterials: t(businessDocCategoryPrefix + "trainingMaterials"),
|
|
requirementsDocument: t(businessDocCategoryPrefix + "requirementsDocument"),
|
|
designDocument: t(businessDocCategoryPrefix + "designDocument"),
|
|
productSpecification: t(businessDocCategoryPrefix + "productSpecification"),
|
|
financialReport: t(businessDocCategoryPrefix + "financialReport"),
|
|
marketAnalysis: t(businessDocCategoryPrefix + "marketAnalysis"),
|
|
projectPlan: t(businessDocCategoryPrefix + "projectPlan"),
|
|
teamStructure: t(businessDocCategoryPrefix + "teamStructure"),
|
|
policiesProcedures: t(businessDocCategoryPrefix + "policiesProcedures"),
|
|
contractsAgreements: t(businessDocCategoryPrefix + "contractsAgreements"),
|
|
emailCorrespondence: t(businessDocCategoryPrefix + "emailCorrespondence"),
|
|
other: t(businessDocCategoryPrefix + "other"),
|
|
};
|
|
};
|