From e161c511af5240f3f4c41013dda3bb56cd2b8739 Mon Sep 17 00:00:00 2001 From: KVOJJJin Date: Sun, 10 Sep 2023 15:17:22 +0800 Subject: [PATCH] Feat:csv & docx support (#1139) Co-authored-by: jyong --- api/controllers/console/datasets/file.py | 2 +- .../datasets/create/assets/docx.svg | 23 +++++++++++++ .../create/embedding-process/index.module.css | 4 ++- .../create/embedding-process/index.tsx | 2 +- .../create/file-uploader/index.module.css | 4 ++- .../datasets/create/file-uploader/index.tsx | 32 ++----------------- .../datasets/create/step-one/index.tsx | 4 +-- .../datasets/create/step-two/index.module.css | 4 +++ .../detail/completed/SegmentCard.tsx | 2 +- .../datasets/documents/style.module.css | 3 ++ web/assets/docx.svg | 23 +++++++++++++ web/i18n/lang/dataset-creation.en.ts | 2 +- web/i18n/lang/dataset-creation.zh.ts | 2 +- 13 files changed, 68 insertions(+), 39 deletions(-) create mode 100644 web/app/components/datasets/create/assets/docx.svg create mode 100644 web/assets/docx.svg diff --git a/api/controllers/console/datasets/file.py b/api/controllers/console/datasets/file.py index adef2c74c..c68083df6 100644 --- a/api/controllers/console/datasets/file.py +++ b/api/controllers/console/datasets/file.py @@ -26,7 +26,7 @@ from models.model import UploadFile cache = TTLCache(maxsize=None, ttl=30) -ALLOWED_EXTENSIONS = ['txt', 'markdown', 'md', 'pdf', 'html', 'htm', 'xlsx'] +ALLOWED_EXTENSIONS = ['txt', 'markdown', 'md', 'pdf', 'html', 'htm', 'xlsx', 'docx', 'csv'] PREVIEW_WORDS_LIMIT = 3000 diff --git a/web/app/components/datasets/create/assets/docx.svg b/web/app/components/datasets/create/assets/docx.svg new file mode 100644 index 000000000..4d70dbfc8 --- /dev/null +++ b/web/app/components/datasets/create/assets/docx.svg @@ -0,0 +1,23 @@ + + + + + + + + + + + + + + + + + + + + + + + diff --git a/web/app/components/datasets/create/embedding-process/index.module.css b/web/app/components/datasets/create/embedding-process/index.module.css index 78050748f..a15b1310b 100644 --- a/web/app/components/datasets/create/embedding-process/index.module.css +++ b/web/app/components/datasets/create/embedding-process/index.module.css @@ -89,7 +89,9 @@ .fileIcon.csv { background-image: url(../assets/csv.svg); } - +.fileIcon.docx { + background-image: url(../assets/docx.svg); +} .fileIcon.xlsx, .fileIcon.xls { background-image: url(../assets/xlsx.svg); diff --git a/web/app/components/datasets/create/embedding-process/index.tsx b/web/app/components/datasets/create/embedding-process/index.tsx index f5678ac39..1d3f83180 100644 --- a/web/app/components/datasets/create/embedding-process/index.tsx +++ b/web/app/components/datasets/create/embedding-process/index.tsx @@ -194,7 +194,7 @@ const EmbeddingProcess: FC = ({ datasetId, batchId, documents = [], index
{indexingStatusBatchDetail.map(indexingStatusDetail => ( -
{fileItem.progress < 100 && ( @@ -274,33 +273,6 @@ const FileUploader = ({
))} - {/* {currentFile && ( -
onPreview(currentFile)} - className={cn( - s.file, - uploading && s.uploading, - // s.active, - )} - > - {uploading && ( -
- )} -
-
-
{currentFile.name}
-
{getFileSize(currentFile.size)}
-
-
- {uploading && ( -
{`${percent}%`}
- )} - {!uploading && ( -
removeFile(index)}/> - )} -
-
- )} */}
) diff --git a/web/app/components/datasets/create/step-one/index.tsx b/web/app/components/datasets/create/step-one/index.tsx index bcae398e7..450f09b40 100644 --- a/web/app/components/datasets/create/step-one/index.tsx +++ b/web/app/components/datasets/create/step-one/index.tsx @@ -65,7 +65,7 @@ const StepOne = ({ const { dataset } = useDatasetDetailContext() const [showModal, setShowModal] = useState(false) const [currentFile, setCurrentFile] = useState() - const [currentNotionPage, setCurrentNotionPage] = useState() + const [currentNotionPage, setCurrentNotionPage] = useState() const { t } = useTranslation() const modalShowHandle = () => setShowModal(true) @@ -78,7 +78,7 @@ const StepOne = ({ setCurrentFile(undefined) } - const updateCurrentPage = (page: Page) => { + const updateCurrentPage = (page: NotionPage) => { setCurrentNotionPage(page) } diff --git a/web/app/components/datasets/create/step-two/index.module.css b/web/app/components/datasets/create/step-two/index.module.css index d861805ce..ce644992e 100644 --- a/web/app/components/datasets/create/step-two/index.module.css +++ b/web/app/components/datasets/create/step-two/index.module.css @@ -290,6 +290,10 @@ background-image: url(../assets/csv.svg); } +.fileIcon.docx { + background-image: url(../assets/docx.svg); +} + .fileIcon.xlsx, .fileIcon.xls { background-image: url(../assets/xlsx.svg); diff --git a/web/app/components/datasets/documents/detail/completed/SegmentCard.tsx b/web/app/components/datasets/documents/detail/completed/SegmentCard.tsx index 0d8ca4acb..787bf1a79 100644 --- a/web/app/components/datasets/documents/detail/completed/SegmentCard.tsx +++ b/web/app/components/datasets/documents/detail/completed/SegmentCard.tsx @@ -43,7 +43,7 @@ type ISegmentCardProps = { scene?: UsageScene className?: string archived?: boolean - embeddingAvailable: boolean + embeddingAvailable?: boolean } const SegmentCard: FC = ({ diff --git a/web/app/components/datasets/documents/style.module.css b/web/app/components/datasets/documents/style.module.css index 34c836ac0..c69144b4f 100644 --- a/web/app/components/datasets/documents/style.module.css +++ b/web/app/components/datasets/documents/style.module.css @@ -87,6 +87,9 @@ .csvIcon { background-image: url(~@/assets/csv.svg); } +.docxIcon { + background-image: url(~@/assets/docx.svg); +} .statusItemDetail { @apply h-8 font-medium border border-gray-200 inline-flex items-center rounded-lg pl-3 pr-4 mr-2; } diff --git a/web/assets/docx.svg b/web/assets/docx.svg new file mode 100644 index 000000000..4d70dbfc8 --- /dev/null +++ b/web/assets/docx.svg @@ -0,0 +1,23 @@ + + + + + + + + + + + + + + + + + + + + + + + diff --git a/web/i18n/lang/dataset-creation.en.ts b/web/i18n/lang/dataset-creation.en.ts index 982272092..19e01bf7f 100644 --- a/web/i18n/lang/dataset-creation.en.ts +++ b/web/i18n/lang/dataset-creation.en.ts @@ -23,7 +23,7 @@ const translation = { title: 'Upload text file', button: 'Drag and drop file, or', browse: 'Browse', - tip: 'Supports txt, html, markdown, xlsx, and pdf. Max {{size}}MB each.', + tip: 'Supports txt, html, markdown, xlsx, csv, docx and pdf. Max {{size}}MB each.', validation: { typeError: 'File type not supported', size: 'File too large. Maximum is {{size}}MB', diff --git a/web/i18n/lang/dataset-creation.zh.ts b/web/i18n/lang/dataset-creation.zh.ts index da8011899..e2cc12530 100644 --- a/web/i18n/lang/dataset-creation.zh.ts +++ b/web/i18n/lang/dataset-creation.zh.ts @@ -23,7 +23,7 @@ const translation = { title: '上传文本文件', button: '拖拽文件至此,或者', browse: '选择文件', - tip: '已支持 TXT、 HTML、 Markdown、 PDF、 XLSX,每个文件不超过 {{size}}MB。', + tip: '已支持 TXT、 HTML、 Markdown、 PDF、 XLSX、CSV、DOCX,每个文件不超过 {{size}}MB。', validation: { typeError: '文件类型不支持', size: '文件太大了,不能超过 {{size}}MB',