diff --git a/api/controllers/service_api/dataset/segment.py b/api/controllers/service_api/dataset/segment.py index 70c3e73d9..ddb4487e6 100644 --- a/api/controllers/service_api/dataset/segment.py +++ b/api/controllers/service_api/dataset/segment.py @@ -1,7 +1,6 @@ from flask_login import current_user from flask_restful import reqparse, marshal from werkzeug.exceptions import NotFound - from controllers.service_api import api from controllers.service_api.app.error import ProviderNotInitializeError from controllers.service_api.wraps import DatasetApiResource @@ -9,8 +8,8 @@ from core.model_providers.error import ProviderTokenNotInitError, LLMBadRequestE from core.model_providers.model_factory import ModelFactory from extensions.ext_database import db from fields.segment_fields import segment_fields -from models.dataset import Dataset -from services.dataset_service import DocumentService, SegmentService +from models.dataset import Dataset, DocumentSegment +from services.dataset_service import DatasetService, DocumentService, SegmentService class SegmentApi(DatasetApiResource): @@ -24,6 +23,8 @@ class SegmentApi(DatasetApiResource): Dataset.tenant_id == tenant_id, Dataset.id == dataset_id ).first() + if not dataset: + raise NotFound('Dataset not found.') # check document document_id = str(document_id) document = DocumentService.get_document(dataset.id, document_id) @@ -55,5 +56,146 @@ class SegmentApi(DatasetApiResource): 'doc_form': document.doc_form }, 200 + def get(self, tenant_id, dataset_id, document_id): + """Create single segment.""" + # check dataset + dataset_id = str(dataset_id) + tenant_id = str(tenant_id) + dataset = db.session.query(Dataset).filter( + Dataset.tenant_id == tenant_id, + Dataset.id == dataset_id + ).first() + if not dataset: + raise NotFound('Dataset not found.') + # check document + document_id = str(document_id) + document = DocumentService.get_document(dataset.id, document_id) + if not document: + raise NotFound('Document not found.') + # check embedding model setting + if dataset.indexing_technique == 'high_quality': + try: + ModelFactory.get_embedding_model( + tenant_id=current_user.current_tenant_id, + model_provider_name=dataset.embedding_model_provider, + model_name=dataset.embedding_model + ) + except LLMBadRequestError: + raise ProviderNotInitializeError( + f"No Embedding Model available. Please configure a valid provider " + f"in the Settings -> Model Provider.") + except ProviderTokenNotInitError as ex: + raise ProviderNotInitializeError(ex.description) + + parser = reqparse.RequestParser() + parser.add_argument('status', type=str, + action='append', default=[], location='args') + parser.add_argument('keyword', type=str, default=None, location='args') + args = parser.parse_args() + + status_list = args['status'] + keyword = args['keyword'] + + query = DocumentSegment.query.filter( + DocumentSegment.document_id == str(document_id), + DocumentSegment.tenant_id == current_user.current_tenant_id + ) + + if status_list: + query = query.filter(DocumentSegment.status.in_(status_list)) + + if keyword: + query = query.where(DocumentSegment.content.ilike(f'%{keyword}%')) + + total = query.count() + segments = query.order_by(DocumentSegment.position).all() + return { + 'data': marshal(segments, segment_fields), + 'doc_form': document.doc_form, + 'total': total + }, 200 + + +class DatasetSegmentApi(DatasetApiResource): + def delete(self, tenant_id, dataset_id, document_id, segment_id): + # check dataset + dataset_id = str(dataset_id) + tenant_id = str(tenant_id) + dataset = db.session.query(Dataset).filter( + Dataset.tenant_id == tenant_id, + Dataset.id == dataset_id + ).first() + if not dataset: + raise NotFound('Dataset not found.') + # check user's model setting + DatasetService.check_dataset_model_setting(dataset) + # check document + document_id = str(document_id) + document = DocumentService.get_document(dataset_id, document_id) + if not document: + raise NotFound('Document not found.') + # check segment + segment = DocumentSegment.query.filter( + DocumentSegment.id == str(segment_id), + DocumentSegment.tenant_id == current_user.current_tenant_id + ).first() + if not segment: + raise NotFound('Segment not found.') + SegmentService.delete_segment(segment, document, dataset) + return {'result': 'success'}, 200 + + def post(self, tenant_id, dataset_id, document_id, segment_id): + # check dataset + dataset_id = str(dataset_id) + tenant_id = str(tenant_id) + dataset = db.session.query(Dataset).filter( + Dataset.tenant_id == tenant_id, + Dataset.id == dataset_id + ).first() + if not dataset: + raise NotFound('Dataset not found.') + # check user's model setting + DatasetService.check_dataset_model_setting(dataset) + # check document + document_id = str(document_id) + document = DocumentService.get_document(dataset_id, document_id) + if not document: + raise NotFound('Document not found.') + if dataset.indexing_technique == 'high_quality': + # check embedding model setting + try: + ModelFactory.get_embedding_model( + tenant_id=current_user.current_tenant_id, + model_provider_name=dataset.embedding_model_provider, + model_name=dataset.embedding_model + ) + except LLMBadRequestError: + raise ProviderNotInitializeError( + f"No Embedding Model available. Please configure a valid provider " + f"in the Settings -> Model Provider.") + except ProviderTokenNotInitError as ex: + raise ProviderNotInitializeError(ex.description) + # check segment + segment_id = str(segment_id) + segment = DocumentSegment.query.filter( + DocumentSegment.id == str(segment_id), + DocumentSegment.tenant_id == current_user.current_tenant_id + ).first() + if not segment: + raise NotFound('Segment not found.') + + # validate args + parser = reqparse.RequestParser() + parser.add_argument('segments', type=dict, required=False, nullable=True, location='json') + args = parser.parse_args() + + SegmentService.segment_create_args_validate(args['segments'], document) + segment = SegmentService.update_segment(args['segments'], segment, document, dataset) + return { + 'data': marshal(segment, segment_fields), + 'doc_form': document.doc_form + }, 200 + api.add_resource(SegmentApi, '/datasets//documents//segments') +api.add_resource(DatasetSegmentApi, '/datasets//documents//segments/') diff --git a/api/services/dataset_service.py b/api/services/dataset_service.py index 71581d47b..6a5f592a8 100644 --- a/api/services/dataset_service.py +++ b/api/services/dataset_service.py @@ -1091,6 +1091,8 @@ class SegmentService: segment.answer = args['answer'] if args['keywords']: segment.keywords = args['keywords'] + if args['enabled'] is not None: + segment.enabled = args['enabled'] db.session.add(segment) db.session.commit() # update segment index task diff --git a/web/app/(commonLayout)/datasets/template/template.en.mdx b/web/app/(commonLayout)/datasets/template/template.en.mdx index 90d5dbacc..7e207d9c3 100644 --- a/web/app/(commonLayout)/datasets/template/template.en.mdx +++ b/web/app/(commonLayout)/datasets/template/template.en.mdx @@ -89,7 +89,7 @@ import { Row, Col, Properties, Property, Heading, SubProperty, Paragraph } from targetCode={`curl --location --request GET '${props.apiBaseUrl}/datasets?page=1&limit=20' \\\n--header 'Authorization: Bearer {api_key}'`} > ```bash {{ title: 'cURL' }} - curl --location --request GET 'https://api.dify.ai/v1/datasets?page=1&limit=20' \ + curl --location --request GET '${props.apiBaseUrl}/datasets?page=1&limit=20' \ --header 'Authorization: Bearer {api_key}' ``` @@ -162,7 +162,7 @@ import { Row, Col, Properties, Property, Heading, SubProperty, Paragraph } from - rules (object) Custom rules (in automatic mode, this field is empty) - pre_processing_rules (array[object]) Preprocessing rules - id (string) Unique identifier for the preprocessing rule - - enumerate + - enumerate - remove_extra_spaces Replace consecutive spaces, newlines, tabs - remove_urls_emails Delete URL, email address - enabled (bool) Whether to select this rule or not. If no document ID is passed in, it represents the default value. @@ -173,14 +173,14 @@ import { Row, Col, Properties, Property, Heading, SubProperty, Paragraph } from - ```bash {{ title: 'cURL' }} - curl --location --request POST 'https://api.dify.ai/v1/datasets/{dataset_id}/document/create_by_text' \ + curl --location --request POST '${props.apiBaseUrl}/datasets/{dataset_id}/document/create_by_text' \ --header 'Authorization: Bearer {api_key}' \ --header 'Content-Type: application/json' \ --data-raw '{ @@ -269,7 +269,7 @@ import { Row, Col, Properties, Property, Heading, SubProperty, Paragraph } from - rules (object) Custom rules (in automatic mode, this field is empty) - pre_processing_rules (array[object]) Preprocessing rules - id (string) Unique identifier for the preprocessing rule - - enumerate + - enumerate - remove_extra_spaces Replace consecutive spaces, newlines, tabs - remove_urls_emails Delete URL, email address - enabled (bool) Whether to select this rule or not. If no document ID is passed in, it represents the default value. @@ -280,14 +280,14 @@ import { Row, Col, Properties, Property, Heading, SubProperty, Paragraph } from - ```bash {{ title: 'cURL' }} - curl --location POST 'https://api.dify.ai/v1/datasets/{dataset_id}/document/create_by_file' \ + curl --location POST '${props.apiBaseUrl}/datasets/{dataset_id}/document/create_by_file' \ --header 'Authorization: Bearer {api_key}' \ --form 'data="{\"name\":\"Dify\",\"indexing_technique\":\"high_quality\",\"process_rule\":{\"rules\":{\"pre_processing_rules\":[{\"id\":\"remove_extra_spaces\",\"enabled\":true},{\"id\":\"remove_urls_emails\",\"enabled\":true}],\"segmentation\":{\"separator\":\"###\",\"max_tokens\":500}},\"mode\":\"custom\"}}";type=text/plain' \ --form 'file=@"/path/to/file"' @@ -363,7 +363,7 @@ import { Row, Col, Properties, Property, Heading, SubProperty, Paragraph } from - rules (object) Custom rules (in automatic mode, this field is empty) - pre_processing_rules (array[object]) Preprocessing rules - id (string) Unique identifier for the preprocessing rule - - enumerate + - enumerate - remove_extra_spaces Replace consecutive spaces, newlines, tabs - remove_urls_emails Delete URL, email address - enabled (bool) Whether to select this rule or not. If no document ID is passed in, it represents the default value. @@ -374,14 +374,14 @@ import { Row, Col, Properties, Property, Heading, SubProperty, Paragraph } from - ```bash {{ title: 'cURL' }} - curl --location --request POST 'https://api.dify.ai/v1/datasets/{dataset_id}/documents/{document_id}/update_by_text' \ + curl --location --request POST '${props.apiBaseUrl}/datasets/{dataset_id}/documents/{document_id}/update_by_text' \ --header 'Authorization: Bearer {api_key}' \ --header 'Content-Type: application/json' \ --data-raw '{ @@ -460,7 +460,7 @@ import { Row, Col, Properties, Property, Heading, SubProperty, Paragraph } from - rules (object) Custom rules (in automatic mode, this field is empty) - pre_processing_rules (array[object]) Preprocessing rules - id (string) Unique identifier for the preprocessing rule - - enumerate + - enumerate - remove_extra_spaces Replace consecutive spaces, newlines, tabs - remove_urls_emails Delete URL, email address - enabled (bool) Whether to select this rule or not. If no document ID is passed in, it represents the default value. @@ -471,14 +471,14 @@ import { Row, Col, Properties, Property, Heading, SubProperty, Paragraph } from - ```bash {{ title: 'cURL' }} - curl --location POST 'https://api.dify.ai/v1/datasets/{dataset_id}/document/{document_id}/create_by_file' \ + curl --location POST '${props.apiBaseUrl}/datasets/{dataset_id}/document/{document_id}/create_by_file' \ --header 'Authorization: Bearer {api_key}' \ --form 'data="{\"name\":\"Dify\",\"indexing_technique\":\"high_quality\",\"process_rule\":{\"rules\":{\"pre_processing_rules\":[{\"id\":\"remove_extra_spaces\",\"enabled\":true},{\"id\":\"remove_urls_emails\",\"enabled\":true}],\"segmentation\":{\"separator\":\"###\",\"max_tokens\":500}},\"mode\":\"custom\"}}";type=text/plain' \ --form 'file=@"/path/to/file"' @@ -539,14 +539,14 @@ import { Row, Col, Properties, Property, Heading, SubProperty, Paragraph } from - ```bash {{ title: 'cURL' }} - curl --location --request GET 'https://api.dify.ai/v1/datasets/{dataset_id}/documents/{batch}/indexing-status' \ + curl --location --request GET '${props.apiBaseUrl}/datasets/{dataset_id}/documents/{batch}/indexing-status' \ --header 'Authorization: Bearer {api_key}' \ ``` @@ -555,7 +555,7 @@ import { Row, Col, Properties, Property, Heading, SubProperty, Paragraph } from { "data":[{ "id": "", - "indexing_status": "indexing", + "indexing_status": "indexing", "processing_started_at": 1681623462.0, "parsing_completed_at": 1681623462.0, "cleaning_completed_at": 1681623462.0, @@ -594,14 +594,14 @@ import { Row, Col, Properties, Property, Heading, SubProperty, Paragraph } from - ```bash {{ title: 'cURL' }} - curl --location --request DELETE 'https://api.dify.ai/v1/datasets/{dataset_id}/documents/{document_id}' \ + curl --location --request DELETE '${props.apiBaseUrl}/datasets/{dataset_id}/documents/{document_id}' \ --header 'Authorization: Bearer {api_key}' \ ``` @@ -646,14 +646,14 @@ import { Row, Col, Properties, Property, Heading, SubProperty, Paragraph } from - ```bash {{ title: 'cURL' }} - curl --location --request GET 'https://api.dify.ai/v1/datasets/{dataset_id}/documents' \ + curl --location --request GET '${props.apiBaseUrl}/datasets/{dataset_id}/documents' \ --header 'Authorization: Bearer {api_key}' \ ``` @@ -720,14 +720,14 @@ import { Row, Col, Properties, Property, Heading, SubProperty, Paragraph } from - ```bash {{ title: 'cURL' }} - curl --location --request POST 'https://api.dify.ai/v1/datasets/{dataset_id}/documents/{document_id}/segments' \ + curl --location --request POST '${props.apiBaseUrl}/datasets/{dataset_id}/documents/{document_id}/segments' \ --header 'Authorization: Bearer {api_key}' \ --header 'Content-Type: application/json' \ --data-raw '{ @@ -778,6 +778,212 @@ import { Row, Col, Properties, Property, Heading, SubProperty, Paragraph } from --- + + + + ### Path + + + Dataset ID + + + Document ID + + + + ### Query + + + keyword,choosable + + + Search status,completed + + + + + + ```bash {{ title: 'cURL' }} + curl --location --request GET '${props.apiBaseUrl}/datasets/{dataset_id}/documents/{document_id}/segments' \ + --header 'Authorization: Bearer {api_key}' \ + --header 'Content-Type: application/json' + ``` + + + ```json {{ title: 'Response' }} + { + "data": [{ + "id": "", + "position": 1, + "document_id": "", + "content": "1", + "answer": "1", + "word_count": 25, + "tokens": 0, + "keywords": [ + "a" + ], + "index_node_id": "", + "index_node_hash": "", + "hit_count": 0, + "enabled": true, + "disabled_at": null, + "disabled_by": null, + "status": "completed", + "created_by": "", + "created_at": 1695312007, + "indexing_at": 1695312007, + "completed_at": 1695312007, + "error": null, + "stopped_at": null + }], + "doc_form": "text_model" + } + ``` + + + + +--- + + + + + ### Path + + + Dataset ID + + + Document Segment ID + + + + + + ```bash {{ title: 'cURL' }} + curl --location --request DELETE '${props.apiBaseUrl}/datasets/{dataset_id}/segments/{segment_id}' \ + --header 'Authorization: Bearer {api_key}' \ + --header 'Content-Type: application/json' + ``` + + + ```json {{ title: 'Response' }} + { + "result": "success" + } + ``` + + + + +--- + + + + + ### POST + + + Dataset ID + + + Document Segment ID + + + + ### Request Body + + + - content (text) text content/question content,required + - answer (text) Answer content, not required, passed if the data set is in qa mode + - keywords (list) keyword, not required + - enabled (bool) false/true, not required + + + + + + ```bash {{ title: 'cURL' }} + curl --location --request POST '${props.apiBaseUrl}/datasets/{dataset_id}/documents/{document_id}/segments/{segment_id}' \ + --header 'Content-Type: application/json' \ + --data-raw '{ + "segments": { + "content": "1", + "answer": "1", + "keywords": ["a"], + "enabled": false + } + }' + ``` + + + ```json {{ title: 'Response' }} + { + "data": [{ + "id": "", + "position": 1, + "document_id": "", + "content": "1", + "answer": "1", + "word_count": 25, + "tokens": 0, + "keywords": [ + "a" + ], + "index_node_id": "", + "index_node_hash": "", + "hit_count": 0, + "enabled": true, + "disabled_at": null, + "disabled_by": null, + "status": "completed", + "created_by": "", + "created_at": 1695312007, + "indexing_at": 1695312007, + "completed_at": 1695312007, + "error": null, + "stopped_at": null + }], + "doc_form": "text_model" + } + ``` + + + + +--- + ### Error message diff --git a/web/app/(commonLayout)/datasets/template/template.zh.mdx b/web/app/(commonLayout)/datasets/template/template.zh.mdx index 54dca02e8..78fa91629 100644 --- a/web/app/(commonLayout)/datasets/template/template.zh.mdx +++ b/web/app/(commonLayout)/datasets/template/template.zh.mdx @@ -27,7 +27,7 @@ import { Row, Col, Properties, Property, Heading, SubProperty, Paragraph } from targetCode={`curl --location --request POST '${props.apiBaseUrl}/datasets' \\\n--header 'Authorization: Bearer {api_key}' \\\n--header 'Content-Type: application/json' \\\n--data-raw '{"name": "name"}'`} > ```bash {{ title: 'cURL' }} - curl --location --request POST 'https://api.dify.ai/v1/datasets' \ + curl --location --request POST '${props.apiBaseUrl}/datasets' \ --header 'Authorization: Bearer {api_key}' \ --header 'Content-Type: application/json' \ --data-raw '{ @@ -82,14 +82,14 @@ import { Row, Col, Properties, Property, Heading, SubProperty, Paragraph } from - ```bash {{ title: 'cURL' }} - curl --location --request GET 'https://api.dify.ai/v1/datasets?page=1&limit=20' \ + curl --location --request GET '${props.apiBaseUrl}/datasets?page=1&limit=20' \ --header 'Authorization: Bearer {api_key}' ``` @@ -162,7 +162,7 @@ import { Row, Col, Properties, Property, Heading, SubProperty, Paragraph } from - rules (object) 自定义规则(自动模式下,该字段为空) - pre_processing_rules (array[object]) 预处理规则 - id (string) 预处理规则的唯一标识符 - - 枚举: + - 枚举: - remove_extra_spaces 替换连续空格、换行符、制表符 - remove_urls_emails 删除 URL、电子邮件地址 - enabled (bool) 是否选中该规则,不传入文档 ID 时代表默认值 @@ -173,14 +173,14 @@ import { Row, Col, Properties, Property, Heading, SubProperty, Paragraph } from - ```bash {{ title: 'cURL' }} - curl --location --request POST 'https://api.dify.ai/v1/datasets/{dataset_id}/document/create_by_text' \ + curl --location --request POST '${props.apiBaseUrl}/datasets/{dataset_id}/document/create_by_text' \ --header 'Authorization: Bearer {api_key}' \ --header 'Content-Type: application/json' \ --data-raw '{ @@ -269,7 +269,7 @@ import { Row, Col, Properties, Property, Heading, SubProperty, Paragraph } from - rules (object) 自定义规则(自动模式下,该字段为空) - pre_processing_rules (array[object]) 预处理规则 - id (string) 预处理规则的唯一标识符 - - 枚举: + - 枚举: - remove_extra_spaces 替换连续空格、换行符、制表符 - remove_urls_emails 删除 URL、电子邮件地址 - enabled (bool) 是否选中该规则,不传入文档 ID 时代表默认值 @@ -280,14 +280,14 @@ import { Row, Col, Properties, Property, Heading, SubProperty, Paragraph } from - ```bash {{ title: 'cURL' }} - curl --location POST 'https://api.dify.ai/v1/datasets/{dataset_id}/document/create_by_file' \ + curl --location POST '${props.apiBaseUrl}/datasets/{dataset_id}/document/create_by_file' \ --header 'Authorization: Bearer {api_key}' \ --form 'data="{\"name\":\"Dify\",\"indexing_technique\":\"high_quality\",\"process_rule\":{\"rules\":{\"pre_processing_rules\":[{\"id\":\"remove_extra_spaces\",\"enabled\":true},{\"id\":\"remove_urls_emails\",\"enabled\":true}],\"segmentation\":{\"separator\":\"###\",\"max_tokens\":500}},\"mode\":\"custom\"}}";type=text/plain' \ --form 'file=@"/path/to/file"' @@ -363,7 +363,7 @@ import { Row, Col, Properties, Property, Heading, SubProperty, Paragraph } from - rules (object) 自定义规则(自动模式下,该字段为空) - pre_processing_rules (array[object]) 预处理规则 - id (string) 预处理规则的唯一标识符 - - 枚举: + - 枚举: - remove_extra_spaces 替换连续空格、换行符、制表符 - remove_urls_emails 删除 URL、电子邮件地址 - enabled (bool) 是否选中该规则,不传入文档 ID 时代表默认值 @@ -374,14 +374,14 @@ import { Row, Col, Properties, Property, Heading, SubProperty, Paragraph } from - ```bash {{ title: 'cURL' }} - curl --location --request POST 'https://api.dify.ai/v1/datasets/{dataset_id}/documents/{document_id}/update_by_text' \ + curl --location --request POST '${props.apiBaseUrl}/datasets/{dataset_id}/documents/{document_id}/update_by_text' \ --header 'Authorization: Bearer {api_key}' \ --header 'Content-Type: application/json' \ --data-raw '{ @@ -460,7 +460,7 @@ import { Row, Col, Properties, Property, Heading, SubProperty, Paragraph } from - rules (object) 自定义规则(自动模式下,该字段为空) - pre_processing_rules (array[object]) 预处理规则 - id (string) 预处理规则的唯一标识符 - - 枚举: + - 枚举: - remove_extra_spaces 替换连续空格、换行符、制表符 - remove_urls_emails 删除 URL、电子邮件地址 - enabled (bool) 是否选中该规则,不传入文档 ID 时代表默认值 @@ -471,14 +471,14 @@ import { Row, Col, Properties, Property, Heading, SubProperty, Paragraph } from - ```bash {{ title: 'cURL' }} - curl --location POST 'https://api.dify.ai/v1/datasets/{dataset_id}/document/{document_id}/create_by_file' \ + curl --location POST '${props.apiBaseUrl}/datasets/{dataset_id}/document/{document_id}/create_by_file' \ --header 'Authorization: Bearer {api_key}' \ --form 'data="{\"name\":\"Dify\",\"indexing_technique\":\"high_quality\",\"process_rule\":{\"rules\":{\"pre_processing_rules\":[{\"id\":\"remove_extra_spaces\",\"enabled\":true},{\"id\":\"remove_urls_emails\",\"enabled\":true}],\"segmentation\":{\"separator\":\"###\",\"max_tokens\":500}},\"mode\":\"custom\"}}";type=text/plain' \ --form 'file=@"/path/to/file"' @@ -539,14 +539,14 @@ import { Row, Col, Properties, Property, Heading, SubProperty, Paragraph } from - ```bash {{ title: 'cURL' }} - curl --location --request GET 'https://api.dify.ai/v1/datasets/{dataset_id}/documents/{batch}/indexing-status' \ + curl --location --request GET '${props.apiBaseUrl}/datasets/{dataset_id}/documents/{batch}/indexing-status' \ --header 'Authorization: Bearer {api_key}' \ ``` @@ -555,7 +555,7 @@ import { Row, Col, Properties, Property, Heading, SubProperty, Paragraph } from { "data":[{ "id": "", - "indexing_status": "indexing", + "indexing_status": "indexing", "processing_started_at": 1681623462.0, "parsing_completed_at": 1681623462.0, "cleaning_completed_at": 1681623462.0, @@ -594,14 +594,14 @@ import { Row, Col, Properties, Property, Heading, SubProperty, Paragraph } from - ```bash {{ title: 'cURL' }} - curl --location --request DELETE 'https://api.dify.ai/v1/datasets/{dataset_id}/documents/{document_id}' \ + curl --location --request DELETE '${props.apiBaseUrl}/datasets/{dataset_id}/documents/{document_id}' \ --header 'Authorization: Bearer {api_key}' \ ``` @@ -646,14 +646,14 @@ import { Row, Col, Properties, Property, Heading, SubProperty, Paragraph } from - ```bash {{ title: 'cURL' }} - curl --location --request GET 'https://api.dify.ai/v1/datasets/{dataset_id}/documents' \ + curl --location --request GET '${props.apiBaseUrl}/datasets/{dataset_id}/documents' \ --header 'Authorization: Bearer {api_key}' \ ``` @@ -720,14 +720,14 @@ import { Row, Col, Properties, Property, Heading, SubProperty, Paragraph } from - ```bash {{ title: 'cURL' }} - curl --location --request POST 'https://api.dify.ai/v1/datasets/{dataset_id}/documents/{document_id}/segments' \ + curl --location --request POST '${props.apiBaseUrl}/datasets/{dataset_id}/documents/{document_id}/segments' \ --header 'Authorization: Bearer {api_key}' \ --header 'Content-Type: application/json' \ --data-raw '{ @@ -778,6 +778,213 @@ import { Row, Col, Properties, Property, Heading, SubProperty, Paragraph } from --- + + + + ### Path + + + 数据集 ID + + + 文档 ID + + + + ### Query + + + 搜索关键词,可选 + + + 搜索状态,completed + + + + + + ```bash {{ title: 'cURL' }} + curl --location --request GET '${props.apiBaseUrl}/datasets/{dataset_id}/documents/{document_id}/segments' \ + --header 'Authorization: Bearer {api_key}' \ + --header 'Content-Type: application/json' + ``` + + + ```json {{ title: 'Response' }} + { + "data": [{ + "id": "", + "position": 1, + "document_id": "", + "content": "1", + "answer": "1", + "word_count": 25, + "tokens": 0, + "keywords": [ + "a" + ], + "index_node_id": "", + "index_node_hash": "", + "hit_count": 0, + "enabled": true, + "disabled_at": null, + "disabled_by": null, + "status": "completed", + "created_by": "", + "created_at": 1695312007, + "indexing_at": 1695312007, + "completed_at": 1695312007, + "error": null, + "stopped_at": null + }], + "doc_form": "text_model" + } + ``` + + + + +--- + + + + + ### Path + + + 数据集 ID + + + 文档分段ID + + + + + + ```bash {{ title: 'cURL' }} + curl --location --request DELETE '${props.apiBaseUrl}/datasets/{dataset_id}/documents/{document_id}/segments/{segment_id}' \ + --header 'Authorization: Bearer {api_key}' \ + --header 'Content-Type: application/json' + ``` + + + ```json {{ title: 'Response' }} + { + "result": "success" + } + ``` + + + + +--- + + + + + ### POST + + + 数据集 ID + + + 文档分段ID + + + + ### Request Body + + + - content (text) 文本内容/问题内容,必填 + - answer (text) 答案内容,非必填,如果数据集的模式为qa模式则传值 + - keywords (list) 关键字,非必填 + - enabled (bool) false/true,非必填 + + + + + + ```bash {{ title: 'cURL' }} + curl --location --request POST '${props.apiBaseUrl}/datasets/{dataset_id}/documents/{document_id}/segments/{segment_id}' \ + --header 'Authorization: Bearer {api_key}' \ + --header 'Content-Type: application/json' \ + --data-raw '{ + "segments": { + "content": "1", + "answer": "1", + "keywords": ["a"], + "enabled": false + } + }' + ``` + + + ```json {{ title: 'Response' }} + { + "data": [{ + "id": "", + "position": 1, + "document_id": "", + "content": "1", + "answer": "1", + "word_count": 25, + "tokens": 0, + "keywords": [ + "a" + ], + "index_node_id": "", + "index_node_hash": "", + "hit_count": 0, + "enabled": true, + "disabled_at": null, + "disabled_by": null, + "status": "completed", + "created_by": "", + "created_at": 1695312007, + "indexing_at": 1695312007, + "completed_at": 1695312007, + "error": null, + "stopped_at": null + }], + "doc_form": "text_model" + } + ``` + + + + +--- + ### 错误信息