From b6631cd8782e788812cecfa6bf9a98f42310221b Mon Sep 17 00:00:00 2001 From: Jyong <76649700+JohnJyong@users.noreply.github.com> Date: Tue, 4 Jun 2024 20:07:25 +0800 Subject: [PATCH] modify rerank and splitter code directory (#4924) --- api/core/indexing_runner.py | 9 ++++++--- api/core/rag/data_post_processor/data_post_processor.py | 2 +- api/core/{ => rag}/docstore/dataset_docstore.py | 0 api/core/rag/index_processor/index_processor_base.py | 7 +++++-- api/core/{ => rag}/rerank/__init__.py | 0 api/core/{ => rag}/rerank/rerank.py | 0 api/core/rag/retrieval/dataset_retrieval.py | 2 +- api/core/{ => rag}/splitter/fixed_text_splitter.py | 2 +- api/core/{ => rag}/splitter/text_splitter.py | 0 .../dataset_retriever/dataset_multi_retriever_tool.py | 2 +- 10 files changed, 15 insertions(+), 9 deletions(-) rename api/core/{ => rag}/docstore/dataset_docstore.py (100%) rename api/core/{ => rag}/rerank/__init__.py (100%) rename api/core/{ => rag}/rerank/rerank.py (100%) rename api/core/{ => rag}/splitter/fixed_text_splitter.py (98%) rename api/core/{ => rag}/splitter/text_splitter.py (100%) diff --git a/api/core/indexing_runner.py b/api/core/indexing_runner.py index 17c4b3c21..d4c2bc5ad 100644 --- a/api/core/indexing_runner.py +++ b/api/core/indexing_runner.py @@ -12,7 +12,6 @@ from flask import Flask, current_app from flask_login import current_user from sqlalchemy.orm.exc import ObjectDeletedError -from core.docstore.dataset_docstore import DatasetDocumentStore from core.errors.error import ProviderTokenNotInitError from core.llm_generator.llm_generator import LLMGenerator from core.model_manager import ModelInstance, ModelManager @@ -20,12 +19,16 @@ from core.model_runtime.entities.model_entities import ModelType, PriceType from core.model_runtime.model_providers.__base.large_language_model import LargeLanguageModel from core.model_runtime.model_providers.__base.text_embedding_model import TextEmbeddingModel from core.rag.datasource.keyword.keyword_factory import Keyword +from core.rag.docstore.dataset_docstore import DatasetDocumentStore from core.rag.extractor.entity.extract_setting import ExtractSetting from core.rag.index_processor.index_processor_base import BaseIndexProcessor from core.rag.index_processor.index_processor_factory import IndexProcessorFactory from core.rag.models.document import Document -from core.splitter.fixed_text_splitter import EnhanceRecursiveCharacterTextSplitter, FixedRecursiveCharacterTextSplitter -from core.splitter.text_splitter import TextSplitter +from core.rag.splitter.fixed_text_splitter import ( + EnhanceRecursiveCharacterTextSplitter, + FixedRecursiveCharacterTextSplitter, +) +from core.rag.splitter.text_splitter import TextSplitter from extensions.ext_database import db from extensions.ext_redis import redis_client from extensions.ext_storage import storage diff --git a/api/core/rag/data_post_processor/data_post_processor.py b/api/core/rag/data_post_processor/data_post_processor.py index bdd69c27b..a0f294778 100644 --- a/api/core/rag/data_post_processor/data_post_processor.py +++ b/api/core/rag/data_post_processor/data_post_processor.py @@ -5,7 +5,7 @@ from core.model_runtime.entities.model_entities import ModelType from core.model_runtime.errors.invoke import InvokeAuthorizationError from core.rag.data_post_processor.reorder import ReorderRunner from core.rag.models.document import Document -from core.rerank.rerank import RerankRunner +from core.rag.rerank.rerank import RerankRunner class DataPostProcessor: diff --git a/api/core/docstore/dataset_docstore.py b/api/core/rag/docstore/dataset_docstore.py similarity index 100% rename from api/core/docstore/dataset_docstore.py rename to api/core/rag/docstore/dataset_docstore.py diff --git a/api/core/rag/index_processor/index_processor_base.py b/api/core/rag/index_processor/index_processor_base.py index 23d2451e2..edc16c821 100644 --- a/api/core/rag/index_processor/index_processor_base.py +++ b/api/core/rag/index_processor/index_processor_base.py @@ -7,8 +7,11 @@ from flask import current_app from core.model_manager import ModelInstance from core.rag.extractor.entity.extract_setting import ExtractSetting from core.rag.models.document import Document -from core.splitter.fixed_text_splitter import EnhanceRecursiveCharacterTextSplitter, FixedRecursiveCharacterTextSplitter -from core.splitter.text_splitter import TextSplitter +from core.rag.splitter.fixed_text_splitter import ( + EnhanceRecursiveCharacterTextSplitter, + FixedRecursiveCharacterTextSplitter, +) +from core.rag.splitter.text_splitter import TextSplitter from models.dataset import Dataset, DatasetProcessRule diff --git a/api/core/rerank/__init__.py b/api/core/rag/rerank/__init__.py similarity index 100% rename from api/core/rerank/__init__.py rename to api/core/rag/rerank/__init__.py diff --git a/api/core/rerank/rerank.py b/api/core/rag/rerank/rerank.py similarity index 100% rename from api/core/rerank/rerank.py rename to api/core/rag/rerank/rerank.py diff --git a/api/core/rag/retrieval/dataset_retrieval.py b/api/core/rag/retrieval/dataset_retrieval.py index 08c7df96d..4d8f82642 100644 --- a/api/core/rag/retrieval/dataset_retrieval.py +++ b/api/core/rag/retrieval/dataset_retrieval.py @@ -14,9 +14,9 @@ from core.model_runtime.entities.model_entities import ModelFeature, ModelType from core.model_runtime.model_providers.__base.large_language_model import LargeLanguageModel from core.rag.datasource.retrieval_service import RetrievalService from core.rag.models.document import Document +from core.rag.rerank.rerank import RerankRunner from core.rag.retrieval.router.multi_dataset_function_call_router import FunctionCallMultiDatasetRouter from core.rag.retrieval.router.multi_dataset_react_route import ReactMultiDatasetRouter -from core.rerank.rerank import RerankRunner from core.tools.tool.dataset_retriever.dataset_multi_retriever_tool import DatasetMultiRetrieverTool from core.tools.tool.dataset_retriever.dataset_retriever_base_tool import DatasetRetrieverBaseTool from core.tools.tool.dataset_retriever.dataset_retriever_tool import DatasetRetrieverTool diff --git a/api/core/splitter/fixed_text_splitter.py b/api/core/rag/splitter/fixed_text_splitter.py similarity index 98% rename from api/core/splitter/fixed_text_splitter.py rename to api/core/rag/splitter/fixed_text_splitter.py index a1510259a..fe6a89ebd 100644 --- a/api/core/splitter/fixed_text_splitter.py +++ b/api/core/rag/splitter/fixed_text_splitter.py @@ -6,7 +6,7 @@ from typing import Any, Optional, cast from core.model_manager import ModelInstance from core.model_runtime.model_providers.__base.text_embedding_model import TextEmbeddingModel from core.model_runtime.model_providers.__base.tokenizers.gpt2_tokenzier import GPT2Tokenizer -from core.splitter.text_splitter import ( +from core.rag.splitter.text_splitter import ( TS, Collection, Literal, diff --git a/api/core/splitter/text_splitter.py b/api/core/rag/splitter/text_splitter.py similarity index 100% rename from api/core/splitter/text_splitter.py rename to api/core/rag/splitter/text_splitter.py diff --git a/api/core/tools/tool/dataset_retriever/dataset_multi_retriever_tool.py b/api/core/tools/tool/dataset_retriever/dataset_multi_retriever_tool.py index b67863eb4..18cf78066 100644 --- a/api/core/tools/tool/dataset_retriever/dataset_multi_retriever_tool.py +++ b/api/core/tools/tool/dataset_retriever/dataset_multi_retriever_tool.py @@ -7,7 +7,7 @@ from core.callback_handler.index_tool_callback_handler import DatasetIndexToolCa from core.model_manager import ModelManager from core.model_runtime.entities.model_entities import ModelType from core.rag.datasource.retrieval_service import RetrievalService -from core.rerank.rerank import RerankRunner +from core.rag.rerank.rerank import RerankRunner from core.tools.tool.dataset_retriever.dataset_retriever_base_tool import DatasetRetrieverBaseTool from extensions.ext_database import db from models.dataset import Dataset, Document, DocumentSegment