test: add .pdf and .doc parse test

This commit is contained in:
Michael Yang 2024-07-03 12:37:23 +08:00
parent 0b019895e5
commit b91d5e73a1
8 changed files with 101 additions and 0 deletions

View File

@ -58,4 +58,13 @@ public class Document extends VectorData {
document.setContent(content);
return document;
}
@Override
public String toString() {
return "Document{" +
"id=" + id +
", content='" + content + '\'' +
", metadataMap=" + metadataMap +
'}';
}
}

View File

@ -1,3 +1,18 @@
/*
* Copyright (c) 2023-2025, Agents-Flex (fuhai999@gmail.com).
* <p>
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
* <p>
* http://www.apache.org/licenses/LICENSE-2.0
* <p>
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.agentsflex.document.parser;
import com.agentsflex.core.document.Document;

View File

@ -0,0 +1,36 @@
/*
* Copyright (c) 2023-2025, Agents-Flex (fuhai999@gmail.com).
* <p>
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
* <p>
* http://www.apache.org/licenses/LICENSE-2.0
* <p>
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.agentsflex.document.parser.test;
import com.agentsflex.core.document.Document;
import com.agentsflex.document.parser.PdfBoxDocumentParser;
import org.junit.Test;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
public class PdfBoxDocumentParserTest {
@Test
public void testParserPdf() throws FileNotFoundException {
File file = new File(System.getProperty("user.dir"), "../../testresource/a.pdf");
FileInputStream stream = new FileInputStream(file);
PdfBoxDocumentParser parser = new PdfBoxDocumentParser();
Document document = parser.parse(stream);
System.out.println(document);
}
}

View File

@ -43,6 +43,11 @@
<artifactId>poi-scratchpad</artifactId>
<version>${apache.poi.version}</version>
</dependency>
<dependency>
<groupId>junit</groupId>
<artifactId>junit</artifactId>
<scope>test</scope>
</dependency>
</dependencies>
</project>

View File

@ -1,3 +1,18 @@
/*
* Copyright (c) 2023-2025, Agents-Flex (fuhai999@gmail.com).
* <p>
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
* <p>
* http://www.apache.org/licenses/LICENSE-2.0
* <p>
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.agentsflex.document.parser;
import com.agentsflex.core.document.Document;

View File

@ -0,0 +1,21 @@
package com.agentsflex.document.parser.test;
import com.agentsflex.core.document.Document;
import com.agentsflex.document.parser.PoiDocumentParser;
import org.junit.Test;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
public class PoiDocumentParserTest {
@Test
public void testParserDocx() throws FileNotFoundException {
File file = new File(System.getProperty("user.dir"), "../../testresource/a.doc");
FileInputStream stream = new FileInputStream(file);
PoiDocumentParser parser = new PoiDocumentParser();
Document document = parser.parse(stream);
System.out.println(document);
}
}

BIN
testresource/a.doc Normal file

Binary file not shown.

BIN
testresource/a.pdf Normal file

Binary file not shown.