mirror of
https://gitee.com/agents-flex/agents-flex.git
synced 2024-11-29 18:38:17 +08:00
test: add .pdf and .doc parse test
This commit is contained in:
parent
0b019895e5
commit
b91d5e73a1
@ -58,4 +58,13 @@ public class Document extends VectorData {
|
||||
document.setContent(content);
|
||||
return document;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return "Document{" +
|
||||
"id=" + id +
|
||||
", content='" + content + '\'' +
|
||||
", metadataMap=" + metadataMap +
|
||||
'}';
|
||||
}
|
||||
}
|
||||
|
@ -1,3 +1,18 @@
|
||||
/*
|
||||
* Copyright (c) 2023-2025, Agents-Flex (fuhai999@gmail.com).
|
||||
* <p>
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
* <p>
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
* <p>
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package com.agentsflex.document.parser;
|
||||
|
||||
import com.agentsflex.core.document.Document;
|
||||
|
@ -0,0 +1,36 @@
|
||||
/*
|
||||
* Copyright (c) 2023-2025, Agents-Flex (fuhai999@gmail.com).
|
||||
* <p>
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
* <p>
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
* <p>
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package com.agentsflex.document.parser.test;
|
||||
|
||||
import com.agentsflex.core.document.Document;
|
||||
import com.agentsflex.document.parser.PdfBoxDocumentParser;
|
||||
import org.junit.Test;
|
||||
|
||||
import java.io.File;
|
||||
import java.io.FileInputStream;
|
||||
import java.io.FileNotFoundException;
|
||||
|
||||
public class PdfBoxDocumentParserTest {
|
||||
|
||||
@Test
|
||||
public void testParserPdf() throws FileNotFoundException {
|
||||
File file = new File(System.getProperty("user.dir"), "../../testresource/a.pdf");
|
||||
FileInputStream stream = new FileInputStream(file);
|
||||
PdfBoxDocumentParser parser = new PdfBoxDocumentParser();
|
||||
Document document = parser.parse(stream);
|
||||
System.out.println(document);
|
||||
}
|
||||
}
|
@ -43,6 +43,11 @@
|
||||
<artifactId>poi-scratchpad</artifactId>
|
||||
<version>${apache.poi.version}</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>junit</groupId>
|
||||
<artifactId>junit</artifactId>
|
||||
<scope>test</scope>
|
||||
</dependency>
|
||||
</dependencies>
|
||||
|
||||
</project>
|
||||
|
@ -1,3 +1,18 @@
|
||||
/*
|
||||
* Copyright (c) 2023-2025, Agents-Flex (fuhai999@gmail.com).
|
||||
* <p>
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
* <p>
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
* <p>
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package com.agentsflex.document.parser;
|
||||
|
||||
import com.agentsflex.core.document.Document;
|
||||
|
@ -0,0 +1,21 @@
|
||||
package com.agentsflex.document.parser.test;
|
||||
|
||||
import com.agentsflex.core.document.Document;
|
||||
import com.agentsflex.document.parser.PoiDocumentParser;
|
||||
import org.junit.Test;
|
||||
|
||||
import java.io.File;
|
||||
import java.io.FileInputStream;
|
||||
import java.io.FileNotFoundException;
|
||||
|
||||
public class PoiDocumentParserTest {
|
||||
|
||||
@Test
|
||||
public void testParserDocx() throws FileNotFoundException {
|
||||
File file = new File(System.getProperty("user.dir"), "../../testresource/a.doc");
|
||||
FileInputStream stream = new FileInputStream(file);
|
||||
PoiDocumentParser parser = new PoiDocumentParser();
|
||||
Document document = parser.parse(stream);
|
||||
System.out.println(document);
|
||||
}
|
||||
}
|
BIN
testresource/a.doc
Normal file
BIN
testresource/a.doc
Normal file
Binary file not shown.
BIN
testresource/a.pdf
Normal file
BIN
testresource/a.pdf
Normal file
Binary file not shown.
Loading…
Reference in New Issue
Block a user