删除多余的文件

This commit is contained in:
KeYuan 2018-05-20 12:55:47 +08:00
parent cb5716ba23
commit 481132b5a3
6 changed files with 0 additions and 821 deletions

11
pom.xml
View File

@ -37,17 +37,6 @@
<artifactId>hamcrest-core</artifactId> <artifactId>hamcrest-core</artifactId>
<version>1.1</version> <version>1.1</version>
</dependency> </dependency>
<dependency>
<groupId>com.itextpdf</groupId>
<artifactId>itextpdf</artifactId>
<version>5.4.3</version>
</dependency>
<dependency>
<groupId>com.itextpdf</groupId>
<artifactId>itext-asian</artifactId>
<version>5.2.0</version>
</dependency>
<!-- https://mvnrepository.com/artifact/com.lowagie/itext --> <!-- https://mvnrepository.com/artifact/com.lowagie/itext -->
<dependency> <dependency>
<groupId>com.lowagie</groupId> <groupId>com.lowagie</groupId>

View File

@ -1,255 +0,0 @@
package com.key.common.plugs.itextpdf;
import java.io.ByteArrayInputStream;
import java.io.FileOutputStream;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.util.ArrayList;
import java.util.List;
import org.jsoup.Jsoup;
import com.itextpdf.text.BaseColor;
import com.itextpdf.text.Chapter;
import com.itextpdf.text.Chunk;
import com.itextpdf.text.Document;
import com.itextpdf.text.Element;
import com.itextpdf.text.Font;
import com.itextpdf.text.PageSize;
import com.itextpdf.text.Paragraph;
import com.itextpdf.text.Section;
import com.itextpdf.text.WritableDirectElement;
import com.itextpdf.text.pdf.BaseFont;
import com.itextpdf.text.pdf.PdfWriter;
import com.itextpdf.text.pdf.draw.LineSeparator;
import com.itextpdf.tool.xml.ElementHandler;
import com.itextpdf.tool.xml.Writable;
import com.itextpdf.tool.xml.XMLWorkerHelper;
import com.itextpdf.tool.xml.pipeline.WritableElement;
/**
* HTML文件转换为PDF
*
* @author <a href="http://www.micmiu.com">Michael Sun</a>
*/
public class Demo4URL2PDF {
/**
* @param args
*/
public static void main(String[] args) throws Exception {
String blogURL = "http://www.micmiu.com/os/linux/shell-dev-null/";
// 直接把网页内容转为PDF文件
String pdfFile = "D:/Documents/demo-URL.pdf";
Demo4URL2PDF.parseURL2PDFFile(pdfFile, blogURL);
// 把网页内容转为PDF中的Elements
String pdfFile2 = "D:/Documents/demo-URL2.pdf";
Demo4URL2PDF.parseURL2PDFElement(pdfFile2, blogURL);
}
/**
* 根据URL提前blog的基本信息返回结果>>:[主题 ,分类,日期,内容].
*
* @param blogURL
* @return
* @throws Exception
*/
public static String[] extractBlogInfo(String blogURL) throws Exception {
String[] info = new String[4];
org.jsoup.nodes.Document doc = Jsoup.connect(blogURL).get();
org.jsoup.nodes.Element e_title = doc.select("h2.title").first();
info[0] = e_title.text();
org.jsoup.nodes.Element e_category = doc.select("a[rel=category tag]")
.first();
info[1] = e_category.attr("href").replace("http://www.micmiu.com/", "");
org.jsoup.nodes.Element e_date = doc.select("span.post-info-date")
.first();
String dateStr = e_date.text().split("日期")[1].trim();
info[2] = dateStr;
org.jsoup.nodes.Element entry = doc.select("div.entry").first();
info[3] = formatContentTag(entry);
return info;
}
/**
* 格式化 img标签
*
* @param entry
* @return
*/
private static String formatContentTag(org.jsoup.nodes.Element entry) {
try {
entry.select("div").remove();
// <a href="*.jpg" ><img src="*.jpg"/></a> 替换为 <img
// src="*.jpg"/>
for (org.jsoup.nodes.Element imgEle : entry
.select("a[href~=(?i)\\.(png|jpe?g)]")) {
imgEle.replaceWith(imgEle.select("img").first());
}
return entry.html();
} catch (Exception e) {
return "";
}
}
/**
* 把String 转为 InputStream
*
* @param content
* @return
*/
public static InputStream parse2Stream(String content) {
try {
ByteArrayInputStream stream = new ByteArrayInputStream(
content.getBytes("utf-8"));
return stream;
} catch (Exception e) {
return null;
}
}
/**
* 直接把网页内容转为PDF文件
*
* @param fileName
* @throws Exception
*/
public static void parseURL2PDFFile(String pdfFile, String blogURL)
throws Exception {
BaseFont bfCN = BaseFont.createFont("STSongStd-Light", "UniGB-UCS2-H",
false);
// 中文字体定义
Font chFont = new Font(bfCN, 14, Font.NORMAL, BaseColor.BLUE);
Font secFont = new Font(bfCN, 12, Font.NORMAL, new BaseColor(0, 204,
255));
Font textFont = new Font(bfCN, 12, Font.NORMAL, BaseColor.BLACK);
Document document = new Document();
PdfWriter pdfwriter = PdfWriter.getInstance(document,
new FileOutputStream(pdfFile));
pdfwriter.setViewerPreferences(PdfWriter.HideToolbar);
document.open();
String[] blogInfo = extractBlogInfo(blogURL);
int chNum = 1;
Chapter chapter = new Chapter(new Paragraph("URL转PDF测试", chFont),
chNum++);
Section section = chapter
.addSection(new Paragraph(blogInfo[0], secFont));
section.setIndentation(10);
section.setIndentationLeft(10);
section.setBookmarkOpen(false);
section.setNumberStyle(Section.NUMBERSTYLE_DOTTED_WITHOUT_FINAL_DOT);
section.add(new Chunk("分类:" + blogInfo[1] + " 日期:" + blogInfo[2],
textFont));
LineSeparator line = new LineSeparator(1, 100, new BaseColor(204, 204,
204), Element.ALIGN_CENTER, -2);
Paragraph p_line = new Paragraph(" ");
p_line.add(line);
section.add(p_line);
section.add(Chunk.NEWLINE);
document.add(chapter);
// html文件
XMLWorkerHelper.getInstance().parseXHtml(pdfwriter, document,
parse2Stream(blogInfo[3]));
document.close();
}
/**
* 把网页内容转为PDF中的Elements
*
* @param pdfFile
* @param htmlFileStream
*/
public static void parseURL2PDFElement(String pdfFile, String blogURL) {
try {
Document document = new Document(PageSize.A4);
FileOutputStream outputStream = new FileOutputStream(pdfFile);
PdfWriter pdfwriter = PdfWriter.getInstance(document, outputStream);
// pdfwriter.setViewerPreferences(PdfWriter.HideToolbar);
document.open();
BaseFont bfCN = BaseFont.createFont("STSongStd-Light",
"UniGB-UCS2-H", false);
// 中文字体定义
Font chFont = new Font(bfCN, 14, Font.NORMAL, BaseColor.BLUE);
Font secFont = new Font(bfCN, 12, Font.NORMAL, new BaseColor(0,
204, 255));
Font textFont = new Font(bfCN, 12, Font.NORMAL, BaseColor.BLACK);
int chNum = 1;
Chapter chapter = new Chapter(new Paragraph("URL转PDF元素便于追加其他内容",
chFont), chNum++);
String[] blogInfo = extractBlogInfo(blogURL);
Section section = chapter.addSection(new Paragraph(blogInfo[0],
secFont));
section.setIndentation(10);
section.setIndentationLeft(10);
section.setBookmarkOpen(false);
section.setNumberStyle(Section.NUMBERSTYLE_DOTTED_WITHOUT_FINAL_DOT);
section.add(new Chunk("分类:" + blogInfo[1] + " 发表日期:" + blogInfo[2],
textFont));
LineSeparator line = new LineSeparator(1, 100, new BaseColor(204,
204, 204), Element.ALIGN_CENTER, -2);
Paragraph p_line = new Paragraph();
p_line.add(line);
section.add(p_line);
section.add(Chunk.NEWLINE);
final List<Element> pdfeleList = new ArrayList<Element>();
ElementHandler elemH = new ElementHandler() {
public void add(final Writable w) {
if (w instanceof WritableElement) {
pdfeleList.addAll(((WritableElement) w).elements());
}
}
};
XMLWorkerHelper.getInstance().parseXHtml(elemH,
new InputStreamReader(parse2Stream(blogInfo[3]), "utf-8"));
List<Element> list = new ArrayList<Element>();
for (Element ele : pdfeleList) {
if (ele instanceof LineSeparator
|| ele instanceof WritableDirectElement) {
continue;
}
list.add(ele);
}
section.addAll(list);
section = chapter.addSection(new Paragraph("继续添加章节", secFont));
section.setIndentation(10);
section.setIndentationLeft(10);
section.setBookmarkOpen(false);
section.setNumberStyle(Section.NUMBERSTYLE_DOTTED_WITHOUT_FINAL_DOT);
section.add(new Chunk("测试URL转为PDF元素方便追加其他内容", textFont));
document.add(chapter);
document.close();
} catch (Exception e) {
e.printStackTrace();
}
}
}

View File

@ -1,255 +0,0 @@
package com.key.common.plugs.itextpdf;
import java.io.ByteArrayInputStream;
import java.io.FileOutputStream;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.util.ArrayList;
import java.util.List;
import org.jsoup.Jsoup;
import com.itextpdf.text.BaseColor;
import com.itextpdf.text.Chapter;
import com.itextpdf.text.Chunk;
import com.itextpdf.text.Document;
import com.itextpdf.text.Element;
import com.itextpdf.text.Font;
import com.itextpdf.text.PageSize;
import com.itextpdf.text.Paragraph;
import com.itextpdf.text.Section;
import com.itextpdf.text.WritableDirectElement;
import com.itextpdf.text.pdf.BaseFont;
import com.itextpdf.text.pdf.PdfWriter;
import com.itextpdf.text.pdf.draw.LineSeparator;
import com.itextpdf.tool.xml.ElementHandler;
import com.itextpdf.tool.xml.Writable;
import com.itextpdf.tool.xml.XMLWorkerHelper;
import com.itextpdf.tool.xml.pipeline.WritableElement;
/**
* HTML文件转换为PDF
*
* @author <a href="http://www.micmiu.com">Michael Sun</a>
*/
public class Demo5URL2PDF {
/**
* @param args
*/
public static void main(String[] args) throws Exception {
String blogURL = "http://localhost:8080/survey/pdfTest.jsp";
// 直接把网页内容转为PDF文件
String pdfFile = "D:/Documents/demo-URL.pdf";
Demo5URL2PDF.parseURL2PDFFile(pdfFile, blogURL);
// 把网页内容转为PDF中的Elements
String pdfFile2 = "D:/Documents/demo-URL2.pdf";
Demo5URL2PDF.parseURL2PDFElement(pdfFile2, blogURL);
}
/**
* 根据URL提前blog的基本信息返回结果>>:[主题 ,分类,日期,内容].
*
* @param blogURL
* @return
* @throws Exception
*/
public static String[] extractBlogInfo(String blogURL) throws Exception {
String[] info = new String[4];
org.jsoup.nodes.Document doc = Jsoup.connect(blogURL).get();
org.jsoup.nodes.Element e_title = doc.select("h2.title").first();
info[0] = e_title.text();
org.jsoup.nodes.Element e_category = doc.select("a[rel=category tag]")
.first();
info[1] = e_category.attr("href").replace("http://www.micmiu.com/", "");
org.jsoup.nodes.Element e_date = doc.select("span.post-info-date")
.first();
String dateStr = e_date.text().split("日期")[1].trim();
info[2] = dateStr;
org.jsoup.nodes.Element entry = doc.select("div.entry").first();
info[3] = formatContentTag(entry);
return info;
}
/**
* 格式化 img标签
*
* @param entry
* @return
*/
private static String formatContentTag(org.jsoup.nodes.Element entry) {
try {
entry.select("div").remove();
// <a href="*.jpg" ><img src="*.jpg"/></a> 替换为 <img
// src="*.jpg"/>
for (org.jsoup.nodes.Element imgEle : entry
.select("a[href~=(?i)\\.(png|jpe?g)]")) {
imgEle.replaceWith(imgEle.select("img").first());
}
return entry.html();
} catch (Exception e) {
return "";
}
}
/**
* 把String 转为 InputStream
*
* @param content
* @return
*/
public static InputStream parse2Stream(String content) {
try {
ByteArrayInputStream stream = new ByteArrayInputStream(
content.getBytes("utf-8"));
return stream;
} catch (Exception e) {
return null;
}
}
/**
* 直接把网页内容转为PDF文件
*
* @param fileName
* @throws Exception
*/
public static void parseURL2PDFFile(String pdfFile, String blogURL)
throws Exception {
BaseFont bfCN = BaseFont.createFont("STSongStd-Light", "UniGB-UCS2-H",
false);
// 中文字体定义
Font chFont = new Font(bfCN, 14, Font.NORMAL, BaseColor.BLUE);
Font secFont = new Font(bfCN, 12, Font.NORMAL, new BaseColor(0, 204,
255));
Font textFont = new Font(bfCN, 12, Font.NORMAL, BaseColor.BLACK);
Document document = new Document();
PdfWriter pdfwriter = PdfWriter.getInstance(document,
new FileOutputStream(pdfFile));
pdfwriter.setViewerPreferences(PdfWriter.HideToolbar);
document.open();
String[] blogInfo = extractBlogInfo(blogURL);
int chNum = 1;
Chapter chapter = new Chapter(new Paragraph("URL转PDF测试", chFont),
chNum++);
Section section = chapter
.addSection(new Paragraph(blogInfo[0], secFont));
section.setIndentation(10);
section.setIndentationLeft(10);
section.setBookmarkOpen(false);
section.setNumberStyle(Section.NUMBERSTYLE_DOTTED_WITHOUT_FINAL_DOT);
section.add(new Chunk("分类:" + blogInfo[1] + " 日期:" + blogInfo[2],
textFont));
LineSeparator line = new LineSeparator(1, 100, new BaseColor(204, 204,
204), Element.ALIGN_CENTER, -2);
Paragraph p_line = new Paragraph(" ");
p_line.add(line);
section.add(p_line);
section.add(Chunk.NEWLINE);
document.add(chapter);
// html文件
XMLWorkerHelper.getInstance().parseXHtml(pdfwriter, document,
parse2Stream(blogInfo[3]));
document.close();
}
/**
* 把网页内容转为PDF中的Elements
*
* @param pdfFile
* @param htmlFileStream
*/
public static void parseURL2PDFElement(String pdfFile, String blogURL) {
try {
Document document = new Document(PageSize.A4);
FileOutputStream outputStream = new FileOutputStream(pdfFile);
PdfWriter pdfwriter = PdfWriter.getInstance(document, outputStream);
// pdfwriter.setViewerPreferences(PdfWriter.HideToolbar);
document.open();
BaseFont bfCN = BaseFont.createFont("STSongStd-Light",
"UniGB-UCS2-H", false);
// 中文字体定义
Font chFont = new Font(bfCN, 14, Font.NORMAL, BaseColor.BLUE);
Font secFont = new Font(bfCN, 12, Font.NORMAL, new BaseColor(0,
204, 255));
Font textFont = new Font(bfCN, 12, Font.NORMAL, BaseColor.BLACK);
int chNum = 1;
Chapter chapter = new Chapter(new Paragraph("URL转PDF元素便于追加其他内容",
chFont), chNum++);
String[] blogInfo = extractBlogInfo(blogURL);
Section section = chapter.addSection(new Paragraph(blogInfo[0],
secFont));
section.setIndentation(10);
section.setIndentationLeft(10);
section.setBookmarkOpen(false);
section.setNumberStyle(Section.NUMBERSTYLE_DOTTED_WITHOUT_FINAL_DOT);
section.add(new Chunk("分类:" + blogInfo[1] + " 发表日期:" + blogInfo[2],
textFont));
LineSeparator line = new LineSeparator(1, 100, new BaseColor(204,
204, 204), Element.ALIGN_CENTER, -2);
Paragraph p_line = new Paragraph();
p_line.add(line);
section.add(p_line);
section.add(Chunk.NEWLINE);
final List<Element> pdfeleList = new ArrayList<Element>();
ElementHandler elemH = new ElementHandler() {
public void add(final Writable w) {
if (w instanceof WritableElement) {
pdfeleList.addAll(((WritableElement) w).elements());
}
}
};
XMLWorkerHelper.getInstance().parseXHtml(elemH,
new InputStreamReader(parse2Stream(blogInfo[3]), "utf-8"));
List<Element> list = new ArrayList<Element>();
for (Element ele : pdfeleList) {
if (ele instanceof LineSeparator
|| ele instanceof WritableDirectElement) {
continue;
}
list.add(ele);
}
section.addAll(list);
section = chapter.addSection(new Paragraph("继续添加章节", secFont));
section.setIndentation(10);
section.setIndentationLeft(10);
section.setBookmarkOpen(false);
section.setNumberStyle(Section.NUMBERSTYLE_DOTTED_WITHOUT_FINAL_DOT);
section.add(new Chunk("测试URL转为PDF元素方便追加其他内容", textFont));
document.add(chapter);
document.close();
} catch (Exception e) {
e.printStackTrace();
}
}
}

View File

@ -1,119 +0,0 @@
package com.key.common.plugs.itextpdf;
import java.io.FileOutputStream;
import com.itextpdf.text.BaseColor;
import com.itextpdf.text.Chapter;
import com.itextpdf.text.Document;
import com.itextpdf.text.Font;
import com.itextpdf.text.FontFactory;
import com.itextpdf.text.Image;
import com.itextpdf.text.PageSize;
import com.itextpdf.text.Paragraph;
import com.itextpdf.text.Section;
import com.itextpdf.text.pdf.BaseFont;
import com.itextpdf.text.pdf.PdfWriter;
public class ItextpdfTest {
public static void main(String[] args) {
try{
ItextpdfTest.writeSimplePdf();
ItextpdfTest.writeCharpter();
}catch(Exception e){
e.printStackTrace();
}
}
public static void writeSimplePdf() throws Exception{
//1.新建document对象
//第一个参数是页面大小接下来的参数分别是左上和下页边距
Document document = new Document(PageSize.A4, 50, 50, 50, 50);
//2.建立一个书写器(Writer)与document对象关联通过书写器(Writer)可以将文档写入到磁盘中
//创建 PdfWriter 对象 第一个参数是对文档对象的引用第二个参数是文件的实际名称在该名称中还会给出其输出路径
PdfWriter writer = PdfWriter.getInstance(document, new FileOutputStream("D:\\Documents\\ITextTest.pdf"));
//3.打开文档
document.open();
//4.向文档中添加内容
//通过 com.lowagie.text.Paragraph 来添加文本可以用文本及其默认的字体颜色大小等等设置来创建一个默认段落
BaseFont bfChinese = BaseFont.createFont("STSong-Light","UniGB-UCS2-H", BaseFont.NOT_EMBEDDED);
Font fontChinese = new Font(bfChinese, 22, Font.BOLD, BaseColor.BLACK);
document.add(new Paragraph("sdfsdfsd全是中文显示了没.fsdfsfs",fontChinese));
document.add(new Paragraph("Some more text on the first page with different color and font type.",
FontFactory.getFont(FontFactory.COURIER, 14, Font.BOLD, new BaseColor(255, 150, 200))));
Paragraph pragraph=new Paragraph("你这里有中亠好", fontChinese);
document.add(pragraph);
//图像支持格式 GIF, Jpeg, PNG, wmf
Image gif = Image.getInstance("F:/keyworkspace/survey/WebRoot/images/logo/snlogo.png");
gif.setBorder(5);
gif.scaleAbsolute(30,30);
gif.setAlignment(Image.RIGHT|Image.TEXTWRAP);
document.add(gif);
Paragraph pragraph11=new Paragraph("你这里有中亠好你这里有中亠好你这里有中亠好你这里有中亠好你这里有中亠好你这里有中亠好你这里有中亠好你这里有中亠好你这里有中亠好你这里有中亠好", fontChinese);
document.add(pragraph11);
Image gif15 = Image.getInstance("F:/keyworkspace/survey/WebRoot/images/logo/snlogo.png");
// gif15.setBorder(50);
gif15.setBorder(Image.BOX);
gif15.setBorderColor(BaseColor.RED);
// gif15.setBorderColorBottom(borderColorBottom)
gif15.setBorderWidth(1);
gif15.scalePercent(50);
document.add(gif15);
//5.关闭文档
document.close();
}
/**
* 添加含有章节的pdf文件
* @throws Exception
*/
public static void writeCharpter() throws Exception{
//新建document对象 第一个参数是页面大小接下来的参数分别是左上和下页边距
Document document = new Document(PageSize.A4, 20, 20, 20, 20);
//建立一个书写器(Writer)与document对象关联通过书写器(Writer)可以将文档写入到磁盘中
PdfWriter writer = PdfWriter.getInstance(document,new FileOutputStream("D:\\Documents\\ITextTestCharpter.pdf"));
//打开文件
document.open();
//标题
document.addTitle("Hello mingri example");
//作者
document.addAuthor("wolf");
//主题
document.addSubject("This example explains how to add metadata.");
document.addKeywords("iText, Hello mingri");
document.addCreator("My program using iText");
// document.newPage();
//向文档中添加内容
document.add(new Paragraph("\n"));
document.add(new Paragraph("\n"));
document.add(new Paragraph("\n"));
document.add(new Paragraph("\n"));
document.add(new Paragraph("\n"));
document.add(new Paragraph("First page of the document."));
document.add(new Paragraph("First page of the document."));
document.add(new Paragraph("First page of the document."));
document.add(new Paragraph("First page of the document."));
document.add(new Paragraph("Some more text on the first page with different color and font type.",
FontFactory.getFont(FontFactory.defaultEncoding, 10,Font.BOLD, new BaseColor(0, 0, 0))));
Paragraph title1 = new Paragraph("Chapter 1",
FontFactory.getFont(FontFactory.HELVETICA, 18, Font.BOLDITALIC, new BaseColor(0, 0,255)));
//新建章节
Chapter chapter1 = new Chapter(title1, 1);
chapter1.setNumberDepth(0);
Paragraph title11 = new Paragraph("This is Section 1 in Chapter 1",
FontFactory.getFont(FontFactory.HELVETICA, 16, Font.BOLD,new BaseColor(255, 0, 0)));
Section section1 = chapter1.addSection(title11);
Paragraph someSectionText = new Paragraph("This text comes as part of section 1 of chapter 1.");
section1.add(someSectionText);
someSectionText = new Paragraph("Following is a 3 X 2 table.");
section1.add(someSectionText);
document.add(chapter1);
//关闭文档
document.close();
}
}

View File

@ -1,97 +0,0 @@
package com.key.common.plugs.itextpdf;
import java.io.BufferedReader;
import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
import java.io.File;
import java.io.FileOutputStream;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.OutputStream;
import java.net.URL;
import java.net.URLConnection;
import org.springframework.context.i18n.LocaleContextHolder;
import org.w3c.tidy.Configuration;
import org.w3c.tidy.Tidy;
import org.xhtmlrenderer.pdf.ITextFontResolver;
import org.xhtmlrenderer.pdf.ITextRenderer;
import com.itextpdf.text.pdf.BaseFont;
public class PDFUtil {
public static void main(String[] args) {
String url="http://localhost:8080/survey/pdfTest.jsp";
try {
PDFUtil.exportPdfFile(url);
} catch (Exception e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}
// 导出pdf add by huangt 2012.6.1
public static File exportPdfFile(String urlStr) throws Exception {
// String outputFile = this.fileRoot + "/" +
// ServiceConstants.DIR_PUBINFO_EXPORT + "/" + getFileName() + ".pdf";
String outputFile = "D:/Documents/test3.pdf";
OutputStream os = new FileOutputStream(outputFile);
ITextRenderer renderer = new ITextRenderer();
String str = getHtmlFile(urlStr);
renderer.setDocumentFromString(str);
ITextFontResolver fontResolver = renderer.getFontResolver();
fontResolver.addFont("F:/keyworkspace/survey/src/conf/itextpdf/simsun.ttc",BaseFont.IDENTITY_H, BaseFont.NOT_EMBEDDED);
// // 宋体字
fontResolver.addFont("F:/keyworkspace/survey/src/conf/itextpdf/ARIALUNI.TTF",BaseFont.IDENTITY_H, BaseFont.NOT_EMBEDDED);// 宋体字
renderer.layout();
renderer.createPDF(os);
os.flush();
os.close();
return new File(outputFile);
}
// 读取页面内容 add by huangt 2012.6.1
public static String getHtmlFile(String urlStr) throws Exception {
URL url;
if (urlStr.indexOf("?") != -1) {
urlStr = urlStr + "&locale="
+ LocaleContextHolder.getLocale().toString();
} else {
urlStr = urlStr + "?locale="
+ LocaleContextHolder.getLocale().toString();
}
url = new URL(urlStr);
URLConnection uc = url.openConnection();
InputStream is = uc.getInputStream();
Tidy tidy = new Tidy();
OutputStream os2 = new ByteArrayOutputStream();
tidy.setXHTML(true); // 设定输出为xhtml(还可以输出为xml)
tidy.setCharEncoding(Configuration.UTF8); // 设定编码以正常转换中文
tidy.setTidyMark(false); // 不设置它会在输出的文件中给加条meta信息
tidy.setXmlPi(true); // 让它加上<?xml version="1.0"?>
tidy.setIndentContent(true); // 缩进可以省略只是让格式看起来漂亮一些
tidy.parse(is, os2);
is.close();
// 解决乱码 --将转换后的输出流重新读取改变编码
String temp;
StringBuffer sb = new StringBuffer();
BufferedReader in = new BufferedReader(new InputStreamReader(
new ByteArrayInputStream(
((ByteArrayOutputStream) os2).toByteArray()), "utf-8"));
while ((temp = in.readLine()) != null) {
sb.append(temp);
}
return sb.toString();
}
}

View File

@ -1,84 +0,0 @@
package com.key.common.plugs.itextpdf;
import java.io.BufferedReader;
import java.io.ByteArrayInputStream;
import java.io.File;
import java.io.FileOutputStream;
import java.io.FileReader;
import java.io.IOException;
import org.apache.poi.poifs.filesystem.DirectoryEntry;
import org.apache.poi.poifs.filesystem.DocumentEntry;
import org.apache.poi.poifs.filesystem.POIFSFileSystem;
public class WordTest {
/**
* 读取html文件到word
*
* @param filepath
* html文件的路径
* @return
* @throws Exception
*/
public boolean writeWordFile(String filepath) throws Exception {
boolean flag = false;
ByteArrayInputStream bais = null;
FileOutputStream fos = null;
String path = "D:/"; // 根据实际情况写路径
try {
if (!"".equals(path)) {
File fileDir = new File(path);
if (fileDir.exists()) {
String content = readFile(filepath);
byte b[] = content.getBytes();
bais = new ByteArrayInputStream(b);
POIFSFileSystem poifs = new POIFSFileSystem();
DirectoryEntry directory = poifs.getRoot();
DocumentEntry documentEntry = directory.createDocument(
"WordDocument", bais);
fos = new FileOutputStream(path + "temp1.doc");
poifs.writeFilesystem(fos);
bais.close();
fos.close();
}
}
} catch (IOException e) {
e.printStackTrace();
} finally {
if (fos != null)
fos.close();
if (bais != null)
bais.close();
}
return flag;
}
/**
* 读取html文件到字符串
*
* @param filename
* @return
* @throws Exception
*/
public String readFile(String filename) throws Exception {
StringBuffer buffer = new StringBuffer("");
BufferedReader br = null;
try {
br = new BufferedReader(new FileReader(filename));
buffer = new StringBuffer();
while (br.ready())
buffer.append((char) br.read());
} catch (Exception e) {
e.printStackTrace();
} finally {
if (br != null)
br.close();
}
return buffer.toString();
}
public static void main(String[] args) throws Exception {
new WordTest().writeWordFile("D:/ttpp.htm");
}
}