这两天在整理Word、Excel、Cad在线预览的问题。刚花了点时间写了下word转化成html的工具。不过貌似这种显示处理稍稍有点问题,可能不会在项目中使用。可能转化成pdf再预览会更好一点。但是,既然工具已经写好了,那么还是先记录下好了,留着备用。
import org.apache.commons.io.FileUtils;
import org.apache.poi.hwpf.HWPFDocument;
import org.apache.poi.hwpf.converter.PicturesManager;
import org.apache.poi.hwpf.converter.WordToHtmlConverter;
import org.apache.poi.hwpf.usermodel.Picture;
import org.apache.poi.hwpf.usermodel.PictureType;
import org.apache.poi.xwpf.converter.core.FileImageExtractor;
import org.apache.poi.xwpf.converter.core.FileURIResolver;
import org.apache.poi.xwpf.converter.xhtml.XHTMLConverter;
import org.apache.poi.xwpf.converter.xhtml.XHTMLOptions;
import org.apache.poi.xwpf.usermodel.XWPFDocument;
import org.w3c.dom.Document;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.transform.OutputKeys;
import javax.xml.transform.Transformer;
import javax.xml.transform.TransformerFactory;
import javax.xml.transform.dom.DOMSource;
import javax.xml.transform.stream.StreamResult;
import java.io.*;
import java.util.List;
/**
* Doc或者Docx文档转化成HTML的工具类
* Created by nemo on 16-10-27.
*/
public class Doc2HTMLUtils {
final static String _HTML_DIR="html/"; //HTML存放位置
final static String _IMG_DIR="pic/"; //图片存储位置
/**
* 处理docx文件
* @param path 文件路径
* @param file 文件名称
* @throws Exception
*/
public static void dealDocx(String path,String file) throws Exception {
File f = new File(path+file);
//读入文件
InputStream in = new FileInputStream(f);
XWPFDocument document = new XWPFDocument(in);
//图片处理,图片存储路径根据文件名称单独存储,防止覆盖
File imageFolderFile = new File(path+_HTML_DIR+_IMG_DIR+file+"/");
XHTMLOptions options = XHTMLOptions.create().URIResolver(
new FileURIResolver(imageFolderFile));
options.setExtractor(new FileImageExtractor(imageFolderFile));
//输出到HTML,路径为 当前docx文件路径+html+文件名称.html
OutputStream out = new FileOutputStream(new File(
path+_HTML_DIR+file+".html"));
XHTMLConverter.getInstance().convert(document, out, options);
}
/**
* 处理doc文件
* @param path 文件路径
* @param file 文件名称
* @throws Exception
*/
private static void dealDoc(String path,String file) throws Exception{
//读入doc文件
InputStream input = new FileInputStream(path + file);
HWPFDocument wordDocument = new HWPFDocument(input);
//图片处理,图片存储路径根据文件名称单独存储,防止覆盖
WordToHtmlConverter wordToHtmlConverter = new WordToHtmlConverter(
DocumentBuilderFactory.newInstance().newDocumentBuilder()
.newDocument());
wordToHtmlConverter.setPicturesManager(new PicturesManager() {
public String savePicture(byte[] content, PictureType pictureType,
String suggestedName, float widthInches, float heightInches) {
return _IMG_DIR + file + "/" + suggestedName;
}
});
wordToHtmlConverter.processDocument(wordDocument);
List pics = wordDocument.getPicturesTable().getAllPictures();
if (pics != null) {
for (int i = 0; i < pics.size(); i++) {
Picture pic = (Picture) pics.get(i);
try {
pic.writeImageContent(new FileOutputStream(path + _HTML_DIR + _IMG_DIR + file + "/"
+ pic.suggestFullFileName()));
} catch (FileNotFoundException e) {
e.printStackTrace();
}
}
}
//输出到html
Document htmlDocument = wordToHtmlConverter.getDocument();
ByteArrayOutputStream outStream = new ByteArrayOutputStream();
DOMSource domSource = new DOMSource(htmlDocument);
StreamResult streamResult = new StreamResult(outStream);
TransformerFactory tf = TransformerFactory.newInstance();
Transformer serializer = tf.newTransformer();
serializer.setOutputProperty(OutputKeys.ENCODING, "utf-8");
serializer.setOutputProperty(OutputKeys.INDENT, "yes");
serializer.setOutputProperty(OutputKeys.METHOD, "html");
serializer.transform(domSource, streamResult);
outStream.close();
String content = new String(outStream.toByteArray());
FileUtils.writeStringToFile(new File(path + _HTML_DIR + file + ".html"), content, "utf-8");
}
public static FileInputStream dealWord(String path,String file) throws Exception{
File f = new File(path+file);
if (!f.exists()) { //如果文件不存在
throw new Exception();
} else {
String outPutFile = path+_HTML_DIR+file+".html"; //输出html的路径
File mFile = new File(outPutFile);
if (mFile.exists()) { //优先本地文件读取,如果已经找到这个文件,直接返回
return new FileInputStream(outPutFile);
} else {
//穿件一些必要目录
mFile = new File(path + _HTML_DIR + _IMG_DIR + file + "/");
if (!mFile.exists()) {
mFile.mkdirs();
}
//开始处理
if (file != null) {
if (file.toLowerCase().endsWith("doc")) {
dealDoc(path, file);
} else if (file.toLowerCase().endsWith("docx")) {
dealDocx(path, file);
}
return new FileInputStream(outPutFile);
}
return null;
}
}
}
public static void main(String args[]) {
try {
dealWord("/home/nemo/","t.doc");
} catch (Exception e) {
e.printStackTrace();
}
}
}