服务器之家:专注于服务器技术及软件下载分享
分类导航

PHP教程|ASP.NET教程|Java教程|ASP教程|编程技术|正则表达式|C/C++|IOS|C#|Swift|Android|VB|R语言|JavaScript|易语言|vb.net|

服务器之家 - 编程语言 - Java教程 - java实现word文件转html文件

java实现word文件转html文件

2020-09-01 09:43littleFatty Java教程

这篇文章主要为大家详细介绍了java实现word文件转html文件的方法,具有一定的参考价值,感兴趣的小伙伴们可以参考一下

最近在项目开发中用户提出要在电脑上没有装office时在浏览器中打开html">word文件,最后确定的逻辑:用户选择想要查看的文件,页面js判断文件是否为word。不是执行下载,是后端根据word文件后缀访问对应转换方法。文件已存在对应html文件直接返回html文件地址,不存在先生成对应html文件再返回地址。js直接通过open()打开新的页签,展示word文件内容。新人一枚,如果代码中存在错误或有更好的实现万望指正!

相关jar包

java实现word文件转html文件

代码

java" id="highlighter_800105">
?
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
import java.io.bytearrayoutputstream;
import java.io.file;
import java.io.fileinputstream;
import java.io.filenotfoundexception;
import java.io.fileoutputstream;
import java.io.ioexception;
import java.io.inputstream;
import java.io.outputstream;
 
import javax.xml.parsers.documentbuilderfactory;
import javax.xml.parsers.parserconfigurationexception;
import javax.xml.transform.outputkeys;
import javax.xml.transform.transformer;
import javax.xml.transform.transformerexception;
import javax.xml.transform.transformerfactory;
import javax.xml.transform.dom.domsource;
import javax.xml.transform.stream.streamresult;
 
import org.apache.poi.hwpf.hwpfdocument;
import org.apache.poi.hwpf.converter.picturesmanager;
import org.apache.poi.hwpf.converter.wordtohtmlconverter;
import org.apache.poi.hwpf.usermodel.picturetype;
import org.apache.poi.xwpf.converter.core.basicuriresolver;
import org.apache.poi.xwpf.converter.core.fileimageextractor;
import org.apache.poi.xwpf.converter.core.fileuriresolver;
import org.apache.poi.xwpf.converter.xhtml.xhtmlconverter;
import org.apache.poi.xwpf.converter.xhtml.xhtmloptions;
import org.apache.poi.xwpf.usermodel.xwpfdocument;
import org.w3c.dom.document;
 
/**
 * word 转换成html 2017-2-27
 */
public class wordtohtml {
  
  
  /**
   * 将word2003转换为html文件 2017-2-27
   * @param wordpath word文件路径
   * @param wordname word文件名称无后缀
   * @param suffix  word文件后缀
   * @throws ioexception
   * @throws transformerexception
   * @throws parserconfigurationexception
   */
  public string word2003tohtml(string wordpath,string wordname,string suffix) throws ioexception, transformerexception, parserconfigurationexception {
    string htmlpath = wordpath + file.separator + wordname + "_show" + file.separator;
    string htmlname = wordname + ".html";
    final string imagepath = htmlpath + "image" + file.separator;
    
    //判断html文件是否存在
    file htmlfile = new file(htmlpath + htmlname);
    if(htmlfile.exists()){
      return htmlfile.getabsolutepath();
    }
    
    //原word文档
    final string file = wordpath + file.separator + wordname + suffix;
    inputstream input = new fileinputstream(new file(file));
    
    hwpfdocument worddocument = new hwpfdocument(input);
    wordtohtmlconverter wordtohtmlconverter = new wordtohtmlconverter(documentbuilderfactory.newinstance().newdocumentbuilder().newdocument());
    //设置图片存放的位置
    wordtohtmlconverter.setpicturesmanager(new picturesmanager() {
      public string savepicture(byte[] content, picturetype picturetype, string suggestedname, float widthinches, float heightinches) {
        file imgpath = new file(imagepath);
        if(!imgpath.exists()){//图片目录不存在则创建
          imgpath.mkdirs();
        }
        file file = new file(imagepath + suggestedname);
        try {
          outputstream os = new fileoutputstream(file);
          os.write(content);
          os.close();
        } catch (filenotfoundexception e) {
          e.printstacktrace();
        } catch (ioexception e) {
          e.printstacktrace();
        }
        //图片在html文件上的路径 相对路径
        return "image/" + suggestedname;
      }
    });
    
    //解析word文档
    wordtohtmlconverter.processdocument(worddocument);
    document htmldocument = wordtohtmlconverter.getdocument();
    
    //生成html文件上级文件夹
    file folder = new file(htmlpath);
    if(!folder.exists()){
      folder.mkdirs();
    }
    
    //生成html文件地址
    outputstream outstream = new fileoutputstream(htmlfile);
 
    domsource domsource = new domsource(htmldocument);
    streamresult streamresult = new streamresult(outstream);
 
    transformerfactory factory = transformerfactory.newinstance();
    transformer serializer = factory.newtransformer();
    serializer.setoutputproperty(outputkeys.encoding, "utf-8");
    serializer.setoutputproperty(outputkeys.indent, "yes");
    serializer.setoutputproperty(outputkeys.method, "html");
    
    serializer.transform(domsource, streamresult);
 
    outstream.close();
    
    return htmlfile.getabsolutepath();
  }
  
  /**
   * 2007版本word转换成html 2017-2-27
   * @param wordpath word文件路径
   * @param wordname word文件名称无后缀
   * @param suffix  word文件后缀
   * @return
   * @throws ioexception
   */
  public string word2007tohtml(string wordpath,string wordname,string suffix) throws ioexception {
    string htmlpath = wordpath + file.separator + wordname + "_show" + file.separator;
    string htmlname = wordname + ".html";
    string imagepath = htmlpath + "image" + file.separator;
    
    //判断html文件是否存在
    file htmlfile = new file(htmlpath + htmlname);
    if(htmlfile.exists()){
      return htmlfile.getabsolutepath();
    }
        
    //word文件
    file wordfile = new file(wordpath + file.separator + wordname + suffix);
    
    // 1) 加载word文档生成 xwpfdocument对象
    inputstream in = new fileinputstream(wordfile);
    xwpfdocument document = new xwpfdocument(in);
 
    // 2) 解析 xhtml配置 (这里设置iuriresolver来设置图片存放的目录)
    file imgfolder = new file(imagepath);
    xhtmloptions options = xhtmloptions.create();
    options.setextractor(new fileimageextractor(imgfolder));
    //html中图片的路径 相对路径
    options.uriresolver(new basicuriresolver("image"));
    options.setignorestylesifunused(false);
    options.setfragment(true);
    
    // 3) 将 xwpfdocument转换成xhtml
    //生成html文件上级文件夹
    file folder = new file(htmlpath);
    if(!folder.exists()){
      folder.mkdirs();
    }
    outputstream out = new fileoutputstream(htmlfile);
    xhtmlconverter.getinstance().convert(document, out, options);
    
    return htmlfile.getabsolutepath();
  }
}

文件目录:

java实现word文件转html文件

java实现word文件转html文件

以上就是本文的全部内容,希望对大家的学习有所帮助,也希望大家多多支持服务器之家。

延伸 · 阅读

精彩推荐