ワードからhtmlへ #HTML

序文

最近行われたプロジェクトは、Wordのインポート機能があり、最初はプレーンテキストをインポートすることであり、その後、フロントエンドのページに表示され、顧客が提案を行い、内部のWordファイルと同じ形式に変更することはできません。

推奨

1.ポイ

2. オープンオフィス

3. libreoffice

poi

pom.xml

<dependency>
 <groupId>org.apache.poi</groupId>
 <artifactId>poi-scratchpad</artifactId>
 <version>3.14</version>
</dependency>
<dependency>
 <groupId>org.apache.poi</groupId>
 <artifactId>poi-ooxml</artifactId>
 <version>3.14</version>
</dependency>
<dependency>
 <groupId>fr.opensagres.xdocreport</groupId>
 <artifactId>xdocreport</artifactId>
 <version>1.0.6</version>
</dependency>
<dependency>
 <groupId>org.apache.poi</groupId>
 <artifactId>poi-ooxml-schemas</artifactId>
 <version>3.14</version>
</dependency>
<dependency>
 <groupId>org.apache.poi</groupId>
 <artifactId>ooxml-schemas</artifactId>
 <version>1.3</version>
</dependency>

ワードリーディング

/**
 * word2003をhtmlファイルに変換する
 *
 * @param wordPath wordファイルパス
 * @param wordName word接尾辞なしのファイル名
 * @param suffix wordファイルの接尾辞
 * @param htmlPath htmlストレージアドレス
 * @throws IOException
 * @throws TransformerException
 * @throws ParserConfigurationException
 */
public static String Word2003ToHtml(String wordPath, String wordName, String suffix, String htmlPath)
 throws IOException, TransformerException, ParserConfigurationException {
 String htmlName = wordName + ".html";
 final String imagePath = htmlPath + "image" + File.separator;
 // htmlファイルが存在するかどうかを判断する
 File htmlFile = new File(htmlPath + htmlName);
 if (htmlFile.exists()) {
 return htmlFile.getAbsolutePath();
 }
 // 元のワード文書
 final String file = wordPath + File.separator + wordName + suffix;
 InputStream input = new FileInputStream(new File(file));
 HWPFDocument wordDocument = new HWPFDocument(input);
 WordToHtmlConverter wordToHtmlConverter = new WordToHtmlConverter(
 DocumentBuilderFactory.newInstance().newDocumentBuilder().newDocument());
 // イメージの保存場所を設定する
 wordToHtmlConverter.setPicturesManager(new PicturesManager() {
 public String savePicture(byte[] content, PictureType pictureType, String suggestedName, float widthInches,
 float heightInches) {
 File imgPath = new File(imagePath);
 // イメージディレクトリが存在しない場合は、作成する
 if (!imgPath.exists()) {
 imgPath.mkdirs();
 }
 File file = new File(imagePath + suggestedName);
 try {
 OutputStream os = new FileOutputStream(file);
 os.write(content);
 os.close();
 } catch (FileNotFoundException e) {
 e.printStackTrace();
 } catch (IOException e) {
 e.printStackTrace();
 }
 // htmlファイル上のイメージのパス 相対パス
 return "image/" + suggestedName;
 }
 });
 // ワード文書を解析する
 wordToHtmlConverter.processDocument(wordDocument);
 Document htmlDocument = wordToHtmlConverter.getDocument();
 // htmlファイルの親フォルダを生成する
 File folder = new File(htmlPath);
 if (!folder.exists()) {
 folder.mkdirs();
 }
 OutputStream outStream = new FileOutputStream(htmlFile);
 DOMSource domSource = new DOMSource(htmlDocument);
 StreamResult streamResult = new StreamResult(outStream);
 TransformerFactory factory = TransformerFactory.newInstance();
 Transformer serializer = factory.newTransformer();
 serializer.setOutputProperty(OutputKeys.ENCODING, "GB2312");
 serializer.setOutputProperty(OutputKeys.INDENT, "yes");
 serializer.setOutputProperty(OutputKeys.METHOD, "html");
 serializer.transform(domSource, streamResult);
 return htmlFile.getAbsolutePath();
}
/**
 *
 * 2007htmlに変換するバージョンの単語
 *
 * @param wordPath wordファイルパス
 * @param wordName word接尾辞なしのファイル名
 * @param suffix wordファイルの接尾辞
 * @param htmlPath htmlストレージアドレス
 * @return
 * @throws IOException
 */
public static String word2007ToHtml(String wordPath, String wordName, String suffix, String htmlPath)
 throws IOException {
 String htmlName = wordName + ".html";
 String imagePath = htmlPath + "image" + File.separator;
 // htmlファイルが存在するかどうかを判断する
 File htmlFile = new File(htmlPath + htmlName);
 if (htmlFile.exists()) {
 return htmlFile.getAbsolutePath();
 }
 // word 
 File wordFile = new File(wordPath + File.separator + wordName + suffix);
 // 1) XWPFDocumentオブジェクトを生成するためにWord文書を読み込む
 InputStream in = new FileInputStream(wordFile);
 XWPFDocument document = new XWPFDocument(in);
 // 2) XHTMLの設定を解析する
 File imgFolder = new File(imagePath);
 XHTMLOptions options = XHTMLOptions.create();
 options.setExtractor(new FileImageExtractor(imgFolder));
 // html相対パスでイメージのパス
 options.URIResolver(new BasicURIResolver("image"));
 options.setIgnoreStylesIfUnused(false);
 options.setFragment(true);
 // 3) XWPFDocumentをXHTMLに変換する
 // htmlファイルの親フォルダを生成する
 File folder = new File(htmlPath);
 if (!folder.exists()) {
 folder.mkdirs();
 }
 OutputStream out = new FileOutputStream(htmlFile);
 XHTMLConverter.getInstance().convert(document, out, options);
 return htmlFile.getAbsolutePath();
}

open office

Jodconverterのダウンロード www.artofsolving.com/opensource/... これはフォーマット変換のためにOpenOfficeを開くサードパーティのjarパッケージです。

linuxワンクリックインストールスクリプト

cd /tmp
install_redhat() {
 wget https://./Apache_OpenOffice_..6_Linux_x86-64_install-rpm_zh-..gz -cO openoffice_rpm.tar.gz && tar zxf /tmp/openoffice_rpm.tar.gz && cd /tmp/ja-JP/RPMS
 if [ $? -eq 0 ];then
 yum install -y libXext.x86_64
 yum groupinstall -y "X Window System"
 rpm -Uvih *.rpm
 echo 'install desktop service ...'
 rpm -Uvih desktop-integration/openoffice4.1.6-redhat-menus-4.1.6-9790.noarch.rpm
 echo 'install finshed...'
 else
 echo 'download package error...'
 fi
}
install_ubuntu() {
 wget https://./Apache_OpenOffice_..6_Linux_x86-64_install-deb_zh-..gz -cO openoffice_deb.tar.gz && tar zxf /tmp/openoffice_deb.tar.gz && cd /tmp/ja-JP/DEBS
 echo $?
 if [ $? -eq 0 ];then
 apt-get install -y libxrender1
 apt-get install -y libxt6
 apt-get install -y libxext-dev
 apt-get install -y libfreetype6-dev
 dpkg -i *.deb
 echo 'install desktop service ...'
 dpkg -i desktop-integration/openoffice4.1-debian-menus_4.1.6-9790_all.deb
 echo 'install finshed...'
 else
 echo 'download package error...'
 fi
}
if [ -f "/etc/redhat-release" ]; then
 yum install -y wget
 install_redhat
else
 apt-get install -y wget
 install_ubuntu

OpenOfficeをインストールし、インストールが完了したらコマンドラインを起動してOpenOfficeのサービスの一つを起動します：

soffice -headless -accept="socket,port=8100;urp;"

pom.xmlのテスト

<dependency>
 <groupId>com.artofsolving</groupId>
 <artifactId>jodconverter</artifactId>
 <version>2.2.1</version>
</dependency>
<!--ログ>
<dependency>
 <groupId>org.slf4j</groupId>
 <artifactId>slf4j-api</artifactId>
 <version>1.7.25</version>
</dependency>

 
public class OfficeConverter { 
 
 public static void main(String[] args) { 
 File inputFile = new File("C:/test/yy.doc"); 
 File outputFile = new File("C:/test/yy.html"); 
 
 OpenOfficeConnection con = new SocketOpenOfficeConnection(8100); 
 try { 
 con.connect(); 
 } catch (ConnectException e) { 
 System.err.println("ファイル変換エラーは、OpenOfficeのサービスが開始されているかどうかを確認してください"); 
 e.printStackTrace(); 
 } 
 DocumentConverter converter = new OpenOfficeDocumentConverter(con); 
 converter.convert(inputFile, outputFile); 
 con.disconnect(); 
 } 
}

libreoffice

取付

yum install libreoffice libreoffice-headless

ワードからhtmlへ

soffice --headless --convert-to html:HTML test.docx

poiオープンオフィスとlibreofficeの違い。

ポイ純粋なJava変換、変換は比較的遅いです。

openofficeはjavaコードの操作しかサポートしていませんが、libreofficeはコマンドラインとjavaコードをサポートしています。

最も重要な点は、openofficeはキュー変換をサポートしていますが、libreofficeはサポートしていない、同時にファイルを変換するsofficeジャムが表示されます。

ワードからhtmlへ

序文

推奨

poi

open office

libreoffice

poiオープンオフィスとlibreofficeの違い。

Read next

Spring Cloud OAuth2（JWTベース）マイクロサービス認証認証戦闘

データ構造とアルゴリズム - 並べ替えと検索のアルゴリズム

素晴らしいVue 3チェックリスト

フロントエンド開発者に欠かせない生産性ツール

flink 1.11はzeppelinを統合し、シンプルなリアルタイム・コンピューティング・プラットフォームを実現する。

アトラシアンは、新しい DevOps 体験を生み出す 12 の新機能を携えて戻ってきた！