赞
踩
应公司需求,需实现以下功能
此文章将使用Spire.Presentation实现对PPT文件中文本内容及图片的提取;
Spire.Presentation for Java是一个专业的 PowerPoint API,180846090使开发人员能够在 Java 应用程序中创建、读取、编写、转换和保存 PowerPoint 文档。作为一个独立的Java 库,Spire.Presentation 不需要在系统上安装Microsoft PowerPoint。
小编准备了以下两个文件:《ppt_demo.ppt》《pptx_demo.pptx》,分别代表不同版本的PPT,以便提取测试,如下图
<repositories>
<repository>
<id>com.e-iceblue</id>
<name>e-iceblue</name>
<url>http://repo.e-iceblue.com/nexus/content/groups/public/</url>
</repository>
</repositories>
<dependencies>
<dependency>
<groupId>e-iceblue</groupId>
<artifactId>spire.presentation</artifactId>
<version>4.9.2</version>
</dependency>
</dependencies>
package com.bjzaxk.utils; import com.spire.presentation.IAutoShape; import com.spire.presentation.ISlide; import com.spire.presentation.ParagraphEx; import com.spire.presentation.Presentation; import javax.imageio.ImageIO; import java.awt.image.BufferedImage; import java.io.File; import java.io.FileWriter; public class Demo { public static void main(String[] args) { // String filePath = "C:\\Users\\Administrator\\Desktop\\java_poi\\demo_file\\ppt_demo.ppt"; String filePath = "C:\\Users\\Administrator\\Desktop\\java_poi\\demo_file\\pptx_demo.pptx"; // 文本提取后存放路径及文件名 // String extractFilePath = "C:\\Users\\Administrator\\Desktop\\java_poi\\demo_file\\ppt_demo.txt"; String extractFilePath = "C:\\Users\\Administrator\\Desktop\\java_poi\\demo_file\\pptx_demo.txt"; // 图片提取后存放路径 String imageFilePath = "C:\\Users\\Administrator\\Desktop\\java_poi\\demo_file\\"; pptTextExtract(filePath, extractFilePath); pptImageExtract(filePath, imageFilePath); } /** * @description: 提取PPT中的文本信息 * @author: Mr.Jkx * @time: 2023/2/2 14:53 */ public static void pptTextExtract(String filePath, String extractFilePath) { try { //加载文档 Presentation ppt = new Presentation(); ppt.loadFromFile(filePath); StringBuilder buffer = new StringBuilder(); //遍历文档中的幻灯片,提取文本 for (Object slide : ppt.getSlides()) { for (Object shape : ((ISlide) slide).getShapes()) { if (shape instanceof IAutoShape) { for (Object tp : ((IAutoShape) shape).getTextFrame().getParagraphs()) { buffer.append(((ParagraphEx) tp).getText()).append("\r\n"); } } } } if (buffer.length() > 0) { //保存到文本文件 FileWriter writer = new FileWriter(extractFilePath); writer.write(buffer.toString()); writer.flush(); writer.close(); } } catch (Exception e) { e.printStackTrace(); } } /** * @description: 提取PPT中的图片 * @author: Mr.Jkx * @time: 2023/1/10 14:26 */ public static void pptImageExtract(String filePath, String imageFilePath) { try { //加载文档 Presentation ppt = new Presentation(); ppt.loadFromFile(filePath); //提取文档中的所有图片 for (int i = 0; i < ppt.getImages().getCount(); i++) { BufferedImage image = ppt.getImages().get(i).getImage(); ImageIO.write(image, "PNG", new File(imageFilePath + "pptImage_" + System.currentTimeMillis() + ".png")); } } catch (Exception e) { e.printStackTrace(); } } }
Copyright © 2003-2013 www.wpsshop.cn 版权所有,并保留所有权利。