当前位置:   article > 正文

Java 使用PDFBox对pdf文件进行相关操作_java pdfbox

java pdfbox

目录

创建空的PDF文件并报错到本地

加载已有的PDF文档 

设置PDF文档属性(包括作者、创建时间、修改时间等)

 查看PDF文档属性

往PDF页面中添加单行文字

往PDF页面中添加多行文字

读取PDF文档里的文字

往PDF文档里插入图片

加密PDF文档,即打开时需要输入密码

往PDF文档里添加javaScript脚本

将一个PDF文档拆成多个

将多个PDF文档合并成一个

提取PDF图像

添加矩形


创建空的PDF文件并报错到本地

  1. package document;
  2. import java.io.IOException;
  3. import org.apache.pdfbox.pdmodel.PDDocument;
  4. import org.apache.pdfbox.pdmodel.PDPage;
  5. public class Adding_Pages {
  6. public static void main(String args[]) throws IOException {
  7. //创建 PDF 文档对象
  8. PDDocument document = new PDDocument();
  9. for (int i=0; i<10; i++) {
  10. //创建空白页
  11. PDPage blankPage = new PDPage();
  12. //将空白页添加到文档
  13. document.addPage( blankPage );
  14. }
  15. //保存文档
  16. document.save("C:/PdfBox_Examples/my_doc.pdf");
  17. System.out.println("PDF created");
  18. //关闭文档
  19. document.close();
  20. }
  21. }

加载已有的PDF文档 

  1. import java.io.File;
  2. import java.io.IOException;
  3. import org.apache.pdfbox.pdmodel.PDDocument;
  4. import org.apache.pdfbox.pdmodel.PDPage;
  5. public class LoadingExistingDocument {
  6. public static void main(String args[]) throws IOException {
  7. //加载现有文档
  8. File file = new File("C:/PdfBox_Examples/sample.pdf");
  9. PDDocument document = PDDocument.load(file);
  10. System.out.println("PDF loaded");
  11. //在文档中添加空白页
  12. document.addPage(new PDPage());
  13. //保存文档
  14. document.save("C:/PdfBox_Examples/sample.pdf");
  15. //关闭文档
  16. document.close();
  17. }
  18. }

设置PDF文档属性(包括作者、创建时间、修改时间等)

  1. import java.io.IOException;
  2. import java.util.Calendar;
  3. import java.util.GregorianCalendar;
  4. import org.apache.pdfbox.pdmodel.PDDocument;
  5. import org.apache.pdfbox.pdmodel.PDDocumentInformation;
  6. import org.apache.pdfbox.pdmodel.PDPage;
  7. public class AddingDocumentAttributes {
  8. public static void main(String args[]) throws IOException {
  9. //创建 PDF 文档对象
  10. PDDocument document = new PDDocument();
  11. //创建空白页
  12. PDPage blankPage = new PDPage();
  13. //将空白页添加到文档
  14. document.addPage( blankPage );
  15. //创建 PDDocumentInformation 对象
  16. PDDocumentInformation pdd = document.getDocumentInformation();
  17. //设置文档的作者
  18. pdd.setAuthor("Learnfk");
  19. //设置文档的标题
  20. pdd.setTitle("Sample document");
  21. //设置文档的创建者
  22. pdd.setCreator("PDF Examples");
  23. //设置文档的主题
  24. pdd.setSubject("Example document");
  25. //设置文档的创建日期
  26. Calendar date = new GregorianCalendar();
  27. date.set(2015, 11, 5);
  28. pdd.setCreationDate(date);
  29. //设置文档的修改日期
  30. date.set(2016, 6, 5);
  31. pdd.setModificationDate(date);
  32. //为文档设置关键字
  33. pdd.setKeywords("sample, first example, my pdf");
  34. //保存文档
  35. document.save("C:/PdfBox_Examples/doc_attributes.pdf");
  36. System.out.println("Properties added successfully ");
  37. //关闭文档
  38. document.close();
  39. }
  40. }

 查看PDF文档属性

  1. import java.io.File;
  2. import java.io.IOException;
  3. import org.apache.pdfbox.pdmodel.PDDocument;
  4. import org.apache.pdfbox.pdmodel.PDDocumentInformation;
  5. public class RetrivingDocumentAttributes {
  6. public static void main(String args[]) throws IOException {
  7. //加载现有文档
  8. File file = new File("C:/PdfBox_Examples/doc_attributes.pdf")
  9. PDDocument document = PDDocument.load(file);
  10. //获取 PDDocumentInformation 对象
  11. PDDocumentInformation pdd = document.getDocumentInformation();
  12. //检索 PDF 文档的信息
  13. System.out.println("Author of the document is :"+ pdd.getAuthor());
  14. System.out.println("Title of the document is :"+ pdd.getTitle());
  15. System.out.println("Subject of the document is :"+ pdd.getSubject());
  16. System.out.println("Creator of the document is :"+ pdd.getCreator());
  17. System.out.println("Creation date of the document is :"+ pdd.getCreationDate());
  18. System.out.println("Modification date of the document is :"+
  19. pdd.getModificationDate());
  20. System.out.println("Keywords of the document are :"+ pdd.getKeywords());
  21. //关闭文档
  22. document.close();
  23. }
  24. }

往PDF页面中添加单行文字

  1. import java.io.File;
  2. import java.io.IOException;
  3. import org.apache.pdfbox.pdmodel.PDDocument;
  4. import org.apache.pdfbox.pdmodel.PDPage;
  5. import org.apache.pdfbox.pdmodel.PDPageContentStream;
  6. import org.apache.pdfbox.pdmodel.font.PDType1Font;
  7. public class AddingContent {
  8. public static void main (String args[]) throws IOException {
  9. //加载现有文档
  10. File file = new File("C:/PdfBox_Examples/my_doc.pdf");
  11. PDDocument document = PDDocument.load(file);
  12. //检索文档的页面
  13. PDPage page = document.getPage(1);
  14. PDPageContentStream contentStream = new PDPageContentStream(document, page);
  15. //开始内容流
  16. contentStream.beginText();
  17. //将字体设置为内容流
  18. contentStream.setFont(PDType1Font.TIMES_ROMAN, 12);
  19. //设置线的位置
  20. contentStream.newLineAtOffset(25, 500);
  21. String text = "This is the sample document and we are adding content to it.";
  22. //以字符串形式添加文本
  23. contentStream.showText(text);
  24. //结束内容流
  25. contentStream.endText();
  26. System.out.println("Content added");
  27. //关闭内容流
  28. contentStream.close();
  29. //保存文档
  30. document.save(new File("C:/PdfBox_Examples/new.pdf"));
  31. //关闭文档
  32. document.close();
  33. }
  34. }

往PDF页面中添加多行文字

  1. import java.io.File;
  2. import java.io.IOException;
  3. import org.apache.pdfbox.pdmodel.PDDocument;
  4. import org.apache.pdfbox.pdmodel.PDPage;
  5. import org.apache.pdfbox.pdmodel.PDPageContentStream;
  6. import org.apache.pdfbox.pdmodel.font.PDType1Font;
  7. public class AddMultipleLines {
  8. public static void main(String args[]) throws IOException {
  9. //加载现有文档
  10. File file = new File("C:/PdfBox_Examples/my_pdf.pdf");
  11. PDDocument doc = document.load(file);
  12. //创建PDF文档
  13. PDPage page = doc.getPage(1);
  14. PDPageContentStream contentStream = new PDPageContentStream(doc, page);
  15. //开始内容流
  16. contentStream.beginText();
  17. //将字体设置为内容流
  18. contentStream.setFont( PDType1Font.TIMES_ROMAN, 16 );
  19. //设置Lead
  20. contentStream.setLeading(14.5f);
  21. //设置Line的位置
  22. contentStream.newLineAtOffset(25, 725);
  23. String text1 = "This is an example of adding text to a page in the pdf document.
  24. we can add as many lines";
  25. String text2 = "as we want like this using the ShowText() method of the
  26. ContentStream class";
  27. //以字符串的形式添加文本
  28. contentStream. ShowText(text1);
  29. contentStream.newLine();
  30. contentStream. ShowText(text2);
  31. //结束内容流
  32. contentStream.endText();
  33. System.out.println("Content added");
  34. //关闭内容流
  35. contentStream.close();
  36. //保存文档
  37. doc.save(new File("C:/PdfBox_Examples/new.pdf"));
  38. //关闭文档
  39. doc.close();
  40. }
  41. }

读取PDF文档里的文字

  1. import java.io.File;
  2. import java.io.IOException;
  3. import org.apache.pdfbox.pdmodel.PDDocument;
  4. import org.apache.pdfbox.text.PDFTextStripper;
  5. public class ReadingText {
  6. public static void main(String args[]) throws IOException {
  7. //加载现有文档
  8. File file = new File("C:/PdfBox_Examples/new.pdf");
  9. PDDocument document = PDDocument.load(file);
  10. //实例化PDFTextStropper类
  11. PDFTextStripper pdfStripper = new PDFTextStripper();
  12. //从PDF文档中检索文本
  13. String text = pdfStripper.getText(document);
  14. System.out.println(text);
  15. //关闭文件
  16. document.close();
  17. }
  18. }

往PDF文档里插入图片

  1. import java.io.File;
  2. import org.apache.pdfbox.pdmodel.PDDocument;
  3. import org.apache.pdfbox.pdmodel.PDPage;
  4. import org.apache.pdfbox.pdmodel.PDPageContentStream;
  5. import org.apache.pdfbox.pdmodel.graphics.image.PDImageXObject;
  6. public class InsertingImage {
  7. public static void main(String args[]) throws Exception {
  8. //加载现有文档
  9. File file = new File("C:/PdfBox_Examples/sample.pdf");
  10. PDDocument doc = PDDocument.load(file);
  11. //检索页面
  12. PDPage page = doc.getPage(0);
  13. //创建PDImagexobject对象
  14. PDImageXObject pdImage = PDImageXObject.createFromFile("C:/PdfBox_Examples/logo.png",doc);
  15. //创建PDPageContentStream对象
  16. PDPageContentStream contents = new PDPageContentStream(doc, page);
  17. //在PDF文档中绘制图像
  18. contents.drawImage(pdImage, 70, 250);
  19. System.out.println("Image inserted");
  20. //关闭PDPageContentStream对象
  21. contents.close();
  22. //保存文档
  23. doc.save("C:/PdfBox_Examples/sample.pdf");
  24. //关闭文件
  25. doc.close();
  26. }
  27. }

加密PDF文档,即打开时需要输入密码

  1. import java.io.File;
  2. import org.apache.pdfbox.pdmodel.PDDocument;
  3. import org.apache.pdfbox.pdmodel.encryption.AccessPermission;
  4. import org.apache.pdfbox.pdmodel.encryption.StandardProtectionPolicy;
  5. public class EncriptingPDF {
  6. public static void main(String args[]) throws Exception {
  7. //加载现有文档
  8. File file = new File("C:/PdfBox_Examples/sample.pdf");
  9. PDDocument document = PDDocument.load(file);
  10. //创建访问权限对象
  11. AccessPermission ap = new AccessPermission();
  12. //创建标准化的支持policy对象
  13. StandardProtectionPolicy spp = new StandardProtectionPolicy("1234", "1234", ap);
  14. //设置加密密钥的长度
  15. spp.setEncryptionKeyLength(128);
  16. //设置访问权限
  17. spp.setPermissions(ap);
  18. //保护文件
  19. document.protect(spp);
  20. System.out.println("Document encrypted");
  21. //保存文档
  22. document.save("C:/PdfBox_Examples/sample.pdf");
  23. //关闭文件
  24. document.close();
  25. }
  26. }

往PDF文档里添加javaScript脚本

  1. import java.io.File;
  2. import org.apache.pdfbox.pdmodel.PDDocument;
  3. import org.apache.pdfbox.pdmodel.interactive.action.PDActionJavaScript;
  4. public class AddJavaScript {
  5. public static void main(String args[]) throws Exception {
  6. //加载现有文件
  7. File file = new File("C:/PdfBox_Examples/new.pdf");
  8. PDDocument document = PDDocument.load(file);
  9. String javaScript = "app.alert( {cMsg: 'this is an example', nIcon: 3,"
  10. + " nType: 0, cTitle: 'PDFBox Javascript example'} );";
  11. //创建PDActionjavascript对象
  12. PDActionJavaScript PDAjavascript = new PDActionJavaScript(javaScript);
  13. //嵌入JavaScript
  14. document.getDocumentCatalog().setOpenAction(PDAjavascript);
  15. //保存文档
  16. document.save( new File("C:/PdfBox_Examples/new.pdf") );
  17. System.out.println("Data added to the given PDF");
  18. //关闭文件
  19. document.close();
  20. }
  21. }

将一个PDF文档拆成多个

  1. import org.apache.pdfbox.multipdf.Splitter;
  2. import org.apache.pdfbox.pdmodel.PDDocument;
  3. import java.io.File;
  4. import java.io.IOException;
  5. import java.util.List;
  6. import java.util.Iterator;
  7. public class SplitPages {
  8. public static void main(String[] args) throws IOException {
  9. //加载现有的PDF文档
  10. File file = new File("C:/PdfBox_Examples/sample.pdf");
  11. PDDocument document = PDDocument.load(file);
  12. //实例化Splitter类
  13. Splitter splitter = new Splitter();
  14. //分割PDF文档的页面
  15. List<PDDocument> Pages = splitter.split(document);
  16. //创建迭代器
  17. Iterator<PDDocument> iterator = Pages.listIterator();
  18. //将每个页面保存为单个文档
  19. int i = 1;
  20. while(iterator.hasNext()) {
  21. PDDocument pd = iterator.next();
  22. pd.save("C:/PdfBox_Examples/sample"+ i++ +".pdf");
  23. }
  24. System.out.println("Multiple PDF's created");
  25. document.close();
  26. }
  27. }

将多个PDF文档合并成一个

  1. import org.apache.pdfbox.multipdf.PDFMergerUtility;
  2. import org.apache.pdfbox.pdmodel.PDDocument;
  3. import java.io.File;
  4. import java.io.IOException;
  5. public class MergePDFs {
  6. public static void main(String[] args) throws IOException {
  7. //加载现有的PDF文档
  8. File file1 = new File("C:/PdfBox_Examples/sample1.pdf");
  9. PDDocument doc1 = PDDocument.load(file1);
  10. File file2 = new File("C:/PdfBox_Examples/sample2.pdf");
  11. PDDocument doc2 = PDDocument.load(file2);
  12. //实例化PDFmergerutility类
  13. PDFMergerUtility PDFmerger = new PDFMergerUtility();
  14. //设置目标文件
  15. PDFmerger.setDestinationFileName("C:/PdfBox_Examples/merged.pdf");
  16. //添加源文件
  17. PDFmerger.addSource(file1);
  18. PDFmerger.addSource(file2);
  19. //合并两份文件
  20. PDFmerger.mergeDocuments();
  21. System.out.println("Documents merged");
  22. //关闭文件
  23. doc1.close();
  24. doc2.close();
  25. }
  26. }

提取PDF图像

  1. import java.awt.image.BufferedImage;
  2. import java.io.File;
  3. import javax.imageio.ImageIO;
  4. import org.apache.pdfbox.pdmodel.PDDocument;
  5. import org.apache.pdfbox.rendering.PDFRenderer;
  6. public class PdfToImage {
  7. public static void main(String args[]) throws Exception {
  8. //加载现有的PDF文档
  9. File file = new File("C:/PdfBox_Examples/sample.pdf");
  10. PDDocument document = PDDocument.load(file);
  11. //实例化PDFRenderer类
  12. PDFRenderer renderer = new PDFRenderer(document);
  13. //从PDF文档渲染图像
  14. BufferedImage image = renderer.renderImage(0);
  15. //将图像写入文件
  16. ImageIO.write(image, "JPEG", new File("C:/PdfBox_Examples/myimage.jpg"));
  17. System.out.println("Image created");
  18. //Closing the document
  19. document.close();
  20. }
  21. }

添加矩形

  1. import java.awt.Color;
  2. import java.io.File;
  3. import org.apache.pdfbox.pdmodel.PDDocument;
  4. import org.apache.pdfbox.pdmodel.PDPage;
  5. import org.apache.pdfbox.pdmodel.PDPageContentStream;
  6. public class ShowColorBoxes {
  7. public static void main(String args[]) throws Exception {
  8. //加载现有文档
  9. File file = new File("C:/PdfBox_Examples/BlankPage.pdf");
  10. PDDocument document = PDDocument.load(file);
  11. //检索PDF文件的页面
  12. PDPage page = document.getPage(0);
  13. //实例化PDPAGEContentStream类
  14. PDPageContentStream contentStream = new PDPageContentStream(document, page);
  15. //设置NonStroking颜色
  16. contentStream.setNonStrokingColor(Color.DARK_GRAY);
  17. //绘制一个矩形
  18. contentStream.addRect(200, 650, 100, 100);
  19. //绘制一个矩形
  20. contentStream.fill();
  21. System.out.println("rectangle added");
  22. //关闭contentStream对象
  23. contentStream.close();
  24. //保存文档
  25. File file1 = new File("C:/PdfBox_Examples/colorbox.pdf");
  26. document.save(file1);
  27. //关闭文件
  28. document.close();
  29. }
  30. }

本文内容由网友自发贡献,转载请注明出处:【wpsshop博客】
推荐阅读
相关标签
  

闽ICP备14008679号