当前位置:   article > 正文

python学习的第二十五天:对PDF文件的读写操作_python pdf读写

python pdf读写

python学习的第二十五天:对PDF文件的读写操作

对Excel操作的补充

# Python操作Excel生成统计图表

from openpyxl import Workbook
from openpyxl.chart import BarChart, Reference

wb = Workbook(write_only=True)
ws = wb.create_sheet()

rows = [
    ('类别', '销售A组', '销售B组'),
    ('手机', 40, 30),
    ('平板', 50, 60),
    ('笔记本', 80, 70),
    ('外围设备', 20, 10),
]

for row in rows:
    ws.append(row)

chart1 = BarChart()
chart1.type = 'col'
chart1.style = 10
chart1.title = '销售统计图'
chart1.y_axis.title = '销量'
chart1.x_axis.title = '商品类别'

data = Reference(ws, min_col=2, min_row=1, max_row=5, max_col=3)
cats = Reference(ws, min_col=1, min_row=2, max_row=5)
chart1.add_data(data, titles_from_data=True)
chart1.set_categories(cats)
chart1.shape = 4
ws.add_chart(chart1, 'A10')

wb.save('resources/demo.xlsx')
  • 1
  • 2
  • 3
  • 4
  • 5
  • 6
  • 7
  • 8
  • 9
  • 10
  • 11
  • 12
  • 13
  • 14
  • 15
  • 16
  • 17
  • 18
  • 19
  • 20
  • 21
  • 22
  • 23
  • 24
  • 25
  • 26
  • 27
  • 28
  • 29
  • 30
  • 31
  • 32
  • 33
  • 34

python的实用方法

获取指定文件下的所有内容
import os

files_list = os.listdir('/Users/Hao/Desktop')
for file in files_list:
    fullpath = os.path.abspath(file)
    print(fullpath)
  • 1
  • 2
  • 3
  • 4
  • 5
  • 6
shutil模块(封装了高级的文件操作函数)
import shutil

# 获取命令的路径
print(shutil.which('python'))
# 移动文件
shutil.move('resources/sales_data.csv', 'sales.csv')
  • 1
  • 2
  • 3
  • 4
  • 5
  • 6

PDF的相关操作

操作PDF需要的工具:pypdf2
读取PDF文件并提取其中的文字
import PyPDF2

from PyPDF2.pdf import PageObject

# 打开PDF文件
reader = PyPDF2.PdfFileReader('resources/XGBoost.pdf')
writer = PyPDF2.PdfFileWriter()
# 读取PDF的页数
for page_num in range(reader.numPages):
    current_page = reader.getPage(page_num)  # type: PageObject
    # 从页面中抽取文字
    # print(current_page.extractText())
    current_page.rotateClockwise(90)
    writer.addPage(current_page)
    writer.addBlankPage()
with open('resources/XGBoost-modified.pdf', 'wb') as file:
    writer.write(file)
  • 1
  • 2
  • 3
  • 4
  • 5
  • 6
  • 7
  • 8
  • 9
  • 10
  • 11
  • 12
  • 13
  • 14
  • 15
  • 16
  • 17
PDF的写操作
from reportlab.lib.pagesizes import A4
from reportlab.pdfbase import pdfmetrics
from reportlab.pdfbase.ttfonts import TTFont
from reportlab.pdfgen import canvas

# 注册字体文件
pdfmetrics.registerFont(TTFont('Font1', 'resources/fonts/Vera.ttf'))
pdfmetrics.registerFont(TTFont('Font2', 'resources/fonts/青呱石头体.ttf'))

pdf_canvas = canvas.Canvas('resources/demo.pdf', pagesize=A4)
width, height = A4

# 绘图
image = canvas.ImageReader('resources/guido.jpg')
pdf_canvas.drawImage(image, 20, height - 395, 250, 375)

# 显示当前页
pdf_canvas.showPage()

# 写字
pdf_canvas.setFont('Font2', 40)
pdf_canvas.setFillColorRGB(1, 0, 0, 1)
pdf_canvas.drawString(width // 2 - 120, height // 2, '你好,世界!')
pdf_canvas.setFont('Font1', 40)
pdf_canvas.setFillColorRGB(0, 1, 0, 0.5)
pdf_canvas.rotate(18)
pdf_canvas.drawString(250, 250, 'hello, world!')

# 保存
pdf_canvas.save()
  • 1
  • 2
  • 3
  • 4
  • 5
  • 6
  • 7
  • 8
  • 9
  • 10
  • 11
  • 12
  • 13
  • 14
  • 15
  • 16
  • 17
  • 18
  • 19
  • 20
  • 21
  • 22
  • 23
  • 24
  • 25
  • 26
  • 27
  • 28
  • 29
  • 30
PDF加密
import PyPDF2

reader = PyPDF2.PdfFileReader('resources/XGBoost.pdf')
writer = PyPDF2.PdfFileWriter()
for page_num in range(reader.numPages):
    writer.addPage(reader.getPage(page_num))
# 加密PDF文件
writer.encrypt('foobared')
with open('resources/XGBoost-encrypted.pdf', 'wb') as file:
    writer.write(file)
  • 1
  • 2
  • 3
  • 4
  • 5
  • 6
  • 7
  • 8
  • 9
  • 10
PDF的解密
import PyPDF2

reader = PyPDF2.PdfFileReader('resources/XGBoost-encrypted.pdf')
if reader.decrypt('foobared') == 0:
    print('密码错误')
else:
    print(reader.numPages)
  • 1
  • 2
  • 3
  • 4
  • 5
  • 6
  • 7
PDF增加水印
import PyPDF2

from PyPDF2.pdf import PageObject

reader1 = PyPDF2.PdfFileReader('resources/XGBoost.pdf')
reader2 = PyPDF2.PdfFileReader('resources/watermark.pdf')
writer = PyPDF2.PdfFileWriter()

watermark_page = reader2.getPage(0)
for page_num in range(reader1.numPages):
    current_page = reader1.getPage(page_num)  # type: PageObject
    current_page.mergePage(watermark_page)
    writer.addPage(current_page)

with open('resources/XGBoost-watermarked.pdf', 'wb') as file:
    writer.write(file)
  • 1
  • 2
  • 3
  • 4
  • 5
  • 6
  • 7
  • 8
  • 9
  • 10
  • 11
  • 12
  • 13
  • 14
  • 15
  • 16
current_page.mergePage(watermark_page)
writer.addPage(current_page)
  • 1
  • 2

with open(‘resources/XGBoost-watermarked.pdf’, ‘wb’) as file:
writer.write(file)


  • 1
声明:本文内容由网友自发贡献,不代表【wpsshop博客】立场,版权归原作者所有,本站不承担相应法律责任。如您发现有侵权的内容,请联系我们。转载请注明出处:https://www.wpsshop.cn/article/detail/51621
推荐阅读
相关标签
  

闽ICP备14008679号