当前位置:   article > 正文

Python OpenCV 去除PDF(图片)中指定颜色水印_python识别pdf里指定颜色的内容

python识别pdf里指定颜色的内容

对比图。水印去除前后;   

 

                                                                     

 

 代码:

  1. import cv2
  2. import numpy as np
  3. imgs = cv2.imread("1.png")
  4. image = imgs.copy()
  5. # images = imgs.copy()
  6. # print(image.shape)
  7. # rows, cols = image.shape[:2]
  8. # print(rows, cols)
  9. red_minus_blue = image[:, :, 2] - image[:, :, 0]
  10. cv2.imwrite("res.jpg",image[:, :, 2])
  11. # red_minus_green = image[:, :, 2] - image[:, :, 1]
  12. # red_minus_blue = red_minus_blue >= 20
  13. # red_minus_green = red_minus_green >= 20
  14. # red = image[:, :, 2] >= np.mean(image[:, :, 2])/2
  15. # mask = red_minus_green & red_minus_blue & red
  16. # print(mask)
  17. # images[mask,:]=255
  18. # mask = (1-mask).astype(np.bool)
  19. # print(mask)
  20. # image[mask, :] = 255
  21. # # stack = np.vstack([imgs,image,images])
  22. # # cv2.imshow("stack", images)
  23. # cv2.imwrite("result.jpg",images)
  24. # # cv2.imshow("orgin", imgs)
  25. # # cv2.imshow("red", image)
  26. # cv2.imshow("delete_red", images)
  27. cv2.waitKey()
  28. # 红色像素值最大,且大于阈值(中值)
  29. # 其他通道像素值的距离比较小,且与红色像素值的距离比率较大,且大于阈值(自己调)。

 

 

 

Python 获取pdf文件中的所有图片,保存到文件夹中。

  1. pip install fitz
  2. pip install pymupdf

 

  1. import fitz
  2. def func(doc):
  3. for i in range(len(doc)):
  4. imglist = doc.getPageImageList(i)
  5. for j, img in enumerate(imglist):
  6. xref = img[0]
  7. pix = fitz.Pixmap(doc, xref) # make pixmap from image
  8. if pix.n - pix.alpha < 4: # can be saved as PNG
  9. pix.writePNG("p%s-%s.png" % (i+1, j+1))
  10. else: # CMYK: must convert first
  11. pix0 = fitz.Pixmap(fitz.csRGB, pix)
  12. pix0.writePNG("p%s-%s.png" % (i+1, j+1))
  13. pix0 = None # free Pixmap resources
  14. pix = None # free Pixmap resources
  15. if __name__ == "__main__":
  16. func(doc=fitz.open('11.pdf')) # input the path of pdf file
  17. ###参考博客 https://blog.csdn.net/qq_42951560/article/details/109609662

 

批量去除水印:

  1. def remove():
  2. """
  3. 去除水印
  4. """
  5. list_file=os.listdir('./pic')
  6. print(list_file)
  7. for i in list_file:
  8. imgs = cv2.imread('./pic/'+i)
  9. image = imgs.copy()
  10. # print(imgs)
  11. cv2.imwrite("./res/"+i,image[:, :, 2])

 

完整代码:

目录结构:

  1. import fitz
  2. import os
  3. import cv2
  4. import numpy as np
  5. def func(doc):
  6. for i in range(len(doc)):
  7. imglist = doc.getPageImageList(i)
  8. for j, img in enumerate(imglist):
  9. xref = img[0]
  10. pix = fitz.Pixmap(doc, xref) # make pixmap from image
  11. if pix.n - pix.alpha < 4: # can be saved as PNG
  12. pix.writePNG("./pic/p%s-%s.png" % (i+1, j+1))
  13. else: # CMYK: must convert first
  14. pix0 = fitz.Pixmap(fitz.csRGB, pix)
  15. pix0.writePNG("./pic/p%s-%s.png" % (i+1, j+1))
  16. pix0 = None # free Pixmap resources
  17. pix = None # free Pixmap resources
  18. def remove():
  19. """
  20. 去除水印
  21. """
  22. list_file=os.listdir('./pic')
  23. print(list_file)
  24. for i in list_file:
  25. imgs = cv2.imread('./pic/'+i)
  26. image = imgs.copy()
  27. # print(imgs)
  28. cv2.imwrite("./res/"+i,image[:, :, 2])
  29. if __name__ == "__main__":
  30. # 第一步 将pdf中所有图片写入文件夹
  31. # func(doc=fitz.open('11.pdf')) # input the path of pdf file
  32. # 第二步 去除文件夹内所有图片的红色水印
  33. # remove()

 

使用金山wps工具就可以将文件夹内的图片合成pdf文档了。

 

参考博客:https://blog.csdn.net/a362682954/article/details/105628198/

参考博客 :https://blog.csdn.net/qq_42951560/article/details/109609662

 

 

声明:本文内容由网友自发贡献,不代表【wpsshop博客】立场,版权归原作者所有,本站不承担相应法律责任。如您发现有侵权的内容,请联系我们。转载请注明出处:https://www.wpsshop.cn/w/木道寻08/article/detail/975423
推荐阅读
相关标签
  

闽ICP备14008679号