赞
踩
发票原图:
首先在系统中根据库表生成一套自己的entity、dao、service、controller、html等一套代码,下面以我自己创建的为例:
Invoiceinfo.java
- /**
- * Copyright © 2012-2016 <a href="https://github.com/thinkgem/jeesite">JeeSite</a> All rights reserved.
- */
- package com.thinkgem.jeesite.modules.basis.entity;
-
- import org.hibernate.validator.constraints.Length;
-
- import java.math.BigDecimal;
- import java.util.Date;
- import java.util.List;
-
- import com.fasterxml.jackson.annotation.JsonFormat;
-
- import com.thinkgem.jeesite.common.persistence.DataEntity;
-
- /**
- * 发票信息Entity
- * @author hsg
- * @version 2024-05-07
- */
- public class Invoiceinfo extends DataEntity<Invoiceinfo> {
-
- private static final long serialVersionUID = 1L;
- private String preparedBy; // 经办人
- private String invoiceCode; // 发票代码
- private String invoiceNumber; // 发票号码
- private String content; // 项目名称
- private String company; // 开票公司
- private Date invoiceDate; // 开票日期
- private Date invoiceEndDate; // 开票结束日期
- private Date getDate; // 取得日期
- private Date getEndDate; // 取得结束日期
- private BigDecimal notaxinclusiveAmount; // 金额
- private BigDecimal taxAmount; // 税额
- private BigDecimal taxinclusiveAmount; // 价税合计
- private String inReserve1; // 上传人
- private String inReserve2; // 发票类别 专票/普票
- private String inReserve3; // 备用3
- private String inReserve4; // 备用4
- private String type; //发票种类
- private String pzh; //凭证号
- private String remakes; //备注
- private Date rzDate; //入账日期
-
- public Invoiceinfo() {
- super();
- }
-
- public Invoiceinfo(String id){
- super(id);
- }
-
- public Date getRzDate() {
- return rzDate;
- }
-
- public void setRzDate(Date rzDate) {
- this.rzDate = rzDate;
- }
-
- public String getPzh() {
- return pzh;
- }
-
- public void setPzh(String pzh) {
- this.pzh = pzh;
- }
-
- public String getRemakes() {
- return remakes;
- }
-
- public void setRemakes(String remakes) {
- this.remakes = remakes;
- }
-
- public String getType() {
- return type;
- }
-
- public void setType(String type) {
- this.type = type;
- }
-
- @Length(min=0, max=100, message="经办人长度必须介于 0 和 100 之间")
- public String getPreparedBy() {
- return preparedBy;
- }
-
- public void setPreparedBy(String preparedBy) {
- this.preparedBy = preparedBy;
- }
-
- @Length(min=0, max=100, message="发票代码长度必须介于 0 和 100 之间")
- public String getInvoiceCode() {
- return invoiceCode;
- }
-
- public void setInvoiceCode(String invoiceCode) {
- this.invoiceCode = invoiceCode;
- }
-
- @Length(min=0, max=100, message="发票号码长度必须介于 0 和 100 之间")
- public String getInvoiceNumber() {
- return invoiceNumber;
- }
-
- public void setInvoiceNumber(String invoiceNumber) {
- this.invoiceNumber = invoiceNumber;
- }
-
- @Length(min=0, max=500, message="开票内容长度必须介于 0 和 500 之间")
- public String getContent() {
- return content;
- }
-
- public void setContent(String content) {
- this.content = content;
- }
-
- @Length(min=0, max=100, message="开票公司长度必须介于 0 和 100 之间")
- public String getCompany() {
- return company;
- }
-
- public void setCompany(String company) {
- this.company = company;
- }
-
- @JsonFormat(pattern = "yyyy-MM-dd HH:mm:ss")
- public Date getInvoiceDate() {
- return invoiceDate;
- }
-
- public void setInvoiceDate(Date invoiceDate) {
- this.invoiceDate = invoiceDate;
- }
-
- @JsonFormat(pattern = "yyyy-MM-dd HH:mm:ss")
- public Date getGetDate() {
- return getDate;
- }
-
- public void setGetDate(Date getDate) {
- this.getDate = getDate;
- }
-
- public BigDecimal getNotaxinclusiveAmount() {
- return notaxinclusiveAmount;
- }
-
- public void setNotaxinclusiveAmount(BigDecimal notaxinclusiveAmount) {
- this.notaxinclusiveAmount = notaxinclusiveAmount;
- }
-
- public BigDecimal getTaxAmount() {
- return taxAmount;
- }
-
- public void setTaxAmount(BigDecimal taxAmount) {
- this.taxAmount = taxAmount;
- }
-
- public BigDecimal getTaxinclusiveAmount() {
- return taxinclusiveAmount;
- }
-
- public void setTaxinclusiveAmount(BigDecimal taxinclusiveAmount) {
- this.taxinclusiveAmount = taxinclusiveAmount;
- }
-
- @Length(min=0, max=100, message="备用1长度必须介于 0 和 100 之间")
- public String getInReserve1() {
- return inReserve1;
- }
-
- public void setInReserve1(String inReserve1) {
- this.inReserve1 = inReserve1;
- }
-
- @Length(min=0, max=100, message="备用2长度必须介于 0 和 100 之间")
- public String getInReserve2() {
- return inReserve2;
- }
-
- public void setInReserve2(String inReserve2) {
- this.inReserve2 = inReserve2;
- }
-
- @Length(min=0, max=100, message="备用3长度必须介于 0 和 100 之间")
- public String getInReserve3() {
- return inReserve3;
- }
-
- public void setInReserve3(String inReserve3) {
- this.inReserve3 = inReserve3;
- }
-
- @Length(min=0, max=100, message="备用4长度必须介于 0 和 100 之间")
- public String getInReserve4() {
- return inReserve4;
- }
-
- public void setInReserve4(String inReserve4) {
- this.inReserve4 = inReserve4;
- }
-
- public Date getInvoiceEndDate() {
- return invoiceEndDate;
- }
-
- public void setInvoiceEndDate(Date invoiceEndDate) {
- this.invoiceEndDate = invoiceEndDate;
- }
-
- public Date getGetEndDate() {
- return getEndDate;
- }
-
- public void setGetEndDate(Date getEndDate) {
- this.getEndDate = getEndDate;
- }
-
- }
前端调用:
controller调用service:
- private static final String UPLOAD_DIR = "D:\\Program Files\\发票票据\\";
- @RequiresPermissions("basis:invoiceinfo:edit")
- @RequestMapping(value = "invoiceInput")
- public String invoiceinfoInput(Invoiceinfo invoiceinfo, @RequestParam("ListFile") MultipartFile[] ListFile, RedirectAttributes redirectAttributes,HttpServletRequest request,Model model){
- if (ListFile.length == 0) {
- addMessage(redirectAttributes, "上传失败!失败信息:请选择要上传的文件!");
- return "redirect:"+Global.getAdminPath()+"/basis/invoiceinfo/?repage";
- }
- String userName = UserUtils.getUser().getLoginName();
- String success = invoiceinfoService.inputInvoice(userName, ListFile, UPLOAD_DIR);
- addMessage(redirectAttributes, success);
- return "redirect:"+Global.getAdminPath()+"/basis/invoiceinfo/?repage";
- }
service实现方法:
调用方法参考如图文件
- @Transactional(readOnly = false)
- public String inputInvoice(String userName, MultipartFile[] ListFile, String UPLOAD_DIR) {
- List<Invoiceinfo> infoList = new ArrayList<Invoiceinfo>();
- String success = "导入数据成功!";
- String dateFormat = "yyyy-MM-dd";
- SimpleDateFormat sdf = new SimpleDateFormat(dateFormat);
- for (MultipartFile file : ListFile) {
- if (file.isEmpty()) {
- continue;
- }
- String fileName = file.getOriginalFilename();
- try {
- if(fileName.endsWith("pdf")) {
- Invoiceinfo invoiceinfo = new Invoiceinfo();
- File uploadDir = new File(UPLOAD_DIR + userName +"/");
- if (!uploadDir.exists()) {
- uploadDir.mkdir();
- }
- // 构造文件保存路径(文件名使用原始文件名,防止重名可以添加UUID等)
- Path targetLocation = Paths.get(UPLOAD_DIR + userName + "/" + fileName);
- // 保存文件
- Files.write(targetLocation, file.getBytes());
- NewInvoice newInvoice = NewInvoiceExtractor.newPdfProcessInvoicesInFile(targetLocation.toAbsolutePath().toString());
- if(StringUtils.isNotBlank(newInvoice.getNumber())) {//新版发票
- invoiceinfo.setInvoiceNumber(newInvoice.getNumber());
- List<Invoiceinfo> list = findList1(invoiceinfo);
- if(list.size() > 0) {
- success = "导入数据失败!系统已存在"+fileName+"编号的发票!";
- }else {
- int index = fileName.indexOf("-");
- invoiceinfo.setPreparedBy(fileName.substring(0, index));
- invoiceinfo.setContent(newInvoice.getContent());
- invoiceinfo.setCompany(newInvoice.getSellerName());
- invoiceinfo.setInvoiceDate(sdf.parse(newInvoice.getDate()));
- invoiceinfo.setNotaxinclusiveAmount(newInvoice.getAmount());
- invoiceinfo.setTaxAmount(newInvoice.getTaxAmount());
- invoiceinfo.setTaxinclusiveAmount(newInvoice.getTotalAmount());
- invoiceinfo.setGetDate(new Date());
- invoiceinfo.setInReserve1(userName);
- invoiceinfo.setInReserve2(newInvoice.getInReserve2());
- invoiceinfo.setInReserve3(targetLocation.toString());
- invoiceinfo.setType("数电发票");
- infoList.add(invoiceinfo);
- }
- }else {//老版发票
- Invoice invoice = InvoiceExtractor.newPdfProcessInvoicesInFile(targetLocation.toAbsolutePath().toString());
- invoiceinfo.setInvoiceNumber(invoice.getNumber());
- List<Invoiceinfo> list = findList1(invoiceinfo);
- if(list.size() > 0) {
- success = "导入数据失败!系统已存在"+fileName+"编号的发票!";
- }else {
- int index = fileName.indexOf("-");
- invoiceinfo.setPreparedBy(fileName.substring(0, index));
- invoiceinfo.setInvoiceCode(invoice.getCode());
- invoiceinfo.setContent(invoice.getContent());
- invoiceinfo.setCompany(invoice.getSellerName());
- invoiceinfo.setInvoiceDate(sdf.parse(invoice.getDate()));
- invoiceinfo.setNotaxinclusiveAmount(invoice.getAmount());
- invoiceinfo.setTaxAmount(invoice.getTaxAmount());
- invoiceinfo.setTaxinclusiveAmount(invoice.getTotalAmount());
- invoiceinfo.setInvoiceNumber(invoice.getNumber());
- invoiceinfo.setGetDate(new Date());
- invoiceinfo.setInReserve1(userName);
- invoiceinfo.setInReserve2(invoice.getInReserve2());
- invoiceinfo.setInReserve3(targetLocation.toString());
- invoiceinfo.setType("电子发票");
- infoList.add(invoiceinfo);
- }
- }
- }
- }catch (Exception e) {
- success = "导入数据失败!失败信息:"+e.getMessage();
- e.printStackTrace();
- }
- }
- if(success.equals("导入数据成功!")) {
- saveInfoList(infoList);
- }
- return success;
- }
-
- private void saveInfoList(List<Invoiceinfo> infoList) {
- for(Invoiceinfo invoiceinfo : infoList)
- save(invoiceinfo);
- }
:
************以下代码可识别新版和老版的专票、普票************
Detail.java
- package com.thinkgem.jeesite.modules.pdfServices.entity;
-
-
-
-
-
- import java.math.BigDecimal;
-
-
-
- /**
- * @author b16mt
- * @version v1.0
- */
-
- public class Detail {
-
-
-
- /**
- * 商品名称
- */
-
- private String name;
-
-
-
- /**
- * 商品规格型号
- */
-
- private String model;
-
-
-
- /**
- * 商品单位
- */
-
- private String unit;
-
-
-
- /**
- * 商品数量
- */
-
- private BigDecimal count;
-
-
-
- /**
- * 商品单价
- */
-
- private BigDecimal price;
-
-
-
- /**
- * 商品总金额
- */
-
- private BigDecimal amount;
-
-
-
- /**
- * 商品税率
- */
-
- private BigDecimal taxRate;
-
-
-
- /**
- * 商品税额
- */
-
- private BigDecimal taxAmount;
-
-
-
- public String getName() {
-
- return name;
-
- }
-
-
-
- public void setName(String name) {
-
- this.name = name;
-
- }
-
-
-
- public String getModel() {
-
- return model;
-
- }
-
-
-
- public void setModel(String model) {
-
- this.model = model;
-
- }
-
-
-
- public String getUnit() {
-
- return unit;
-
- }
-
-
-
- public void setUnit(String unit) {
-
- this.unit = unit;
-
- }
-
-
-
- public BigDecimal getCount() {
-
- return count;
-
- }
-
-
-
- public void setCount(BigDecimal count) {
-
- this.count = count;
-
- }
-
-
-
- public BigDecimal getPrice() {
-
- return price;
-
- }
-
-
-
- public void setPrice(BigDecimal price) {
-
- this.price = price;
-
- }
-
-
-
- public BigDecimal getAmount() {
-
- return amount;
-
- }
-
-
-
- public void setAmount(BigDecimal amount) {
-
- this.amount = amount;
-
- }
-
-
-
- public BigDecimal getTaxRate() {
-
- return taxRate;
-
- }
-
-
-
- public void setTaxRate(BigDecimal taxRate) {
-
- this.taxRate = taxRate;
-
- }
-
-
-
- public BigDecimal getTaxAmount() {
-
- return taxAmount;
-
- }
-
-
-
- public void setTaxAmount(BigDecimal taxAmount) {
-
- this.taxAmount = taxAmount;
-
- }
-
- }
----------------------------------------------------------------
Invoice.java
- package com.thinkgem.jeesite.modules.pdfServices.entity;
-
- import java.math.BigDecimal;
- import java.util.List;
-
-
-
- public class Invoice {
-
- /**
- * 文件绝对路径
- */
- private String fileAbsolutePath;
-
- /**
- * 发票标题
- */
- private String title;
-
- /**
- * 机器编号
- */
- private String machineNumber;
-
- /**
- * 发票代码
- */
- private String code;
-
- /**
- * 发票号码
- */
- private String number;
-
- /**
- * 开票日期
- */
- private String date;
-
- /**
- * 校验码
- */
- private String checksum;
-
- /**
- * 购买方名称
- */
- private String buyerName;
-
- /**
- * 购买方纳税人识别号
- */
- private String buyerCode;
-
- /**
- * 购买方地址
- */
- private String buyerAddress;
-
- /**
- * 购买方银行账号
- */
- private String buyerAccount;
-
- /**
- * 密码区
- */
- private String password;
-
- /**
- * 合计金额
- */
- private BigDecimal amount;
-
- /**
- * 合计税额
- */
- private BigDecimal taxAmount;
-
- /**
- * 价税合计大写字符串
- */
- private String totalAmountString;
-
- /**
- * 价税合计金额
- */
- private BigDecimal totalAmount;
-
- /**
- * 销售方名称
- */
- private String sellerName;
-
- /**
- * 销售方纳税人识别号
- */
- private String sellerCode;
-
- /**
- * 销售方地址
- */
- private String sellerAddress;
-
- /**
- * 销售方银行账号
- */
- private String sellerAccount;
-
- /**
- * 收款人
- */
- private String payee;
-
- /**
- * 复核人
- */
- private String reviewer;
-
- /**
- * 开票人
- */
- private String drawer;
-
- /**
- * 发票类型
- */
- private String type;
-
- /**
- * 发票明细列表
- */
- private List<Detail> detailList;
- private String content; // 开票内容
- private String inReserve2; // 发票类别 专票/普票
-
-
- public String getInReserve2() {
- return inReserve2;
- }
-
- public void setInReserve2(String inReserve2) {
- this.inReserve2 = inReserve2;
- }
-
- public String getContent() {
- return content;
- }
-
- public void setContent(String content) {
- this.content = content;
- }
-
- /**
- * 获取文件绝对路径
- * @return 文件绝对路径
- */
- public String getFileAbsolutePath() {
- return fileAbsolutePath;
- }
-
- /**
- * 设置文件绝对路径
- * @param fileAbsolutePath 文件绝对路径
- */
- public void setFileAbsolutePath(String fileAbsolutePath) {
- this.fileAbsolutePath = fileAbsolutePath;
- }
-
- /**
- * 获取标题
- * @return 标题
- */
- public String getTitle() {
- return title;
- }
-
- /**
- * 设置标题
- * @param title 标题
- */
- public void setTitle(String title) {
- this.title = title;
- }
-
- /**
- * 获取机器编号
- * @return 机器编号
- */
- public String getMachineNumber() {
- return machineNumber;
- }
-
- /**
- * 设置机器编号
- * @param machineNumber 机器编号
- */
- public void setMachineNumber(String machineNumber) {
- this.machineNumber = machineNumber;
- }
-
- /**
- * 获取发票代码
- * @return 发票代码
- */
- public String getCode() {
- return code;
- }
-
- /**
- * 设置发票代码
- * @param code 发票代码
- */
- public void setCode(String code) {
- this.code = code;
- }
-
- /**
- * 获取发票号码
- * @return 发票号码
- */
- public String getNumber() {
- return number;
- }
-
- /**
- * 设置发票号码
- * @param number 发票号码
- */
- public void setNumber(String number) {
- this.number = number;
- }
-
- /**
- * 获取日期
- * @return 日期
- */
- public String getDate() {
- return date;
- }
-
- /**
- * 设置日期
- * @param date 日期
- */
- public void setDate(String date) {
- this.date = date;
- }
-
- /**
- * 获取校验码
- * @return 校验码
- */
- public String getChecksum() {
- return checksum;
- }
-
- /**
- * 设置校验码
- * @param checksum 校验码
- */
- public void setChecksum(String checksum) {
- this.checksum = checksum;
- }
-
- /**
- * 获取购买方名称
- * @return 购买方名称
- */
- public String getBuyerName() {
- return buyerName;
- }
-
- /**
- * 设置购买方名称
- * @param buyerName 购买方名称
- */
- public void setBuyerName(String buyerName) {
- this.buyerName = buyerName;
- }
-
- /**
- * 获取购买方发票代码
- * @return 购买方发票代码
- */
- public String getBuyerCode() {
- return buyerCode;
- }
-
- /**
- * 设置购买方发票代码
- * @param buyerCode 购买方发票代码
- */
- public void setBuyerCode(String buyerCode) {
- this.buyerCode = buyerCode;
- }
-
- /**
- * 获取购买方地址
- * @return 购买方地址
- */
- public String getBuyerAddress() {
- return buyerAddress;
- }
-
- /**
- * 设置购买方地址
- * @param buyerAddress 购买方地址
- */
- public void setBuyerAddress(String buyerAddress) {
- this.buyerAddress = buyerAddress;
- }
-
- /**
- * 获取购买方账号
- * @return 购买方账号
- */
- public String getBuyerAccount() {
- return buyerAccount;
- }
-
- /**
- * 设置购买方账号
- * @param buyerAccount 购买方账号
- */
- public void setBuyerAccount(String buyerAccount) {
- this.buyerAccount = buyerAccount;
- }
-
- /**
- * 获取密码
- * @return 密码
- */
- public String getPassword() {
- return password;
- }
-
- /**
- * 设置密码
- * @param password 密码
- */
- public void setPassword(String password) {
- this.password = password;
- }
-
- /**
- * 获取金额
- * @return 金额
- */
- public BigDecimal getAmount() {
- return amount;
- }
-
- /**
- * 设置金额
- * @param amount 金额
- */
- public void setAmount(BigDecimal amount) {
- this.amount = amount;
- }
-
- /**
- * 获取税额
- * @return 税额
- */
- public BigDecimal getTaxAmount() {
- return taxAmount;
- }
-
- /**
- * 设置税额
- * @param taxAmount 税额
- */
- public void setTaxAmount(BigDecimal taxAmount) {
- this.taxAmount = taxAmount;
- }
-
- /**
- * 获取价税合计(大写)
- * @return 价税合计(大写)
- */
- public String getTotalAmountString() {
- return totalAmountString;
- }
-
- /**
- * 设置价税合计(大写)
- * @param totalAmountString 价税合计(大写)
- */
- public void setTotalAmountString(String totalAmountString) {
- this.totalAmountString = totalAmountString;
- }
-
- /**
- * 获取价税合计
- * @return 价税合计
- */
- public BigDecimal getTotalAmount() {
- return totalAmount;
- }
-
- /**
- * 设置价税合计
- * @param totalAmount 价税合计
- */
- public void setTotalAmount(BigDecimal totalAmount) {
- this.totalAmount = totalAmount;
- }
-
- /**
- * 获取销售方名称
- * @return 销售方名称
- */
- public String getSellerName() {
- return sellerName;
- }
-
- /**
- * 设置销售方名称
- * @param sellerName 销售方名称
- */
- public void setSellerName(String sellerName) {
- this.sellerName = sellerName;
- }
-
- /**
- * 获取销售方发票代码
- * @return 销售方发票代码
- */
- public String getSellerCode() {
- return sellerCode;
- }
-
- /**
- * 设置销售方发票代码
- * @param sellerCode 销售方发票代码
- */
- public void setSellerCode(String sellerCode) {
- this.sellerCode = sellerCode;
- }
-
- /**
- * 获取卖方地址。
- * @return 卖方地址
- */
- public String getSellerAddress() {
- return sellerAddress;
- }
-
- /**
- * 设置卖方地址。
- * @param sellerAddress 要设置的卖方地址
- */
- public void setSellerAddress(String sellerAddress) {
- this.sellerAddress = sellerAddress;
- }
-
- /**
- * 获取卖方账号。
- * @return 卖方账号
- */
- public String getSellerAccount() {
- return sellerAccount;
- }
-
- /**
- * 设置卖方账号。
- * @param sellerAccount 要设置的卖方账号
- */
- public void setSellerAccount(String sellerAccount) {
- this.sellerAccount = sellerAccount;
- }
-
- /**
- * 获取收款人信息。
- * @return 收款人信息
- */
- public String getPayee() {
- return payee;
- }
-
- /**
- * 设置收款人信息。
- * @param payee 要设置的收款人信息
- */
- public void setPayee(String payee) {
- this.payee = payee;
- }
-
- /**
- * 获取复核人信息。
- * @return 复核人信息
- */
- public String getReviewer() {
- return reviewer;
- }
-
- /**
- * 设置复核人信息。
- * @param reviewer 要设置的复核人信息
- */
- public void setReviewer(String reviewer) {
- this.reviewer = reviewer;
- }
-
- /**
- * 获取开票人信息。
- * @return 开票人信息
- */
- public String getDrawer() {
- return drawer;
- }
-
- /**
- * 设置开票人信息。
- * @param drawer 要设置的开票人信息
- */
- public void setDrawer(String drawer) {
- this.drawer = drawer;
- }
-
- /**
- * 获取发票类型。
- * @return 发票类型
- */
- public String getType() {
- return type;
- }
-
- /**
- * 设置发票类型。
- * @param type 要设置的发票类型
- */
- public void setType(String type) {
- this.type = type;
- }
-
- /**
- * 获取发票明细列表。
- * @return 发票明细列表
- */
- public List<Detail> getDetailList() {
- return detailList;
- }
-
- /**
- * 设置发票明细列表。
- * @param detailList 要设置的发票明细列表
- */
- public void setDetailList(List<Detail> detailList) {
- this.detailList = detailList;
- }
-
-
- @Override
- public String toString() {
- return "Invoice [title=" + title + ", machineNumber=" + machineNumber + ", code=" + code + ", number=" + number
- + ", date=" + date + ", checksum=" + checksum + ", buyerName=" + buyerName + ", buyerCode=" + buyerCode
- + ", buyerAddress=" + buyerAddress + ", buyerAccount=" + buyerAccount + ", password=" + password + ", amount="
- + amount + ", taxAmount=" + taxAmount + ", totalAmountString=" + totalAmountString + ", totalAmount="
- + totalAmount + ", sellerName=" + sellerName + ", sellerCode=" + sellerCode + ", sellerAddress=" + sellerAddress
- + ", sellerAccount=" + sellerAccount + ", payee=" + payee + ", reviewer=" + reviewer + ", drawer=" + drawer
- + ", type=" + type + ", detailList=" + detailList + "]";
- }
- }
-------------------------------------------------------------------------
InvoiceRegexEnum.java
- package com.thinkgem.jeesite.modules.pdfServices.entity;
-
-
-
- public enum InvoiceRegexEnum {
-
-
-
- /**
- * 机器编码、发票代码、发票号码、开票日期和校验码的提取正则
- */
-
- REGULAR_A("机器编号:(?<machineNumber>\\d{12})|发票代码:(?<code>\\d{12})|发票号码:(?<number>\\d{8})|:(?<date>\\d{4}年\\d{2}月\\d{2}日)|校验码:(?<checksum>\\d{20}|\\S{4,})"),
-
-
-
- /**
- * 新版发票的机器编码、发票代码、发票号码、开票日期和校验码的提取正则
- */
-
- REGULAR_A_NEW("发票号码:(?<number>\\d{20})|:(?<date>\\d{4}年\\d{2}月\\d{2}日)|(售名称|销名称):(?<name>\\S*)"),
-
-
-
- /**
- * 发票号码备用提取正则
- */
-
- REGULAR_A_1("(国制|制普通发票)(?<number>\\d{8})"),
-
-
-
- /**
- * 发票号码跨行提取正则
- */
-
- REGULAR_A_1R("发票号码:(?<number>\\d{7})[\\s\\S]*?(\\d+)"),
-
-
-
- /**
- * 开票日期备用提取正则
- */
-
- REGULAR_A_2("开票日期:(?<date>\\d{4}\\d{2}月\\d{2}日)"),
-
-
-
- /**
- * 发票代码备用提取正则
- */
-
- REGULAR_A_3("发票代码(?<code>\\d{12})"),
-
-
-
- /**
- * 发票代码跨行提取正则
- */
-
- REGULAR_A_3R("发票代码:(?<code>\\d{10})[\\s\\S]*?(\\d+)"),
-
-
-
- /**
- * 金额、税额提取正则,匹配形如 "合计¥?金额¥?税额" 的文本
- */
-
- REGULAR_B("合计¥?(?<amount>[^ \\f\\n\\r\\t\\v*]*)(¥?(?<taxAmount>\\S*)|\\*+)\\s"),
-
-
-
- /**
- * 金额提取正则,用于匹配结果有误的修正
- */
-
- REGULAR_BR("合计¥(?<amount>\\d+\\.\\d+)"),
-
-
-
- /**
- * 金额、税额备用提取正则
- */
-
- REGULAR_B_1("合\\u0020*计\\u0020*¥?(?<amount>[^ ]*)\\u0020+¥?(?:(?<taxAmount>\\S*)|\\*+)\\s"),
-
- /**
- * 第一条发票类别
- */
-
- REGULAR_B_C("\\*.*?\\*([^\\s]+)\\s"),
-
-
-
- /**
- * 价税合计提取正则,匹配“价税合计(大写)XXX(小写)¥YYY”格式的文本
- */
-
- REGULAR_C("价税合计\\u0028大写\\u0029(?<amountString>\\S*)\\u0028小写\\u0029¥?(?<amount>\\S*)\\s"),
-
-
-
- /**
- * 收款人、复核、开票人、销售方提取正则,匹配格式为“收款人:xxx复核:xxx开票人:xxx销售方”的字符串
- */
-
- REGULAR_D("收款人:(?<payee>\\S*)复核:(?<reviewer>\\S*)开票人:(?<drawer>\\S*)销售方"),
-
-
-
- /**
- * 发票类型提取正则,匹配"xxx通发票"格式的发票类型
- */
-
- REGULAR_E("(?<p>\\S*)普通"),
-
-
-
- /**
- * 发票类型提取正则,匹配"xxx用发票"格式的发票类型
- */
-
- REGULAR_E_1("(?<p>\\S*)专用"),
-
-
-
- /**
- * 发票类型提取 - 辅助正则
- */
-
- REGULAR_E_AUX("(?:国|统|一|发|票|监|制)"),
-
-
-
- /**
- * 购买方信息提取正则
- */
-
- REGULAR_F("名称:(?<name>\\S*)|纳税人识别号:(?<code>\\S*)|地址、电话:(?<address>\\S*)|开户行及账号:(?<account>\\S*)|电子支付标识:(?<account2>\\S*)"),
-
-
-
- /**
- * 针对深圳发票的销售方名称提取正则
- */
-
- REGULAR_FR("名称:(?<name>\\S*)"),
-
-
-
- /**
- * 处理除了金额和税额之外的其他文本元素正则
- */
-
- REGULAR_G("^(-?\\d+)(\\.\\d+)?$"),
-
-
-
- /**
- * 检查当前详细项字符串是否符合特定条件正则
- */
-
- REGULAR_H("\\S+\\d*(%|免税|不征税|出口零税率|普通零税率)\\S*"),
-
- REGULAR_H_1("^ *\\d*(%|免税|不征税|出口零税率|普通零税率)\\S*"),
-
- REGULAR_H_2("\\S+\\d+%[\\-\\d]+\\S*"),
-
- REGULAR_H_3("^ *\\d*(%|免税|不征税|出口零税率|普通零税率)\\S*");
-
-
-
-
-
- private final String regex;
-
-
-
-
-
- InvoiceRegexEnum(String regex) {
-
- this.regex = regex;
-
- }
-
-
-
-
-
- public String getRegex() {
-
- return regex;
-
- }
-
- }
---------------------------------------------------------------------
InvoiceSubset.java
- package com.thinkgem.jeesite.modules.pdfServices.entity;
-
-
-
- import java.math.BigDecimal;
-
-
-
- public class InvoiceSubset {
-
- /**
- * 文件绝对路径
- */
-
- //private String fileAbsolutePath;
-
-
-
- /**
- * 发票代码
- */
-
- private String code;
-
-
-
- /**
- * 发票号码
- */
-
- private String number;
-
-
-
- /**
- * 新版发票号码
- */
-
- private String newNumber;
-
-
-
- /**
- * 开票日期
- */
-
- private String date;
-
-
-
- /**
- * 校验码
- */
-
- private String checksum;
-
-
-
- /**
- * 销售方名称
- */
-
- private String sellerName;
-
-
-
- /**
- * 合计金额
- */
-
- private BigDecimal amount;
-
-
-
- /**
- * 合计税额
- */
-
- private BigDecimal taxAmount;
-
-
-
- /**
- * 价税合计金额
- */
-
- private BigDecimal totalAmount;
-
-
-
- public String getCode() {
-
- return code;
-
- }
-
-
-
- public void setCode(String code) {
-
- this.code = code;
-
- }
-
-
-
- public String getNumber() {
-
- return number;
-
- }
-
-
-
- public void setNumber(String number) {
-
- this.number = number;
-
- }
-
-
-
- public String getNewNumber() {
-
- return newNumber;
-
- }
-
-
-
- public void setNewNumber(String newNumber) {
-
- this.newNumber = newNumber;
-
- }
-
-
-
- public String getDate() {
-
- return date;
-
- }
-
-
-
- public void setDate(String date) {
-
- this.date = date;
-
- }
-
-
-
- public String getChecksum() {
-
- return checksum;
-
- }
-
-
-
- public void setChecksum(String checksum) {
-
- this.checksum = checksum;
-
- }
-
-
-
- public String getSellerName() {
-
- return sellerName;
-
- }
-
-
-
- public void setSellerName(String sellerName) {
-
- this.sellerName = sellerName;
-
- }
-
-
-
- public BigDecimal getAmount() {
-
- return amount;
-
- }
-
-
-
- public void setAmount(BigDecimal amount) {
-
- this.amount = amount;
-
- }
-
-
-
- public BigDecimal getTaxAmount() {
-
- return taxAmount;
-
- }
-
-
-
- public void setTaxAmount(BigDecimal taxAmount) {
-
- this.taxAmount = taxAmount;
-
- }
-
-
-
- public BigDecimal getTotalAmount() {
-
- return totalAmount;
-
- }
-
-
-
- public void setTotalAmount(BigDecimal totalAmount) {
-
- this.totalAmount = totalAmount;
-
- }
-
- }
----------------------------------------------------------------
NewInvoice.java
- package com.thinkgem.jeesite.modules.pdfServices.entity;
-
-
-
- import java.math.BigDecimal;
-
-
-
- public class NewInvoice {
-
-
-
- /**
- * 文件绝对路径
- */
-
- private String fileAbsolutePath;
-
-
-
- /**
- * 发票号码
- */
-
- private String number;
-
-
-
- /**
- * 开票日期
- */
-
- private String date;
-
-
-
- /**
- * 销售方名称
- */
-
- private String sellerName;
-
-
-
- /**
- * 合计金额
- */
-
- private BigDecimal amount;
-
-
-
- /**
- * 合计税额
- */
-
- private BigDecimal taxAmount;
-
-
-
- /**
- * 价税合计金额
- */
-
- private BigDecimal totalAmount;
-
-
-
- private String content; // 开票内容
-
- private String inReserve2; // 发票类别 专票/普票
-
-
-
-
-
- public String getInReserve2() {
-
- return inReserve2;
-
- }
-
-
-
- public void setInReserve2(String inReserve2) {
-
- this.inReserve2 = inReserve2;
-
- }
-
-
-
- public String getContent() {
-
- return content;
-
- }
-
-
-
- public void setContent(String content) {
-
- this.content = content;
-
- }
-
-
-
- /**
- * 获取文件绝对路径
- * @return 文件绝对路径
- */
-
- public String getFileAbsolutePath() {
-
- return fileAbsolutePath;
-
- }
-
-
-
- /**
- * 设置文件绝对路径
- * @param fileAbsolutePath 文件绝对路径
- */
-
- public void setFileAbsolutePath(String fileAbsolutePath) {
-
- this.fileAbsolutePath = fileAbsolutePath;
-
- }
-
-
-
- /**
- * 获取发票号码
- * @return 发票号码
- */
-
- public String getNumber() {
-
- return number;
-
- }
-
-
-
- /**
- * 设置发票号码
- * @param number 发票号码
- */
-
- public void setNumber(String number) {
-
- this.number = number;
-
- }
-
-
-
- /**
- * 获取日期
- * @return 日期
- */
-
- public String getDate() {
-
- return date;
-
- }
-
-
-
- /**
- * 设置日期
- * @param date 日期
- */
-
- public void setDate(String date) {
-
- this.date = date;
-
- }
-
-
-
- /**
- * 获取金额
- * @return 金额
- */
-
- public BigDecimal getAmount() {
-
- return amount;
-
- }
-
-
-
- /**
- * 设置金额
- * @param amount 金额
- */
-
- public void setAmount(BigDecimal amount) {
-
- this.amount = amount;
-
- }
-
-
-
- /**
- * 获取税额
- * @return 税额
- */
-
- public BigDecimal getTaxAmount() {
-
- return taxAmount;
-
- }
-
-
-
- /**
- * 设置税额
- * @param taxAmount 税额
- */
-
- public void setTaxAmount(BigDecimal taxAmount) {
-
- this.taxAmount = taxAmount;
-
- }
-
-
-
- /**
- * 获取价税合计
- * @return 价税合计
- */
-
- public BigDecimal getTotalAmount() {
-
- return totalAmount;
-
- }
-
-
-
- /**
- * 设置价税合计
- * @param totalAmount 价税合计
- */
-
- public void setTotalAmount(BigDecimal totalAmount) {
-
- this.totalAmount = totalAmount;
-
- }
-
-
-
- /**
- * 获取销售方名称
- * @return 销售方名称
- */
-
- public String getSellerName() {
-
- return sellerName;
-
- }
-
-
-
- /**
- * 设置销售方名称
- * @param sellerName 销售方名称
- */
-
- public void setSellerName(String sellerName) {
-
- this.sellerName = sellerName;
-
- }
-
-
-
-
-
- @Override
-
- public String toString() {
-
- return "NewInvoice{" +
-
- "number='" + number + '\'' +
-
- ", date='" + date + '\'' +
-
- ", amount=" + amount +
-
- ", taxAmount=" + taxAmount +
-
- ", totalAmount=" + totalAmount +
-
- ", sellerName='" + sellerName + '\'' +
-
- '}';
-
- }
-
- }
--------------------------------------------------------
InvoiceExtractor.java
- package com.thinkgem.jeesite.modules.pdfServices.utils;
-
-
- import java.io.File;
- import java.math.BigDecimal;
- import java.util.ArrayList;
- import java.util.List;
-
- import com.thinkgem.jeesite.modules.pdfServices.entity.Detail;
- import com.thinkgem.jeesite.modules.pdfServices.entity.Invoice;
-
-
- public class InvoiceExtractor {
-
- // public static Logger log = Logger.getLogger(Runnable.class);
-
- private static final String PDF_EXTENSION = ".pdf";
-
-
- /**
- * 处理指定文件夹中的PDF发票文件
- * @param folderPath 文件夹路径
- * @return 包含提取信息的 Invoice 列表
- */
- public static List<Invoice> pdfProcessInvoicesInFolder(String folderPath) {
- File folder = new File(folderPath);
- File[] files = folder.listFiles();
- List<Invoice> resultList = new ArrayList<>();
-
- if (files != null) {
- for (File file : files) {
- if (isPdfFile(file)) {
- Invoice result = extractInvoice(file.getAbsolutePath());
-
- if (result != null) {
- Invoice returnResult = createProcessedInvoice(result);
- resultList.add(returnResult);
- } else {
- handleExtractionError(file);
- }
- }
- }
- }
- return resultList;
- }
-
- /**
- * 处理指定的PDF发票文件
- * @param filePath 文件路径
- * @return 包含提取信息的 NewInvoice 列表
- */
- public static Invoice newPdfProcessInvoicesInFile(String filePath) {
- File file = new File(filePath);
- Invoice returnResult = new Invoice();
- if (isPdfFile(file)) {
- Invoice result = extractInvoice(file.getAbsolutePath());
-
- if (result != null) {
- returnResult = createProcessedInvoice(result);
-
- } else {
- handleExtractionError(file);
- }
- }
-
- return returnResult;
- }
-
- /**
- * 输出发票提取内容
- * @param invoiceList 发票提取结果集合
- */
- public static void printListInvoice(List<Invoice> invoiceList, String type){
- int sum = 0;
- for (Invoice invoice : invoiceList) {
- sum++;
- if ("String".equals(type) || "string".equals(type)){
- System.out.println("\n文件绝对路径:" + invoice.getFileAbsolutePath() +
- "\n发票代码: " + invoice.getCode() +
- "\n发票号码: " + invoice.getNumber() +
- "\n开票日期: " + invoice.getDate() +
- "\n校验码后六位: " + invoice.getChecksum() +
- "\n总价: " + invoice.getTotalAmount() +
- "\n金额: " + invoice.getAmount() +
- "\n税额: " + invoice.getTaxAmount() +
- "\n销售方名称: " + invoice.getSellerName());
- } else if ("List".equals(type) || "list".equals(type)) {
- System.out.println(invoice);
- } else {
- System.out.println(invoice);
- }
- }
- System.out.println("\nTotal:" + sum);
- }
-
-
- /**
- * 检查文件是否为PDF文件
- * @param file 要检查的文件
- * @return 如果是PDF文件,返回 true,否则返回 false
- */
- private static boolean isPdfFile(File file) {
- return file.isFile() && file.getName().toLowerCase().endsWith(PDF_EXTENSION);
- }
-
- /**
- * 创建一个处理后的 Invoice 对象
- * @param result 原始的 Invoice 对象
- * @return 处理后的 Invoice 对象
- */
- private static Invoice createProcessedInvoice(Invoice result) {
- Invoice returnResult = new Invoice();
-
- String checksum = result.getChecksum();
-
- if (checksum != null) {
- checksum = (checksum.length() > 6) ? checksum.substring(checksum.length() - 6) : checksum;
- } else {
- System.out.println("未提取到校验码~");
- }
-
- BigDecimal amount = (result.getAmount().compareTo(BigDecimal.ZERO) == 0) ? getAmountFromDetailList(result) : result.getAmount();
- BigDecimal taxAmount = (result.getTaxAmount().compareTo(BigDecimal.ZERO) == 0) ? getTaxAmountFromDetailList(result) : result.getTaxAmount();
-
- returnResult.setFileAbsolutePath(result.getFileAbsolutePath());
- returnResult.setCode(result.getCode());
- returnResult.setNumber(result.getNumber());
- returnResult.setDate(result.getDate());
- returnResult.setChecksum(checksum);
- returnResult.setTotalAmount(result.getTotalAmount());
- returnResult.setAmount(amount);
- returnResult.setTaxAmount(taxAmount);
- returnResult.setSellerName(result.getSellerName());
- returnResult.setContent(result.getContent());
- returnResult.setInReserve2(result.getInReserve2());
- return returnResult;
- }
-
- /**
- * 从发票的 detailList 中获取金额
- * @param result 原始的 Invoice 对象
- * @return 从 detailList 中获取的金额,如果 detailList 为空则返回 BigDecimal.ZERO
- */
- private static BigDecimal getAmountFromDetailList(Invoice result) {
- List<Detail> detailList = result.getDetailList();
- return (!detailList.isEmpty()) ? detailList.get(0).getAmount() : BigDecimal.ZERO;
- }
-
- /**
- * 从发票的 detailList 中获取税额
- * @param result 原始的 Invoice 对象
- * @return 从 detailList 中获取的税额,如果 detailList 为空则返回 BigDecimal.ZERO
- */
- private static BigDecimal getTaxAmountFromDetailList(Invoice result) {
- List<Detail> detailList = result.getDetailList();
- return (!detailList.isEmpty()) ? detailList.get(0).getTaxAmount() : BigDecimal.ZERO;
- }
-
- /**
- * 处理提取失败的情况,输出错误信息
- * @param file 提取失败的文件
- */
- private static void handleExtractionError(File file) {
- // log.debug("文件: {}\t提取失败~~~\n"+ file.getName());
- System.out.println("文件: {}\t提取失败~~~\n"+ file.getName());
- }
-
-
- /**
- * 从本地文件或URL中提取发票信息。
- * @param filePath 本地文件路径或发票的URL。
- * @return 包含提取信息的 Invoice 对象。
- */
- private static Invoice extractInvoice(String filePath) {
- File sourceFile = new File(filePath);
-
- if (!sourceFile.exists()) {
- // log.debug("指定的源文件不存在");
- System.out.println("指定的源文件不存在");
- //throw new IllegalArgumentException("指定的源文件不存在");
- }
-
- Invoice result = null;
-
- try {
- result = PdfInvoiceExtractor.extract(sourceFile);
- result.setFileAbsolutePath(sourceFile.getAbsolutePath());
- } catch (Exception e) {
- e.printStackTrace();
- result = new Invoice();
- result.setTitle("error");
- }
- return result;
- }
- }
-----------------------------------------------------------------
NewInvoiceExtractor.java
- package com.thinkgem.jeesite.modules.pdfServices.utils;
-
- import java.io.File;
- import java.util.ArrayList;
- import java.util.List;
-
- import com.thinkgem.jeesite.modules.pdfServices.entity.NewInvoice;
-
-
- public class NewInvoiceExtractor {
-
- // public static Logger log = Logger.getLogger(Runnable.class);
-
- private static final String PDF_EXTENSION = ".pdf";
-
-
- /**
- * 处理指定文件夹中的PDF发票文件
- * @param folderPath 文件夹路径
- * @return 包含提取信息的 NewInvoice 列表
- */
- public static List<NewInvoice> newPdfProcessInvoicesInFolder(String folderPath) {
- File folder = new File(folderPath);
-
- File[] files = folder.listFiles();
-
- List<NewInvoice> resultList = new ArrayList<>();
-
- if (files != null) {
- for (File file : files) {
- if (isPdfFile(file)) {
- NewInvoice result = extractInvoice(file.getAbsolutePath());
-
- if (result != null) {
- NewInvoice returnResult = createProcessedInvoice(result);
- resultList.add(returnResult);
- } else {
- handleExtractionError(file);
- }
- }
- }
- }
-
- return resultList;
- }
-
-
- /**
- * 处理指定的PDF发票文件
- * @param filePath 文件路径
- * @return 包含提取信息的 NewInvoice 列表
- */
- public static NewInvoice newPdfProcessInvoicesInFile(String filePath) {
- File file = new File(filePath);
- NewInvoice returnResult = new NewInvoice();
- if (isPdfFile(file)) {
- NewInvoice result = extractInvoice(file.getAbsolutePath());
-
- if (result != null) {
- returnResult = createProcessedInvoice(result);
-
- } else {
- handleExtractionError(file);
- }
- }
-
- return returnResult;
- }
-
-
- /**
- * 输出发票提取内容
- * @param invoiceList 发票提取结果集合
- */
- public static void newPrintListInvoice(List<NewInvoice> invoiceList, String type){
- int sum = 0;
- for (NewInvoice invoice : invoiceList) {
- sum++;
- if ("String".equals(type) || "string".equals(type)){
- System.out.println("\n文件绝对路径: " + invoice.getFileAbsolutePath() +
- "\n发票号码: " + invoice.getNumber() +
- "\n开票日期: " + invoice.getDate() +
- "\n总价: " + invoice.getTotalAmount() +
- "\n金额: " + invoice.getAmount() +
- "\n税额: " + invoice.getTaxAmount() +
- "\n销售方名称: " + invoice.getSellerName());
- } else if ("List".equals(type) || "list".equals(type)) {
- System.out.println(invoice);
- } else {
- System.out.println(invoice);
- }
- }
- System.out.println("\nTotal:" + sum);
- }
-
-
- /**
- * 检查文件是否为PDF文件
- * @param file 要检查的文件
- * @return 如果是PDF文件,返回 true,否则返回 false
- */
- private static boolean isPdfFile(File file) {
- return file.isFile() && file.getName().toLowerCase().endsWith(PDF_EXTENSION);
- }
-
- /**
- * 创建一个处理后的 NewInvoice 对象
- * @param result 原始的 NewInvoice 对象
- * @return 处理后的 NewInvoice 对象
- */
- private static NewInvoice createProcessedInvoice(NewInvoice result) {
- NewInvoice returnResult = new NewInvoice();
- returnResult.setFileAbsolutePath(result.getFileAbsolutePath());
- returnResult.setNumber(result.getNumber());
- returnResult.setDate(result.getDate());
- returnResult.setContent(result.getContent());
- returnResult.setTotalAmount(result.getTotalAmount());
- returnResult.setAmount(result.getAmount());
- returnResult.setTaxAmount(result.getTaxAmount());
- returnResult.setSellerName(result.getSellerName());
- returnResult.setInReserve2(result.getInReserve2());
- return returnResult;
- }
-
-
- /**
- * 处理提取失败的情况,输出错误信息
- * @param file 提取失败的文件
- */
- private static void handleExtractionError(File file) {
- // log.debug("文件: {}\t提取失败~~~\n"+ file.getName());
- System.out.println("文件: {}\t提取失败~~~\n"+ file.getName());
- }
-
-
- /**
- * 从本地文件或URL中提取发票信息。
- * @param filePath 本地文件路径或发票的URL。
- * @return 包含提取信息的 NewInvoice 对象。
- */
- private static NewInvoice extractInvoice(String filePath) {
- File sourceFile = new File(filePath);
-
- if (!sourceFile.exists()) {
- // log.error("指定的源文件不存在");
- System.out.println("指定的源文件不存在");
- //throw new IllegalArgumentException("指定的源文件不存在");
- }
-
- NewInvoice result = null;
-
- try {
- result = NewPdfInvoiceExtractor.extract(sourceFile);
- result.setFileAbsolutePath(sourceFile.getAbsolutePath());
- } catch (Exception e) {
- e.printStackTrace();
- result = new NewInvoice();
- }
- return result;
- }
- }
------------------------------------------------------------------
NewPdfInvoiceExtractor.java
- package com.thinkgem.jeesite.modules.pdfServices.utils;
-
- import org.apache.pdfbox.pdmodel.PDDocument;
- import org.apache.pdfbox.text.PDFTextStripper;
-
- import com.thinkgem.jeesite.common.utils.StringUtils;
- import com.thinkgem.jeesite.modules.pdfServices.entity.InvoiceRegexEnum;
- import com.thinkgem.jeesite.modules.pdfServices.entity.NewInvoice;
-
- import java.io.File;
- import java.io.IOException;
- import java.math.BigDecimal;
- import java.text.ParseException;
- import java.text.SimpleDateFormat;
- import java.util.*;
- import java.util.regex.Matcher;
- import java.util.regex.Pattern;
-
-
-
- /**
- * 处理电子发票识别类
- * @author b16mt
- */
- public class NewPdfInvoiceExtractor {
- public static NewInvoice extract(File file) throws IOException {
- NewInvoice invoice = new NewInvoice();
- PDDocument doc = PDDocument.load(file);
- // PDPage firstPage = doc.getPage(0);
-
- // int pageWidth = Math.round(firstPage.getCropBox().getWidth());
-
- PDFTextStripper textStripper = new PDFTextStripper();
- textStripper.setSortByPosition(true);
-
- String fullText = textStripper.getText(doc);
-
- // if (firstPage.getRotation() != 0) {
- // pageWidth = Math.round(firstPage.getCropBox().getHeight());
- // }
-
- String allText = replace(fullText).replaceAll("(", "(").replaceAll(")", ")").replaceAll("¥", "¥");
-
- {
- Pattern pattern = Pattern.compile(InvoiceRegexEnum.REGULAR_A_NEW.getRegex());
- Pattern patternNumber = Pattern.compile(InvoiceRegexEnum.REGULAR_A_1.getRegex());
- Pattern patternDate = Pattern.compile(InvoiceRegexEnum.REGULAR_A_2.getRegex());
- Matcher matcher = pattern.matcher(allText);
- while (matcher.find()) {
- if (matcher.group("number") != null) {
- invoice.setNumber(matcher.group("number"));
- } else if (matcher.group("date") != null) {
- String rawDate = matcher.group("date");
- try {
- SimpleDateFormat inputDateFormat = new SimpleDateFormat("yyyy年MM月dd日");
- SimpleDateFormat outputDateFormat = new SimpleDateFormat("yyyy-MM-dd");
-
- Date parsedDate = inputDateFormat.parse(rawDate);
- String formattedDate = outputDateFormat.format(parsedDate);
- invoice.setDate(formattedDate);
- } catch (ParseException e) {
- System.out.println("无法解析日期:" + rawDate);
- }
- } else if (matcher.group("name") != null){
- invoice.setSellerName(matcher.group("name"));
- }
-
- if (matcher.group("number") == null){
- Matcher matcher2 = patternNumber.matcher(allText);
- if (matcher2.find()) {
- invoice.setNumber(matcher2.group("number"));
- }
- }
- if (matcher.group("date") == null){
- Matcher matcher3 = patternDate.matcher(allText);
- if (matcher3.find()) {
- String rawDate = matcher3.group("date");
- try {
- SimpleDateFormat inputDateFormat = new SimpleDateFormat("yyyyMM月dd日");
- SimpleDateFormat outputDateFormat = new SimpleDateFormat("yyyy-MM-dd");
-
- Date parsedDate = inputDateFormat.parse(rawDate);
- String formattedDate = outputDateFormat.format(parsedDate);
-
- invoice.setDate(formattedDate);
- } catch (Exception e) {
- System.out.println("无法解析日期:" + rawDate);
- }
- }
- }
- }
- }
- //获取不到发票号码按照老版发票处理
- if(StringUtils.isBlank(invoice.getNumber())){
- return null;
- }
-
- {
- Pattern pattern = Pattern.compile(InvoiceRegexEnum.REGULAR_B.getRegex());
- Matcher matcher = pattern.matcher(allText);
- if (matcher.find()) {
- try {
- invoice.setAmount(new BigDecimal(matcher.group("amount")));
- } catch (Exception e) {
- // 不处理
- }
- try {
- invoice.setTaxAmount(new BigDecimal(matcher.group("taxAmount")));
- } catch (Exception e) {
- invoice.setTaxAmount(new BigDecimal(0));
- }
- }
- }
-
- if (null == invoice.getAmount()) {
- Pattern pattern = Pattern.compile(InvoiceRegexEnum.REGULAR_B_1.getRegex());
- Matcher matcher = pattern.matcher(fullText);
- if (matcher.find()) {
- try {
- invoice.setAmount(new BigDecimal(matcher.group("amount")));
- } catch (Exception e) {
- invoice.setAmount(new BigDecimal(0));
- }
- try {
- invoice.setTaxAmount(new BigDecimal(matcher.group("taxAmount")));
- } catch (Exception e) {
- invoice.setTaxAmount(new BigDecimal(0));
- }
- }
- }
- invoice.setTotalAmount(invoice.getAmount().add(invoice.getTaxAmount()));
-
- if (null == invoice.getInReserve2()) {
- Pattern pattern = Pattern.compile(InvoiceRegexEnum.REGULAR_E.getRegex());
- Matcher matcher = pattern.matcher(fullText);
- if (matcher.find()) {
- try {
- invoice.setInReserve2("普票");
- } catch (Exception e) {
- System.out.println("无法解析日期:" + matcher.group("p"));
- }
- }else {
- Pattern pattern1 = Pattern.compile(InvoiceRegexEnum.REGULAR_E_1.getRegex());
- Matcher matcher1 = pattern1.matcher(fullText);
- if (matcher1.find()) {
- try {
- invoice.setInReserve2("专票");
- } catch (Exception e) {
- System.out.println("无法解析日期:" + matcher1.group("p"));
- }
- }
- }
- }
-
- if (null == invoice.getContent()) {
- Pattern pattern = Pattern.compile(InvoiceRegexEnum.REGULAR_B_C.getRegex());
- Matcher matcher = pattern.matcher(fullText);
- if (matcher.find()) {
- try {
- invoice.setContent(matcher.group(1));
- } catch (Exception e) {
- invoice.setContent(null);
- }
- }
- }
- return invoice;
-
- }
-
- /**
- * 替换字符串中的空格、全角空格、冒号和特殊空白字符为标准字符。
- * @param str 要进行替换的字符串
- * @return 替换后的字符串
- */
- private static String replace(String str) {
- return str.replaceAll(" ", "").replaceAll(" ", "").replaceAll(":", ":").replaceAll(" ", "");
- }
- }
-------------------------------------------------------
PdfInvoiceExtractor.java
- package com.thinkgem.jeesite.modules.pdfServices.utils;
-
- import org.apache.commons.lang3.StringUtils;
- import org.apache.pdfbox.pdmodel.PDDocument;
- import org.apache.pdfbox.pdmodel.PDPage;
- import org.apache.pdfbox.text.PDFTextStripper;
- import org.apache.pdfbox.text.PDFTextStripperByArea;
-
- import com.thinkgem.jeesite.modules.pdfServices.entity.Detail;
- import com.thinkgem.jeesite.modules.pdfServices.entity.Invoice;
- import com.thinkgem.jeesite.modules.pdfServices.entity.InvoiceRegexEnum;
-
- import java.awt.*;
- import java.io.File;
- import java.io.IOException;
- import java.math.BigDecimal;
- import java.text.ParseException;
- import java.text.SimpleDateFormat;
- import java.util.List;
- import java.util.*;
- import java.util.regex.Matcher;
- import java.util.regex.Pattern;
-
-
-
- /**
- * 处理电子发票识别类
- */
- public class PdfInvoiceExtractor {
- public static Invoice extract(File file) throws IOException {
- // 创建一个新的发票对象
- Invoice invoice = new Invoice();
-
- // 加载 PDF 文件并获取第一页
- PDDocument doc = PDDocument.load(file);
- PDPage firstPage = doc.getPage(0);
-
- // 获取页面宽度
- int pageWidth = Math.round(firstPage.getCropBox().getWidth());
-
- // 创建 PDF 文本提取工具
- PDFTextStripper textStripper = new PDFTextStripper();
- textStripper.setSortByPosition(true);
-
- // 提取整个 PDF 文本内容
- String fullText = textStripper.getText(doc);
-
- // 如果页面旋转了,重新计算页面宽度
- if (firstPage.getRotation() != 0) {
- pageWidth = Math.round(firstPage.getCropBox().getHeight());
- }
-
- // 替换文本中的全角字符为半角字符
- String allText = replace(fullText).replaceAll("(", "(").replaceAll(")", ")").replaceAll("¥", "¥");
-
- // 使用正则表达式匹配和提取发票信息
- {
- Pattern pattern = Pattern.compile(InvoiceRegexEnum.REGULAR_A.getRegex());
- Pattern patternNumber = Pattern.compile(InvoiceRegexEnum.REGULAR_A_1.getRegex());
- Pattern patternNumberReplace = Pattern.compile(InvoiceRegexEnum.REGULAR_A_1R.getRegex());
- Pattern patternDate = Pattern.compile(InvoiceRegexEnum.REGULAR_A_2.getRegex());
- Pattern patternCode = Pattern.compile(InvoiceRegexEnum.REGULAR_A_3.getRegex());
- Pattern patternCodeReplace = Pattern.compile(InvoiceRegexEnum.REGULAR_A_3R.getRegex());
-
- Matcher matcher = pattern.matcher(allText);
- while (matcher.find()) {
- if (matcher.group("machineNumber") != null) {
- invoice.setMachineNumber(matcher.group("machineNumber"));
- } else if (matcher.group("code") != null) {
- invoice.setCode(matcher.group("code"));
- } else if (matcher.group("number") != null) {
- invoice.setNumber(matcher.group("number"));
- } else if (matcher.group("date") != null) {
- String rawDate = matcher.group("date");
- try {
- SimpleDateFormat inputDateFormat = new SimpleDateFormat("yyyy年MM月dd日");
- SimpleDateFormat outputDateFormat = new SimpleDateFormat("yyyy-MM-dd");
-
- Date parsedDate = inputDateFormat.parse(rawDate);
- String formattedDate = outputDateFormat.format(parsedDate);
- invoice.setDate(formattedDate);
- } catch (ParseException e) {
- System.out.println("无法解析日期:" + rawDate);
- }
- } else if (matcher.group("checksum") != null) {
- invoice.setChecksum(matcher.group("checksum"));
- }
-
- if (invoice.getNumber() == null){
- Matcher matcher2 = patternNumber.matcher(allText);
- if (matcher2.find()) {
- invoice.setNumber(matcher2.group("number"));
- }
- if (invoice.getNumber() == null){
- Matcher matcher2Replace = patternNumberReplace.matcher(allText);
- while (matcher2Replace.find()) {
- String firstNumber = matcher2Replace.group("number");
- String secondNumber = matcher2Replace.group(2);
- invoice.setNumber(firstNumber + secondNumber);
- }
- }
- }
-
- if (invoice.getDate() == null){
- Matcher matcher3 = patternDate.matcher(allText);
- if (matcher3.find()) {
- String rawDate = matcher3.group("date");
-
- try {
- SimpleDateFormat inputDateFormat = new SimpleDateFormat("yyyyMM月dd日");
- SimpleDateFormat outputDateFormat = new SimpleDateFormat("yyyy-MM-dd");
-
- Date parsedDate = inputDateFormat.parse(rawDate);
- String formattedDate = outputDateFormat.format(parsedDate);
-
- invoice.setDate(formattedDate);
- } catch (Exception e) {
- System.out.println("无法解析日期:" + rawDate);
- }
- }
- }
-
- if (invoice.getCode() == null){
- Matcher matcher4 = patternCode.matcher(allText);
- if (matcher4.find()) {
- invoice.setCode(matcher4.group("code"));
- }
- if (invoice.getCode() == null){
- Matcher matcher4Replace = patternCodeReplace.matcher(allText);
- while (matcher4Replace.find()) {
- String firstCode = matcher4Replace.group("code");
- String secondCode = matcher4Replace.group(2);
- invoice.setCode(secondCode + firstCode);
- }
- }
- }
- }
- }
-
- {
- Pattern pattern = Pattern.compile(InvoiceRegexEnum.REGULAR_B.getRegex());
-
- Matcher matcher = pattern.matcher(allText);
-
- if (matcher.find()) {
- try {
- invoice.setAmount(new BigDecimal(matcher.group("amount")));
- } catch (Exception e) {
- // 不处理
- }
- try {
- invoice.setTaxAmount(new BigDecimal(matcher.group("taxAmount")));
- } catch (Exception e) {
- invoice.setTaxAmount(new BigDecimal(0));
- }
- }
- }
-
- if (null == invoice.getAmount()) {
- Pattern pattern = Pattern.compile(InvoiceRegexEnum.REGULAR_B_1.getRegex());
-
- Matcher matcher = pattern.matcher(fullText);
-
- if (matcher.find()) {
- try {
- invoice.setAmount(new BigDecimal(matcher.group("amount")));
- } catch (Exception e) {
- invoice.setAmount(new BigDecimal(0));
- }
- try {
- invoice.setTaxAmount(new BigDecimal(matcher.group("taxAmount")));
- } catch (Exception e) {
- invoice.setTaxAmount(new BigDecimal(0));
- }
- }
- }
-
- if (null == invoice.getContent()) {
- Pattern pattern = Pattern.compile(InvoiceRegexEnum.REGULAR_B_C.getRegex());
- Matcher matcher = pattern.matcher(fullText);
- while (matcher.find()) {
- try {
- invoice.setContent(matcher.group(1));
- } catch (Exception e) {
- invoice.setContent(null);
- }
- }
- }
-
- {
- Pattern pattern = Pattern.compile(InvoiceRegexEnum.REGULAR_C.getRegex());
-
- Matcher matcher = pattern.matcher(allText);
-
- if (matcher.find()) {
- invoice.setTotalAmountString(matcher.group("amountString"));
-
- try {
- invoice.setTotalAmount(new BigDecimal(matcher.group("amount")));
- } catch (Exception e) {
- invoice.setTotalAmount(new BigDecimal(0));
- }
- }
- }
-
- {
- Pattern pattern = Pattern.compile(InvoiceRegexEnum.REGULAR_D.getRegex());
- Matcher matcher = pattern.matcher(allText);
-
- if (matcher.find()) {
- invoice.setPayee(matcher.group("payee"));
- invoice.setReviewer(matcher.group("reviewer"));
- invoice.setDrawer(matcher.group("drawer"));
- }
-
- if (allText.indexOf("通行费") > 0 && allText.indexOf("车牌号") > 0) {
- invoice.setType("通行费");
- }
- }
-
- {
- Pattern type00Pattern = Pattern.compile(InvoiceRegexEnum.REGULAR_E.getRegex());
- Matcher m00 = type00Pattern.matcher(allText);
- if (m00.find()) {
- // invoice.setTitle(m00.group("p").replaceAll(InvoiceRegexEnum.REGULAR_E_AUX.getRegex(), "") + "通发票");
- if (null == invoice.getInReserve2()) {
- invoice.setInReserve2("普票");
- }
- } else {
- Pattern type01Pattern = Pattern.compile(InvoiceRegexEnum.REGULAR_E_1.getRegex());
- Matcher m01 = type01Pattern.matcher(allText);
- if (m01.find()) {
- // invoice.setTitle(m01.group("p").replaceAll(InvoiceRegexEnum.REGULAR_E_AUX.getRegex(), "") + "用发票");
- if (null == invoice.getInReserve2()) {
- invoice.setInReserve2("专票");
- }
- }
- }
- }
-
- PDFKeyWordPosition kwp = new PDFKeyWordPosition();
-
- Map<String, List<Position>> positionListMap = kwp.getCoordinate(Arrays.asList("机器编号", "税率", "价税合计", "合计", "开票日期", "规格型号", "车牌号", "开户行及账号", "密", "码", "区"), doc);
-
- PDFTextStripperByArea stripper = new PDFTextStripperByArea();
- stripper.setSortByPosition(true);
-
- PDFTextStripperByArea detailStripper = new PDFTextStripperByArea();
- detailStripper.setSortByPosition(true);
- {
- Position machineNumber;
- if (positionListMap.get("机器编号").size() > 0) {
- machineNumber = positionListMap.get("机器编号").get(0);
- } else {
- machineNumber = positionListMap.get("开票日期").get(0);
- machineNumber.setY(machineNumber.getY() + 30);
- }
- Position taxRate = positionListMap.get("税率").get(0);
- Position totalAmount = positionListMap.get("价税合计").get(0);
- Position amount = positionListMap.get("合计").get(0);
-
- Position model = null;
-
- if (!positionListMap.get("规格型号").isEmpty()) {
- model = positionListMap.get("规格型号").get(0);
- } else {
- model = positionListMap.get("车牌号").get(0);
- model.setX(model.getX() - 15);
- }
-
- List<Position> account = positionListMap.get("开户行及账号");
-
- Position buyer;
- Position seller;
-
- if (account.size() < 2) {
- buyer = new Position(51, 122);
- seller = new Position(51, 341);
- } else {
- buyer = account.get(0);
- seller = account.get(1);
- }
-
- int maqX = 370;
-
- List<Position> mi = positionListMap.get("密");
- List<Position> ma = positionListMap.get("码");
- List<Position> qu = positionListMap.get("区");
-
- for (Position position : mi) {
- float x1 = position.getX();
- for (Position value : ma) {
- float x2 = value.getX();
- if (Math.abs(x1 - x2) < 5) {
- for (Position item : qu) {
- float x3 = item.getX();
- if (Math.abs(x2 - x3) < 5) {
- maqX = Math.round((x1 + x2 + x3) / 3);
- }
- }
- }
- }
- }
-
- {
- int x = Math.round(model.getX()) - 13;
- int y = Math.round(taxRate.getY()) + 5;
- int h = Math.round(amount.getY()) - Math.round(taxRate.getY()) - 25;
-
- detailStripper.addRegion("detail", new Rectangle(0, y, pageWidth, h));
- stripper.addRegion("detailName", new Rectangle(0, y, x, h));
- stripper.addRegion("detailPrice", new Rectangle(x, y, pageWidth, h));
- }
-
- {
- int x = maqX + 10;
- int y = Math.round(machineNumber.getY()) + 10;
- int w = pageWidth - maqX - 10;
- int h = Math.round(taxRate.getY() - 5) - y;
-
- stripper.addRegion("password", new Rectangle(x, y, w, h));
- }
-
- {
- int x = Math.round(buyer.getX()) - 15;
- int y = Math.round(machineNumber.getY()) + 10;
- int w = maqX - x - 5;
- int h = Math.round(buyer.getY()) - y + 20;
-
- stripper.addRegion("buyer", new Rectangle(x, y, w, h));
- }
-
- {
- int x = Math.round(seller.getX()) - 15;
- int y = Math.round(totalAmount.getY()) + 10;
- int w = maqX - x - 5;
- int h = Math.round(seller.getY()) - y + 20;
-
- stripper.addRegion("seller", new Rectangle(x, y, w, h));
- }
- }
-
- stripper.extractRegions(firstPage);
- detailStripper.extractRegions(firstPage);
-
- doc.close();
-
- invoice.setPassword(StringUtils.trim(stripper.getTextForRegion("password")));
-
- {
- String buyer = replace(stripper.getTextForRegion("buyer"));
- Pattern pattern = Pattern.compile(InvoiceRegexEnum.REGULAR_F.getRegex());
-
- Matcher matcher = pattern.matcher(buyer);
-
- while (matcher.find()) {
- if (matcher.group("name") != null) {
- invoice.setBuyerName(matcher.group("name"));
- }
- else if (matcher.group("code") != null) {
- invoice.setBuyerCode(matcher.group("code"));
- }
- else if (matcher.group("address") != null) {
- invoice.setBuyerAddress(matcher.group("address"));
- }
- else if (matcher.group("account") != null) {
- invoice.setBuyerAccount(matcher.group("account"));
- }
- else if (matcher.group("account2") != null) {
- invoice.setBuyerAccount(matcher.group("account2"));
- }
- }
- }
-
- {
- String seller = replace(stripper.getTextForRegion("seller"));
-
- Pattern pattern = Pattern.compile(InvoiceRegexEnum.REGULAR_F.getRegex());
-
- Matcher matcher = pattern.matcher(seller);
-
- while (matcher.find()) {
- if (matcher.group("name") != null) {
- if (matcher.group("name").contains("出有限公司")){
- String replace = matcher.group("name").replace("出有限公司", "出版社有限公司");
- invoice.setSellerName(replace);
- } else if (matcher.group("name").contains("贸有公司")) {
- String replace = matcher.group("name").replace("贸有公司", "贸易有限公司");
- invoice.setSellerName(replace);
- } else {
- invoice.setSellerName(matcher.group("name"));
- }
- }
- else if (matcher.group("code") != null) {
- invoice.setSellerCode(matcher.group("code"));
- }
- else if (matcher.group("address") != null) {
- invoice.setSellerAddress(matcher.group("address"));
- }
- else if (matcher.group("account") != null) {
- invoice.setSellerAccount(matcher.group("account"));
- }
- }
- }
- if (invoice.getSellerName() == null){
- Pattern patternReplace = Pattern.compile(InvoiceRegexEnum.REGULAR_FR.getRegex());
-
- Matcher matcherReplace = patternReplace.matcher(allText);
-
- while (matcherReplace.find()) {
- if (matcherReplace.group("name") != null) {
- if (!matcherReplace.group("name").contains("学院") || !matcherReplace.group("name").contains("大学")){
- if (matcherReplace.group("name").contains("出有限公司")){
- String replace = matcherReplace.group("name").replace("出有限公司", "出版社有限公司");
- invoice.setSellerName(replace);
- } else if (matcherReplace.group("name").contains("贸有公司")) {
- String replace = matcherReplace.group("name").replace("贸有公司", "贸易有限公司");
- invoice.setSellerName(replace);
- } else {
- invoice.setSellerName(matcherReplace.group("name"));
- }
- }
- }
- }
- }
-
- {
- List<String> skipList = new ArrayList<>();
-
- List<Detail> detailList = new ArrayList<>();
-
- String[] detailPriceStringArray = stripper.getTextForRegion("detailPrice")
- .replaceAll(" ", " ").replaceAll(" ", " ").replaceAll("\r", "").split("\\n");
-
- for (String detailString : detailPriceStringArray) {
- Detail detail = new Detail();
-
- detail.setName("");
-
- String[] itemArray = StringUtils.split(detailString, " ");
-
- if (2 == itemArray.length) {
- detail.setAmount(new BigDecimal(itemArray[0]));
- detail.setTaxAmount(new BigDecimal(itemArray[1]));
-
- detailList.add(detail);
- }
- else if (2 < itemArray.length) {
- detail.setAmount(new BigDecimal(itemArray[itemArray.length - 3]));
-
- String taxRate = itemArray[itemArray.length - 2];
-
- if (taxRate.indexOf("免税") > 0 || taxRate.indexOf("不征税") > 0 || taxRate.indexOf("出口零税率") > 0
- || taxRate.indexOf("普通零税率") > 0 || !taxRate.contains("%")) {
- detail.setTaxRate(new BigDecimal(0));
- detail.setTaxAmount(new BigDecimal(0));
- } else {
- BigDecimal rate = new BigDecimal(Integer.parseInt(taxRate.replaceAll("%", "")));
- detail.setTaxRate(rate.divide(new BigDecimal(100)));
- detail.setTaxAmount(new BigDecimal(itemArray[itemArray.length - 1]));
- }
-
- for (int j = 0; j < itemArray.length - 3; j++) {
- if (itemArray[j].matches(InvoiceRegexEnum.REGULAR_G.getRegex())) {
- if (null == detail.getCount()) {
- detail.setCount(new BigDecimal(itemArray[j]));
- } else {
- detail.setPrice(new BigDecimal(itemArray[j]));
- }
- } else {
- if (itemArray.length >= j + 1 && !itemArray[j + 1].matches(InvoiceRegexEnum.REGULAR_G.getRegex())) {
- detail.setUnit(itemArray[j + 1]);
- detail.setModel(itemArray[j]);
- j++;
- } else if (itemArray[j].length() > 2) {
- detail.setModel(itemArray[j]);
- } else {
- detail.setUnit(itemArray[j]);
- }
- }
- }
- detailList.add(detail);
- } else {
- skipList.add(detailString);
- }
- }
-
- String[] detailNameStringArray = stripper.getTextForRegion("detailName").replaceAll(" ", " ").replaceAll(" ", " ")
- .replaceAll("\r", "").split("\\n");
- String[] detailStringArray = replace(detailStripper.getTextForRegion("detail")).replaceAll("\r", "").split("\\n");
- int i = 0, j = 0, h = 0, m = 0;
- Detail lastDetail = null;
-
- for (String detailString : detailStringArray) {
- if (m < detailNameStringArray.length) {
- if (detailString.matches(InvoiceRegexEnum.REGULAR_H.getRegex())
- && !detailString.matches(InvoiceRegexEnum.REGULAR_H_1.getRegex())
- && detailString.matches(InvoiceRegexEnum.REGULAR_H_2.getRegex())
- || detailStringArray.length > i + 1
- && detailStringArray[i + 1].matches(InvoiceRegexEnum.REGULAR_H_3.getRegex())) {
- if (j < detailList.size()) {
- lastDetail = detailList.get(j);
- lastDetail.setName(detailNameStringArray[m]);
- }
- j++;
- } else if (null != lastDetail && StringUtils.isNotBlank(detailNameStringArray[m])) {
- if (skipList.size() > h) {
- String skip = skipList.get(h);
- if (detailString.endsWith(skip)) {
- if (detailString.equals(skip)) {
- m--;
- } else {
- lastDetail.setName(lastDetail.getName() + detailNameStringArray[m]);
- }
- lastDetail.setModel(lastDetail.getModel() + skip);
- h++;
- } else {
- lastDetail.setName(lastDetail.getName() + detailNameStringArray[m]);
- }
- } else {
- lastDetail.setName(lastDetail.getName() + detailNameStringArray[m]);
- }
- }
- }
- i++;
- m++;
- }
-
- invoice.setDetailList(detailList);
-
- if (invoice.getAmount().add(invoice.getTaxAmount()).compareTo(invoice.getTotalAmount()) != 0){
- Pattern patternReplace = Pattern.compile(InvoiceRegexEnum.REGULAR_BR.getRegex());
-
- Matcher matcherReplace = patternReplace.matcher(allText);
-
- if (matcherReplace.find()) {
- try {
- invoice.setAmount(new BigDecimal(matcherReplace.group("amount")));
- invoice.setTaxAmount(invoice.getTotalAmount().subtract(invoice.getAmount()));
- } catch (Exception e) {
- // 不处理
- }
- }
- }
-
-
- if (invoice.getTotalAmount().compareTo(BigDecimal.ZERO) == 0){
- if (invoice.getAmount().compareTo(BigDecimal.ZERO) == 0){
- invoice.setAmount((!detailList.isEmpty()) ? detailList.get(0).getAmount() : BigDecimal.ZERO);
- invoice.setTaxAmount((!detailList.isEmpty()) ? detailList.get(0).getTaxAmount() : BigDecimal.ZERO);
- }
- invoice.setTotalAmount(invoice.getAmount().add(invoice.getTaxAmount()));
- }
-
- }
- return invoice;
-
- }
-
- /**
- * 替换字符串中的空格、全角空格、冒号和特殊空白字符为标准字符。
- * @param str 要进行替换的字符串
- * @return 替换后的字符串
- */
- private static String replace(String str) {
- return str.replaceAll(" ", "").replaceAll(" ", "").replaceAll(":", ":").replaceAll(" ", "");
- }
- }
------------------------------------------------------------
PDFKeyWordPosition.java
- package com.thinkgem.jeesite.modules.pdfServices.utils;
-
- import org.apache.pdfbox.pdmodel.PDDocument;
- import org.apache.pdfbox.text.PDFTextStripper;
- import org.apache.pdfbox.text.TextPosition;
-
- import java.io.ByteArrayOutputStream;
- import java.io.IOException;
- import java.io.OutputStreamWriter;
- import java.io.Writer;
- import java.util.ArrayList;
- import java.util.HashMap;
- import java.util.List;
- import java.util.Map;
-
- public class PDFKeyWordPosition extends PDFTextStripper {
-
- private List<String> keywordList;
- private Map<String, List<Position>> positionListMap;
-
- public PDFKeyWordPosition() throws IOException {
- super();
- }
-
- /**
- * 获取坐标信息
- *
- * @param keywordList 要搜索的关键字列表
- * @param document PDF 文档
- * @return 关键字的位置信息映射
- * @throws
- */
- public Map<String, List<Position>> getCoordinate(List<String> keywordList, PDDocument document) throws IOException {
- super.setSortByPosition(true);
- this.keywordList = keywordList;
- this.positionListMap = new HashMap<>();
- super.setStartPage(1);
- super.setEndPage(1);
- Writer dummy = new OutputStreamWriter(new ByteArrayOutputStream());
- super.writeText(document, dummy);
- return positionListMap;
- }
-
- @Override
- protected void writeString(String string, List<TextPosition> textPositions) throws IOException {
- for (String keyword : keywordList) {
- int foundIndex = 0;
- List<Position> positionList = positionListMap.computeIfAbsent(keyword, k -> new ArrayList<>());
-
- for (int i = 0; i < textPositions.size(); i++) {
- TextPosition textPosition = textPositions.get(i);
- String str = textPosition.getUnicode();
-
- if (0 < str.length() && str.charAt(0) == keyword.charAt(foundIndex)) {
- foundIndex++;
- int count = foundIndex;
-
- for (int j = foundIndex; j < keyword.length(); j++) {
- if (i + j >= textPositions.size()) {
- break;
- } else {
- String s = textPositions.get(i + j).getUnicode();
-
- if (0 < s.length() && s.charAt(0) == keyword.charAt(j)) {
- count++;
- }
- }
- }
-
- if (count == keyword.length()) {
- foundIndex = 0;
- Position position = new Position();
- position.setX(textPosition.getX());
- position.setY(textPosition.getY());
- positionList.add(position);
- positionListMap.put(keyword, positionList);
- }
- }
- }
- }
- }
- }
-
-
- class Position {
- public Position() {
- }
-
- public Position(float x, float y) {
- super();
- this.x = x;
- this.y = y;
- }
-
- float x;
- float y;
-
- public float getX() {
- return x;
- }
-
- public void setX(float x) {
- this.x = x;
- }
-
- public float getY() {
- return y;
- }
-
- public void setY(float y) {
- this.y = y;
- }
-
- @Override
- public String toString() {
- return "Position [x=" + x + ", y=" + y + "]";
- }
- }
***********代码至此结束,以下是需要导入包*********************************
- <!-- pdf识别-->
-
- <dependency>
-
- <groupId>org.apache.pdfbox</groupId>
-
- <artifactId>pdfbox</artifactId>
-
- <version>2.0.21</version>
-
- </dependency>
-
- <dependency>
-
- <groupId>org.apache.pdfbox</groupId>
-
- <artifactId>fontbox</artifactId>
-
- <version>2.0.21</version>
-
- </dependency>
-
- <dependency>
-
- <groupId>org.apache.pdfbox</groupId>
-
- <artifactId>jempbox</artifactId>
-
- <version>1.8.13</version>
-
- </dependency>
-
- <dependency>
-
- <groupId>org.apache.pdfbox</groupId>
-
- <artifactId>xmpbox</artifactId>
-
- <version>2.0.0</version>
-
- </dependency>
-
- <dependency>
-
- <groupId>org.apache.pdfbox</groupId>
-
- <artifactId>preflight</artifactId>
-
- <version>2.0.0</version>
-
- </dependency>
-
- <dependency>
-
- <groupId>org.apache.pdfbox</groupId>
-
- <artifactId>pdfbox-tools</artifactId>
-
- <version>2.0.0</version>
-
- </dependency>
-
- <dependency>
-
- <groupId>org.apache.poi</groupId>
-
- <artifactId>poi</artifactId>
-
- <version>5.0.0</version>
-
- </dependency>
-
- <dependency>
-
- <groupId>org.apache.poi</groupId>
-
- <artifactId>poi-ooxml</artifactId>
-
- <version>5.0.0</version>
-
- </dependency>
-
- <dependency>
-
- <groupId>com.google.zxing</groupId>
-
- <artifactId>core</artifactId>
-
- <version>3.1.0</version>
-
- </dependency>
-
- <dependency>
-
- <groupId>com.google.zxing</groupId>
-
- <artifactId>javase</artifactId>
-
- <version>3.1.0</version>
-
- </dependency>
-
- <dependency>
-
- <groupId>org.apache.commons</groupId>
-
- <artifactId>commons-lang3</artifactId>
-
- <version>3.12.0</version>
-
- </dependency>
-
- <dependency>
-
- <groupId>commons-io</groupId>
-
- <artifactId>commons-io</artifactId>
-
- <version>2.11.0</version>
-
- </dependency>
-
- <dependency>
-
- <groupId>commons-codec</groupId>
-
- <artifactId>commons-codec</artifactId>
-
- </dependency>
-
- <!-- pdf识别end-->
==========================================================
导出excel效果如图:
Copyright © 2003-2013 www.wpsshop.cn 版权所有,并保留所有权利。