赞
踩
主要为开发者提供一个思路,这里并不是完整的商业项目,只是一时兴起写的一份demo,希望对大家有帮助。
开源仓库地址:GPT-PDF
from flask import Flask, request, Response import PyPDF2 app = Flask(__name__) @app.route('/upload', methods=['POST']) def upload_file(): if 'pdf' not in request.files: return "No file part", 400 file = request.files['pdf'] if file.filename == '': return "No selected file", 400 if file: try: reader = PyPDF2.PdfReader(file) num_pages = len(reader.pages) text = '' for page in range(num_pages): page_obj = reader.pages[page] text += page_obj.extract_text() # 指定返回类型为text/plain和编码为utf-8 return Response(text, mimetype="text/plain", content_type="text/plain; charset=utf-8") except Exception as e: return str(e), 500 if __name__ == '__main__': app.run(debug=True)
# coding=gbk
import requests
url = 'http://localhost:5000/upload'
files = {'pdf': open('2.pdf', 'rb')}
response = requests.post(url, files=files)
# 直接打印文本而不是编码文本
print(response.text)
# coding=gbk # pip install pypdf2 --upgrade import PyPDF2 # 打开PDF文件 with open('2.pdf', 'rb') as file: reader = PyPDF2.PdfReader(file) # 获取PDF的总页数 num_pages = len(reader.pages) # 逐页读取 for page in range(num_pages): page_obj = reader.pages[page] print(page_obj.extract_text())
import http.client import json import requests # import time # 开始计时 # start_time = time.time() # 获取PDF文本 url = 'http://localhost:5000/upload' files = {'pdf': open('3.pdf', 'rb')} response = requests.post(url, files=files) long_text = response.text # 从接口获得的长文本 # print(long_text) # 分段函数 def split_text(text, max_size): for start in range(0, len(text), max_size): yield text[start:start + max_size] # 配置GPT API api.zhangsan.cloud conn = http.client.HTTPSConnection("api.zhangsan.cloud") headers = { 'Accept': 'application/json', 'Authorization': 'Bearer sk-zkyXXXXXXXXXXXXXXXaA47c77', 'User-Agent': 'Apifox/1.0.0 (https://apifox.com)', 'Content-Type': 'application/json' } # 准备发送到GPT API的消息 all_responses = [] # 系统提示,加入到第一个消息段 system_prompt = "请总结本篇论文,并详细告诉我论文中是基于什么背景.例如:用到了什么方法/算法,是怎么解决的,得到了什么结果,一步步详细告诉我,reply in chinese." for i, segment in enumerate(split_text(long_text, 8000)): if i == 0: # 第一个段落,添加系统提示 messages = [ {"role": "system", "content": system_prompt}, {"role": "user", "content": segment} ] else: messages = [ {"role": "user", "content": segment} ] payload = json.dumps({ "model": "gpt-3.5-turbo-16k-0613", "messages": messages }) conn.request("POST", "/v1/chat/completions", payload, headers) res = conn.getresponse() data = res.read() all_responses.append(json.loads(data.decode("utf-8"))) # 打印或处理所有的响应 for response in all_responses: content = response["choices"][0]["message"]["content"] print(content) # print('\n\n') # # 结束计时并输出运行时间 # end_time = time.time() # print("Flask API 请求运行时间: {:.2f}秒".format(end_time - start_time))
先运行接口,在运行分析。
效果如下:
Copyright © 2003-2013 www.wpsshop.cn 版权所有,并保留所有权利。