import xlwt,pymysql,requests,json,datetime,PyPDF2,urllib.request,io,ssl
添加context 解决读取pdf时SSL报错问题
context = ssl._create_unverified_context()
读取pdf地址 获取pdf内容
req = urllib.request.urlopen(contract_download_url,context=context)
remote_file=req.read()
memory_file = io.BytesIO(remote_file)
read_pdf = PyPDF2.PdfReader(memory_file)
获取pdf页数
number_of_pages = len(read_pdf.pages)
for i in range(0, number_of_pages):
pageObj = read_pdf.pages[i]
获取当前页数的pdf内容
page = pageObj.extract_text()