# -*- coding: utf-8 -*-importosimportbase64importfitzimportiofromPILimportImagefromglobimportglobclassCovertPdfToJpg:def__init__(self,file_path,save_root):self.file_path=file_pathself.save_root=save_root@staticmethoddefopen_pdf(file):returnfitz.open(file)@staticmethoddefget_trans(doc,page,min_side=0,max_side=0,rotate=0.0):""" Create a scale object. """region=doc[page].rectscale=1ifmax_side>min_side>0:scale=min_side/min(region.width,region.height)ifmax(region.width,region.height)*scale>max_side:scale=max_side/max(region.width,region.height)trans=fitz.Matrix(scale,scale).preRotate(rotate)returntransdefpage2pix(self,doc,page,trans):""" Parse the current page as image data based on given parameters."""# Obtain the PDF format of a specified page. Note that page parameters need to be pre-parsed to avoid any issues.returndoc[page].getPixmap(matrix=trans,alpha=False)defpdf_to_jpg(self,width=1024,height=1400):doc=self.open_pdf(self.file_path)save_dir=os.path.join(self.save_root)ifnotos.path.exists(save_dir):os.makedirs(save_dir)print("document",len(doc),doc.pageCount)foriinrange(len(doc)):trans=self.get_trans(doc,i,width,height,rotate=0)try:pdf=self.page2pix(doc,i,trans)except:continueimage=pdf.getPNGData()image=Image.open(io.BytesIO(image))print(os.path.join(save_dir,os.path.basename(self.file_path).replace('.pdf','')+'_'+str(i+1)+'.jpg'))image.save(os.path.join(save_dir,os.path.basename(self.file_path).replace('.pdf','')+'_'+str(i+1)+'.jpg'))return
last updated: 2024-12-11 10:02 UTC - commit: f18a89e157ed002602c57458c4d33037c98e13a9