frompaddleocrimportPaddleOCR,draw_ocr# Paddleocr目前支持的多语言语种可以通过修改lang参数进行切换# 例如`ch`, `en`, `fr`, `german`, `korean`, `japan`ocr=PaddleOCR(use_angle_cls=True,lang="ch")# need to run only once to download and load model into memoryimg_path='./imgs/11.jpg'result=ocr.ocr(img_path,cls=True)foridxinrange(len(result)):res=result[idx]forlineinres:print(line)# 显示结果fromPILimportImageresult=result[0]image=Image.open(img_path).convert('RGB')boxes=[line[0]forlineinresult]txts=[line[1][0]forlineinresult]scores=[line[1][1]forlineinresult]im_show=draw_ocr(image,boxes,txts,scores,font_path='./fonts/simfang.ttf')im_show=Image.fromarray(im_show)im_show.save('result.jpg')
frompaddleocrimportPaddleOCR,draw_ocr# Paddleocr目前支持的多语言语种可以通过修改lang参数进行切换# 例如`ch`, `en`, `fr`, `german`, `korean`, `japan`PAGE_NUM=10# 将识别页码前置作为全局,防止后续打开pdf的参数和前文识别参数不一致 / Set the recognition page numberpdf_path='default.pdf'ocr=PaddleOCR(use_angle_cls=True,lang="ch",page_num=PAGE_NUM)# need to run only once to download and load model into memory# ocr = PaddleOCR(use_angle_cls=True, lang="ch", page_num=PAGE_NUM,use_gpu=0) # 如果需要使用GPU,请取消此行的注释 并注释上一行 / To Use GPU,uncomment this line and comment the above one.result=ocr.ocr(pdf_path,cls=True)foridxinrange(len(result)):res=result[idx]ifres==None:# 识别到空页就跳过,防止程序报错 / Skip when empty result detected to avoid TypeError:NoneTypeprint(f"[DEBUG] Empty page {idx+1} detected, skip it.")continueforlineinres:print(line)# 显示结果importfitzfromPILimportImageimportcv2importnumpyasnpimgs=[]withfitz.open(pdf_path)aspdf:forpginrange(0,PAGE_NUM):page=pdf[pg]mat=fitz.Matrix(2,2)pm=page.get_pixmap(matrix=mat,alpha=False)# if width or height > 2000 pixels, don't enlarge the imageifpm.width>2000orpm.height>2000:pm=page.get_pixmap(matrix=fitz.Matrix(1,1),alpha=False)img=Image.frombytes("RGB",[pm.width,pm.height],pm.samples)img=cv2.cvtColor(np.array(img),cv2.COLOR_RGB2BGR)imgs.append(img)foridxinrange(len(result)):res=result[idx]ifres==None:continueimage=imgs[idx]boxes=[line[0]forlineinres]txts=[line[1][0]forlineinres]scores=[line[1][1]forlineinres]im_show=draw_ocr(image,boxes,txts,scores,font_path='doc/fonts/simfang.ttf')im_show=Image.fromarray(im_show)im_show.save('result_page_{}.jpg'.format(idx))