-
Notifications
You must be signed in to change notification settings - Fork 7
/
ocr_align.py
58 lines (51 loc) · 2.41 KB
/
ocr_align.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
from paddleocr import PaddleOCR, draw_ocr
from PIL import Image, ImageDraw, ImageFont
import numpy as np
from interval import Interval
from PIL import Image
det_model_dir = "./inference/ch_ppocr_server_v2.0_det_infer/"
rec_model_dir = "./inference/ch_ppocr_server_v2.0_rec_infer/"
cls_model_dir = "./inference/ch_ppocr_mobile_v2.0_cls_infer/"
class OCR(PaddleOCR):
def __init__(self, **kwargs):
super(OCR, self).__init__(**kwargs)
def ocr_new(self, img, det=True, rec=True, cls=True):
res = self.ocr(img, det=det, rec=rec, cls=cls)
if res!=None:
return res
else:
img_v = Image.open(img).convert("RGB")
img_v = np.asanyarray(img_v)[:,:,[2,1,0]]
dt_boxes, rec_res = self.__call__(img_v)
return [[box.tolist(), res] for box, res in zip(dt_boxes, rec_res)]
def align_text(res, threshold=0):
res.sort(key=lambda i: (i[0][0][0])) # 按照x排
already_IN, line_list = [], []
for i in range(len(res)): # i当前
if res[i][0][0] in already_IN:
continue
line_txt = res[i][1][0]
already_IN.append(res[i][0][0])
y_i_points = [res[i][0][0][1], res[i][0][1][1], res[i][0][3][1], res[i][0][2][1]]
min_I_y, max_I_y = min(y_i_points), max(y_i_points)
curr = Interval(min_I_y + (max_I_y - min_I_y) // 3, max_I_y)
curr_mid = min_I_y + (max_I_y - min_I_y) // 2
for j in range(i + 1, len(res)): # j下一个
if res[j][0][0] in already_IN:
continue
y_j_points = [res[j][0][0][1], res[j][0][1][1], res[j][0][3][1], res[j][0][2][1]]
min_J_y, max_J_y = min(y_j_points), max(y_j_points)
next_j = Interval(min_J_y, max_J_y - (max_J_y - min_J_y) // 3)
if next_j.overlaps(curr) and curr_mid in Interval(min_J_y, max_J_y):
line_txt += (res[j][1][0] + " ")
already_IN.append(res[j][0][0])
curr = Interval(min_J_y + (max_J_y - min_J_y) // 3, max_J_y)
curr_mid = min_J_y + (max_J_y - min_J_y) // 2
line_list.append((res[i][0][0][1], line_txt))
line_list.sort(key=lambda x: x[0])
txt = '\n'.join([i[1] for i in line_list])
return txt
paddle_ocr_engin = OCR(det_model_dir= det_model_dir,rec_model_dir= rec_model_dir,cls_model_dir= cls_model_dir,use_angle_cls=True)
res = paddle_ocr_engin.ocr_new('./test.jpg')
print(res)
print(align_text(res))