-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathprocess_recognition_image.py
More file actions
128 lines (109 loc) · 4.98 KB
/
process_recognition_image.py
File metadata and controls
128 lines (109 loc) · 4.98 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
from PIL import Image, ImageEnhance, ImageDraw
from auxiliary_funcs import get_current_time
from operation_with_settings import get_data_settings
import numpy as np
import easyocr
class RecognitionProcessingImage:
def __init__(self, image_path):
self.errors = [] # List of errors (time, str)
self.image_path = image_path
self.before_image = None # To draw recognized text
self.__load_image()
# Open and read the JSON (settings) file
data = get_data_settings()
self.values_setting = data["image_settings"]
save_path_dir = data["save_path"]["for_image"]
self.save_path = save_path_dir + self.image_path.split("/")[-1].split(".")[0]
def __load_image(self):
try:
self.image = Image.open(self.image_path)
self.before_image = self.image.copy()
except FileNotFoundError:
self.errors.append((get_current_time(), "This file does not exist!"))
self.image = None
except Exception:
self.errors.append((get_current_time(), "An error occurred in the loading of the image"))
def change_size(self, size=1.0):
width, height = self.image.width, self.image.height
# Change the image size (if necessary)
self.image = self.image.resize((int(width * size), int(height * size)), Image.LANCZOS)
self.before_image = self.image.copy() # Save the original scale image
def convert_to_gray(self):
"""Convert the image to grayscale"""
self.image = self.image.convert("L")
def change_contrast(self, value=1.0):
"""Change the contrast"""
enhancer = ImageEnhance.Contrast(self.image)
self.image = enhancer.enhance(value) # Increase contrast
def change_threshold(self):
"""Change the threshold"""
self.image = self.image.point(lambda p: 255 if p > 128 else 0)
def invert_color(self):
"""Invert the color"""
self.image = Image.eval(self.image, lambda p: 255 - p)
@property
def preprocess_image(self):
"""Preparatory image processing"""
self.change_size(self.values_setting["SIZE_TRANSFORM_IMG"])
if self.values_setting["CONVERT_IMG"] == "bitmap":
self.convert_to_gray()
self.change_contrast(self.values_setting["CONTRAST_RATIO"])
self.change_threshold()
elif self.values_setting["CONVERT_IMG"] == "gray":
self.convert_to_gray()
self.change_contrast(self.values_setting["CONTRAST_RATIO"])
else:
self.change_contrast(self.values_setting["CONTRAST_RATIO"])
if self.values_setting["INVERT_IMG"]:
self.invert_color()
return self.image
def get_text(self):
"""Recognize text in the image"""
processed_img = self.preprocess_image # Convert image
# Convert image to NumPy array for EasyOCR
img_np = np.array(processed_img)
# Initialize EasyOCR
reader = easyocr.Reader(self.values_setting["DEFAULT_LANG"], gpu=True)
# Recognize text, get results
results = reader.readtext(img_np)
res_text = ""
# Get text from results
for detection in results:
res_text += detection[1] + "\n"
bbox = detection[0] # The borders of the recognized block
try:
self.draw_borders_on_image(bbox)
except Exception:
self.errors.append((get_current_time(), "An error occurred in the drawing of the borders"))
return res_text
def convert_to_TXT(self, text):
"""Converts the image to .txt format."""
with open(self.save_path + ".txt", "w", encoding="utf-8") as file:
file.write(text)
@property
def recognize_text(self):
"""Main function to recognize"""
res_text = self.get_text()
try:
if self.values_setting["SAVE_TXT_FILE"]:
self.convert_to_TXT(res_text)
except FileNotFoundError:
self.errors.append((get_current_time(), "The directory not found!")) # Directory not found
except PermissionError:
self.errors.append((get_current_time(), "No permission to create a file")) # No permission to create a file
except OSError as e:
self.errors.append((get_current_time(), "An error occurred in the OS")) # An error occurred
return res_text, self.errors
def draw_borders_on_image(self, border):
"""Draw the borders of the recognized text"""
draw = ImageDraw.Draw(self.before_image)
left_top = list(map(int, border[0]))
right_bottom = list(map(int, border[2]))
ltx, lty, rbx, rby = left_top[0], left_top[1], right_bottom[0], right_bottom[1]
# Swap the coordinates if necessary
if ltx > rbx:
ltx, rbx = rbx, ltx
if lty > rby:
lty, rby = rby, lty
draw.rectangle([ltx, lty, rbx, rby], fill=None,
outline=(255, 0, 0), width=3) # Draw the borders on the image