-
Notifications
You must be signed in to change notification settings - Fork 7
/
main.py
88 lines (70 loc) · 3.26 KB
/
main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
import os
import sys
# comment out below line to enable tensorflow outputs
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
import tensorflow as tf
physical_devices = tf.config.experimental.list_physical_devices('GPU')
if len(physical_devices) > 0:
tf.config.experimental.set_memory_growth(physical_devices[0], True)
from absl import app, flags, logging
from absl.flags import FLAGS
sys.path.insert(0, './yolov4_card_detection')
from model_config import text_detection_config
from yolov4_card_detection.card_detection import detect_card
from card_alignment.card_alignment import CardAlignment
from text_detection.text_detection import TextDetection
from text_recognition.text_recognition import TextRecognition
from text_detection.utils.image_utils import sort_text
import cv2
import numpy as np
import json
import codecs
# Flag for phone camera
flags.DEFINE_string('camera_ip', None, 'camera ip')
# Flag for card detection
flags.DEFINE_string('weights', '/checkpoints/yolov4-416', 'path to weights file')
flags.DEFINE_integer('size', 416, 'resize images to')
flags.DEFINE_boolean('tiny', False, 'yolo or yolo-tiny')
flags.DEFINE_string('model', 'yolov4', 'yolov3 or yolov4')
flags.DEFINE_string('video', '0', 'path to input video or set to 0 for webcam')
flags.DEFINE_float('iou', 0.5, 'iou threshold')
flags.DEFINE_float('score', 0.70, 'score threshold')
# Flag for output
flags.DEFINE_boolean('output', False, 'to save aligned image to output folder')
# Flag for card alignment
flags.DEFINE_boolean('interactive', False, 'interactive mode in card alignment')
flags.DEFINE_boolean('alignment_process', False, 'show alignment process')
def main(_argv):
text_detection_model = TextDetection(path_model=text_detection_config['path_to_model'],
path_labels=text_detection_config['path_to_labels'],
thres_nms=text_detection_config['nms_ths'],
thres_score=text_detection_config['score_ths'])
aligned_model = CardAlignment()
text_recognition_model = TextRecognition()
# detect card type
frame = detect_card(FLAGS)
# align card
aligned = aligned_model.scan_card(frame, FLAGS)
# detect text
detected = np.copy(aligned)
detection_boxes, detection_classes, category_index = text_detection_model.predict(detected)
id_boxes, name_boxes, birth_boxes, home_boxes, add_boxes = sort_text(detection_boxes, detection_classes)
detected = text_detection_model.draw(detected)
# recognize text
field_dict = text_recognition_model.recog_text(aligned, id_boxes, name_boxes, birth_boxes, home_boxes, add_boxes)
# save extracted information
output_dir = os.getcwd().replace(os.sep, '/') + "/output"
with codecs.open(output_dir + "/information.txt", "w", "utf-8") as file:
file.write(json.dumps(field_dict,ensure_ascii=False)) # use `json.loads` to do the reverse
# save aligned image
if FLAGS.output:
cv2.imwrite(output_dir + "/aligned.jpg", aligned)
cv2.imwrite(output_dir + "/detected.jpg", detected)
cv2.imshow("Alignment", aligned)
cv2.imshow("Detected", detected)
cv2.waitKey(0)
if __name__ == '__main__':
try:
app.run(main)
except SystemExit:
pass