Skip to content

Commit c46a354

Browse files
committed
Optimized annotation processing
1 parent 48e1e63 commit c46a354

File tree

1 file changed

+49
-5
lines changed

1 file changed

+49
-5
lines changed

examples/official/annotation_tool/benchmark/gui_benchmark.py

Lines changed: 49 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -179,16 +179,39 @@ def load_and_convert(file_path: str) -> Dict[str, List[str]]:
179179
class AnnotationValidator:
180180
"""Validate detected barcodes against expected values."""
181181

182+
_CONTROL_TOKEN_MAP = {
183+
'<NUL>': '\x00',
184+
'␀': '\x00',
185+
'<EOT>': '\x04',
186+
'␄': '\x04',
187+
'<GS>': '\x1d',
188+
'[GS]': '\x1d',
189+
'␝': '\x1d',
190+
'<RS>': '\x1e',
191+
'␞': '\x1e',
192+
}
193+
194+
@staticmethod
195+
def _normalize_text(value: str) -> str:
196+
"""Normalize line endings and control-character placeholders."""
197+
normalized = value.replace('\r\n', '\n').replace('\r', '\n').strip()
198+
for token, replacement in AnnotationValidator._CONTROL_TOKEN_MAP.items():
199+
normalized = normalized.replace(token, replacement)
200+
return normalized
201+
182202
@staticmethod
183203
def _normalize_gs1_text(value: str) -> str:
184204
"""Normalize GS1 text so human-friendly AI formatting still matches raw content."""
185-
return value.replace('(', '').replace(')', '')
205+
normalized = AnnotationValidator._normalize_text(value)
206+
for token in ('(', ')', '\x1d', '\x1e', '\x04'):
207+
normalized = normalized.replace(token, '')
208+
return normalized
186209

187210
@staticmethod
188211
def _match_texts(detected: str, expected: str) -> bool:
189212
"""Flexible barcode text matching with UPC/EAN equivalence."""
190-
detected = detected.strip()
191-
expected = expected.strip()
213+
detected = AnnotationValidator._normalize_text(detected)
214+
expected = AnnotationValidator._normalize_text(expected)
192215

193216
if detected == expected:
194217
return True
@@ -202,6 +225,11 @@ def _match_texts(detected: str, expected: str) -> bool:
202225
detected = normalized_detected
203226
expected = normalized_expected
204227

228+
# EAN/UPC add-on extensions may be present in GT but omitted by decoders.
229+
if detected.isdigit() and expected.isdigit() and abs(len(detected) - len(expected)) in (2, 5):
230+
if detected.startswith(expected) or expected.startswith(detected):
231+
return True
232+
205233
# UPC-A (12 digits) <-> EAN-13 (13 digits with leading 0)
206234
if len(detected) == 12 and len(expected) == 13 and expected == '0' + detected:
207235
return True
@@ -513,6 +541,21 @@ def _esc(s: str) -> str:
513541
from html import escape
514542
return escape(str(s))
515543

544+
@staticmethod
545+
def _display_text(s: str) -> str:
546+
"""Convert control characters to visible markers and preserve line breaks in HTML."""
547+
from html import escape
548+
549+
normalized = AnnotationValidator._normalize_text(str(s))
550+
visible = (
551+
normalized
552+
.replace('\x00', '<NUL>')
553+
.replace('\x04', '<EOT>')
554+
.replace('\x1d', '<GS>')
555+
.replace('\x1e', '<RS>')
556+
)
557+
return escape(visible).replace('\n', '<br>')
558+
516559
@staticmethod
517560
def _gt_cls(rate_pct: float) -> str:
518561
return 'gt-good' if rate_pct >= 90 else ('gt-ok' if rate_pct >= 70 else 'gt-poor')
@@ -522,6 +565,7 @@ def export(results: Dict[str, List[Dict]], output_path: str, has_annotation: boo
522565
"""Generate HTML report identical in layout to the web-app benchmark export."""
523566
esc = HTMLReportExporter._esc
524567
gt_cls = HTMLReportExporter._gt_cls
568+
display_text = HTMLReportExporter._display_text
525569

526570
# ── Reorganise results: file_path → lib_name → result dict ─────────
527571
file_results: Dict[str, Dict[str, dict]] = {}
@@ -655,7 +699,7 @@ def render_image_section(fp: str, show_expected_values: bool = False) -> str:
655699
)
656700
section_html = f'<h4 class="benchmark-image-title">{esc(fname)}{gt_badge}</h4>\n'
657701
if show_expected_values and expected_values:
658-
expected_html = ', '.join(esc(value) for value in expected_values)
702+
expected_html = '<br>'.join(display_text(value) for value in expected_values)
659703
section_html += (
660704
'<div class="expected-values">'
661705
f'<strong>Expected:</strong> {expected_html}'
@@ -681,7 +725,7 @@ def render_image_section(fp: str, show_expected_values: bool = False) -> str:
681725
barcodes = r.get('detected_data', [])
682726
if barcodes:
683727
detail = '<ul class="barcodes-list">' + ''.join(
684-
f'<li>[{esc(b.get("type",""))}] {esc(b.get("data",""))}</li>'
728+
f'<li>[{esc(b.get("type",""))}] {display_text(b.get("data",""))}</li>'
685729
for b in barcodes
686730
) + '</ul>'
687731
else:

0 commit comments

Comments
 (0)