@@ -179,16 +179,39 @@ def load_and_convert(file_path: str) -> Dict[str, List[str]]:
179179class AnnotationValidator :
180180 """Validate detected barcodes against expected values."""
181181
182+ _CONTROL_TOKEN_MAP = {
183+ '<NUL>' : '\x00 ' ,
184+ '␀' : '\x00 ' ,
185+ '<EOT>' : '\x04 ' ,
186+ '␄' : '\x04 ' ,
187+ '<GS>' : '\x1d ' ,
188+ '[GS]' : '\x1d ' ,
189+ '␝' : '\x1d ' ,
190+ '<RS>' : '\x1e ' ,
191+ '␞' : '\x1e ' ,
192+ }
193+
194+ @staticmethod
195+ def _normalize_text (value : str ) -> str :
196+ """Normalize line endings and control-character placeholders."""
197+ normalized = value .replace ('\r \n ' , '\n ' ).replace ('\r ' , '\n ' ).strip ()
198+ for token , replacement in AnnotationValidator ._CONTROL_TOKEN_MAP .items ():
199+ normalized = normalized .replace (token , replacement )
200+ return normalized
201+
182202 @staticmethod
183203 def _normalize_gs1_text (value : str ) -> str :
184204 """Normalize GS1 text so human-friendly AI formatting still matches raw content."""
185- return value .replace ('(' , '' ).replace (')' , '' )
205+ normalized = AnnotationValidator ._normalize_text (value )
206+ for token in ('(' , ')' , '\x1d ' , '\x1e ' , '\x04 ' ):
207+ normalized = normalized .replace (token , '' )
208+ return normalized
186209
187210 @staticmethod
188211 def _match_texts (detected : str , expected : str ) -> bool :
189212 """Flexible barcode text matching with UPC/EAN equivalence."""
190- detected = detected . strip ( )
191- expected = expected . strip ( )
213+ detected = AnnotationValidator . _normalize_text ( detected )
214+ expected = AnnotationValidator . _normalize_text ( expected )
192215
193216 if detected == expected :
194217 return True
@@ -202,6 +225,11 @@ def _match_texts(detected: str, expected: str) -> bool:
202225 detected = normalized_detected
203226 expected = normalized_expected
204227
228+ # EAN/UPC add-on extensions may be present in GT but omitted by decoders.
229+ if detected .isdigit () and expected .isdigit () and abs (len (detected ) - len (expected )) in (2 , 5 ):
230+ if detected .startswith (expected ) or expected .startswith (detected ):
231+ return True
232+
205233 # UPC-A (12 digits) <-> EAN-13 (13 digits with leading 0)
206234 if len (detected ) == 12 and len (expected ) == 13 and expected == '0' + detected :
207235 return True
@@ -513,6 +541,21 @@ def _esc(s: str) -> str:
513541 from html import escape
514542 return escape (str (s ))
515543
544+ @staticmethod
545+ def _display_text (s : str ) -> str :
546+ """Convert control characters to visible markers and preserve line breaks in HTML."""
547+ from html import escape
548+
549+ normalized = AnnotationValidator ._normalize_text (str (s ))
550+ visible = (
551+ normalized
552+ .replace ('\x00 ' , '<NUL>' )
553+ .replace ('\x04 ' , '<EOT>' )
554+ .replace ('\x1d ' , '<GS>' )
555+ .replace ('\x1e ' , '<RS>' )
556+ )
557+ return escape (visible ).replace ('\n ' , '<br>' )
558+
516559 @staticmethod
517560 def _gt_cls (rate_pct : float ) -> str :
518561 return 'gt-good' if rate_pct >= 90 else ('gt-ok' if rate_pct >= 70 else 'gt-poor' )
@@ -522,6 +565,7 @@ def export(results: Dict[str, List[Dict]], output_path: str, has_annotation: boo
522565 """Generate HTML report identical in layout to the web-app benchmark export."""
523566 esc = HTMLReportExporter ._esc
524567 gt_cls = HTMLReportExporter ._gt_cls
568+ display_text = HTMLReportExporter ._display_text
525569
526570 # ── Reorganise results: file_path → lib_name → result dict ─────────
527571 file_results : Dict [str , Dict [str , dict ]] = {}
@@ -655,7 +699,7 @@ def render_image_section(fp: str, show_expected_values: bool = False) -> str:
655699 )
656700 section_html = f'<h4 class="benchmark-image-title">{ esc (fname )} { gt_badge } </h4>\n '
657701 if show_expected_values and expected_values :
658- expected_html = ', ' .join (esc (value ) for value in expected_values )
702+ expected_html = '<br> ' .join (display_text (value ) for value in expected_values )
659703 section_html += (
660704 '<div class="expected-values">'
661705 f'<strong>Expected:</strong> { expected_html } '
@@ -681,7 +725,7 @@ def render_image_section(fp: str, show_expected_values: bool = False) -> str:
681725 barcodes = r .get ('detected_data' , [])
682726 if barcodes :
683727 detail = '<ul class="barcodes-list">' + '' .join (
684- f'<li>[{ esc (b .get ("type" ,"" ))} ] { esc (b .get ("data" ,"" ))} </li>'
728+ f'<li>[{ esc (b .get ("type" ,"" ))} ] { display_text (b .get ("data" ,"" ))} </li>'
685729 for b in barcodes
686730 ) + '</ul>'
687731 else :
0 commit comments