99from urllib .parse import quote
1010from getpass import getpass
1111
12+ from .output_schema import DisambiguationResult
13+
1214# =============== Gemini client bootstrap ===============
1315try :
1416 from google import genai
@@ -280,13 +282,6 @@ def _safe_idx(lst: List[Any], value: Any) -> Optional[int]:
280282 i = 0
281283 return max (0 , min (i , len (lst ) - 1 ))
282284
283- @staticmethod
284- def _get_json (text : Optional [str ]) -> Dict [str , Any ]:
285- try :
286- return json .loads ((text or "" ).strip () or "{}" )
287- except Exception :
288- return {}
289-
290285 # --------------------- main entrypoint ---------------------
291286
292287 def disambiguate_triple (
@@ -332,46 +327,52 @@ def disambiguate_triple(
332327 subj_list_text = self ._fmt_indexed (subject_candidates )
333328 obj_list_text = self ._fmt_indexed (object_candidates )
334329
335- prompt = f"""Pick the best RDF triple using ONLY these options.
330+ prompt = f"""
331+ Analyze the context and disambiguate the triple by selecting the correct Subject, Predicate, and Object.
336332
337- Allowed predicate URIs:
338- { pred_list_text }
333+ Context: { context }
339334
340- Subject candidates (choose by INDEX ):
341- { subj_list_text }
335+ Allowed Predicates (Select one URI ):
336+ { pred_list_text }
342337
343- Object candidates (choose by INDEX ):
344- { obj_list_text }
338+ Subject Candidates (Select by Index ):
339+ { subj_list_text }
345340
346- Context (helps decide, but does NOT add new options):
347- { context }
341+ Object Candidates (Select by Index):
342+ { obj_list_text }
343+ """
348344
349- Return ONLY strict JSON on one line (no prose):
350- {{"subject_index": 0, "predicate": "URI", "object_index": 0}}
351- Rules:
352- - "predicate" MUST be exactly one URI from Allowed predicate URIs.
353- - "subject_index" MUST be an integer index from Subject candidates.
354- - "object_index" MUST be an integer index from Object candidates.
355- - Do not invent or modify URIs. Do not swap roles.
356- """
345+ generation_config = {
346+ "response_mime_type" : "application/json" ,
347+ "response_schema" : DisambiguationResult # <--- The Pydantic class output
348+ }
357349
358350 # Call the model (Gemini client style)
359351 resp = self .client .models .generate_content (
360352 model = self .model_name ,
361353 contents = prompt ,
362- config = { "response_mime_type" : "application/json" },
354+ config = generation_config
363355 )
364356
365- data = self ._get_json (getattr (resp , "text" , None ))
357+ try :
358+ data = DisambiguationResult .model_validate_json (resp .text )
359+ pred_uri = data .predicate_uri
360+ s_idx = data .subject_index
361+ o_idx = data .object_index
366362
367- # Validate predicate
368- pred_uri = data .get ("predicate" , "" )
369- if pred_uri not in allowed :
370- pred_uri = allowed [0 ]
363+ # Validate predicate
364+ if pred_uri not in allowed :
365+ pred_uri = None ## if none
366+
367+ except Exception as e :
368+ print (f"validation error{ e } " )
369+ pred_uri = None
370+ s_idx = 0
371+ o_idx = 0
371372
372373 # Clamp indices and map to URIs
373- si = self ._safe_idx (subject_candidates , data . get ( "subject_index" , 0 ) )
374- oi = self ._safe_idx (object_candidates , data . get ( "object_index" , 0 ) )
374+ si = self ._safe_idx (subject_candidates , s_idx )
375+ oi = self ._safe_idx (object_candidates , o_idx )
375376
376377 s_uri = subject_candidates [si ][0 ] if (subject_candidates and si is not None ) else ""
377378 o_uri = object_candidates [oi ][0 ] if (object_candidates and oi is not None ) else ""
@@ -392,7 +393,7 @@ def disambiguate_triple(
392393 # Build meta (compatible with your previous code)
393394 chosen_sim , rank0 = sim_map .get (pred_uri , (None , None ))
394395 meta = {
395- "label" : "candidate" ,
396+ "label" : "candidate" if pred_uri else "hallucination_rejected" ,
396397 "chosen_similarity" : float (chosen_sim ) if chosen_sim is not None else None ,
397398 "rank_in_topk" : (rank0 + 1 ) if rank0 is not None else None ,
398399 "topk" : total_k ,
0 commit comments