dbpedia · mallasiddharthreddy · Jan 26, 2026 · Jan 26, 2026 · Jan 26, 2026 · Jan 26, 2026
diff --git a/GSoC25_H/IndIE/llm_extractor.py b/GSoC25_H/IndIE/llm_extractor.py
@@ -1,64 +1,28 @@
 import json
-import ollama
 import time
 import re
-from typing import List, Dict, Any, Tuple
-from dataclasses import dataclass
-
-@dataclass
-class ModelConfig:
-    """Configuration for the LLM model."""
-    name: str = "gemma3:12b-it-qat"
-    temperature: float = 0.1
-    top_p: float = 0.9
-    num_predict: int = 2000
-
-class LLMInterface:
-    """Interface for interacting with the language model via Ollama."""
-    def __init__(self, model_config: ModelConfig, max_retries: int = 2, timeout: int = 60):
-        self.model_config = model_config
-        self.max_retries = max_retries
-        self.client = ollama.Client(timeout=timeout)
-
-    def generate_response(self, messages: List[Dict[str, Any]]) -> Dict[str, Any]:
-        """
-        Generates a response from the LLM, with retries for handling errors.
-        """
-        retries = 0
-        while retries < self.max_retries:
-            try:
-                response = self.client.chat(
-                    model=self.model_config.name,
-                    messages=messages,
-                    options={
-                        "temperature": self.model_config.temperature,
-                        "top_p": self.model_config.top_p,
-                        "num_predict": self.model_config.num_predict
-                    }
-                )
-                return response
+import sys
+import os
+from typing import List, Dict
 
-            except Exception as e:
-                retries += 1
-                print(f"Error calling model '{self.model_config.name}': {e}. Retrying ({retries}/{self.max_retries})...")
-                time.sleep(2 ** retries)
-
-        print(f"Failed to get a valid response from model '{self.model_config.name}' after {self.max_retries} retries.")
-        return None
+# Add parent directory to path to allow importing from src
+sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), '..')))
+
+from src.llm_core import LLMService, ModelConfig
 
 class LLMExtractor:
     def __init__(self, model_name="gemma3:12b-it-qat", temperature=0.05, max_retries=3, timeout=120):
+        # USES NEW SHARED CONFIG FROM SRC
         self.model_config = ModelConfig(
             name=model_name,
-            temperature=temperature,  # Lower temperature for more focused extractions
-            top_p=0.8,  # Slightly more focused sampling
-            num_predict=1500  # Reduced to encourage concise outputs
-        )
-        self.llm_interface = LLMInterface(
-            model_config=self.model_config,
-            max_retries=max_retries,
-            timeout=timeout
+            temperature=temperature,
+            top_p=0.8,
+            num_predict=1500,
+            timeout=timeout,
+            max_retries=max_retries
         )
+        # USES NEW SHARED SERVICE FROM SRC
+        self.llm_interface = LLMService(model_config=self.model_config)
 
         # Quality patterns for filtering false positives
         self.low_quality_patterns = [
@@ -941,4 +905,4 @@ def quick_test():
     return result
 
 if __name__ == "__main__":
-    test_llm_extractor() 
+    test_llm_extractor()
diff --git a/GSoC25_H/llm_IE/llm_interface.py b/GSoC25_H/llm_IE/llm_interface.py
@@ -1,12 +1,15 @@
+import sys
 import os
-import requests
 import time
-import re
-from typing import Dict, List, Any, Optional, Tuple
+from typing import Dict, List, Optional
 from dataclasses import dataclass, field
 
-from config import ModelConfig
+# Add parent directory to path to allow importing from src
+sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), '..')))
+
+from src.llm_core import LLMService, ModelConfig as SharedConfig
 from output_parser import OutputParser
+from config import ModelConfig
 
 @dataclass
 class ExtractionResult:
@@ -18,97 +21,60 @@ class ExtractionResult:
     error: Optional[str] = None
 
 class OllamaInterface:
-    """Unified interface for interacting with Ollama models"""
+    """
+    Unified interface for interacting with Ollama models.
+    Refactored to use the shared src.llm_core.LLMService instead of raw requests.
+    """
 
     def __init__(self, model_config: ModelConfig, base_url: str = "http://localhost:11434"):
-        self.model_config = model_config
-        self.base_url = base_url.rstrip('/')
-        self.api_endpoint = f"{self.base_url}/api"
         self.output_parser = OutputParser()
 
-        if not self._is_available():
-            print(f"Warning: Ollama model '{self.model_config.name}' not found locally. Trying to pull it...")
-            if not self._pull_model():
-                raise ConnectionError(f"Failed to pull or connect to Ollama model {self.model_config.name}")
-
-    def _is_available(self) -> bool:
-        """Check if the Ollama model is available locally"""
-        try:
-            response = requests.get(f"{self.api_endpoint}/tags")
-            response.raise_for_status()
-            models = response.json().get("models", [])
-            return any(m['name'] == self.model_config.name for m in models)
-        except requests.exceptions.RequestException:
-            return False
-
-    def _generate_text(self, prompt: str) -> str:
-        """Generic text generation using the configured Ollama model."""
-        start_time = time.time()
-
-        payload = {
-            "model": self.model_config.name,
-            "prompt": prompt,
-            "stream": False,
-            "options": {
-                "temperature": self.model_config.temperature,
-                "top_p": self.model_config.top_p,
-                "top_k": self.model_config.top_k,
-                "num_predict": self.model_config.max_tokens,
-            }
-        }
+        # ADAPTER: Convert local llm_IE config to the Shared Config
+        # We map 'max_tokens' (from llm_IE) to 'num_predict' (shared core)
+        shared_config = SharedConfig(
+            name=model_config.name,
+            host=base_url,  # <--- Fix: Pass the base_url here!
+            temperature=model_config.temperature,
+            top_p=model_config.top_p,
+            num_predict=getattr(model_config, 'max_tokens', 2000),
+            timeout=getattr(model_config, 'timeout', 60),
+            max_retries=getattr(model_config, 'max_retries', 3)
+        )
 
-        try:
-            response = requests.post(
-                f"{self.api_endpoint}/generate",
-                json=payload,
-                timeout=self.model_config.timeout,
-                headers={"Content-Type": "application/json"}
-            )
-            response.raise_for_status()
-
-            result = response.json()
-            return result.get("response", "").strip()
-
-        except requests.exceptions.RequestException as e:
-            print(f"Error during Ollama API request: {e}")
-            return ""
+        # Initialize the shared service
+        self.service = LLMService(shared_config)
 
     def extract_relations(self, sentence: str, prompt: str) -> ExtractionResult:
-        """Extracts relations from a sentence using a given prompt."""
+        """Extracts relations from a sentence using the shared LLM service."""
+        # Fix: Explicitly mark 'sentence' as unused to satisfy linter
+        _ = sentence 
+
         start_time = time.time()
 
-        raw_output = self._generate_text(prompt)
+        # Prepare standard message format for the shared service
+        messages = [{"role": "user", "content": prompt}]
+
+        # Use shared service (Handles retries and connection automatically)
+        response = self.service.generate_response(messages)
         processing_time = time.time() - start_time
 
-        if not raw_output:
+        if not response:
             return ExtractionResult(
                 success=False,
                 raw_output="",
                 processing_time=processing_time,
                 error="Failed to generate text from model."
             )
 
+        # Extract text content from the Ollama response dictionary
+        raw_output = response.get("message", {}).get("content", "").strip()
+
+        # Parse output using existing parser
         parsed_triplets, _ = self.output_parser.parse_and_format(raw_output)
 
         return ExtractionResult(
             success=len(parsed_triplets) > 0,
             raw_output=raw_output,
             parsed_triplets=parsed_triplets,
             processing_time=processing_time
-        )
-
-    def _pull_model(self) -> bool:
-        """Pull the model from the Ollama registry."""
-        print(f"Pulling model: {self.model_config.name}. This may take a while...")
-        try:
-            response = requests.post(
-                f"{self.api_endpoint}/pull",
-                json={"name": self.model_config.name, "stream": False},
-                timeout=300  # 5-minute timeout for pulling
-            )
-            response.raise_for_status()
-            print(f"Model '{self.model_config.name}' pulled successfully.")
-            return True
-        except requests.exceptions.RequestException as e:
-            print(f"Failed to pull model '{self.model_config.name}': {e}")
-            return False 
+        )
diff --git a/GSoC25_H/src/llm_core.py b/GSoC25_H/src/llm_core.py
@@ -0,0 +1,120 @@
+import time
+import logging
+import ollama
+from dataclasses import dataclass
+from typing import List, Dict, Any, Optional
+
+@dataclass
+class ModelConfig:
+    """Unified Configuration for the LLM model."""
+    name: str = "gemma3:12b-it-qat"
+    host: str = "http://localhost:11434"
+    temperature: float = 0.1
+    top_p: float = 0.9
+    num_predict: int = 2000
+    timeout: int = 60
+    max_retries: int = 3
+
+class LLMService:
+    """
+    Centralized service for LLM interactions.
+    Replaces duplicative logic in IndIE/llm_extractor.py and llm_IE/llm_interface.py
+    """
+    def __init__(self, model_config: ModelConfig):
+        self.config = model_config
+        self.client = ollama.Client(host=self.config.host, timeout=self.config.timeout)
+        self.logger = logging.getLogger(self.__class__.__name__)
+        self._ensure_model_available()
+
+    def _ensure_model_available(self):
+        """Checks if model exists, attempts to pull if missing (logic from llm_IE)."""
+        try:
+            # List available models to check if our model exists
+            available_models_response = self.client.list()
+
+            # Handle different possible response structures
+            if hasattr(available_models_response, 'models'):
+                models_list = available_models_response.models
+            elif isinstance(available_models_response, dict):
+                models_list = available_models_response.get('models', [])
+            elif isinstance(available_models_response, list):
+                models_list = available_models_response
+            else:
+                self.logger.warning("Could not parse model list response. Attempting direct model check...")
+                try:
+                    self.client.chat(
+                        model=self.config.name,
+                        messages=[{'role': 'user', 'content': 'test'}],
+                        options={'num_predict': 1}
+                    )
+                    self.logger.debug(f"Model '{self.config.name}' is available (verified via chat)")
+                    return
+                except Exception as e:
+                    # FIX: Log the failure reason for debugging purposes
+                    self.logger.debug(f"Fallback model check failed: {e}")
+                    models_list = []
+
+            # Extract model names safely
+            model_names = []
+            for model in models_list:
+                if isinstance(model, dict):
+                    model_names.append(model.get('name', ''))
+                elif hasattr(model, 'name'):
+                    model_names.append(model.name)
+                elif isinstance(model, str):
+                    model_names.append(model)
+
+            # Smart matching for tags (e.g. 'gemma3' vs 'gemma3:latest')
+            model_available = any(
+                self.config.name == name or 
+                name.startswith(self.config.name + ':') or
+                self.config.name.startswith(name + ':')
+                for name in model_names
+            )
+
+            if not model_available:
+                self.logger.info(f"Model '{self.config.name}' not found locally. Attempting to pull...")
+                self.client.pull(self.config.name)
+                self.logger.info(f"Successfully pulled model '{self.config.name}'")
+            else:
+                self.logger.debug(f"Model '{self.config.name}' is already available")
+
+        except Exception as e:
+            self.logger.error(f"Model availability check/pull failed for '{self.config.name}': {e}", exc_info=True)
+
+    def generate_response(self, messages: List[Dict[str, Any]]) -> Optional[Dict[str, Any]]:
+        """
+        Generates a response with standardized retry logic (logic from IndIE).
+        """
+        retries = 0
+        while retries < self.config.max_retries:
+            try:
+                response = self.client.chat(
+                    model=self.config.name,
+                    messages=messages,
+                    options={
+                        "temperature": self.config.temperature,
+                        "top_p": self.config.top_p,
+                        "num_predict": self.config.num_predict
+                    }
+                )
+                return response
+
+            except Exception as e:
+                retries += 1
+                if retries >= self.config.max_retries:
+                    self.logger.error(
+                        f"Error calling model '{self.config.name}': {e}. "
+                        f"Exhausted {self.config.max_retries} retries.",
+                        exc_info=True,
+                    )
+                    break
+
+                wait_time = 2 ** retries
+                self.logger.warning(
+                    f"Error calling model '{self.config.name}': {e}. "
+                    f"Retrying ({retries}/{self.config.max_retries}) in {wait_time}s..."
+                )
+                time.sleep(wait_time)
+
+        return None