From fa6efc4b9417dc76aaefcf83fccf3d0bb0dc95d9 Mon Sep 17 00:00:00 2001
From: Ashpreet <ashpreetbedi@users.noreply.github.com>
Date: Fri, 10 Apr 2026 12:44:34 +0100
Subject: [PATCH 01/23] feat: add LLMsTxtReader and LLMsTxtTools for llms.txt
 support

Add a reader and toolkit for the llms.txt standard (https://llmstxt.org),
enabling agents to discover and consume documentation indexes.

LLMsTxtReader: fetches an llms.txt URL, parses the standardized markdown
format to extract all linked doc URLs, fetches page content (handling HTML,
markdown, plain text), and returns Documents with section/title metadata.
Async variant fetches all pages concurrently.

LLMsTxtTools provides two modes:
- Agentic: get_llms_txt_index returns the index so the agent picks which
  pages to read, then read_llms_txt_url fetches individual pages.
- Knowledge: read_llms_txt_and_load_knowledge bulk-fetches all linked
  pages and inserts them into a Knowledge base.

Includes 32 unit tests and 2 cookbook examples.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
---
 cookbook/91_tools/llms_txt_tools.py           |  42 ++
 cookbook/91_tools/llms_txt_tools_knowledge.py |  56 +++
 .../agno/knowledge/reader/llms_txt_reader.py  | 361 ++++++++++++++++
 .../agno/knowledge/reader/reader_factory.py   |  17 +
 libs/agno/agno/tools/llms_txt.py              | 161 +++++++
 libs/agno/tests/unit/tools/test_llms_txt.py   | 398 ++++++++++++++++++
 6 files changed, 1035 insertions(+)
 create mode 100644 cookbook/91_tools/llms_txt_tools.py
 create mode 100644 cookbook/91_tools/llms_txt_tools_knowledge.py
 create mode 100644 libs/agno/agno/knowledge/reader/llms_txt_reader.py
 create mode 100644 libs/agno/agno/tools/llms_txt.py
 create mode 100644 libs/agno/tests/unit/tools/test_llms_txt.py

diff --git a/cookbook/91_tools/llms_txt_tools.py b/cookbook/91_tools/llms_txt_tools.py
new file mode 100644
index 0000000000..cb2379c2ec
--- /dev/null
+++ b/cookbook/91_tools/llms_txt_tools.py
@@ -0,0 +1,42 @@
+"""
+LLMs.txt Tools - Agentic Documentation Discovery
+=============================
+
+Demonstrates how to use LLMsTxtTools in agentic mode where the agent:
+1. Reads the llms.txt index to discover available documentation pages
+2. Decides which pages are relevant to the user's question
+3. Fetches only the specific pages it needs
+
+The llms.txt format (https://llmstxt.org) is a standardized way for websites
+to provide LLM-friendly documentation indexes.
+"""
+
+from agno.agent import Agent
+from agno.models.openai import OpenAIResponses
+from agno.tools.llms_txt import LLMsTxtTools
+
+# ---------------------------------------------------------------------------
+# Create Agent
+# ---------------------------------------------------------------------------
+
+agent = Agent(
+    model=OpenAIResponses(id="gpt-5.4"),
+    tools=[LLMsTxtTools()],
+    instructions=[
+        "You can read llms.txt files to discover documentation for any project.",
+        "First use get_llms_txt_index to see what pages are available.",
+        "Then use read_llms_txt_url to fetch only the pages relevant to the user's question.",
+    ],
+    markdown=True,
+)
+
+# ---------------------------------------------------------------------------
+# Run Agent
+# ---------------------------------------------------------------------------
+if __name__ == "__main__":
+    agent.print_response(
+        "Using the llms.txt at https://docs.agno.com/llms.txt, "
+        "find and read the documentation about how to create an agent with tools",
+        markdown=True,
+        stream=True,
+    )
diff --git a/cookbook/91_tools/llms_txt_tools_knowledge.py b/cookbook/91_tools/llms_txt_tools_knowledge.py
new file mode 100644
index 0000000000..ce1c131f99
--- /dev/null
+++ b/cookbook/91_tools/llms_txt_tools_knowledge.py
@@ -0,0 +1,56 @@
+"""
+LLMs.txt Tools with Knowledge Base
+=============================
+
+Demonstrates loading all documentation from an llms.txt file into a knowledge base
+for retrieval-augmented generation (RAG).
+
+The agent reads the llms.txt index, fetches all linked documentation pages,
+and stores them in a PgVector knowledge base for semantic search.
+"""
+
+from agno.agent import Agent
+from agno.knowledge.knowledge import Knowledge
+from agno.models.openai import OpenAIResponses
+from agno.tools.llms_txt import LLMsTxtTools
+from agno.vectordb.pgvector import PgVector
+
+# ---------------------------------------------------------------------------
+# Setup Knowledge Base
+# ---------------------------------------------------------------------------
+
+db_url = "postgresql+psycopg://ai:ai@localhost:5532/ai"
+
+knowledge = Knowledge(
+    vector_db=PgVector(
+        table_name="llms_txt_docs",
+        db_url=db_url,
+    ),
+)
+
+# ---------------------------------------------------------------------------
+# Create Agent
+# ---------------------------------------------------------------------------
+
+agent = Agent(
+    model=OpenAIResponses(id="gpt-5.4"),
+    knowledge=knowledge,
+    search_knowledge=True,
+    tools=[LLMsTxtTools(knowledge=knowledge, max_urls=20)],
+    instructions=[
+        "You can load documentation from llms.txt files into your knowledge base.",
+        "When asked about a project, first load its llms.txt into the knowledge base, then answer questions.",
+    ],
+    markdown=True,
+)
+
+# ---------------------------------------------------------------------------
+# Run Agent
+# ---------------------------------------------------------------------------
+if __name__ == "__main__":
+    agent.print_response(
+        "Load the documentation from https://docs.agno.com/llms.txt into the knowledge base, "
+        "then tell me how to create an agent with Agno",
+        markdown=True,
+        stream=True,
+    )
diff --git a/libs/agno/agno/knowledge/reader/llms_txt_reader.py b/libs/agno/agno/knowledge/reader/llms_txt_reader.py
new file mode 100644
index 0000000000..66e2256336
--- /dev/null
+++ b/libs/agno/agno/knowledge/reader/llms_txt_reader.py
@@ -0,0 +1,361 @@
+import asyncio
+import re
+import uuid
+from dataclasses import dataclass
+from typing import Dict, List, Optional, Tuple
+from urllib.parse import urljoin
+
+import httpx
+
+from agno.knowledge.chunking.fixed import FixedSizeChunking
+from agno.knowledge.chunking.strategy import ChunkingStrategy, ChunkingStrategyType
+from agno.knowledge.document.base import Document
+from agno.knowledge.reader.base import Reader
+from agno.knowledge.types import ContentType
+from agno.utils.log import log_debug, log_error, log_warning
+
+try:
+    from bs4 import BeautifulSoup
+except ImportError:
+    raise ImportError("The `bs4` package is not installed. Please install it via `pip install beautifulsoup4`.")
+
+
+# Pattern to match markdown links: - [Title](url) or - [Title](url): description
+_LINK_PATTERN = re.compile(r"-\s+\[([^\]]+)\]\(([^)]+)\)(?::\s*(.+))?")
+# Pattern to match H2 section headers
+_SECTION_PATTERN = re.compile(r"^##\s+(.+)$", re.MULTILINE)
+
+
+@dataclass
+class LLMsTxtEntry:
+    """A single entry parsed from an llms.txt file."""
+
+    title: str
+    url: str
+    description: str
+    section: str
+
+
+class LLMsTxtReader(Reader):
+    """Reader for llms.txt files.
+
+    Reads an llms.txt file (see https://llmstxt.org), parses all linked documentation URLs,
+    fetches the content of each linked page, and returns them as Documents.
+
+    The llms.txt format is a standardized markdown file with:
+    - An H1 heading (project name)
+    - An optional blockquote summary
+    - H2-delimited sections containing markdown links to documentation pages
+
+    Example:
+        reader = LLMsTxtReader(max_urls=50)
+        documents = reader.read("https://docs.example.com/llms.txt")
+    """
+
+    def __init__(
+        self,
+        chunking_strategy: Optional[ChunkingStrategy] = None,
+        max_urls: int = 100,
+        timeout: int = 30,
+        proxy: Optional[str] = None,
+        include_llms_txt_content: bool = True,
+        skip_optional: bool = False,
+        **kwargs,
+    ):
+        """Initialize the LLMsTxtReader.
+
+        Args:
+            chunking_strategy: Strategy for chunking documents.
+            max_urls: Maximum number of linked URLs to fetch. Defaults to 100.
+            timeout: HTTP request timeout in seconds. Defaults to 30.
+            proxy: Optional HTTP proxy URL.
+            include_llms_txt_content: Whether to include the llms.txt file itself as a document.
+            skip_optional: Whether to skip URLs in the "Optional" section.
+        """
+        if chunking_strategy is None:
+            chunk_size = kwargs.get("chunk_size", 5000)
+            chunking_strategy = FixedSizeChunking(chunk_size=chunk_size)
+        super().__init__(chunking_strategy=chunking_strategy, **kwargs)
+        self.max_urls = max_urls
+        self.timeout = timeout
+        self.proxy = proxy
+        self.include_llms_txt_content = include_llms_txt_content
+        self.skip_optional = skip_optional
+
+    @classmethod
+    def get_supported_chunking_strategies(cls) -> List[ChunkingStrategyType]:
+        return [
+            ChunkingStrategyType.FIXED_SIZE_CHUNKER,
+            ChunkingStrategyType.AGENTIC_CHUNKER,
+            ChunkingStrategyType.DOCUMENT_CHUNKER,
+            ChunkingStrategyType.RECURSIVE_CHUNKER,
+            ChunkingStrategyType.SEMANTIC_CHUNKER,
+        ]
+
+    @classmethod
+    def get_supported_content_types(cls) -> List[ContentType]:
+        return [ContentType.URL]
+
+    def _parse_llms_txt(self, content: str, base_url: str) -> Tuple[str, List[LLMsTxtEntry]]:
+        """Parse an llms.txt file and extract all linked URLs.
+
+        Args:
+            content: The raw text content of the llms.txt file.
+            base_url: The base URL for resolving relative links.
+
+        Returns:
+            A tuple of (overview text, list of LLMsTxtEntry).
+        """
+        entries: List[LLMsTxtEntry] = []
+        current_section = ""
+        in_optional = False
+
+        lines = content.split("\n")
+        overview_lines: List[str] = []
+        past_first_section = False
+
+        for line in lines:
+            # Check for section headers
+            section_match = _SECTION_PATTERN.match(line)
+            if section_match:
+                current_section = section_match.group(1).strip()
+                past_first_section = True
+                in_optional = current_section.lower() == "optional"
+                continue
+
+            if not past_first_section:
+                overview_lines.append(line)
+                continue
+
+            if self.skip_optional and in_optional:
+                continue
+
+            # Check for links
+            link_match = _LINK_PATTERN.match(line.strip())
+            if link_match:
+                title = link_match.group(1).strip()
+                url = link_match.group(2).strip()
+                description = (link_match.group(3) or "").strip()
+
+                # Resolve relative URLs
+                if not url.startswith(("http://", "https://")):
+                    url = urljoin(base_url, url)
+
+                entries.append(
+                    LLMsTxtEntry(
+                        title=title,
+                        url=url,
+                        description=description,
+                        section=current_section,
+                    )
+                )
+
+        overview = "\n".join(overview_lines).strip()
+        return overview, entries
+
+    def _extract_content(self, html: str) -> str:
+        """Extract readable text content from HTML."""
+        soup = BeautifulSoup(html, "html.parser")
+
+        # Remove unwanted elements
+        for tag in soup.find_all(["script", "style", "nav", "header", "footer", "aside"]):
+            tag.decompose()
+
+        # Try to find main content
+        main = soup.find("main") or soup.find("article") or soup.find(attrs={"role": "main"})
+        if main:
+            return main.get_text(strip=True, separator=" ")
+
+        body = soup.find("body")
+        if body:
+            return body.get_text(strip=True, separator=" ")
+
+        return soup.get_text(strip=True, separator=" ")
+
+    def _fetch_url(self, url: str) -> Optional[str]:
+        """Fetch content from a URL, returning text for text-like content or extracted text from HTML."""
+        try:
+            log_debug(f"Fetching: {url}")
+            if self.proxy:
+                response = httpx.get(url, timeout=self.timeout, proxy=self.proxy, follow_redirects=True)
+            else:
+                response = httpx.get(url, timeout=self.timeout, follow_redirects=True)
+            response.raise_for_status()
+
+            content_type = response.headers.get("content-type", "")
+            text = response.text
+
+            # If content is plain text or markdown, return as-is
+            if any(t in content_type for t in ["text/plain", "text/markdown"]):
+                return text
+
+            # If content is HTML, extract the text
+            if "text/html" in content_type or text.strip().startswith(("<!DOCTYPE", "<html", "<HTML")):
+                return self._extract_content(text)
+
+            # Default: return raw text
+            return text
+        except httpx.HTTPStatusError as e:
+            log_warning(f"HTTP error fetching {url}: {e.response.status_code}")
+            return None
+        except httpx.RequestError as e:
+            log_warning(f"Request error fetching {url}: {str(e)}")
+            return None
+        except Exception as e:
+            log_error(f"Failed to fetch {url}: {str(e)}")
+            return None
+
+    async def _async_fetch_url(self, client: httpx.AsyncClient, url: str) -> Optional[str]:
+        """Asynchronously fetch content from a URL."""
+        try:
+            log_debug(f"Fetching asynchronously: {url}")
+            response = await client.get(url, timeout=self.timeout, follow_redirects=True)
+            response.raise_for_status()
+
+            content_type = response.headers.get("content-type", "")
+            text = response.text
+
+            if any(t in content_type for t in ["text/plain", "text/markdown"]):
+                return text
+
+            if "text/html" in content_type or text.strip().startswith(("<!DOCTYPE", "<html", "<HTML")):
+                return self._extract_content(text)
+
+            return text
+        except httpx.HTTPStatusError as e:
+            log_warning(f"HTTP error fetching {url}: {e.response.status_code}")
+            return None
+        except httpx.RequestError as e:
+            log_warning(f"Request error fetching {url}: {str(e)}")
+            return None
+        except Exception as e:
+            log_error(f"Failed to fetch {url}: {str(e)}")
+            return None
+
+    def _build_documents(
+        self,
+        overview: str,
+        entries: List[LLMsTxtEntry],
+        fetched: Dict[str, str],
+        llms_txt_url: str,
+        name: Optional[str],
+    ) -> List[Document]:
+        """Build Document list from fetched content."""
+        documents: List[Document] = []
+
+        # Optionally include the llms.txt overview as a document
+        if self.include_llms_txt_content and overview:
+            doc = Document(
+                name=name or llms_txt_url,
+                id=str(uuid.uuid4()),
+                meta_data={
+                    "url": llms_txt_url,
+                    "type": "llms_txt_overview",
+                },
+                content=overview,
+            )
+            if self.chunk:
+                documents.extend(self.chunk_document(doc))
+            else:
+                documents.append(doc)
+
+        # Add each fetched page as a document
+        for entry in entries:
+            content = fetched.get(entry.url)
+            if not content:
+                continue
+
+            doc = Document(
+                name=entry.title,
+                id=str(uuid.uuid4()),
+                meta_data={
+                    "url": entry.url,
+                    "section": entry.section,
+                    "description": entry.description,
+                    "type": "llms_txt_linked_doc",
+                },
+                content=content,
+            )
+            if self.chunk:
+                documents.extend(self.chunk_document(doc))
+            else:
+                documents.append(doc)
+
+        return documents
+
+    def read(self, url: str, name: Optional[str] = None) -> List[Document]:
+        """Read an llms.txt file and all its linked documentation.
+
+        Args:
+            url: The URL of the llms.txt file.
+            name: Optional name for the documents.
+
+        Returns:
+            A list of documents from the llms.txt and all linked pages.
+        """
+        log_debug(f"Reading llms.txt: {url}")
+
+        # Fetch the llms.txt file
+        llms_txt_content = self._fetch_url(url)
+        if not llms_txt_content:
+            log_error(f"Failed to fetch llms.txt from {url}")
+            return []
+
+        # Parse the llms.txt content
+        overview, entries = self._parse_llms_txt(llms_txt_content, url)
+        log_debug(f"Found {len(entries)} linked URLs in llms.txt")
+
+        # Limit the number of URLs to fetch
+        entries_to_fetch = entries[: self.max_urls]
+        if len(entries) > self.max_urls:
+            log_warning(f"Limiting to {self.max_urls} URLs (found {len(entries)})")
+
+        # Fetch all linked pages
+        fetched: Dict[str, str] = {}
+        for entry in entries_to_fetch:
+            content = self._fetch_url(entry.url)
+            if content:
+                fetched[entry.url] = content
+
+        log_debug(f"Successfully fetched {len(fetched)}/{len(entries_to_fetch)} linked pages")
+        return self._build_documents(overview, entries_to_fetch, fetched, url, name)
+
+    async def async_read(self, url: str, name: Optional[str] = None) -> List[Document]:
+        """Asynchronously read an llms.txt file and all its linked documentation.
+
+        Args:
+            url: The URL of the llms.txt file.
+            name: Optional name for the documents.
+
+        Returns:
+            A list of documents from the llms.txt and all linked pages.
+        """
+        log_debug(f"Reading llms.txt asynchronously: {url}")
+
+        client_args = {"proxy": self.proxy} if self.proxy else {}
+        async with httpx.AsyncClient(**client_args) as client:  # type: ignore
+            # Fetch the llms.txt file
+            llms_txt_content = await self._async_fetch_url(client, url)
+            if not llms_txt_content:
+                log_error(f"Failed to fetch llms.txt from {url}")
+                return []
+
+            # Parse the llms.txt content
+            overview, entries = self._parse_llms_txt(llms_txt_content, url)
+            log_debug(f"Found {len(entries)} linked URLs in llms.txt")
+
+            # Limit the number of URLs to fetch
+            entries_to_fetch = entries[: self.max_urls]
+            if len(entries) > self.max_urls:
+                log_warning(f"Limiting to {self.max_urls} URLs (found {len(entries)})")
+
+            # Fetch all linked pages concurrently
+            async def _fetch_entry(entry: LLMsTxtEntry) -> Tuple[str, Optional[str]]:
+                content = await self._async_fetch_url(client, entry.url)
+                return entry.url, content
+
+            results = await asyncio.gather(*[_fetch_entry(e) for e in entries_to_fetch])
+            fetched: Dict[str, str] = {url: content for url, content in results if content}
+
+            log_debug(f"Successfully fetched {len(fetched)}/{len(entries_to_fetch)} linked pages")
+            return self._build_documents(overview, entries_to_fetch, fetched, url, name)
diff --git a/libs/agno/agno/knowledge/reader/reader_factory.py b/libs/agno/agno/knowledge/reader/reader_factory.py
index 92548f4df0..a5aefa3bd0 100644
--- a/libs/agno/agno/knowledge/reader/reader_factory.py
+++ b/libs/agno/agno/knowledge/reader/reader_factory.py
@@ -76,6 +76,10 @@ class ReaderFactory:
             "name": "WebSearchReader",
             "description": "Executes web searches and processes results with relevance ranking and content extraction",
         },
+        "llms_txt": {
+            "name": "LLMsTxtReader",
+            "description": "Reads llms.txt files, discovers linked documentation URLs, and fetches their content",
+        },
         "docling": {
             "name": "DoclingReader",
             "description": "Converts multiple document formats like PDF, DOCX, PPTX, images, HTML, etc. using IBM's Docling library",
@@ -279,6 +283,18 @@ def _get_web_search_reader(cls, **kwargs) -> Reader:
         config.update(kwargs)
         return WebSearchReader(**config)
 
+    @classmethod
+    def _get_llms_txt_reader(cls, **kwargs) -> Reader:
+        """Get LLMs Text reader instance."""
+        from agno.knowledge.reader.llms_txt_reader import LLMsTxtReader
+
+        config: Dict[str, Any] = {
+            "name": "LLMs Text Reader",
+            "description": "Reads llms.txt files, discovers linked documentation URLs, and fetches their content",
+        }
+        config.update(kwargs)
+        return LLMsTxtReader(**config)
+
     @classmethod
     def _get_docling_reader(cls, **kwargs) -> Reader:
         """Get Docling reader instance."""
@@ -334,6 +350,7 @@ def get_reader_class(cls, reader_key: str) -> type:
             "arxiv": ("agno.knowledge.reader.arxiv_reader", "ArxivReader"),
             "wikipedia": ("agno.knowledge.reader.wikipedia_reader", "WikipediaReader"),
             "web_search": ("agno.knowledge.reader.web_search_reader", "WebSearchReader"),
+            "llms_txt": ("agno.knowledge.reader.llms_txt_reader", "LLMsTxtReader"),
             "docling": ("agno.knowledge.reader.docling_reader", "DoclingReader"),
         }
 
diff --git a/libs/agno/agno/tools/llms_txt.py b/libs/agno/agno/tools/llms_txt.py
new file mode 100644
index 0000000000..88945cec75
--- /dev/null
+++ b/libs/agno/agno/tools/llms_txt.py
@@ -0,0 +1,161 @@
+import json
+from typing import Any, List, Optional
+
+from agno.knowledge.document import Document
+from agno.knowledge.knowledge import Knowledge
+from agno.tools import Toolkit
+from agno.utils.log import log_debug, log_info
+
+
+class LLMsTxtTools(Toolkit):
+    """Tools for reading llms.txt files and loading their linked documentation into a knowledge base.
+
+    The llms.txt format (see https://llmstxt.org) is a standardized way for websites to provide
+    LLM-friendly documentation indexes.
+
+    This toolkit provides two usage modes:
+
+    **Agentic mode (without knowledge):** The agent gets two tools:
+    - `get_llms_txt_index` - reads the llms.txt and returns the index of available docs
+    - `read_llms_txt_url` - fetches a specific URL from the index
+    The agent reads the index, decides which pages are relevant, and fetches only those.
+
+    **Knowledge mode (with knowledge):** The agent gets one tool:
+    - `read_llms_txt_and_load_knowledge` - reads the llms.txt, fetches all linked pages,
+      and loads them into the knowledge base.
+
+    Args:
+        knowledge: Optional Knowledge instance. When provided, enables knowledge loading mode.
+        max_urls: Maximum number of linked URLs to fetch when loading into knowledge. Defaults to 100.
+        timeout: HTTP request timeout in seconds. Defaults to 30.
+        skip_optional: Whether to skip URLs listed in the "Optional" section. Defaults to False.
+
+    Example:
+        # Agentic mode - agent reads index and picks which docs to fetch
+        tools = LLMsTxtTools()
+        agent = Agent(tools=[tools])
+
+        # Knowledge mode - bulk load all docs into KB
+        knowledge = Knowledge(vector_db=my_vector_db)
+        tools = LLMsTxtTools(knowledge=knowledge)
+        agent = Agent(tools=[tools], knowledge=knowledge)
+    """
+
+    def __init__(
+        self,
+        knowledge: Optional[Knowledge] = None,
+        max_urls: int = 100,
+        timeout: int = 30,
+        skip_optional: bool = False,
+        **kwargs,
+    ):
+        self.knowledge: Optional[Knowledge] = knowledge
+        self.max_urls = max_urls
+        self.timeout = timeout
+        self.skip_optional = skip_optional
+
+        tools: List[Any] = []
+        if self.knowledge is not None:
+            tools.append(self.read_llms_txt_and_load_knowledge)
+        else:
+            tools.append(self.get_llms_txt_index)
+            tools.append(self.read_llms_txt_url)
+
+        super().__init__(name="llms_txt_tools", tools=tools, **kwargs)
+
+    def get_llms_txt_index(self, url: str) -> str:
+        """Reads an llms.txt file and returns the index of all available documentation pages.
+
+        An llms.txt file is a standardized index of documentation for a project.
+        This function reads the index and returns all available pages with their titles,
+        URLs, descriptions, and sections. Use this to discover what documentation is
+        available, then use read_llms_txt_url to fetch specific pages.
+
+        :param url: The URL of the llms.txt file (e.g. https://docs.example.com/llms.txt).
+        :return: JSON with the overview and list of available documentation pages.
+        """
+        from agno.knowledge.reader.llms_txt_reader import LLMsTxtReader
+
+        reader = LLMsTxtReader(
+            timeout=self.timeout,
+            skip_optional=self.skip_optional,
+        )
+
+        log_info(f"Reading llms.txt index from {url}")
+        llms_txt_content = reader._fetch_url(url)
+        if not llms_txt_content:
+            return f"Failed to fetch llms.txt from {url}"
+
+        overview, entries = reader._parse_llms_txt(llms_txt_content, url)
+
+        index = {
+            "overview": overview,
+            "pages": [
+                {
+                    "title": entry.title,
+                    "url": entry.url,
+                    "description": entry.description,
+                    "section": entry.section,
+                }
+                for entry in entries
+            ],
+            "total_pages": len(entries),
+        }
+        return json.dumps(index)
+
+    def read_llms_txt_url(self, url: str) -> str:
+        """Fetches and returns the content of a specific documentation page URL.
+
+        Use this after calling get_llms_txt_index to fetch the content of specific pages
+        you want to read. You can call this multiple times for different URLs.
+
+        :param url: The URL of the documentation page to read.
+        :return: The text content of the page.
+        """
+        from agno.knowledge.reader.llms_txt_reader import LLMsTxtReader
+
+        reader = LLMsTxtReader(timeout=self.timeout)
+
+        log_debug(f"Fetching URL: {url}")
+        content = reader._fetch_url(url)
+        if not content:
+            return f"Failed to fetch content from {url}"
+
+        return content
+
+    def read_llms_txt_and_load_knowledge(self, url: str) -> str:
+        """Reads an llms.txt file, fetches all linked documentation pages, and loads them into the knowledge base.
+
+        An llms.txt file is a standardized index of documentation for a project.
+        This function reads the index, fetches every linked page, and stores the content
+        in the knowledge base for future retrieval.
+
+        :param url: The URL of the llms.txt file (e.g. https://docs.example.com/llms.txt).
+        :return: Summary of what was loaded into the knowledge base.
+        """
+        if self.knowledge is None:
+            return "Knowledge base not provided"
+
+        from agno.knowledge.reader.llms_txt_reader import LLMsTxtReader
+
+        reader = LLMsTxtReader(
+            max_urls=self.max_urls,
+            timeout=self.timeout,
+            skip_optional=self.skip_optional,
+        )
+
+        log_info(f"Reading llms.txt from {url}")
+        documents: List[Document] = reader.read(url=url)
+
+        if not documents:
+            return f"No documents found in llms.txt at {url}"
+
+        log_debug(f"Loading {len(documents)} documents into knowledge base")
+        for doc in documents:
+            self.knowledge.insert(
+                text_content=doc.content,
+                name=doc.name,
+                metadata=doc.meta_data,
+            )
+
+        return f"Successfully loaded {len(documents)} documents from llms.txt into the knowledge base"
diff --git a/libs/agno/tests/unit/tools/test_llms_txt.py b/libs/agno/tests/unit/tools/test_llms_txt.py
new file mode 100644
index 0000000000..ec20971273
--- /dev/null
+++ b/libs/agno/tests/unit/tools/test_llms_txt.py
@@ -0,0 +1,398 @@
+"""Unit tests for LLMsTxtTools and LLMsTxtReader."""
+
+import json
+from unittest.mock import MagicMock, patch
+
+import httpx
+import pytest
+
+bs4 = pytest.importorskip("bs4")
+
+from agno.knowledge.reader.llms_txt_reader import LLMsTxtEntry, LLMsTxtReader  # noqa: E402
+from agno.tools.llms_txt import LLMsTxtTools  # noqa: E402
+
+# ---------------------------------------------------------------------------
+# Sample llms.txt content for testing
+# ---------------------------------------------------------------------------
+
+SAMPLE_LLMS_TXT = """# Acme Project
+
+> Acme is a framework for building AI applications.
+
+Acme makes it easy to build production-ready AI agents.
+
+## Getting Started
+
+- [Introduction](https://docs.acme.com/introduction): Overview of Acme
+- [Installation](https://docs.acme.com/installation): How to install Acme
+- [Quickstart](https://docs.acme.com/quickstart): Build your first agent
+
+## API Reference
+
+- [Agent API](https://docs.acme.com/api/agent): Agent class reference
+- [Tools API](https://docs.acme.com/api/tools): Tools class reference
+
+## Optional
+
+- [Changelog](https://docs.acme.com/changelog): Release notes
+- [Contributing](https://docs.acme.com/contributing): How to contribute
+"""
+
+SAMPLE_LLMS_TXT_RELATIVE = """# My Project
+
+> A project with relative links.
+
+## Docs
+
+- [Guide](/docs/guide): The guide
+- [API](api/reference): API docs
+"""
+
+
+# ---------------------------------------------------------------------------
+# LLMsTxtReader tests
+# ---------------------------------------------------------------------------
+
+
+class TestLLMsTxtReaderInit:
+    def test_defaults(self):
+        reader = LLMsTxtReader()
+        assert reader.max_urls == 100
+        assert reader.timeout == 30
+        assert reader.proxy is None
+        assert reader.include_llms_txt_content is True
+        assert reader.skip_optional is False
+
+    def test_custom_params(self):
+        reader = LLMsTxtReader(max_urls=50, timeout=10, skip_optional=True)
+        assert reader.max_urls == 50
+        assert reader.timeout == 10
+        assert reader.skip_optional is True
+
+
+class TestParseLLMsTxt:
+    def test_parses_entries(self):
+        reader = LLMsTxtReader()
+        overview, entries = reader._parse_llms_txt(SAMPLE_LLMS_TXT, "https://docs.acme.com/llms.txt")
+
+        assert len(entries) == 7
+        assert entries[0].title == "Introduction"
+        assert entries[0].url == "https://docs.acme.com/introduction"
+        assert entries[0].description == "Overview of Acme"
+        assert entries[0].section == "Getting Started"
+
+    def test_parses_overview(self):
+        reader = LLMsTxtReader()
+        overview, entries = reader._parse_llms_txt(SAMPLE_LLMS_TXT, "https://docs.acme.com/llms.txt")
+
+        assert "# Acme Project" in overview
+        assert "Acme makes it easy" in overview
+
+    def test_sections_assigned(self):
+        reader = LLMsTxtReader()
+        _, entries = reader._parse_llms_txt(SAMPLE_LLMS_TXT, "https://docs.acme.com/llms.txt")
+
+        sections = {e.section for e in entries}
+        assert sections == {"Getting Started", "API Reference", "Optional"}
+
+    def test_skip_optional(self):
+        reader = LLMsTxtReader(skip_optional=True)
+        _, entries = reader._parse_llms_txt(SAMPLE_LLMS_TXT, "https://docs.acme.com/llms.txt")
+
+        assert len(entries) == 5
+        assert all(e.section != "Optional" for e in entries)
+
+    def test_relative_urls_resolved(self):
+        reader = LLMsTxtReader()
+        _, entries = reader._parse_llms_txt(SAMPLE_LLMS_TXT_RELATIVE, "https://example.com/llms.txt")
+
+        assert entries[0].url == "https://example.com/docs/guide"
+        assert entries[1].url == "https://example.com/api/reference"
+
+    def test_empty_content(self):
+        reader = LLMsTxtReader()
+        overview, entries = reader._parse_llms_txt("", "https://example.com/llms.txt")
+
+        assert overview == ""
+        assert entries == []
+
+    def test_no_links(self):
+        content = "# Title\n\nSome overview text.\n\n## Section\n\nNo links here."
+        reader = LLMsTxtReader()
+        overview, entries = reader._parse_llms_txt(content, "https://example.com/llms.txt")
+
+        assert "# Title" in overview
+        assert entries == []
+
+
+class TestExtractContent:
+    def test_extracts_from_main_tag(self):
+        reader = LLMsTxtReader()
+        html = "<html><body><nav>Nav</nav><main>Main content here</main><footer>Foot</footer></body></html>"
+        result = reader._extract_content(html)
+        assert "Main content here" in result
+        assert "Nav" not in result
+
+    def test_extracts_from_body_fallback(self):
+        reader = LLMsTxtReader()
+        html = "<html><body><div>Body content</div></body></html>"
+        result = reader._extract_content(html)
+        assert "Body content" in result
+
+    def test_strips_script_and_style(self):
+        reader = LLMsTxtReader()
+        html = "<html><body><script>var x=1;</script><style>.a{}</style><p>Text</p></body></html>"
+        result = reader._extract_content(html)
+        assert "var x" not in result
+        assert "Text" in result
+
+
+class TestFetchUrl:
+    def test_returns_text_for_plain_content(self):
+        reader = LLMsTxtReader()
+        mock_response = MagicMock()
+        mock_response.headers = {"content-type": "text/plain"}
+        mock_response.text = "Plain text content"
+        mock_response.raise_for_status = MagicMock()
+
+        with patch("httpx.get", return_value=mock_response):
+            result = reader._fetch_url("https://example.com/file.txt")
+
+        assert result == "Plain text content"
+
+    def test_extracts_html_content(self):
+        reader = LLMsTxtReader()
+        mock_response = MagicMock()
+        mock_response.headers = {"content-type": "text/html"}
+        mock_response.text = "<html><body><main>Extracted</main></body></html>"
+        mock_response.raise_for_status = MagicMock()
+
+        with patch("httpx.get", return_value=mock_response):
+            result = reader._fetch_url("https://example.com/page")
+
+        assert "Extracted" in result
+
+    def test_returns_none_on_http_error(self):
+        reader = LLMsTxtReader()
+
+        with patch(
+            "httpx.get",
+            side_effect=httpx.HTTPStatusError("error", request=MagicMock(), response=MagicMock(status_code=404)),
+        ):
+            result = reader._fetch_url("https://example.com/missing")
+
+        assert result is None
+
+    def test_returns_none_on_request_error(self):
+        reader = LLMsTxtReader()
+
+        with patch("httpx.get", side_effect=httpx.RequestError("connection failed")):
+            result = reader._fetch_url("https://example.com/down")
+
+        assert result is None
+
+
+class TestBuildDocuments:
+    def test_builds_overview_and_linked_docs(self):
+        reader = LLMsTxtReader(chunk=False)
+        entries = [
+            LLMsTxtEntry(title="Intro", url="https://example.com/intro", description="Intro page", section="Docs"),
+        ]
+        fetched = {"https://example.com/intro": "Introduction content here"}
+
+        docs = reader._build_documents("Overview text", entries, fetched, "https://example.com/llms.txt", None)
+
+        assert len(docs) == 2
+        assert docs[0].meta_data["type"] == "llms_txt_overview"
+        assert docs[0].content == "Overview text"
+        assert docs[1].meta_data["type"] == "llms_txt_linked_doc"
+        assert docs[1].name == "Intro"
+        assert docs[1].content == "Introduction content here"
+
+    def test_skips_unfetched_entries(self):
+        reader = LLMsTxtReader(chunk=False)
+        entries = [
+            LLMsTxtEntry(title="Missing", url="https://example.com/missing", description="", section="Docs"),
+        ]
+        fetched = {}
+
+        docs = reader._build_documents("Overview", entries, fetched, "https://example.com/llms.txt", None)
+
+        # Only the overview doc
+        assert len(docs) == 1
+
+    def test_excludes_overview_when_disabled(self):
+        reader = LLMsTxtReader(chunk=False, include_llms_txt_content=False)
+        entries = [
+            LLMsTxtEntry(title="Page", url="https://example.com/page", description="", section="Docs"),
+        ]
+        fetched = {"https://example.com/page": "Page content"}
+
+        docs = reader._build_documents("Overview", entries, fetched, "https://example.com/llms.txt", None)
+
+        assert len(docs) == 1
+        assert docs[0].meta_data["type"] == "llms_txt_linked_doc"
+
+
+class TestRead:
+    def test_read_fetches_and_builds_docs(self):
+        reader = LLMsTxtReader(max_urls=5, chunk=False)
+
+        def mock_fetch(url):
+            if url == "https://example.com/llms.txt":
+                return SAMPLE_LLMS_TXT
+            return f"Content of {url}"
+
+        with patch.object(reader, "_fetch_url", side_effect=mock_fetch):
+            docs = reader.read("https://example.com/llms.txt")
+
+        # 1 overview + 5 linked docs (max_urls=5)
+        assert len(docs) == 6
+        assert docs[0].meta_data["type"] == "llms_txt_overview"
+
+    def test_read_returns_empty_on_fetch_failure(self):
+        reader = LLMsTxtReader()
+
+        with patch.object(reader, "_fetch_url", return_value=None):
+            docs = reader.read("https://example.com/llms.txt")
+
+        assert docs == []
+
+    def test_max_urls_limits_fetched_pages(self):
+        reader = LLMsTxtReader(max_urls=2, chunk=False)
+
+        def mock_fetch(url):
+            if url == "https://example.com/llms.txt":
+                return SAMPLE_LLMS_TXT
+            return f"Content of {url}"
+
+        with patch.object(reader, "_fetch_url", side_effect=mock_fetch):
+            docs = reader.read("https://example.com/llms.txt")
+
+        # 1 overview + 2 linked docs (max_urls=2)
+        assert len(docs) == 3
+
+
+# ---------------------------------------------------------------------------
+# LLMsTxtTools tests
+# ---------------------------------------------------------------------------
+
+
+class TestLLMsTxtToolsInit:
+    def test_without_knowledge_registers_agentic_tools(self):
+        tools = LLMsTxtTools()
+        func_names = [func.name for func in tools.functions.values()]
+        assert "get_llms_txt_index" in func_names
+        assert "read_llms_txt_url" in func_names
+        assert "read_llms_txt_and_load_knowledge" not in func_names
+
+    def test_with_knowledge_registers_load(self):
+        mock_knowledge = MagicMock()
+        tools = LLMsTxtTools(knowledge=mock_knowledge)
+        func_names = [func.name for func in tools.functions.values()]
+        assert "read_llms_txt_and_load_knowledge" in func_names
+        assert "get_llms_txt_index" not in func_names
+
+    def test_custom_params(self):
+        tools = LLMsTxtTools(max_urls=50, timeout=10, skip_optional=True)
+        assert tools.max_urls == 50
+        assert tools.timeout == 10
+        assert tools.skip_optional is True
+
+
+class TestGetLLMsTxtIndex:
+    def test_returns_index_json(self):
+        tools = LLMsTxtTools()
+
+        mock_response = MagicMock()
+        mock_response.headers = {"content-type": "text/plain"}
+        mock_response.text = SAMPLE_LLMS_TXT
+        mock_response.raise_for_status = MagicMock()
+
+        with patch("httpx.get", return_value=mock_response):
+            result = tools.get_llms_txt_index("https://docs.acme.com/llms.txt")
+
+        data = json.loads(result)
+        assert data["total_pages"] == 7
+        assert data["pages"][0]["title"] == "Introduction"
+        assert data["pages"][0]["url"] == "https://docs.acme.com/introduction"
+        assert "overview" in data
+
+    def test_returns_error_on_fetch_failure(self):
+        tools = LLMsTxtTools()
+
+        with patch("httpx.get", side_effect=httpx.RequestError("connection failed")):
+            result = tools.get_llms_txt_index("https://example.com/llms.txt")
+
+        assert "Failed to fetch" in result
+
+
+class TestReadLLMsTxtUrl:
+    def test_returns_page_content(self):
+        tools = LLMsTxtTools()
+
+        mock_response = MagicMock()
+        mock_response.headers = {"content-type": "text/plain"}
+        mock_response.text = "Page content here"
+        mock_response.raise_for_status = MagicMock()
+
+        with patch("httpx.get", return_value=mock_response):
+            result = tools.read_llms_txt_url("https://docs.acme.com/introduction")
+
+        assert result == "Page content here"
+
+    def test_returns_error_on_fetch_failure(self):
+        tools = LLMsTxtTools()
+
+        with patch("httpx.get", side_effect=httpx.RequestError("connection failed")):
+            result = tools.read_llms_txt_url("https://example.com/missing")
+
+        assert "Failed to fetch" in result
+
+
+class TestLoadKnowledge:
+    def test_inserts_into_knowledge(self):
+        mock_knowledge = MagicMock()
+        tools = LLMsTxtTools(knowledge=mock_knowledge)
+
+        mock_response = MagicMock()
+        mock_response.headers = {"content-type": "text/plain"}
+        mock_response.text = "Page content"
+        mock_response.raise_for_status = MagicMock()
+
+        # Simple llms.txt with one link
+        llms_content = "# Test\n\n## Docs\n\n- [Page](https://example.com/page): A page\n"
+        call_count = 0
+
+        def mock_get(url, **kwargs):
+            nonlocal call_count
+            call_count += 1
+            resp = MagicMock()
+            resp.headers = {"content-type": "text/plain"}
+            resp.raise_for_status = MagicMock()
+            if call_count == 1:
+                resp.text = llms_content
+            else:
+                resp.text = "Page content"
+            return resp
+
+        with patch("httpx.get", side_effect=mock_get):
+            result = tools.read_llms_txt_and_load_knowledge("https://example.com/llms.txt")
+
+        assert mock_knowledge.insert.called
+        assert "Successfully loaded" in result
+
+    def test_returns_message_when_no_knowledge(self):
+        tools = LLMsTxtTools()
+        # Force-call the knowledge method even though it wouldn't be registered
+        result = tools.read_llms_txt_and_load_knowledge("https://example.com/llms.txt")
+        assert result == "Knowledge base not provided"
+
+    def test_returns_message_when_no_docs(self):
+        mock_knowledge = MagicMock()
+        tools = LLMsTxtTools(knowledge=mock_knowledge)
+
+        with patch("httpx.get", side_effect=httpx.RequestError("connection failed")):
+            result = tools.read_llms_txt_and_load_knowledge("https://example.com/llms.txt")
+
+        assert "No documents found" in result

From f2a91010081128c6f7b36d0a87cd368656d13c00 Mon Sep 17 00:00:00 2001
From: Yash Pratap Solanky <101447028+ysolanky@users.noreply.github.com>
Date: Fri, 10 Apr 2026 10:48:36 -0400
Subject: [PATCH 02/23] fix: address review issues in LLMsTxtReader and
 LLMsTxtTools (#7465)

## Summary

Addresses code review feedback on #7458. Fixes several issues in the
LLMsTxtReader and LLMsTxtTools implementation.

**Changes:**
- **Lazy BeautifulSoup import** - Deferred to `_extract_content()`
instead of hard-failing at module import time
- **Variable shadowing fix** - Renamed `url` to `entry_url` in
`async_read()` dict comprehension to avoid shadowing the method
parameter
- **Concurrency limiting** - Added `asyncio.Semaphore(10)` to prevent
overwhelming target servers when fetching 100+ URLs concurrently
- **Better text extraction** - Changed `_extract_content()` separator
from `" "` to `"\n"` to preserve document structure
- **Public API methods** - Renamed `_fetch_url` / `_parse_llms_txt` to
`fetch_url` / `parse_llms_txt` since they are called by the toolkit
- **Reader reuse** - LLMsTxtTools now creates a single `LLMsTxtReader`
instance in `__init__` instead of per tool call
- **Async tool variants** - Added `aget_llms_txt_index`,
`aread_llms_txt_url`, `aread_llms_txt_and_load_knowledge` registered via
`async_tools` following the codebase convention (e.g. BrandfetchTools)
- **New tests** - Added tests for async tool registration, reader reuse,
and newline preservation in HTML extraction

## Type of change

- [x] Improvement

---

## Checklist

- [x] Code complies with style guidelines
- [x] Ran format/validation scripts (`./scripts/format.sh` and
`./scripts/validate.sh`)
- [x] Self-review completed
- [x] Documentation updated (comments, docstrings)
- [x] Tests added/updated (if applicable)

### Duplicate and AI-Generated PR Check

- [x] I have searched existing [open pull requests](../../pulls) and
confirmed that no other PR already addresses this issue
- [x] Check if this PR was entirely AI-generated (by Copilot, Claude
Code, Cursor, etc.)

---

## Additional Notes

All 36 tests pass (up from 32 - added 4 new tests for async
registration, reader reuse, and HTML newline preservation).
---
 .../agno/knowledge/reader/llms_txt_reader.py  |  48 ++++----
 libs/agno/agno/tools/llms_txt.py              | 104 ++++++++++++++----
 libs/agno/tests/unit/tools/test_llms_txt.py   |  54 ++++++---
 3 files changed, 152 insertions(+), 54 deletions(-)

diff --git a/libs/agno/agno/knowledge/reader/llms_txt_reader.py b/libs/agno/agno/knowledge/reader/llms_txt_reader.py
index 66e2256336..5952e3679a 100644
--- a/libs/agno/agno/knowledge/reader/llms_txt_reader.py
+++ b/libs/agno/agno/knowledge/reader/llms_txt_reader.py
@@ -14,17 +14,15 @@
 from agno.knowledge.types import ContentType
 from agno.utils.log import log_debug, log_error, log_warning
 
-try:
-    from bs4 import BeautifulSoup
-except ImportError:
-    raise ImportError("The `bs4` package is not installed. Please install it via `pip install beautifulsoup4`.")
-
-
 # Pattern to match markdown links: - [Title](url) or - [Title](url): description
+# Note: titles with nested brackets (e.g. [Agent [Beta]](url)) are not supported.
 _LINK_PATTERN = re.compile(r"-\s+\[([^\]]+)\]\(([^)]+)\)(?::\s*(.+))?")
 # Pattern to match H2 section headers
 _SECTION_PATTERN = re.compile(r"^##\s+(.+)$", re.MULTILINE)
 
+# Maximum number of concurrent HTTP requests when fetching linked pages
+_MAX_CONCURRENT_FETCHES = 10
+
 
 @dataclass
 class LLMsTxtEntry:
@@ -96,7 +94,7 @@ def get_supported_chunking_strategies(cls) -> List[ChunkingStrategyType]:
     def get_supported_content_types(cls) -> List[ContentType]:
         return [ContentType.URL]
 
-    def _parse_llms_txt(self, content: str, base_url: str) -> Tuple[str, List[LLMsTxtEntry]]:
+    def parse_llms_txt(self, content: str, base_url: str) -> Tuple[str, List[LLMsTxtEntry]]:
         """Parse an llms.txt file and extract all linked URLs.
 
         Args:
@@ -155,6 +153,11 @@ def _parse_llms_txt(self, content: str, base_url: str) -> Tuple[str, List[LLMsTx
 
     def _extract_content(self, html: str) -> str:
         """Extract readable text content from HTML."""
+        try:
+            from bs4 import BeautifulSoup
+        except ImportError:
+            raise ImportError("The `bs4` package is not installed. Please install it via `pip install beautifulsoup4`.")
+
         soup = BeautifulSoup(html, "html.parser")
 
         # Remove unwanted elements
@@ -164,15 +167,15 @@ def _extract_content(self, html: str) -> str:
         # Try to find main content
         main = soup.find("main") or soup.find("article") or soup.find(attrs={"role": "main"})
         if main:
-            return main.get_text(strip=True, separator=" ")
+            return main.get_text(separator="\n", strip=True)
 
         body = soup.find("body")
         if body:
-            return body.get_text(strip=True, separator=" ")
+            return body.get_text(separator="\n", strip=True)
 
-        return soup.get_text(strip=True, separator=" ")
+        return soup.get_text(separator="\n", strip=True)
 
-    def _fetch_url(self, url: str) -> Optional[str]:
+    def fetch_url(self, url: str) -> Optional[str]:
         """Fetch content from a URL, returning text for text-like content or extracted text from HTML."""
         try:
             log_debug(f"Fetching: {url}")
@@ -205,7 +208,7 @@ def _fetch_url(self, url: str) -> Optional[str]:
             log_error(f"Failed to fetch {url}: {str(e)}")
             return None
 
-    async def _async_fetch_url(self, client: httpx.AsyncClient, url: str) -> Optional[str]:
+    async def async_fetch_url(self, client: httpx.AsyncClient, url: str) -> Optional[str]:
         """Asynchronously fetch content from a URL."""
         try:
             log_debug(f"Fetching asynchronously: {url}")
@@ -296,13 +299,13 @@ def read(self, url: str, name: Optional[str] = None) -> List[Document]:
         log_debug(f"Reading llms.txt: {url}")
 
         # Fetch the llms.txt file
-        llms_txt_content = self._fetch_url(url)
+        llms_txt_content = self.fetch_url(url)
         if not llms_txt_content:
             log_error(f"Failed to fetch llms.txt from {url}")
             return []
 
         # Parse the llms.txt content
-        overview, entries = self._parse_llms_txt(llms_txt_content, url)
+        overview, entries = self.parse_llms_txt(llms_txt_content, url)
         log_debug(f"Found {len(entries)} linked URLs in llms.txt")
 
         # Limit the number of URLs to fetch
@@ -313,7 +316,7 @@ def read(self, url: str, name: Optional[str] = None) -> List[Document]:
         # Fetch all linked pages
         fetched: Dict[str, str] = {}
         for entry in entries_to_fetch:
-            content = self._fetch_url(entry.url)
+            content = self.fetch_url(entry.url)
             if content:
                 fetched[entry.url] = content
 
@@ -335,13 +338,13 @@ async def async_read(self, url: str, name: Optional[str] = None) -> List[Documen
         client_args = {"proxy": self.proxy} if self.proxy else {}
         async with httpx.AsyncClient(**client_args) as client:  # type: ignore
             # Fetch the llms.txt file
-            llms_txt_content = await self._async_fetch_url(client, url)
+            llms_txt_content = await self.async_fetch_url(client, url)
             if not llms_txt_content:
                 log_error(f"Failed to fetch llms.txt from {url}")
                 return []
 
             # Parse the llms.txt content
-            overview, entries = self._parse_llms_txt(llms_txt_content, url)
+            overview, entries = self.parse_llms_txt(llms_txt_content, url)
             log_debug(f"Found {len(entries)} linked URLs in llms.txt")
 
             # Limit the number of URLs to fetch
@@ -349,13 +352,16 @@ async def async_read(self, url: str, name: Optional[str] = None) -> List[Documen
             if len(entries) > self.max_urls:
                 log_warning(f"Limiting to {self.max_urls} URLs (found {len(entries)})")
 
-            # Fetch all linked pages concurrently
+            # Fetch all linked pages concurrently with a semaphore to limit parallelism
+            semaphore = asyncio.Semaphore(_MAX_CONCURRENT_FETCHES)
+
             async def _fetch_entry(entry: LLMsTxtEntry) -> Tuple[str, Optional[str]]:
-                content = await self._async_fetch_url(client, entry.url)
-                return entry.url, content
+                async with semaphore:
+                    content = await self.async_fetch_url(client, entry.url)
+                    return entry.url, content
 
             results = await asyncio.gather(*[_fetch_entry(e) for e in entries_to_fetch])
-            fetched: Dict[str, str] = {url: content for url, content in results if content}
+            fetched: Dict[str, str] = {entry_url: content for entry_url, content in results if content}
 
             log_debug(f"Successfully fetched {len(fetched)}/{len(entries_to_fetch)} linked pages")
             return self._build_documents(overview, entries_to_fetch, fetched, url, name)
diff --git a/libs/agno/agno/tools/llms_txt.py b/libs/agno/agno/tools/llms_txt.py
index 88945cec75..a7a050b32d 100644
--- a/libs/agno/agno/tools/llms_txt.py
+++ b/libs/agno/agno/tools/llms_txt.py
@@ -49,19 +49,30 @@ def __init__(
         skip_optional: bool = False,
         **kwargs,
     ):
+        from agno.knowledge.reader.llms_txt_reader import LLMsTxtReader
+
         self.knowledge: Optional[Knowledge] = knowledge
         self.max_urls = max_urls
         self.timeout = timeout
         self.skip_optional = skip_optional
+        self.reader = LLMsTxtReader(
+            max_urls=max_urls,
+            timeout=timeout,
+            skip_optional=skip_optional,
+        )
 
         tools: List[Any] = []
+        async_tools_list: List[tuple] = []
         if self.knowledge is not None:
             tools.append(self.read_llms_txt_and_load_knowledge)
+            async_tools_list.append((self.aread_llms_txt_and_load_knowledge, "read_llms_txt_and_load_knowledge"))
         else:
             tools.append(self.get_llms_txt_index)
             tools.append(self.read_llms_txt_url)
+            async_tools_list.append((self.aget_llms_txt_index, "get_llms_txt_index"))
+            async_tools_list.append((self.aread_llms_txt_url, "read_llms_txt_url"))
 
-        super().__init__(name="llms_txt_tools", tools=tools, **kwargs)
+        super().__init__(name="llms_txt_tools", tools=tools, async_tools=async_tools_list, **kwargs)
 
     def get_llms_txt_index(self, url: str) -> str:
         """Reads an llms.txt file and returns the index of all available documentation pages.
@@ -74,19 +85,44 @@ def get_llms_txt_index(self, url: str) -> str:
         :param url: The URL of the llms.txt file (e.g. https://docs.example.com/llms.txt).
         :return: JSON with the overview and list of available documentation pages.
         """
-        from agno.knowledge.reader.llms_txt_reader import LLMsTxtReader
+        log_info(f"Reading llms.txt index from {url}")
+        llms_txt_content = self.reader.fetch_url(url)
+        if not llms_txt_content:
+            return f"Failed to fetch llms.txt from {url}"
 
-        reader = LLMsTxtReader(
-            timeout=self.timeout,
-            skip_optional=self.skip_optional,
-        )
+        overview, entries = self.reader.parse_llms_txt(llms_txt_content, url)
+
+        index = {
+            "overview": overview,
+            "pages": [
+                {
+                    "title": entry.title,
+                    "url": entry.url,
+                    "description": entry.description,
+                    "section": entry.section,
+                }
+                for entry in entries
+            ],
+            "total_pages": len(entries),
+        }
+        return json.dumps(index)
+
+    async def aget_llms_txt_index(self, url: str) -> str:
+        """Async variant of get_llms_txt_index.
+
+        :param url: The URL of the llms.txt file (e.g. https://docs.example.com/llms.txt).
+        :return: JSON with the overview and list of available documentation pages.
+        """
+        import httpx
 
         log_info(f"Reading llms.txt index from {url}")
-        llms_txt_content = reader._fetch_url(url)
+        async with httpx.AsyncClient() as client:
+            llms_txt_content = await self.reader.async_fetch_url(client, url)
+
         if not llms_txt_content:
             return f"Failed to fetch llms.txt from {url}"
 
-        overview, entries = reader._parse_llms_txt(llms_txt_content, url)
+        overview, entries = self.reader.parse_llms_txt(llms_txt_content, url)
 
         index = {
             "overview": overview,
@@ -112,12 +148,25 @@ def read_llms_txt_url(self, url: str) -> str:
         :param url: The URL of the documentation page to read.
         :return: The text content of the page.
         """
-        from agno.knowledge.reader.llms_txt_reader import LLMsTxtReader
+        log_debug(f"Fetching URL: {url}")
+        content = self.reader.fetch_url(url)
+        if not content:
+            return f"Failed to fetch content from {url}"
+
+        return content
+
+    async def aread_llms_txt_url(self, url: str) -> str:
+        """Async variant of read_llms_txt_url.
 
-        reader = LLMsTxtReader(timeout=self.timeout)
+        :param url: The URL of the documentation page to read.
+        :return: The text content of the page.
+        """
+        import httpx
 
         log_debug(f"Fetching URL: {url}")
-        content = reader._fetch_url(url)
+        async with httpx.AsyncClient() as client:
+            content = await self.reader.async_fetch_url(client, url)
+
         if not content:
             return f"Failed to fetch content from {url}"
 
@@ -136,23 +185,40 @@ def read_llms_txt_and_load_knowledge(self, url: str) -> str:
         if self.knowledge is None:
             return "Knowledge base not provided"
 
-        from agno.knowledge.reader.llms_txt_reader import LLMsTxtReader
+        log_info(f"Reading llms.txt from {url}")
+        documents: List[Document] = self.reader.read(url=url)
 
-        reader = LLMsTxtReader(
-            max_urls=self.max_urls,
-            timeout=self.timeout,
-            skip_optional=self.skip_optional,
-        )
+        if not documents:
+            return f"No documents found in llms.txt at {url}"
+
+        log_debug(f"Loading {len(documents)} documents into knowledge base")
+        for doc in documents:
+            self.knowledge.insert(
+                text_content=doc.content,
+                name=doc.name,
+                metadata=doc.meta_data,
+            )
+
+        return f"Successfully loaded {len(documents)} documents from llms.txt into the knowledge base"
+
+    async def aread_llms_txt_and_load_knowledge(self, url: str) -> str:
+        """Async variant of read_llms_txt_and_load_knowledge.
+
+        :param url: The URL of the llms.txt file (e.g. https://docs.example.com/llms.txt).
+        :return: Summary of what was loaded into the knowledge base.
+        """
+        if self.knowledge is None:
+            return "Knowledge base not provided"
 
         log_info(f"Reading llms.txt from {url}")
-        documents: List[Document] = reader.read(url=url)
+        documents: List[Document] = await self.reader.async_read(url=url)
 
         if not documents:
             return f"No documents found in llms.txt at {url}"
 
         log_debug(f"Loading {len(documents)} documents into knowledge base")
         for doc in documents:
-            self.knowledge.insert(
+            await self.knowledge.ainsert(
                 text_content=doc.content,
                 name=doc.name,
                 metadata=doc.meta_data,
diff --git a/libs/agno/tests/unit/tools/test_llms_txt.py b/libs/agno/tests/unit/tools/test_llms_txt.py
index ec20971273..b3cce25d04 100644
--- a/libs/agno/tests/unit/tools/test_llms_txt.py
+++ b/libs/agno/tests/unit/tools/test_llms_txt.py
@@ -73,7 +73,7 @@ def test_custom_params(self):
 class TestParseLLMsTxt:
     def test_parses_entries(self):
         reader = LLMsTxtReader()
-        overview, entries = reader._parse_llms_txt(SAMPLE_LLMS_TXT, "https://docs.acme.com/llms.txt")
+        overview, entries = reader.parse_llms_txt(SAMPLE_LLMS_TXT, "https://docs.acme.com/llms.txt")
 
         assert len(entries) == 7
         assert entries[0].title == "Introduction"
@@ -83,35 +83,35 @@ def test_parses_entries(self):
 
     def test_parses_overview(self):
         reader = LLMsTxtReader()
-        overview, entries = reader._parse_llms_txt(SAMPLE_LLMS_TXT, "https://docs.acme.com/llms.txt")
+        overview, entries = reader.parse_llms_txt(SAMPLE_LLMS_TXT, "https://docs.acme.com/llms.txt")
 
         assert "# Acme Project" in overview
         assert "Acme makes it easy" in overview
 
     def test_sections_assigned(self):
         reader = LLMsTxtReader()
-        _, entries = reader._parse_llms_txt(SAMPLE_LLMS_TXT, "https://docs.acme.com/llms.txt")
+        _, entries = reader.parse_llms_txt(SAMPLE_LLMS_TXT, "https://docs.acme.com/llms.txt")
 
         sections = {e.section for e in entries}
         assert sections == {"Getting Started", "API Reference", "Optional"}
 
     def test_skip_optional(self):
         reader = LLMsTxtReader(skip_optional=True)
-        _, entries = reader._parse_llms_txt(SAMPLE_LLMS_TXT, "https://docs.acme.com/llms.txt")
+        _, entries = reader.parse_llms_txt(SAMPLE_LLMS_TXT, "https://docs.acme.com/llms.txt")
 
         assert len(entries) == 5
         assert all(e.section != "Optional" for e in entries)
 
     def test_relative_urls_resolved(self):
         reader = LLMsTxtReader()
-        _, entries = reader._parse_llms_txt(SAMPLE_LLMS_TXT_RELATIVE, "https://example.com/llms.txt")
+        _, entries = reader.parse_llms_txt(SAMPLE_LLMS_TXT_RELATIVE, "https://example.com/llms.txt")
 
         assert entries[0].url == "https://example.com/docs/guide"
         assert entries[1].url == "https://example.com/api/reference"
 
     def test_empty_content(self):
         reader = LLMsTxtReader()
-        overview, entries = reader._parse_llms_txt("", "https://example.com/llms.txt")
+        overview, entries = reader.parse_llms_txt("", "https://example.com/llms.txt")
 
         assert overview == ""
         assert entries == []
@@ -119,7 +119,7 @@ def test_empty_content(self):
     def test_no_links(self):
         content = "# Title\n\nSome overview text.\n\n## Section\n\nNo links here."
         reader = LLMsTxtReader()
-        overview, entries = reader._parse_llms_txt(content, "https://example.com/llms.txt")
+        overview, entries = reader.parse_llms_txt(content, "https://example.com/llms.txt")
 
         assert "# Title" in overview
         assert entries == []
@@ -146,6 +146,14 @@ def test_strips_script_and_style(self):
         assert "var x" not in result
         assert "Text" in result
 
+    def test_preserves_structure_with_newlines(self):
+        reader = LLMsTxtReader()
+        html = "<html><body><main><p>First paragraph</p><p>Second paragraph</p></main></body></html>"
+        result = reader._extract_content(html)
+        assert "First paragraph" in result
+        assert "Second paragraph" in result
+        assert "\n" in result
+
 
 class TestFetchUrl:
     def test_returns_text_for_plain_content(self):
@@ -156,7 +164,7 @@ def test_returns_text_for_plain_content(self):
         mock_response.raise_for_status = MagicMock()
 
         with patch("httpx.get", return_value=mock_response):
-            result = reader._fetch_url("https://example.com/file.txt")
+            result = reader.fetch_url("https://example.com/file.txt")
 
         assert result == "Plain text content"
 
@@ -168,7 +176,7 @@ def test_extracts_html_content(self):
         mock_response.raise_for_status = MagicMock()
 
         with patch("httpx.get", return_value=mock_response):
-            result = reader._fetch_url("https://example.com/page")
+            result = reader.fetch_url("https://example.com/page")
 
         assert "Extracted" in result
 
@@ -179,7 +187,7 @@ def test_returns_none_on_http_error(self):
             "httpx.get",
             side_effect=httpx.HTTPStatusError("error", request=MagicMock(), response=MagicMock(status_code=404)),
         ):
-            result = reader._fetch_url("https://example.com/missing")
+            result = reader.fetch_url("https://example.com/missing")
 
         assert result is None
 
@@ -187,7 +195,7 @@ def test_returns_none_on_request_error(self):
         reader = LLMsTxtReader()
 
         with patch("httpx.get", side_effect=httpx.RequestError("connection failed")):
-            result = reader._fetch_url("https://example.com/down")
+            result = reader.fetch_url("https://example.com/down")
 
         assert result is None
 
@@ -243,7 +251,7 @@ def mock_fetch(url):
                 return SAMPLE_LLMS_TXT
             return f"Content of {url}"
 
-        with patch.object(reader, "_fetch_url", side_effect=mock_fetch):
+        with patch.object(reader, "fetch_url", side_effect=mock_fetch):
             docs = reader.read("https://example.com/llms.txt")
 
         # 1 overview + 5 linked docs (max_urls=5)
@@ -253,7 +261,7 @@ def mock_fetch(url):
     def test_read_returns_empty_on_fetch_failure(self):
         reader = LLMsTxtReader()
 
-        with patch.object(reader, "_fetch_url", return_value=None):
+        with patch.object(reader, "fetch_url", return_value=None):
             docs = reader.read("https://example.com/llms.txt")
 
         assert docs == []
@@ -266,7 +274,7 @@ def mock_fetch(url):
                 return SAMPLE_LLMS_TXT
             return f"Content of {url}"
 
-        with patch.object(reader, "_fetch_url", side_effect=mock_fetch):
+        with patch.object(reader, "fetch_url", side_effect=mock_fetch):
             docs = reader.read("https://example.com/llms.txt")
 
         # 1 overview + 2 linked docs (max_urls=2)
@@ -286,6 +294,12 @@ def test_without_knowledge_registers_agentic_tools(self):
         assert "read_llms_txt_url" in func_names
         assert "read_llms_txt_and_load_knowledge" not in func_names
 
+    def test_without_knowledge_registers_async_tools(self):
+        tools = LLMsTxtTools()
+        async_func_names = [func.name for func in tools.async_functions.values()]
+        assert "get_llms_txt_index" in async_func_names
+        assert "read_llms_txt_url" in async_func_names
+
     def test_with_knowledge_registers_load(self):
         mock_knowledge = MagicMock()
         tools = LLMsTxtTools(knowledge=mock_knowledge)
@@ -293,12 +307,24 @@ def test_with_knowledge_registers_load(self):
         assert "read_llms_txt_and_load_knowledge" in func_names
         assert "get_llms_txt_index" not in func_names
 
+    def test_with_knowledge_registers_async_load(self):
+        mock_knowledge = MagicMock()
+        tools = LLMsTxtTools(knowledge=mock_knowledge)
+        async_func_names = [func.name for func in tools.async_functions.values()]
+        assert "read_llms_txt_and_load_knowledge" in async_func_names
+
     def test_custom_params(self):
         tools = LLMsTxtTools(max_urls=50, timeout=10, skip_optional=True)
         assert tools.max_urls == 50
         assert tools.timeout == 10
         assert tools.skip_optional is True
 
+    def test_reader_is_reused(self):
+        tools = LLMsTxtTools()
+        assert tools.reader is not None
+        assert tools.reader.timeout == tools.timeout
+        assert tools.reader.max_urls == tools.max_urls
+
 
 class TestGetLLMsTxtIndex:
     def test_returns_index_json(self):

From 482cd73bb9bb484227e9a57c19d216f30a9f5fe7 Mon Sep 17 00:00:00 2001
From: Mustafa Esoofally <coolmusta@gmail.com>
Date: Fri, 10 Apr 2026 11:05:29 -0400
Subject: [PATCH 03/23] fix: improve LLMsTxtTools async patterns and
 deduplicate reader logic

- Full async docstrings on all 3 async tool methods so the LLM sees
  proper tool descriptions in async mode
- AsyncClient now receives timeout and proxy via _async_client_kwargs()
- Module-level httpx import consistent with Brandfetch/Perplexity
- Extract _process_response() to deduplicate content-type classification
  across fetch_url and async_fetch_url
---
 .../agno/knowledge/reader/llms_txt_reader.py  | 37 ++++++-------------
 libs/agno/agno/tools/llms_txt.py              | 37 ++++++++++++++-----
 2 files changed, 39 insertions(+), 35 deletions(-)

diff --git a/libs/agno/agno/knowledge/reader/llms_txt_reader.py b/libs/agno/agno/knowledge/reader/llms_txt_reader.py
index 5952e3679a..138c69f815 100644
--- a/libs/agno/agno/knowledge/reader/llms_txt_reader.py
+++ b/libs/agno/agno/knowledge/reader/llms_txt_reader.py
@@ -175,6 +175,16 @@ def _extract_content(self, html: str) -> str:
 
         return soup.get_text(separator="\n", strip=True)
 
+    def _process_response(self, content_type: str, text: str) -> str:
+        """Classify an HTTP response by content-type and return processed text."""
+        if any(t in content_type for t in ["text/plain", "text/markdown"]):
+            return text
+
+        if "text/html" in content_type or text.strip().startswith(("<!DOCTYPE", "<html", "<HTML")):
+            return self._extract_content(text)
+
+        return text
+
     def fetch_url(self, url: str) -> Optional[str]:
         """Fetch content from a URL, returning text for text-like content or extracted text from HTML."""
         try:
@@ -184,20 +194,7 @@ def fetch_url(self, url: str) -> Optional[str]:
             else:
                 response = httpx.get(url, timeout=self.timeout, follow_redirects=True)
             response.raise_for_status()
-
-            content_type = response.headers.get("content-type", "")
-            text = response.text
-
-            # If content is plain text or markdown, return as-is
-            if any(t in content_type for t in ["text/plain", "text/markdown"]):
-                return text
-
-            # If content is HTML, extract the text
-            if "text/html" in content_type or text.strip().startswith(("<!DOCTYPE", "<html", "<HTML")):
-                return self._extract_content(text)
-
-            # Default: return raw text
-            return text
+            return self._process_response(response.headers.get("content-type", ""), response.text)
         except httpx.HTTPStatusError as e:
             log_warning(f"HTTP error fetching {url}: {e.response.status_code}")
             return None
@@ -214,17 +211,7 @@ async def async_fetch_url(self, client: httpx.AsyncClient, url: str) -> Optional
             log_debug(f"Fetching asynchronously: {url}")
             response = await client.get(url, timeout=self.timeout, follow_redirects=True)
             response.raise_for_status()
-
-            content_type = response.headers.get("content-type", "")
-            text = response.text
-
-            if any(t in content_type for t in ["text/plain", "text/markdown"]):
-                return text
-
-            if "text/html" in content_type or text.strip().startswith(("<!DOCTYPE", "<html", "<HTML")):
-                return self._extract_content(text)
-
-            return text
+            return self._process_response(response.headers.get("content-type", ""), response.text)
         except httpx.HTTPStatusError as e:
             log_warning(f"HTTP error fetching {url}: {e.response.status_code}")
             return None
diff --git a/libs/agno/agno/tools/llms_txt.py b/libs/agno/agno/tools/llms_txt.py
index a7a050b32d..0b42728f8b 100644
--- a/libs/agno/agno/tools/llms_txt.py
+++ b/libs/agno/agno/tools/llms_txt.py
@@ -1,5 +1,7 @@
 import json
-from typing import Any, List, Optional
+from typing import Any, Dict, List, Optional
+
+import httpx
 
 from agno.knowledge.document import Document
 from agno.knowledge.knowledge import Knowledge
@@ -74,6 +76,13 @@ def __init__(
 
         super().__init__(name="llms_txt_tools", tools=tools, async_tools=async_tools_list, **kwargs)
 
+    def _async_client_kwargs(self) -> Dict[str, Any]:
+        """Build kwargs for httpx.AsyncClient matching the reader's config."""
+        kwargs: Dict[str, Any] = {"timeout": httpx.Timeout(self.timeout)}
+        if self.reader.proxy:
+            kwargs["proxy"] = self.reader.proxy
+        return kwargs
+
     def get_llms_txt_index(self, url: str) -> str:
         """Reads an llms.txt file and returns the index of all available documentation pages.
 
@@ -108,15 +117,18 @@ def get_llms_txt_index(self, url: str) -> str:
         return json.dumps(index)
 
     async def aget_llms_txt_index(self, url: str) -> str:
-        """Async variant of get_llms_txt_index.
+        """Reads an llms.txt file and returns the index of all available documentation pages.
+
+        An llms.txt file is a standardized index of documentation for a project.
+        This function reads the index and returns all available pages with their titles,
+        URLs, descriptions, and sections. Use this to discover what documentation is
+        available, then use read_llms_txt_url to fetch specific pages.
 
         :param url: The URL of the llms.txt file (e.g. https://docs.example.com/llms.txt).
         :return: JSON with the overview and list of available documentation pages.
         """
-        import httpx
-
         log_info(f"Reading llms.txt index from {url}")
-        async with httpx.AsyncClient() as client:
+        async with httpx.AsyncClient(**self._async_client_kwargs()) as client:
             llms_txt_content = await self.reader.async_fetch_url(client, url)
 
         if not llms_txt_content:
@@ -156,15 +168,16 @@ def read_llms_txt_url(self, url: str) -> str:
         return content
 
     async def aread_llms_txt_url(self, url: str) -> str:
-        """Async variant of read_llms_txt_url.
+        """Fetches and returns the content of a specific documentation page URL.
+
+        Use this after calling get_llms_txt_index to fetch the content of specific pages
+        you want to read. You can call this multiple times for different URLs.
 
         :param url: The URL of the documentation page to read.
         :return: The text content of the page.
         """
-        import httpx
-
         log_debug(f"Fetching URL: {url}")
-        async with httpx.AsyncClient() as client:
+        async with httpx.AsyncClient(**self._async_client_kwargs()) as client:
             content = await self.reader.async_fetch_url(client, url)
 
         if not content:
@@ -202,7 +215,11 @@ def read_llms_txt_and_load_knowledge(self, url: str) -> str:
         return f"Successfully loaded {len(documents)} documents from llms.txt into the knowledge base"
 
     async def aread_llms_txt_and_load_knowledge(self, url: str) -> str:
-        """Async variant of read_llms_txt_and_load_knowledge.
+        """Reads an llms.txt file, fetches all linked documentation pages, and loads them into the knowledge base.
+
+        An llms.txt file is a standardized index of documentation for a project.
+        This function reads the index, fetches every linked page, and stores the content
+        in the knowledge base for future retrieval.
 
         :param url: The URL of the llms.txt file (e.g. https://docs.example.com/llms.txt).
         :return: Summary of what was loaded into the knowledge base.

From f35ab19e3052d566fef512a774f6bfc1e8e3b14c Mon Sep 17 00:00:00 2001
From: Mustafa Esoofally <coolmusta@gmail.com>
Date: Fri, 10 Apr 2026 11:10:19 -0400
Subject: [PATCH 04/23] fix: delegate knowledge loading to Knowledge.insert()
 pipeline
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Instead of manually reading documents and looping insert(), delegate
to self.knowledge.insert(url=url, reader=self.reader) which gives us
content hashing, deduplication, status tracking, and proper vector DB
insertion — matching the pattern used by WebsiteTools and WikipediaTools.
---
 libs/agno/agno/tools/llms_txt.py            | 33 +++---------------
 libs/agno/tests/unit/tools/test_llms_txt.py | 37 ++++-----------------
 2 files changed, 10 insertions(+), 60 deletions(-)

diff --git a/libs/agno/agno/tools/llms_txt.py b/libs/agno/agno/tools/llms_txt.py
index 0b42728f8b..8fa8375d2c 100644
--- a/libs/agno/agno/tools/llms_txt.py
+++ b/libs/agno/agno/tools/llms_txt.py
@@ -3,7 +3,6 @@
 
 import httpx
 
-from agno.knowledge.document import Document
 from agno.knowledge.knowledge import Knowledge
 from agno.tools import Toolkit
 from agno.utils.log import log_debug, log_info
@@ -199,20 +198,8 @@ def read_llms_txt_and_load_knowledge(self, url: str) -> str:
             return "Knowledge base not provided"
 
         log_info(f"Reading llms.txt from {url}")
-        documents: List[Document] = self.reader.read(url=url)
-
-        if not documents:
-            return f"No documents found in llms.txt at {url}"
-
-        log_debug(f"Loading {len(documents)} documents into knowledge base")
-        for doc in documents:
-            self.knowledge.insert(
-                text_content=doc.content,
-                name=doc.name,
-                metadata=doc.meta_data,
-            )
-
-        return f"Successfully loaded {len(documents)} documents from llms.txt into the knowledge base"
+        self.knowledge.insert(url=url, reader=self.reader)
+        return f"Successfully loaded documentation from {url} into the knowledge base"
 
     async def aread_llms_txt_and_load_knowledge(self, url: str) -> str:
         """Reads an llms.txt file, fetches all linked documentation pages, and loads them into the knowledge base.
@@ -228,17 +215,5 @@ async def aread_llms_txt_and_load_knowledge(self, url: str) -> str:
             return "Knowledge base not provided"
 
         log_info(f"Reading llms.txt from {url}")
-        documents: List[Document] = await self.reader.async_read(url=url)
-
-        if not documents:
-            return f"No documents found in llms.txt at {url}"
-
-        log_debug(f"Loading {len(documents)} documents into knowledge base")
-        for doc in documents:
-            await self.knowledge.ainsert(
-                text_content=doc.content,
-                name=doc.name,
-                metadata=doc.meta_data,
-            )
-
-        return f"Successfully loaded {len(documents)} documents from llms.txt into the knowledge base"
+        await self.knowledge.ainsert(url=url, reader=self.reader)
+        return f"Successfully loaded documentation from {url} into the knowledge base"
diff --git a/libs/agno/tests/unit/tools/test_llms_txt.py b/libs/agno/tests/unit/tools/test_llms_txt.py
index b3cce25d04..1740cd5488 100644
--- a/libs/agno/tests/unit/tools/test_llms_txt.py
+++ b/libs/agno/tests/unit/tools/test_llms_txt.py
@@ -377,48 +377,23 @@ def test_returns_error_on_fetch_failure(self):
 
 
 class TestLoadKnowledge:
-    def test_inserts_into_knowledge(self):
+    def test_delegates_to_knowledge_insert(self):
         mock_knowledge = MagicMock()
         tools = LLMsTxtTools(knowledge=mock_knowledge)
 
-        mock_response = MagicMock()
-        mock_response.headers = {"content-type": "text/plain"}
-        mock_response.text = "Page content"
-        mock_response.raise_for_status = MagicMock()
+        tools.read_llms_txt_and_load_knowledge("https://example.com/llms.txt")
 
-        # Simple llms.txt with one link
-        llms_content = "# Test\n\n## Docs\n\n- [Page](https://example.com/page): A page\n"
-        call_count = 0
-
-        def mock_get(url, **kwargs):
-            nonlocal call_count
-            call_count += 1
-            resp = MagicMock()
-            resp.headers = {"content-type": "text/plain"}
-            resp.raise_for_status = MagicMock()
-            if call_count == 1:
-                resp.text = llms_content
-            else:
-                resp.text = "Page content"
-            return resp
-
-        with patch("httpx.get", side_effect=mock_get):
-            result = tools.read_llms_txt_and_load_knowledge("https://example.com/llms.txt")
-
-        assert mock_knowledge.insert.called
-        assert "Successfully loaded" in result
+        mock_knowledge.insert.assert_called_once_with(url="https://example.com/llms.txt", reader=tools.reader)
 
     def test_returns_message_when_no_knowledge(self):
         tools = LLMsTxtTools()
-        # Force-call the knowledge method even though it wouldn't be registered
         result = tools.read_llms_txt_and_load_knowledge("https://example.com/llms.txt")
         assert result == "Knowledge base not provided"
 
-    def test_returns_message_when_no_docs(self):
+    def test_returns_success_message(self):
         mock_knowledge = MagicMock()
         tools = LLMsTxtTools(knowledge=mock_knowledge)
 
-        with patch("httpx.get", side_effect=httpx.RequestError("connection failed")):
-            result = tools.read_llms_txt_and_load_knowledge("https://example.com/llms.txt")
+        result = tools.read_llms_txt_and_load_knowledge("https://example.com/llms.txt")
 
-        assert "No documents found" in result
+        assert "Successfully loaded" in result

From 8bb61d77a13a5a55fa4e10b6b3c746235d9c58ee Mon Sep 17 00:00:00 2001
From: Mustafa Esoofally <coolmusta@gmail.com>
Date: Fri, 10 Apr 2026 11:30:15 -0400
Subject: [PATCH 05/23] fix: simplify reader, delegate to Knowledge pipeline,
 remove dead code
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Reader:
- Remove redundant state: in_optional and past_first_section replaced
  by single current_section variable
- Remove dead if/else branch on proxy — httpx accepts proxy=None
- Remove WHAT comments that restate the next line
- Simplify AsyncClient construction (proxy=self.proxy directly)

Toolkit:
- Extract _format_index helper to deduplicate sync/async index building
- Delegate knowledge loading to Knowledge.insert(url=, reader=) pipeline

Knowledge:
- Skip pre-download when custom reader is provided — URL-based readers
  like LLMsTxtReader need the URL string, not pre-fetched BytesIO
---
 libs/agno/agno/knowledge/knowledge.py         |  8 ++-
 .../agno/knowledge/reader/llms_txt_reader.py  | 33 ++----------
 libs/agno/agno/tools/llms_txt.py              | 50 ++++++++-----------
 libs/agno/tests/unit/tools/test_llms_txt.py   | 11 +---
 4 files changed, 33 insertions(+), 69 deletions(-)

diff --git a/libs/agno/agno/knowledge/knowledge.py b/libs/agno/agno/knowledge/knowledge.py
index 200018d1b4..dd01927627 100644
--- a/libs/agno/agno/knowledge/knowledge.py
+++ b/libs/agno/agno/knowledge/knowledge.py
@@ -1564,7 +1564,9 @@ async def _aload_from_url(
         file_extension = url_path.suffix.lower()
 
         bytes_content = None
-        if file_extension:
+        # Skip pre-download when a custom reader is provided — it knows how to
+        # handle the URL directly (e.g. LLMsTxtReader fetches linked pages)
+        if file_extension and not content.reader:
             async with AsyncClient() as client:
                 response = await async_fetch_with_retry(content.url, client=client)
             bytes_content = BytesIO(response.content)
@@ -1716,7 +1718,9 @@ def _load_from_url(
         file_extension = url_path.suffix.lower()
 
         bytes_content = None
-        if file_extension:
+        # Skip pre-download when a custom reader is provided — it knows how to
+        # handle the URL directly (e.g. LLMsTxtReader fetches linked pages)
+        if file_extension and not content.reader:
             response = fetch_with_retry(content.url)
             bytes_content = BytesIO(response.content)
 
diff --git a/libs/agno/agno/knowledge/reader/llms_txt_reader.py b/libs/agno/agno/knowledge/reader/llms_txt_reader.py
index 138c69f815..839441b8e1 100644
--- a/libs/agno/agno/knowledge/reader/llms_txt_reader.py
+++ b/libs/agno/agno/knowledge/reader/llms_txt_reader.py
@@ -106,29 +106,21 @@ def parse_llms_txt(self, content: str, base_url: str) -> Tuple[str, List[LLMsTxt
         """
         entries: List[LLMsTxtEntry] = []
         current_section = ""
-        in_optional = False
-
-        lines = content.split("\n")
         overview_lines: List[str] = []
-        past_first_section = False
 
-        for line in lines:
-            # Check for section headers
+        for line in content.split("\n"):
             section_match = _SECTION_PATTERN.match(line)
             if section_match:
                 current_section = section_match.group(1).strip()
-                past_first_section = True
-                in_optional = current_section.lower() == "optional"
                 continue
 
-            if not past_first_section:
+            if not current_section:
                 overview_lines.append(line)
                 continue
 
-            if self.skip_optional and in_optional:
+            if self.skip_optional and current_section.lower() == "optional":
                 continue
 
-            # Check for links
             link_match = _LINK_PATTERN.match(line.strip())
             if link_match:
                 title = link_match.group(1).strip()
@@ -189,10 +181,7 @@ def fetch_url(self, url: str) -> Optional[str]:
         """Fetch content from a URL, returning text for text-like content or extracted text from HTML."""
         try:
             log_debug(f"Fetching: {url}")
-            if self.proxy:
-                response = httpx.get(url, timeout=self.timeout, proxy=self.proxy, follow_redirects=True)
-            else:
-                response = httpx.get(url, timeout=self.timeout, follow_redirects=True)
+            response = httpx.get(url, timeout=self.timeout, proxy=self.proxy, follow_redirects=True)
             response.raise_for_status()
             return self._process_response(response.headers.get("content-type", ""), response.text)
         except httpx.HTTPStatusError as e:
@@ -284,23 +273,18 @@ def read(self, url: str, name: Optional[str] = None) -> List[Document]:
             A list of documents from the llms.txt and all linked pages.
         """
         log_debug(f"Reading llms.txt: {url}")
-
-        # Fetch the llms.txt file
         llms_txt_content = self.fetch_url(url)
         if not llms_txt_content:
             log_error(f"Failed to fetch llms.txt from {url}")
             return []
 
-        # Parse the llms.txt content
         overview, entries = self.parse_llms_txt(llms_txt_content, url)
         log_debug(f"Found {len(entries)} linked URLs in llms.txt")
 
-        # Limit the number of URLs to fetch
         entries_to_fetch = entries[: self.max_urls]
         if len(entries) > self.max_urls:
             log_warning(f"Limiting to {self.max_urls} URLs (found {len(entries)})")
 
-        # Fetch all linked pages
         fetched: Dict[str, str] = {}
         for entry in entries_to_fetch:
             content = self.fetch_url(entry.url)
@@ -321,25 +305,18 @@ async def async_read(self, url: str, name: Optional[str] = None) -> List[Documen
             A list of documents from the llms.txt and all linked pages.
         """
         log_debug(f"Reading llms.txt asynchronously: {url}")
-
-        client_args = {"proxy": self.proxy} if self.proxy else {}
-        async with httpx.AsyncClient(**client_args) as client:  # type: ignore
-            # Fetch the llms.txt file
+        async with httpx.AsyncClient(proxy=self.proxy) as client:
             llms_txt_content = await self.async_fetch_url(client, url)
             if not llms_txt_content:
                 log_error(f"Failed to fetch llms.txt from {url}")
                 return []
 
-            # Parse the llms.txt content
             overview, entries = self.parse_llms_txt(llms_txt_content, url)
             log_debug(f"Found {len(entries)} linked URLs in llms.txt")
 
-            # Limit the number of URLs to fetch
             entries_to_fetch = entries[: self.max_urls]
             if len(entries) > self.max_urls:
                 log_warning(f"Limiting to {self.max_urls} URLs (found {len(entries)})")
-
-            # Fetch all linked pages concurrently with a semaphore to limit parallelism
             semaphore = asyncio.Semaphore(_MAX_CONCURRENT_FETCHES)
 
             async def _fetch_entry(entry: LLMsTxtEntry) -> Tuple[str, Optional[str]]:
diff --git a/libs/agno/agno/tools/llms_txt.py b/libs/agno/agno/tools/llms_txt.py
index 8fa8375d2c..1294198355 100644
--- a/libs/agno/agno/tools/llms_txt.py
+++ b/libs/agno/agno/tools/llms_txt.py
@@ -82,6 +82,24 @@ def _async_client_kwargs(self) -> Dict[str, Any]:
             kwargs["proxy"] = self.reader.proxy
         return kwargs
 
+    def _format_index(self, overview: str, entries: list) -> str:
+        """Build JSON index response from parsed llms.txt data."""
+        return json.dumps(
+            {
+                "overview": overview,
+                "pages": [
+                    {
+                        "title": e.title,
+                        "url": e.url,
+                        "description": e.description,
+                        "section": e.section,
+                    }
+                    for e in entries
+                ],
+                "total_pages": len(entries),
+            }
+        )
+
     def get_llms_txt_index(self, url: str) -> str:
         """Reads an llms.txt file and returns the index of all available documentation pages.
 
@@ -99,21 +117,7 @@ def get_llms_txt_index(self, url: str) -> str:
             return f"Failed to fetch llms.txt from {url}"
 
         overview, entries = self.reader.parse_llms_txt(llms_txt_content, url)
-
-        index = {
-            "overview": overview,
-            "pages": [
-                {
-                    "title": entry.title,
-                    "url": entry.url,
-                    "description": entry.description,
-                    "section": entry.section,
-                }
-                for entry in entries
-            ],
-            "total_pages": len(entries),
-        }
-        return json.dumps(index)
+        return self._format_index(overview, entries)
 
     async def aget_llms_txt_index(self, url: str) -> str:
         """Reads an llms.txt file and returns the index of all available documentation pages.
@@ -134,21 +138,7 @@ async def aget_llms_txt_index(self, url: str) -> str:
             return f"Failed to fetch llms.txt from {url}"
 
         overview, entries = self.reader.parse_llms_txt(llms_txt_content, url)
-
-        index = {
-            "overview": overview,
-            "pages": [
-                {
-                    "title": entry.title,
-                    "url": entry.url,
-                    "description": entry.description,
-                    "section": entry.section,
-                }
-                for entry in entries
-            ],
-            "total_pages": len(entries),
-        }
-        return json.dumps(index)
+        return self._format_index(overview, entries)
 
     def read_llms_txt_url(self, url: str) -> str:
         """Fetches and returns the content of a specific documentation page URL.
diff --git a/libs/agno/tests/unit/tools/test_llms_txt.py b/libs/agno/tests/unit/tools/test_llms_txt.py
index 1740cd5488..500e0e3fcc 100644
--- a/libs/agno/tests/unit/tools/test_llms_txt.py
+++ b/libs/agno/tests/unit/tools/test_llms_txt.py
@@ -381,19 +381,12 @@ def test_delegates_to_knowledge_insert(self):
         mock_knowledge = MagicMock()
         tools = LLMsTxtTools(knowledge=mock_knowledge)
 
-        tools.read_llms_txt_and_load_knowledge("https://example.com/llms.txt")
+        result = tools.read_llms_txt_and_load_knowledge("https://example.com/llms.txt")
 
         mock_knowledge.insert.assert_called_once_with(url="https://example.com/llms.txt", reader=tools.reader)
+        assert "Successfully loaded" in result
 
     def test_returns_message_when_no_knowledge(self):
         tools = LLMsTxtTools()
         result = tools.read_llms_txt_and_load_knowledge("https://example.com/llms.txt")
         assert result == "Knowledge base not provided"
-
-    def test_returns_success_message(self):
-        mock_knowledge = MagicMock()
-        tools = LLMsTxtTools(knowledge=mock_knowledge)
-
-        result = tools.read_llms_txt_and_load_knowledge("https://example.com/llms.txt")
-
-        assert "Successfully loaded" in result

From 7d88c4457bb9ccc8259cdc8aa11471c5722b6bdf Mon Sep 17 00:00:00 2001
From: Mustafa Esoofally <coolmusta@gmail.com>
Date: Fri, 10 Apr 2026 11:35:53 -0400
Subject: [PATCH 06/23] =?UTF-8?q?fix:=20remove=20include=5Fllms=5Ftxt=5Fco?=
 =?UTF-8?q?ntent=20parameter=20=E2=80=94=20always=20include=20overview?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The overview document (title + summary from the llms.txt) provides
essential context about the project. No caller ever set this to False.
Removing the parameter and its branch simplifies the reader.
---
 libs/agno/agno/knowledge/reader/llms_txt_reader.py |  6 +-----
 libs/agno/tests/unit/tools/test_llms_txt.py        | 14 --------------
 2 files changed, 1 insertion(+), 19 deletions(-)

diff --git a/libs/agno/agno/knowledge/reader/llms_txt_reader.py b/libs/agno/agno/knowledge/reader/llms_txt_reader.py
index 839441b8e1..2eaac0896f 100644
--- a/libs/agno/agno/knowledge/reader/llms_txt_reader.py
+++ b/libs/agno/agno/knowledge/reader/llms_txt_reader.py
@@ -56,7 +56,6 @@ def __init__(
         max_urls: int = 100,
         timeout: int = 30,
         proxy: Optional[str] = None,
-        include_llms_txt_content: bool = True,
         skip_optional: bool = False,
         **kwargs,
     ):
@@ -67,7 +66,6 @@ def __init__(
             max_urls: Maximum number of linked URLs to fetch. Defaults to 100.
             timeout: HTTP request timeout in seconds. Defaults to 30.
             proxy: Optional HTTP proxy URL.
-            include_llms_txt_content: Whether to include the llms.txt file itself as a document.
             skip_optional: Whether to skip URLs in the "Optional" section.
         """
         if chunking_strategy is None:
@@ -77,7 +75,6 @@ def __init__(
         self.max_urls = max_urls
         self.timeout = timeout
         self.proxy = proxy
-        self.include_llms_txt_content = include_llms_txt_content
         self.skip_optional = skip_optional
 
     @classmethod
@@ -222,8 +219,7 @@ def _build_documents(
         """Build Document list from fetched content."""
         documents: List[Document] = []
 
-        # Optionally include the llms.txt overview as a document
-        if self.include_llms_txt_content and overview:
+        if overview:
             doc = Document(
                 name=name or llms_txt_url,
                 id=str(uuid.uuid4()),
diff --git a/libs/agno/tests/unit/tools/test_llms_txt.py b/libs/agno/tests/unit/tools/test_llms_txt.py
index 500e0e3fcc..7857341a27 100644
--- a/libs/agno/tests/unit/tools/test_llms_txt.py
+++ b/libs/agno/tests/unit/tools/test_llms_txt.py
@@ -60,7 +60,6 @@ def test_defaults(self):
         assert reader.max_urls == 100
         assert reader.timeout == 30
         assert reader.proxy is None
-        assert reader.include_llms_txt_content is True
         assert reader.skip_optional is False
 
     def test_custom_params(self):
@@ -229,19 +228,6 @@ def test_skips_unfetched_entries(self):
         # Only the overview doc
         assert len(docs) == 1
 
-    def test_excludes_overview_when_disabled(self):
-        reader = LLMsTxtReader(chunk=False, include_llms_txt_content=False)
-        entries = [
-            LLMsTxtEntry(title="Page", url="https://example.com/page", description="", section="Docs"),
-        ]
-        fetched = {"https://example.com/page": "Page content"}
-
-        docs = reader._build_documents("Overview", entries, fetched, "https://example.com/llms.txt", None)
-
-        assert len(docs) == 1
-        assert docs[0].meta_data["type"] == "llms_txt_linked_doc"
-
-
 class TestRead:
     def test_read_fetches_and_builds_docs(self):
         reader = LLMsTxtReader(max_urls=5, chunk=False)

From 4474d950a474542610b76ba09325f08f2d827190 Mon Sep 17 00:00:00 2001
From: Mustafa Esoofally <coolmusta@gmail.com>
Date: Fri, 10 Apr 2026 11:44:39 -0400
Subject: [PATCH 07/23] =?UTF-8?q?fix:=20clean=20up=20reader=20=E2=80=94=20?=
 =?UTF-8?q?remove=20init=20docstring,=20simplify=20parser?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

- Remove __init__ docstring (no other reader has one)
- Rewrite parse_llms_txt: replace 3 continue statements with clean
  if/elif/else chain — each line falls into one bucket
- Remove include_llms_txt_content param (always True, never exposed)
---
 .../agno/knowledge/reader/llms_txt_reader.py  | 50 ++++++-------------
 1 file changed, 16 insertions(+), 34 deletions(-)

diff --git a/libs/agno/agno/knowledge/reader/llms_txt_reader.py b/libs/agno/agno/knowledge/reader/llms_txt_reader.py
index 2eaac0896f..ce4c95254e 100644
--- a/libs/agno/agno/knowledge/reader/llms_txt_reader.py
+++ b/libs/agno/agno/knowledge/reader/llms_txt_reader.py
@@ -59,15 +59,6 @@ def __init__(
         skip_optional: bool = False,
         **kwargs,
     ):
-        """Initialize the LLMsTxtReader.
-
-        Args:
-            chunking_strategy: Strategy for chunking documents.
-            max_urls: Maximum number of linked URLs to fetch. Defaults to 100.
-            timeout: HTTP request timeout in seconds. Defaults to 30.
-            proxy: Optional HTTP proxy URL.
-            skip_optional: Whether to skip URLs in the "Optional" section.
-        """
         if chunking_strategy is None:
             chunk_size = kwargs.get("chunk_size", 5000)
             chunking_strategy = FixedSizeChunking(chunk_size=chunk_size)
@@ -109,33 +100,24 @@ def parse_llms_txt(self, content: str, base_url: str) -> Tuple[str, List[LLMsTxt
             section_match = _SECTION_PATTERN.match(line)
             if section_match:
                 current_section = section_match.group(1).strip()
-                continue
-
-            if not current_section:
+            elif not current_section:
                 overview_lines.append(line)
-                continue
-
-            if self.skip_optional and current_section.lower() == "optional":
-                continue
-
-            link_match = _LINK_PATTERN.match(line.strip())
-            if link_match:
-                title = link_match.group(1).strip()
-                url = link_match.group(2).strip()
-                description = (link_match.group(3) or "").strip()
-
-                # Resolve relative URLs
-                if not url.startswith(("http://", "https://")):
-                    url = urljoin(base_url, url)
-
-                entries.append(
-                    LLMsTxtEntry(
-                        title=title,
-                        url=url,
-                        description=description,
-                        section=current_section,
+            elif self.skip_optional and current_section.lower() == "optional":
+                pass
+            else:
+                link_match = _LINK_PATTERN.match(line.strip())
+                if link_match:
+                    url = link_match.group(2).strip()
+                    if not url.startswith(("http://", "https://")):
+                        url = urljoin(base_url, url)
+                    entries.append(
+                        LLMsTxtEntry(
+                            title=link_match.group(1).strip(),
+                            url=url,
+                            description=(link_match.group(3) or "").strip(),
+                            section=current_section,
+                        )
                     )
-                )
 
         overview = "\n".join(overview_lines).strip()
         return overview, entries

From 9039720decc5d04ea9d0b11ef228c51093f32c33 Mon Sep 17 00:00:00 2001
From: Mustafa Esoofally <coolmusta@gmail.com>
Date: Fri, 10 Apr 2026 11:46:12 -0400
Subject: [PATCH 08/23] fix: inline _extract_content into _process_response
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

_extract_content was called exactly once. Inlining removes one
indirection layer — the reader now has only the helpers that are
actually shared between read() and async_read().
---
 .../agno/knowledge/reader/llms_txt_reader.py  | 45 ++++++++-----------
 libs/agno/tests/unit/tools/test_llms_txt.py   | 10 ++---
 2 files changed, 24 insertions(+), 31 deletions(-)

diff --git a/libs/agno/agno/knowledge/reader/llms_txt_reader.py b/libs/agno/agno/knowledge/reader/llms_txt_reader.py
index ce4c95254e..70866196e2 100644
--- a/libs/agno/agno/knowledge/reader/llms_txt_reader.py
+++ b/libs/agno/agno/knowledge/reader/llms_txt_reader.py
@@ -122,37 +122,30 @@ def parse_llms_txt(self, content: str, base_url: str) -> Tuple[str, List[LLMsTxt
         overview = "\n".join(overview_lines).strip()
         return overview, entries
 
-    def _extract_content(self, html: str) -> str:
-        """Extract readable text content from HTML."""
-        try:
-            from bs4 import BeautifulSoup
-        except ImportError:
-            raise ImportError("The `bs4` package is not installed. Please install it via `pip install beautifulsoup4`.")
-
-        soup = BeautifulSoup(html, "html.parser")
-
-        # Remove unwanted elements
-        for tag in soup.find_all(["script", "style", "nav", "header", "footer", "aside"]):
-            tag.decompose()
-
-        # Try to find main content
-        main = soup.find("main") or soup.find("article") or soup.find(attrs={"role": "main"})
-        if main:
-            return main.get_text(separator="\n", strip=True)
-
-        body = soup.find("body")
-        if body:
-            return body.get_text(separator="\n", strip=True)
-
-        return soup.get_text(separator="\n", strip=True)
-
     def _process_response(self, content_type: str, text: str) -> str:
-        """Classify an HTTP response by content-type and return processed text."""
+        """Classify an HTTP response by content-type and extract text."""
         if any(t in content_type for t in ["text/plain", "text/markdown"]):
             return text
 
         if "text/html" in content_type or text.strip().startswith(("<!DOCTYPE", "<html", "<HTML")):
-            return self._extract_content(text)
+            try:
+                from bs4 import BeautifulSoup
+            except ImportError:
+                raise ImportError("The `bs4` package is not installed. Please install it via `pip install beautifulsoup4`.")
+
+            soup = BeautifulSoup(text, "html.parser")
+            for tag in soup.find_all(["script", "style", "nav", "header", "footer", "aside"]):
+                tag.decompose()
+
+            main = soup.find("main") or soup.find("article") or soup.find(attrs={"role": "main"})
+            if main:
+                return main.get_text(separator="\n", strip=True)
+
+            body = soup.find("body")
+            if body:
+                return body.get_text(separator="\n", strip=True)
+
+            return soup.get_text(separator="\n", strip=True)
 
         return text
 
diff --git a/libs/agno/tests/unit/tools/test_llms_txt.py b/libs/agno/tests/unit/tools/test_llms_txt.py
index 7857341a27..f4939b7e3b 100644
--- a/libs/agno/tests/unit/tools/test_llms_txt.py
+++ b/libs/agno/tests/unit/tools/test_llms_txt.py
@@ -124,31 +124,31 @@ def test_no_links(self):
         assert entries == []
 
 
-class TestExtractContent:
+class TestProcessResponse:
     def test_extracts_from_main_tag(self):
         reader = LLMsTxtReader()
         html = "<html><body><nav>Nav</nav><main>Main content here</main><footer>Foot</footer></body></html>"
-        result = reader._extract_content(html)
+        result = reader._process_response("text/html",html)
         assert "Main content here" in result
         assert "Nav" not in result
 
     def test_extracts_from_body_fallback(self):
         reader = LLMsTxtReader()
         html = "<html><body><div>Body content</div></body></html>"
-        result = reader._extract_content(html)
+        result = reader._process_response("text/html",html)
         assert "Body content" in result
 
     def test_strips_script_and_style(self):
         reader = LLMsTxtReader()
         html = "<html><body><script>var x=1;</script><style>.a{}</style><p>Text</p></body></html>"
-        result = reader._extract_content(html)
+        result = reader._process_response("text/html",html)
         assert "var x" not in result
         assert "Text" in result
 
     def test_preserves_structure_with_newlines(self):
         reader = LLMsTxtReader()
         html = "<html><body><main><p>First paragraph</p><p>Second paragraph</p></main></body></html>"
-        result = reader._extract_content(html)
+        result = reader._process_response("text/html",html)
         assert "First paragraph" in result
         assert "Second paragraph" in result
         assert "\n" in result

From d6becc8f8df6e0eaa7fb2a3d710ab132c5923199 Mon Sep 17 00:00:00 2001
From: Mustafa Esoofally <coolmusta@gmail.com>
Date: Fri, 10 Apr 2026 11:48:51 -0400
Subject: [PATCH 09/23] =?UTF-8?q?fix:=20simplify=20fetch=5Furl=20=E2=80=94?=
 =?UTF-8?q?=20collapse=203=20except=20blocks=20into=201?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The 3-way exception split (HTTPStatusError, RequestError, Exception)
was duplicated between sync and async. For a reader fetching doc pages,
a single catch with a warning log is sufficient. Each method is now
4 lines instead of 12.
---
 .../agno/knowledge/reader/llms_txt_reader.py  | 22 ++++---------------
 1 file changed, 4 insertions(+), 18 deletions(-)

diff --git a/libs/agno/agno/knowledge/reader/llms_txt_reader.py b/libs/agno/agno/knowledge/reader/llms_txt_reader.py
index 70866196e2..07aedf1584 100644
--- a/libs/agno/agno/knowledge/reader/llms_txt_reader.py
+++ b/libs/agno/agno/knowledge/reader/llms_txt_reader.py
@@ -150,37 +150,23 @@ def _process_response(self, content_type: str, text: str) -> str:
         return text
 
     def fetch_url(self, url: str) -> Optional[str]:
-        """Fetch content from a URL, returning text for text-like content or extracted text from HTML."""
+        """Fetch a URL and return its text content, or None on failure."""
         try:
-            log_debug(f"Fetching: {url}")
             response = httpx.get(url, timeout=self.timeout, proxy=self.proxy, follow_redirects=True)
             response.raise_for_status()
             return self._process_response(response.headers.get("content-type", ""), response.text)
-        except httpx.HTTPStatusError as e:
-            log_warning(f"HTTP error fetching {url}: {e.response.status_code}")
-            return None
-        except httpx.RequestError as e:
-            log_warning(f"Request error fetching {url}: {str(e)}")
-            return None
         except Exception as e:
-            log_error(f"Failed to fetch {url}: {str(e)}")
+            log_warning(f"Failed to fetch {url}: {e}")
             return None
 
     async def async_fetch_url(self, client: httpx.AsyncClient, url: str) -> Optional[str]:
-        """Asynchronously fetch content from a URL."""
+        """Async variant of fetch_url using a shared client."""
         try:
-            log_debug(f"Fetching asynchronously: {url}")
             response = await client.get(url, timeout=self.timeout, follow_redirects=True)
             response.raise_for_status()
             return self._process_response(response.headers.get("content-type", ""), response.text)
-        except httpx.HTTPStatusError as e:
-            log_warning(f"HTTP error fetching {url}: {e.response.status_code}")
-            return None
-        except httpx.RequestError as e:
-            log_warning(f"Request error fetching {url}: {str(e)}")
-            return None
         except Exception as e:
-            log_error(f"Failed to fetch {url}: {str(e)}")
+            log_warning(f"Failed to fetch {url}: {e}")
             return None
 
     def _build_documents(

From 5ed981b403dd63409ad8f834bbcc61aa69e45243 Mon Sep 17 00:00:00 2001
From: Mustafa Esoofally <coolmusta@gmail.com>
Date: Fri, 10 Apr 2026 11:57:23 -0400
Subject: [PATCH 10/23] fix: remove module-level constant, inline semaphore
 with WHY comment
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Keep the semaphore (Codex confirms: this is external HTTP fan-out, not
local processing — unbounded gather would burst 100 requests at once).
Remove _MAX_CONCURRENT_FETCHES constant, inline the value with a comment
explaining why it exists.
---
 libs/agno/agno/knowledge/reader/llms_txt_reader.py | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/libs/agno/agno/knowledge/reader/llms_txt_reader.py b/libs/agno/agno/knowledge/reader/llms_txt_reader.py
index 07aedf1584..3b713390fc 100644
--- a/libs/agno/agno/knowledge/reader/llms_txt_reader.py
+++ b/libs/agno/agno/knowledge/reader/llms_txt_reader.py
@@ -20,9 +20,6 @@
 # Pattern to match H2 section headers
 _SECTION_PATTERN = re.compile(r"^##\s+(.+)$", re.MULTILINE)
 
-# Maximum number of concurrent HTTP requests when fetching linked pages
-_MAX_CONCURRENT_FETCHES = 10
-
 
 @dataclass
 class LLMsTxtEntry:
@@ -274,7 +271,9 @@ async def async_read(self, url: str, name: Optional[str] = None) -> List[Documen
             entries_to_fetch = entries[: self.max_urls]
             if len(entries) > self.max_urls:
                 log_warning(f"Limiting to {self.max_urls} URLs (found {len(entries)})")
-            semaphore = asyncio.Semaphore(_MAX_CONCURRENT_FETCHES)
+            # httpx pool limits handle per-host connections, but we also cap total
+            # in-flight fetches to avoid bursting 100 requests at third-party servers
+            semaphore = asyncio.Semaphore(10)
 
             async def _fetch_entry(entry: LLMsTxtEntry) -> Tuple[str, Optional[str]]:
                 async with semaphore:

From a5fe2a386770ce0bd8a5f8687449ed9249e38a7a Mon Sep 17 00:00:00 2001
From: Mustafa Esoofally <coolmusta@gmail.com>
Date: Fri, 10 Apr 2026 12:01:11 -0400
Subject: [PATCH 11/23] fix: reuse fetch_with_retry utils instead of raw httpx
 calls
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Add timeout and follow_redirects params to existing fetch_with_retry
and async_fetch_with_retry in utils/http.py. Reader now uses these
shared utils instead of making raw httpx.get calls — retry logic,
error handling, and connection management in one place.

Removed semaphore — httpx AsyncClient already limits concurrent
connections per host (default 20).
---
 .../agno/knowledge/reader/llms_txt_reader.py  | 25 +++++++++++--------
 libs/agno/agno/utils/http.py                  | 19 ++++++++------
 libs/agno/tests/unit/tools/test_llms_txt.py   |  9 ++++---
 3 files changed, 31 insertions(+), 22 deletions(-)

diff --git a/libs/agno/agno/knowledge/reader/llms_txt_reader.py b/libs/agno/agno/knowledge/reader/llms_txt_reader.py
index 3b713390fc..5feb87acea 100644
--- a/libs/agno/agno/knowledge/reader/llms_txt_reader.py
+++ b/libs/agno/agno/knowledge/reader/llms_txt_reader.py
@@ -12,6 +12,7 @@
 from agno.knowledge.document.base import Document
 from agno.knowledge.reader.base import Reader
 from agno.knowledge.types import ContentType
+from agno.utils.http import async_fetch_with_retry, fetch_with_retry
 from agno.utils.log import log_debug, log_error, log_warning
 
 # Pattern to match markdown links: - [Title](url) or - [Title](url): description
@@ -128,7 +129,9 @@ def _process_response(self, content_type: str, text: str) -> str:
             try:
                 from bs4 import BeautifulSoup
             except ImportError:
-                raise ImportError("The `bs4` package is not installed. Please install it via `pip install beautifulsoup4`.")
+                raise ImportError(
+                    "The `bs4` package is not installed. Please install it via `pip install beautifulsoup4`."
+                )
 
             soup = BeautifulSoup(text, "html.parser")
             for tag in soup.find_all(["script", "style", "nav", "header", "footer", "aside"]):
@@ -149,8 +152,9 @@ def _process_response(self, content_type: str, text: str) -> str:
     def fetch_url(self, url: str) -> Optional[str]:
         """Fetch a URL and return its text content, or None on failure."""
         try:
-            response = httpx.get(url, timeout=self.timeout, proxy=self.proxy, follow_redirects=True)
-            response.raise_for_status()
+            response = fetch_with_retry(
+                url, max_retries=1, proxy=self.proxy, timeout=self.timeout, follow_redirects=True
+            )
             return self._process_response(response.headers.get("content-type", ""), response.text)
         except Exception as e:
             log_warning(f"Failed to fetch {url}: {e}")
@@ -159,8 +163,9 @@ def fetch_url(self, url: str) -> Optional[str]:
     async def async_fetch_url(self, client: httpx.AsyncClient, url: str) -> Optional[str]:
         """Async variant of fetch_url using a shared client."""
         try:
-            response = await client.get(url, timeout=self.timeout, follow_redirects=True)
-            response.raise_for_status()
+            response = await async_fetch_with_retry(
+                url, client=client, max_retries=1, timeout=self.timeout, follow_redirects=True
+            )
             return self._process_response(response.headers.get("content-type", ""), response.text)
         except Exception as e:
             log_warning(f"Failed to fetch {url}: {e}")
@@ -271,14 +276,12 @@ async def async_read(self, url: str, name: Optional[str] = None) -> List[Documen
             entries_to_fetch = entries[: self.max_urls]
             if len(entries) > self.max_urls:
                 log_warning(f"Limiting to {self.max_urls} URLs (found {len(entries)})")
-            # httpx pool limits handle per-host connections, but we also cap total
-            # in-flight fetches to avoid bursting 100 requests at third-party servers
-            semaphore = asyncio.Semaphore(10)
 
+            # httpx AsyncClient limits concurrent connections per host (default 20),
+            # so we don't need application-level throttling
             async def _fetch_entry(entry: LLMsTxtEntry) -> Tuple[str, Optional[str]]:
-                async with semaphore:
-                    content = await self.async_fetch_url(client, entry.url)
-                    return entry.url, content
+                content = await self.async_fetch_url(client, entry.url)
+                return entry.url, content
 
             results = await asyncio.gather(*[_fetch_entry(e) for e in entries_to_fetch])
             fetched: Dict[str, str] = {entry_url: content for entry_url, content in results if content}
diff --git a/libs/agno/agno/utils/http.py b/libs/agno/agno/utils/http.py
index ca887b3e83..053767033b 100644
--- a/libs/agno/agno/utils/http.py
+++ b/libs/agno/agno/utils/http.py
@@ -179,12 +179,16 @@ def fetch_with_retry(
     max_retries: int = DEFAULT_MAX_RETRIES,
     backoff_factor: int = DEFAULT_BACKOFF_FACTOR,
     proxy: Optional[str] = None,
+    timeout: Optional[int] = None,
+    follow_redirects: bool = False,
 ) -> httpx.Response:
     """Synchronous HTTP GET with retry logic."""
 
     for attempt in range(max_retries):
         try:
-            response = httpx.get(url, proxy=proxy) if proxy else httpx.get(url)
+            response = httpx.get(
+                url, proxy=proxy, follow_redirects=follow_redirects, timeout=timeout  # type: ignore[arg-type]
+            )
             response.raise_for_status()
             return response
         except httpx.RequestError as e:
@@ -198,7 +202,7 @@ def fetch_with_retry(
             logger.exception(f"HTTP error for {url}: {e.response.status_code} - {e.response.text}")
             raise
 
-    raise httpx.RequestError(f"Failed to fetch {url} after {max_retries} attempts")
+    raise httpx.RequestError(f"Failed to fetch {url} after {max_retries} attempts")  # type: ignore[call-arg]
 
 
 async def async_fetch_with_retry(
@@ -207,16 +211,17 @@ async def async_fetch_with_retry(
     max_retries: int = DEFAULT_MAX_RETRIES,
     backoff_factor: int = DEFAULT_BACKOFF_FACTOR,
     proxy: Optional[str] = None,
+    timeout: Optional[int] = None,
+    follow_redirects: bool = False,
 ) -> httpx.Response:
     """Asynchronous HTTP GET with retry logic."""
 
     async def _fetch():
         if client is None:
-            client_args = {"proxy": proxy} if proxy else {}
-            async with httpx.AsyncClient(**client_args) as local_client:  # type: ignore
-                return await local_client.get(url)
+            async with httpx.AsyncClient(proxy=proxy) as local_client:
+                return await local_client.get(url, follow_redirects=follow_redirects, timeout=timeout)  # type: ignore[arg-type]
         else:
-            return await client.get(url)
+            return await client.get(url, follow_redirects=follow_redirects, timeout=timeout)  # type: ignore[arg-type]
 
     for attempt in range(max_retries):
         try:
@@ -234,4 +239,4 @@ async def _fetch():
             logger.exception(f"HTTP error for {url}: {e.response.status_code} - {e.response.text}")
             raise
 
-    raise httpx.RequestError(f"Failed to fetch {url} after {max_retries} attempts")
+    raise httpx.RequestError(f"Failed to fetch {url} after {max_retries} attempts")  # type: ignore[call-arg]
diff --git a/libs/agno/tests/unit/tools/test_llms_txt.py b/libs/agno/tests/unit/tools/test_llms_txt.py
index f4939b7e3b..0346f06c5f 100644
--- a/libs/agno/tests/unit/tools/test_llms_txt.py
+++ b/libs/agno/tests/unit/tools/test_llms_txt.py
@@ -128,27 +128,27 @@ class TestProcessResponse:
     def test_extracts_from_main_tag(self):
         reader = LLMsTxtReader()
         html = "<html><body><nav>Nav</nav><main>Main content here</main><footer>Foot</footer></body></html>"
-        result = reader._process_response("text/html",html)
+        result = reader._process_response("text/html", html)
         assert "Main content here" in result
         assert "Nav" not in result
 
     def test_extracts_from_body_fallback(self):
         reader = LLMsTxtReader()
         html = "<html><body><div>Body content</div></body></html>"
-        result = reader._process_response("text/html",html)
+        result = reader._process_response("text/html", html)
         assert "Body content" in result
 
     def test_strips_script_and_style(self):
         reader = LLMsTxtReader()
         html = "<html><body><script>var x=1;</script><style>.a{}</style><p>Text</p></body></html>"
-        result = reader._process_response("text/html",html)
+        result = reader._process_response("text/html", html)
         assert "var x" not in result
         assert "Text" in result
 
     def test_preserves_structure_with_newlines(self):
         reader = LLMsTxtReader()
         html = "<html><body><main><p>First paragraph</p><p>Second paragraph</p></main></body></html>"
-        result = reader._process_response("text/html",html)
+        result = reader._process_response("text/html", html)
         assert "First paragraph" in result
         assert "Second paragraph" in result
         assert "\n" in result
@@ -228,6 +228,7 @@ def test_skips_unfetched_entries(self):
         # Only the overview doc
         assert len(docs) == 1
 
+
 class TestRead:
     def test_read_fetches_and_builds_docs(self):
         reader = LLMsTxtReader(max_urls=5, chunk=False)

From 62e75c0cc9c9625b558a1979cfcd1ff42113a348 Mon Sep 17 00:00:00 2001
From: Mustafa Esoofally <coolmusta@gmail.com>
Date: Fri, 10 Apr 2026 12:03:18 -0400
Subject: [PATCH 12/23] =?UTF-8?q?fix:=20change=20defaults=20=E2=80=94=20ma?=
 =?UTF-8?q?x=5Furls=3D20,=20timeout=3D60?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

max_urls=100 was too high — would overwhelm model context in agentic
mode. 20 matches the knowledge cookbook and WebsiteReader's max_links=10
ballpark. timeout=60 matches the global httpx client default.
---
 libs/agno/agno/knowledge/reader/llms_txt_reader.py | 6 +++---
 libs/agno/agno/tools/llms_txt.py                   | 8 ++++----
 libs/agno/tests/unit/tools/test_llms_txt.py        | 4 ++--
 3 files changed, 9 insertions(+), 9 deletions(-)

diff --git a/libs/agno/agno/knowledge/reader/llms_txt_reader.py b/libs/agno/agno/knowledge/reader/llms_txt_reader.py
index 5feb87acea..dd6e22e430 100644
--- a/libs/agno/agno/knowledge/reader/llms_txt_reader.py
+++ b/libs/agno/agno/knowledge/reader/llms_txt_reader.py
@@ -44,15 +44,15 @@ class LLMsTxtReader(Reader):
     - H2-delimited sections containing markdown links to documentation pages
 
     Example:
-        reader = LLMsTxtReader(max_urls=50)
+        reader = LLMsTxtReader(max_urls=20)
         documents = reader.read("https://docs.example.com/llms.txt")
     """
 
     def __init__(
         self,
         chunking_strategy: Optional[ChunkingStrategy] = None,
-        max_urls: int = 100,
-        timeout: int = 30,
+        max_urls: int = 20,
+        timeout: int = 60,
         proxy: Optional[str] = None,
         skip_optional: bool = False,
         **kwargs,
diff --git a/libs/agno/agno/tools/llms_txt.py b/libs/agno/agno/tools/llms_txt.py
index 1294198355..ecbd05dbd2 100644
--- a/libs/agno/agno/tools/llms_txt.py
+++ b/libs/agno/agno/tools/llms_txt.py
@@ -27,8 +27,8 @@ class LLMsTxtTools(Toolkit):
 
     Args:
         knowledge: Optional Knowledge instance. When provided, enables knowledge loading mode.
-        max_urls: Maximum number of linked URLs to fetch when loading into knowledge. Defaults to 100.
-        timeout: HTTP request timeout in seconds. Defaults to 30.
+        max_urls: Maximum number of linked URLs to fetch when loading into knowledge. Defaults to 20.
+        timeout: HTTP request timeout in seconds. Defaults to 60.
         skip_optional: Whether to skip URLs listed in the "Optional" section. Defaults to False.
 
     Example:
@@ -45,8 +45,8 @@ class LLMsTxtTools(Toolkit):
     def __init__(
         self,
         knowledge: Optional[Knowledge] = None,
-        max_urls: int = 100,
-        timeout: int = 30,
+        max_urls: int = 20,
+        timeout: int = 60,
         skip_optional: bool = False,
         **kwargs,
     ):
diff --git a/libs/agno/tests/unit/tools/test_llms_txt.py b/libs/agno/tests/unit/tools/test_llms_txt.py
index 0346f06c5f..edfb6023c9 100644
--- a/libs/agno/tests/unit/tools/test_llms_txt.py
+++ b/libs/agno/tests/unit/tools/test_llms_txt.py
@@ -57,8 +57,8 @@
 class TestLLMsTxtReaderInit:
     def test_defaults(self):
         reader = LLMsTxtReader()
-        assert reader.max_urls == 100
-        assert reader.timeout == 30
+        assert reader.max_urls == 20
+        assert reader.timeout == 60
         assert reader.proxy is None
         assert reader.skip_optional is False
 

From 1d8312ffb045f24fe23e7aca9ad53578cf352023 Mon Sep 17 00:00:00 2001
From: Mustafa Esoofally <coolmusta@gmail.com>
Date: Fri, 10 Apr 2026 12:04:23 -0400
Subject: [PATCH 13/23] =?UTF-8?q?fix:=20move=20imports=20to=20module=20lev?=
 =?UTF-8?q?el=20=E2=80=94=20bs4=20and=20LLMsTxtReader?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

bs4 import now fails at import time (matching WebsiteReader and
WebSearchReader pattern) instead of deep inside a fetch call.
LLMsTxtReader import moved to top of toolkit — no reason to defer
an internal agno module.
---
 libs/agno/agno/knowledge/reader/llms_txt_reader.py | 12 +++++-------
 libs/agno/agno/tools/llms_txt.py                   |  3 +--
 2 files changed, 6 insertions(+), 9 deletions(-)

diff --git a/libs/agno/agno/knowledge/reader/llms_txt_reader.py b/libs/agno/agno/knowledge/reader/llms_txt_reader.py
index dd6e22e430..8689ff7805 100644
--- a/libs/agno/agno/knowledge/reader/llms_txt_reader.py
+++ b/libs/agno/agno/knowledge/reader/llms_txt_reader.py
@@ -7,6 +7,11 @@
 
 import httpx
 
+try:
+    from bs4 import BeautifulSoup  # noqa: F401
+except ImportError:
+    raise ImportError("The `bs4` package is not installed. Please install it via `pip install beautifulsoup4`.")
+
 from agno.knowledge.chunking.fixed import FixedSizeChunking
 from agno.knowledge.chunking.strategy import ChunkingStrategy, ChunkingStrategyType
 from agno.knowledge.document.base import Document
@@ -126,13 +131,6 @@ def _process_response(self, content_type: str, text: str) -> str:
             return text
 
         if "text/html" in content_type or text.strip().startswith(("<!DOCTYPE", "<html", "<HTML")):
-            try:
-                from bs4 import BeautifulSoup
-            except ImportError:
-                raise ImportError(
-                    "The `bs4` package is not installed. Please install it via `pip install beautifulsoup4`."
-                )
-
             soup = BeautifulSoup(text, "html.parser")
             for tag in soup.find_all(["script", "style", "nav", "header", "footer", "aside"]):
                 tag.decompose()
diff --git a/libs/agno/agno/tools/llms_txt.py b/libs/agno/agno/tools/llms_txt.py
index ecbd05dbd2..b21ad5f45a 100644
--- a/libs/agno/agno/tools/llms_txt.py
+++ b/libs/agno/agno/tools/llms_txt.py
@@ -4,6 +4,7 @@
 import httpx
 
 from agno.knowledge.knowledge import Knowledge
+from agno.knowledge.reader.llms_txt_reader import LLMsTxtReader
 from agno.tools import Toolkit
 from agno.utils.log import log_debug, log_info
 
@@ -50,8 +51,6 @@ def __init__(
         skip_optional: bool = False,
         **kwargs,
     ):
-        from agno.knowledge.reader.llms_txt_reader import LLMsTxtReader
-
         self.knowledge: Optional[Knowledge] = knowledge
         self.max_urls = max_urls
         self.timeout = timeout

From 2ae73c25a28dfb175cefbcb321f99c52d9ce7ed5 Mon Sep 17 00:00:00 2001
From: Mustafa Esoofally <coolmusta@gmail.com>
Date: Fri, 10 Apr 2026 12:05:45 -0400
Subject: [PATCH 14/23] fix: remove class docstring and WHAT comment
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Class docstring was a 30-line essay — most toolkits have none.
The code structure already shows the two modes (with/without knowledge).
Removed remaining WHAT comment in _build_documents.
---
 .../agno/knowledge/reader/llms_txt_reader.py  |  1 -
 libs/agno/agno/tools/llms_txt.py              | 32 -------------------
 2 files changed, 33 deletions(-)

diff --git a/libs/agno/agno/knowledge/reader/llms_txt_reader.py b/libs/agno/agno/knowledge/reader/llms_txt_reader.py
index 8689ff7805..614c57e91c 100644
--- a/libs/agno/agno/knowledge/reader/llms_txt_reader.py
+++ b/libs/agno/agno/knowledge/reader/llms_txt_reader.py
@@ -195,7 +195,6 @@ def _build_documents(
             else:
                 documents.append(doc)
 
-        # Add each fetched page as a document
         for entry in entries:
             content = fetched.get(entry.url)
             if not content:
diff --git a/libs/agno/agno/tools/llms_txt.py b/libs/agno/agno/tools/llms_txt.py
index b21ad5f45a..9c9441fffa 100644
--- a/libs/agno/agno/tools/llms_txt.py
+++ b/libs/agno/agno/tools/llms_txt.py
@@ -10,38 +10,6 @@
 
 
 class LLMsTxtTools(Toolkit):
-    """Tools for reading llms.txt files and loading their linked documentation into a knowledge base.
-
-    The llms.txt format (see https://llmstxt.org) is a standardized way for websites to provide
-    LLM-friendly documentation indexes.
-
-    This toolkit provides two usage modes:
-
-    **Agentic mode (without knowledge):** The agent gets two tools:
-    - `get_llms_txt_index` - reads the llms.txt and returns the index of available docs
-    - `read_llms_txt_url` - fetches a specific URL from the index
-    The agent reads the index, decides which pages are relevant, and fetches only those.
-
-    **Knowledge mode (with knowledge):** The agent gets one tool:
-    - `read_llms_txt_and_load_knowledge` - reads the llms.txt, fetches all linked pages,
-      and loads them into the knowledge base.
-
-    Args:
-        knowledge: Optional Knowledge instance. When provided, enables knowledge loading mode.
-        max_urls: Maximum number of linked URLs to fetch when loading into knowledge. Defaults to 20.
-        timeout: HTTP request timeout in seconds. Defaults to 60.
-        skip_optional: Whether to skip URLs listed in the "Optional" section. Defaults to False.
-
-    Example:
-        # Agentic mode - agent reads index and picks which docs to fetch
-        tools = LLMsTxtTools()
-        agent = Agent(tools=[tools])
-
-        # Knowledge mode - bulk load all docs into KB
-        knowledge = Knowledge(vector_db=my_vector_db)
-        tools = LLMsTxtTools(knowledge=knowledge)
-        agent = Agent(tools=[tools], knowledge=knowledge)
-    """
 
     def __init__(
         self,

From 8bdd0610004be47800b4efd11f39e4712e848ff9 Mon Sep 17 00:00:00 2001
From: Mustafa Esoofally <coolmusta@gmail.com>
Date: Fri, 10 Apr 2026 12:07:07 -0400
Subject: [PATCH 15/23] =?UTF-8?q?fix:=20clean=20up=20toolkit=20=E2=80=94?=
 =?UTF-8?q?=20trim=20docstrings,=20simplify=20helpers,=20add=20sections?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

- Trim tool docstrings: remove repeated llms.txt explanations, keep
  only what the LLM needs to decide when/how to call the tool
- Replace _async_client_kwargs dict builder with _async_client() that
  returns the client directly
- Add section comments to separate helpers / agentic tools / knowledge
  tools for scannable code
- Remove unused Dict import
---
 libs/agno/agno/tools/llms_txt.py | 66 ++++++++++----------------------
 1 file changed, 20 insertions(+), 46 deletions(-)

diff --git a/libs/agno/agno/tools/llms_txt.py b/libs/agno/agno/tools/llms_txt.py
index 9c9441fffa..454836e25b 100644
--- a/libs/agno/agno/tools/llms_txt.py
+++ b/libs/agno/agno/tools/llms_txt.py
@@ -1,5 +1,5 @@
 import json
-from typing import Any, Dict, List, Optional
+from typing import Any, List, Optional
 
 import httpx
 
@@ -42,38 +42,28 @@ def __init__(
 
         super().__init__(name="llms_txt_tools", tools=tools, async_tools=async_tools_list, **kwargs)
 
-    def _async_client_kwargs(self) -> Dict[str, Any]:
-        """Build kwargs for httpx.AsyncClient matching the reader's config."""
-        kwargs: Dict[str, Any] = {"timeout": httpx.Timeout(self.timeout)}
-        if self.reader.proxy:
-            kwargs["proxy"] = self.reader.proxy
-        return kwargs
+    # ---- Helpers (not exposed to the agent) ----
 
     def _format_index(self, overview: str, entries: list) -> str:
-        """Build JSON index response from parsed llms.txt data."""
         return json.dumps(
             {
                 "overview": overview,
                 "pages": [
-                    {
-                        "title": e.title,
-                        "url": e.url,
-                        "description": e.description,
-                        "section": e.section,
-                    }
+                    {"title": e.title, "url": e.url, "description": e.description, "section": e.section}
                     for e in entries
                 ],
                 "total_pages": len(entries),
             }
         )
 
+    def _async_client(self) -> httpx.AsyncClient:
+        return httpx.AsyncClient(timeout=self.timeout, proxy=self.reader.proxy)
+
+    # ---- Tools: Agentic mode (without knowledge) ----
+
     def get_llms_txt_index(self, url: str) -> str:
         """Reads an llms.txt file and returns the index of all available documentation pages.
-
-        An llms.txt file is a standardized index of documentation for a project.
-        This function reads the index and returns all available pages with their titles,
-        URLs, descriptions, and sections. Use this to discover what documentation is
-        available, then use read_llms_txt_url to fetch specific pages.
+        Use this to discover what pages are available, then use read_llms_txt_url to fetch specific pages.
 
         :param url: The URL of the llms.txt file (e.g. https://docs.example.com/llms.txt).
         :return: JSON with the overview and list of available documentation pages.
@@ -88,17 +78,13 @@ def get_llms_txt_index(self, url: str) -> str:
 
     async def aget_llms_txt_index(self, url: str) -> str:
         """Reads an llms.txt file and returns the index of all available documentation pages.
-
-        An llms.txt file is a standardized index of documentation for a project.
-        This function reads the index and returns all available pages with their titles,
-        URLs, descriptions, and sections. Use this to discover what documentation is
-        available, then use read_llms_txt_url to fetch specific pages.
+        Use this to discover what pages are available, then use read_llms_txt_url to fetch specific pages.
 
         :param url: The URL of the llms.txt file (e.g. https://docs.example.com/llms.txt).
         :return: JSON with the overview and list of available documentation pages.
         """
         log_info(f"Reading llms.txt index from {url}")
-        async with httpx.AsyncClient(**self._async_client_kwargs()) as client:
+        async with self._async_client() as client:
             llms_txt_content = await self.reader.async_fetch_url(client, url)
 
         if not llms_txt_content:
@@ -108,10 +94,8 @@ async def aget_llms_txt_index(self, url: str) -> str:
         return self._format_index(overview, entries)
 
     def read_llms_txt_url(self, url: str) -> str:
-        """Fetches and returns the content of a specific documentation page URL.
-
-        Use this after calling get_llms_txt_index to fetch the content of specific pages
-        you want to read. You can call this multiple times for different URLs.
+        """Fetches and returns the content of a specific documentation page.
+        Use this after calling get_llms_txt_index to read pages relevant to the user's question.
 
         :param url: The URL of the documentation page to read.
         :return: The text content of the page.
@@ -120,33 +104,27 @@ def read_llms_txt_url(self, url: str) -> str:
         content = self.reader.fetch_url(url)
         if not content:
             return f"Failed to fetch content from {url}"
-
         return content
 
     async def aread_llms_txt_url(self, url: str) -> str:
-        """Fetches and returns the content of a specific documentation page URL.
-
-        Use this after calling get_llms_txt_index to fetch the content of specific pages
-        you want to read. You can call this multiple times for different URLs.
+        """Fetches and returns the content of a specific documentation page.
+        Use this after calling get_llms_txt_index to read pages relevant to the user's question.
 
         :param url: The URL of the documentation page to read.
         :return: The text content of the page.
         """
         log_debug(f"Fetching URL: {url}")
-        async with httpx.AsyncClient(**self._async_client_kwargs()) as client:
+        async with self._async_client() as client:
             content = await self.reader.async_fetch_url(client, url)
 
         if not content:
             return f"Failed to fetch content from {url}"
-
         return content
 
-    def read_llms_txt_and_load_knowledge(self, url: str) -> str:
-        """Reads an llms.txt file, fetches all linked documentation pages, and loads them into the knowledge base.
+    # ---- Tools: Knowledge mode (with knowledge) ----
 
-        An llms.txt file is a standardized index of documentation for a project.
-        This function reads the index, fetches every linked page, and stores the content
-        in the knowledge base for future retrieval.
+    def read_llms_txt_and_load_knowledge(self, url: str) -> str:
+        """Reads an llms.txt file, fetches all linked pages, and loads them into the knowledge base.
 
         :param url: The URL of the llms.txt file (e.g. https://docs.example.com/llms.txt).
         :return: Summary of what was loaded into the knowledge base.
@@ -159,11 +137,7 @@ def read_llms_txt_and_load_knowledge(self, url: str) -> str:
         return f"Successfully loaded documentation from {url} into the knowledge base"
 
     async def aread_llms_txt_and_load_knowledge(self, url: str) -> str:
-        """Reads an llms.txt file, fetches all linked documentation pages, and loads them into the knowledge base.
-
-        An llms.txt file is a standardized index of documentation for a project.
-        This function reads the index, fetches every linked page, and stores the content
-        in the knowledge base for future retrieval.
+        """Reads an llms.txt file, fetches all linked pages, and loads them into the knowledge base.
 
         :param url: The URL of the llms.txt file (e.g. https://docs.example.com/llms.txt).
         :return: Summary of what was loaded into the knowledge base.

From a252b2f45b6de6159a7b8d90abe38769519af551 Mon Sep 17 00:00:00 2001
From: Mustafa Esoofally <coolmusta@gmail.com>
Date: Fri, 10 Apr 2026 12:09:22 -0400
Subject: [PATCH 16/23] =?UTF-8?q?fix:=20match=20Gmail=20toolkit=20docstrin?=
 =?UTF-8?q?g=20pattern=20=E2=80=94=20Args/Returns=20style?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Docstrings now use the same format as GmailTools and GoogleCalendarTools:
triple-quote, Args (type): description, Returns: type: description.
Replaced section dividers with inline comments matching Gmail pattern.
Helpers have no docstrings (underscore prefix signals internal use).
---
 libs/agno/agno/tools/llms_txt.py | 83 ++++++++++++++++++++------------
 libs/agno/agno/utils/http.py     |  5 +-
 2 files changed, 55 insertions(+), 33 deletions(-)

diff --git a/libs/agno/agno/tools/llms_txt.py b/libs/agno/agno/tools/llms_txt.py
index 454836e25b..8fd00e0f24 100644
--- a/libs/agno/agno/tools/llms_txt.py
+++ b/libs/agno/agno/tools/llms_txt.py
@@ -10,14 +10,13 @@
 
 
 class LLMsTxtTools(Toolkit):
-
     def __init__(
         self,
         knowledge: Optional[Knowledge] = None,
         max_urls: int = 20,
         timeout: int = 60,
         skip_optional: bool = False,
-        **kwargs,
+        **kwargs: Any,
     ):
         self.knowledge: Optional[Knowledge] = knowledge
         self.max_urls = max_urls
@@ -31,18 +30,21 @@ def __init__(
 
         tools: List[Any] = []
         async_tools_list: List[tuple] = []
-        if self.knowledge is not None:
-            tools.append(self.read_llms_txt_and_load_knowledge)
-            async_tools_list.append((self.aread_llms_txt_and_load_knowledge, "read_llms_txt_and_load_knowledge"))
-        else:
+        # Agentic mode — agent picks which pages to read
+        if self.knowledge is None:
             tools.append(self.get_llms_txt_index)
             tools.append(self.read_llms_txt_url)
             async_tools_list.append((self.aget_llms_txt_index, "get_llms_txt_index"))
             async_tools_list.append((self.aread_llms_txt_url, "read_llms_txt_url"))
+        # Knowledge mode — bulk load all pages into vector DB
+        else:
+            tools.append(self.read_llms_txt_and_load_knowledge)
+            async_tools_list.append((self.aread_llms_txt_and_load_knowledge, "read_llms_txt_and_load_knowledge"))
 
         super().__init__(name="llms_txt_tools", tools=tools, async_tools=async_tools_list, **kwargs)
 
-    # ---- Helpers (not exposed to the agent) ----
+    def _async_client(self) -> httpx.AsyncClient:
+        return httpx.AsyncClient(timeout=self.timeout, proxy=self.reader.proxy)
 
     def _format_index(self, overview: str, entries: list) -> str:
         return json.dumps(
@@ -56,17 +58,16 @@ def _format_index(self, overview: str, entries: list) -> str:
             }
         )
 
-    def _async_client(self) -> httpx.AsyncClient:
-        return httpx.AsyncClient(timeout=self.timeout, proxy=self.reader.proxy)
-
-    # ---- Tools: Agentic mode (without knowledge) ----
-
     def get_llms_txt_index(self, url: str) -> str:
-        """Reads an llms.txt file and returns the index of all available documentation pages.
+        """
+        Reads an llms.txt file and returns the index of all available documentation pages.
         Use this to discover what pages are available, then use read_llms_txt_url to fetch specific pages.
 
-        :param url: The URL of the llms.txt file (e.g. https://docs.example.com/llms.txt).
-        :return: JSON with the overview and list of available documentation pages.
+        Args:
+            url (str): The URL of the llms.txt file (e.g. https://docs.example.com/llms.txt)
+
+        Returns:
+            str: JSON with the overview and list of available documentation pages
         """
         log_info(f"Reading llms.txt index from {url}")
         llms_txt_content = self.reader.fetch_url(url)
@@ -77,11 +78,15 @@ def get_llms_txt_index(self, url: str) -> str:
         return self._format_index(overview, entries)
 
     async def aget_llms_txt_index(self, url: str) -> str:
-        """Reads an llms.txt file and returns the index of all available documentation pages.
+        """
+        Reads an llms.txt file and returns the index of all available documentation pages.
         Use this to discover what pages are available, then use read_llms_txt_url to fetch specific pages.
 
-        :param url: The URL of the llms.txt file (e.g. https://docs.example.com/llms.txt).
-        :return: JSON with the overview and list of available documentation pages.
+        Args:
+            url (str): The URL of the llms.txt file (e.g. https://docs.example.com/llms.txt)
+
+        Returns:
+            str: JSON with the overview and list of available documentation pages
         """
         log_info(f"Reading llms.txt index from {url}")
         async with self._async_client() as client:
@@ -94,11 +99,15 @@ async def aget_llms_txt_index(self, url: str) -> str:
         return self._format_index(overview, entries)
 
     def read_llms_txt_url(self, url: str) -> str:
-        """Fetches and returns the content of a specific documentation page.
+        """
+        Fetches and returns the content of a specific documentation page.
         Use this after calling get_llms_txt_index to read pages relevant to the user's question.
 
-        :param url: The URL of the documentation page to read.
-        :return: The text content of the page.
+        Args:
+            url (str): The URL of the documentation page to read
+
+        Returns:
+            str: The text content of the page
         """
         log_debug(f"Fetching URL: {url}")
         content = self.reader.fetch_url(url)
@@ -107,11 +116,15 @@ def read_llms_txt_url(self, url: str) -> str:
         return content
 
     async def aread_llms_txt_url(self, url: str) -> str:
-        """Fetches and returns the content of a specific documentation page.
+        """
+        Fetches and returns the content of a specific documentation page.
         Use this after calling get_llms_txt_index to read pages relevant to the user's question.
 
-        :param url: The URL of the documentation page to read.
-        :return: The text content of the page.
+        Args:
+            url (str): The URL of the documentation page to read
+
+        Returns:
+            str: The text content of the page
         """
         log_debug(f"Fetching URL: {url}")
         async with self._async_client() as client:
@@ -121,13 +134,15 @@ async def aread_llms_txt_url(self, url: str) -> str:
             return f"Failed to fetch content from {url}"
         return content
 
-    # ---- Tools: Knowledge mode (with knowledge) ----
-
     def read_llms_txt_and_load_knowledge(self, url: str) -> str:
-        """Reads an llms.txt file, fetches all linked pages, and loads them into the knowledge base.
+        """
+        Reads an llms.txt file, fetches all linked pages, and loads them into the knowledge base.
 
-        :param url: The URL of the llms.txt file (e.g. https://docs.example.com/llms.txt).
-        :return: Summary of what was loaded into the knowledge base.
+        Args:
+            url (str): The URL of the llms.txt file (e.g. https://docs.example.com/llms.txt)
+
+        Returns:
+            str: Summary of what was loaded into the knowledge base
         """
         if self.knowledge is None:
             return "Knowledge base not provided"
@@ -137,10 +152,14 @@ def read_llms_txt_and_load_knowledge(self, url: str) -> str:
         return f"Successfully loaded documentation from {url} into the knowledge base"
 
     async def aread_llms_txt_and_load_knowledge(self, url: str) -> str:
-        """Reads an llms.txt file, fetches all linked pages, and loads them into the knowledge base.
+        """
+        Reads an llms.txt file, fetches all linked pages, and loads them into the knowledge base.
+
+        Args:
+            url (str): The URL of the llms.txt file (e.g. https://docs.example.com/llms.txt)
 
-        :param url: The URL of the llms.txt file (e.g. https://docs.example.com/llms.txt).
-        :return: Summary of what was loaded into the knowledge base.
+        Returns:
+            str: Summary of what was loaded into the knowledge base
         """
         if self.knowledge is None:
             return "Knowledge base not provided"
diff --git a/libs/agno/agno/utils/http.py b/libs/agno/agno/utils/http.py
index 053767033b..833650dd4d 100644
--- a/libs/agno/agno/utils/http.py
+++ b/libs/agno/agno/utils/http.py
@@ -187,7 +187,10 @@ def fetch_with_retry(
     for attempt in range(max_retries):
         try:
             response = httpx.get(
-                url, proxy=proxy, follow_redirects=follow_redirects, timeout=timeout  # type: ignore[arg-type]
+                url,
+                proxy=proxy,
+                follow_redirects=follow_redirects,
+                timeout=timeout,  # type: ignore[arg-type]
             )
             response.raise_for_status()
             return response

From bc918b0ddda96e3ea499785f382509556fefa704 Mon Sep 17 00:00:00 2001
From: Mustafa Esoofally <coolmusta@gmail.com>
Date: Fri, 10 Apr 2026 12:12:44 -0400
Subject: [PATCH 17/23] =?UTF-8?q?fix:=20add=20try/except=20to=20all=20tool?=
 =?UTF-8?q?s,=20reorder=20methods=20=E2=80=94=20helpers=20then=20public?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Toolkit: every tool method now wrapped in try/except returning error
strings, matching Gmail/Calendar pattern. Helpers at top, tools below.

Reader: reordered — __init__, classmethods, helpers (_process_response,
_build_documents), then public methods (parse_llms_txt, fetch_url,
read, async_read). Removed bloated docstrings on helpers. Trimmed
class docstring to just the example.
---
 .../agno/knowledge/reader/llms_txt_reader.py  | 160 +++++++-----------
 libs/agno/agno/tools/llms_txt.py              |  82 +++++----
 2 files changed, 110 insertions(+), 132 deletions(-)

diff --git a/libs/agno/agno/knowledge/reader/llms_txt_reader.py b/libs/agno/agno/knowledge/reader/llms_txt_reader.py
index 614c57e91c..8f0a2a5e78 100644
--- a/libs/agno/agno/knowledge/reader/llms_txt_reader.py
+++ b/libs/agno/agno/knowledge/reader/llms_txt_reader.py
@@ -2,7 +2,7 @@
 import re
 import uuid
 from dataclasses import dataclass
-from typing import Dict, List, Optional, Tuple
+from typing import Any, Dict, List, Optional, Tuple
 from urllib.parse import urljoin
 
 import httpx
@@ -20,17 +20,12 @@
 from agno.utils.http import async_fetch_with_retry, fetch_with_retry
 from agno.utils.log import log_debug, log_error, log_warning
 
-# Pattern to match markdown links: - [Title](url) or - [Title](url): description
-# Note: titles with nested brackets (e.g. [Agent [Beta]](url)) are not supported.
 _LINK_PATTERN = re.compile(r"-\s+\[([^\]]+)\]\(([^)]+)\)(?::\s*(.+))?")
-# Pattern to match H2 section headers
 _SECTION_PATTERN = re.compile(r"^##\s+(.+)$", re.MULTILINE)
 
 
 @dataclass
 class LLMsTxtEntry:
-    """A single entry parsed from an llms.txt file."""
-
     title: str
     url: str
     description: str
@@ -38,15 +33,7 @@ class LLMsTxtEntry:
 
 
 class LLMsTxtReader(Reader):
-    """Reader for llms.txt files.
-
-    Reads an llms.txt file (see https://llmstxt.org), parses all linked documentation URLs,
-    fetches the content of each linked page, and returns them as Documents.
-
-    The llms.txt format is a standardized markdown file with:
-    - An H1 heading (project name)
-    - An optional blockquote summary
-    - H2-delimited sections containing markdown links to documentation pages
+    """Reader for llms.txt files (see https://llmstxt.org).
 
     Example:
         reader = LLMsTxtReader(max_urls=20)
@@ -60,7 +47,7 @@ def __init__(
         timeout: int = 60,
         proxy: Optional[str] = None,
         skip_optional: bool = False,
-        **kwargs,
+        **kwargs: Any,
     ):
         if chunking_strategy is None:
             chunk_size = kwargs.get("chunk_size", 5000)
@@ -85,48 +72,9 @@ def get_supported_chunking_strategies(cls) -> List[ChunkingStrategyType]:
     def get_supported_content_types(cls) -> List[ContentType]:
         return [ContentType.URL]
 
-    def parse_llms_txt(self, content: str, base_url: str) -> Tuple[str, List[LLMsTxtEntry]]:
-        """Parse an llms.txt file and extract all linked URLs.
-
-        Args:
-            content: The raw text content of the llms.txt file.
-            base_url: The base URL for resolving relative links.
-
-        Returns:
-            A tuple of (overview text, list of LLMsTxtEntry).
-        """
-        entries: List[LLMsTxtEntry] = []
-        current_section = ""
-        overview_lines: List[str] = []
-
-        for line in content.split("\n"):
-            section_match = _SECTION_PATTERN.match(line)
-            if section_match:
-                current_section = section_match.group(1).strip()
-            elif not current_section:
-                overview_lines.append(line)
-            elif self.skip_optional and current_section.lower() == "optional":
-                pass
-            else:
-                link_match = _LINK_PATTERN.match(line.strip())
-                if link_match:
-                    url = link_match.group(2).strip()
-                    if not url.startswith(("http://", "https://")):
-                        url = urljoin(base_url, url)
-                    entries.append(
-                        LLMsTxtEntry(
-                            title=link_match.group(1).strip(),
-                            url=url,
-                            description=(link_match.group(3) or "").strip(),
-                            section=current_section,
-                        )
-                    )
-
-        overview = "\n".join(overview_lines).strip()
-        return overview, entries
+    # Helpers
 
     def _process_response(self, content_type: str, text: str) -> str:
-        """Classify an HTTP response by content-type and extract text."""
         if any(t in content_type for t in ["text/plain", "text/markdown"]):
             return text
 
@@ -147,28 +95,6 @@ def _process_response(self, content_type: str, text: str) -> str:
 
         return text
 
-    def fetch_url(self, url: str) -> Optional[str]:
-        """Fetch a URL and return its text content, or None on failure."""
-        try:
-            response = fetch_with_retry(
-                url, max_retries=1, proxy=self.proxy, timeout=self.timeout, follow_redirects=True
-            )
-            return self._process_response(response.headers.get("content-type", ""), response.text)
-        except Exception as e:
-            log_warning(f"Failed to fetch {url}: {e}")
-            return None
-
-    async def async_fetch_url(self, client: httpx.AsyncClient, url: str) -> Optional[str]:
-        """Async variant of fetch_url using a shared client."""
-        try:
-            response = await async_fetch_with_retry(
-                url, client=client, max_retries=1, timeout=self.timeout, follow_redirects=True
-            )
-            return self._process_response(response.headers.get("content-type", ""), response.text)
-        except Exception as e:
-            log_warning(f"Failed to fetch {url}: {e}")
-            return None
-
     def _build_documents(
         self,
         overview: str,
@@ -177,17 +103,13 @@ def _build_documents(
         llms_txt_url: str,
         name: Optional[str],
     ) -> List[Document]:
-        """Build Document list from fetched content."""
         documents: List[Document] = []
 
         if overview:
             doc = Document(
                 name=name or llms_txt_url,
                 id=str(uuid.uuid4()),
-                meta_data={
-                    "url": llms_txt_url,
-                    "type": "llms_txt_overview",
-                },
+                meta_data={"url": llms_txt_url, "type": "llms_txt_overview"},
                 content=overview,
             )
             if self.chunk:
@@ -218,16 +140,60 @@ def _build_documents(
 
         return documents
 
-    def read(self, url: str, name: Optional[str] = None) -> List[Document]:
-        """Read an llms.txt file and all its linked documentation.
+    # Public methods
 
-        Args:
-            url: The URL of the llms.txt file.
-            name: Optional name for the documents.
+    def parse_llms_txt(self, content: str, base_url: str) -> Tuple[str, List[LLMsTxtEntry]]:
+        entries: List[LLMsTxtEntry] = []
+        current_section = ""
+        overview_lines: List[str] = []
 
-        Returns:
-            A list of documents from the llms.txt and all linked pages.
-        """
+        for line in content.split("\n"):
+            section_match = _SECTION_PATTERN.match(line)
+            if section_match:
+                current_section = section_match.group(1).strip()
+            elif not current_section:
+                overview_lines.append(line)
+            elif self.skip_optional and current_section.lower() == "optional":
+                pass
+            else:
+                link_match = _LINK_PATTERN.match(line.strip())
+                if link_match:
+                    url = link_match.group(2).strip()
+                    if not url.startswith(("http://", "https://")):
+                        url = urljoin(base_url, url)
+                    entries.append(
+                        LLMsTxtEntry(
+                            title=link_match.group(1).strip(),
+                            url=url,
+                            description=(link_match.group(3) or "").strip(),
+                            section=current_section,
+                        )
+                    )
+
+        overview = "\n".join(overview_lines).strip()
+        return overview, entries
+
+    def fetch_url(self, url: str) -> Optional[str]:
+        try:
+            response = fetch_with_retry(
+                url, max_retries=1, proxy=self.proxy, timeout=self.timeout, follow_redirects=True
+            )
+            return self._process_response(response.headers.get("content-type", ""), response.text)
+        except Exception as e:
+            log_warning(f"Failed to fetch {url}: {e}")
+            return None
+
+    async def async_fetch_url(self, client: httpx.AsyncClient, url: str) -> Optional[str]:
+        try:
+            response = await async_fetch_with_retry(
+                url, client=client, max_retries=1, timeout=self.timeout, follow_redirects=True
+            )
+            return self._process_response(response.headers.get("content-type", ""), response.text)
+        except Exception as e:
+            log_warning(f"Failed to fetch {url}: {e}")
+            return None
+
+    def read(self, url: str, name: Optional[str] = None) -> List[Document]:
         log_debug(f"Reading llms.txt: {url}")
         llms_txt_content = self.fetch_url(url)
         if not llms_txt_content:
@@ -251,15 +217,6 @@ def read(self, url: str, name: Optional[str] = None) -> List[Document]:
         return self._build_documents(overview, entries_to_fetch, fetched, url, name)
 
     async def async_read(self, url: str, name: Optional[str] = None) -> List[Document]:
-        """Asynchronously read an llms.txt file and all its linked documentation.
-
-        Args:
-            url: The URL of the llms.txt file.
-            name: Optional name for the documents.
-
-        Returns:
-            A list of documents from the llms.txt and all linked pages.
-        """
         log_debug(f"Reading llms.txt asynchronously: {url}")
         async with httpx.AsyncClient(proxy=self.proxy) as client:
             llms_txt_content = await self.async_fetch_url(client, url)
@@ -274,8 +231,7 @@ async def async_read(self, url: str, name: Optional[str] = None) -> List[Documen
             if len(entries) > self.max_urls:
                 log_warning(f"Limiting to {self.max_urls} URLs (found {len(entries)})")
 
-            # httpx AsyncClient limits concurrent connections per host (default 20),
-            # so we don't need application-level throttling
+            # httpx AsyncClient limits concurrent connections per host (default 20)
             async def _fetch_entry(entry: LLMsTxtEntry) -> Tuple[str, Optional[str]]:
                 content = await self.async_fetch_url(client, entry.url)
                 return entry.url, content
diff --git a/libs/agno/agno/tools/llms_txt.py b/libs/agno/agno/tools/llms_txt.py
index 8fd00e0f24..699d1c2c2d 100644
--- a/libs/agno/agno/tools/llms_txt.py
+++ b/libs/agno/agno/tools/llms_txt.py
@@ -43,6 +43,8 @@ def __init__(
 
         super().__init__(name="llms_txt_tools", tools=tools, async_tools=async_tools_list, **kwargs)
 
+    # Helpers
+
     def _async_client(self) -> httpx.AsyncClient:
         return httpx.AsyncClient(timeout=self.timeout, proxy=self.reader.proxy)
 
@@ -58,6 +60,8 @@ def _format_index(self, overview: str, entries: list) -> str:
             }
         )
 
+    # Tools
+
     def get_llms_txt_index(self, url: str) -> str:
         """
         Reads an llms.txt file and returns the index of all available documentation pages.
@@ -69,13 +73,16 @@ def get_llms_txt_index(self, url: str) -> str:
         Returns:
             str: JSON with the overview and list of available documentation pages
         """
-        log_info(f"Reading llms.txt index from {url}")
-        llms_txt_content = self.reader.fetch_url(url)
-        if not llms_txt_content:
-            return f"Failed to fetch llms.txt from {url}"
+        try:
+            log_info(f"Reading llms.txt index from {url}")
+            llms_txt_content = self.reader.fetch_url(url)
+            if not llms_txt_content:
+                return f"Failed to fetch llms.txt from {url}"
 
-        overview, entries = self.reader.parse_llms_txt(llms_txt_content, url)
-        return self._format_index(overview, entries)
+            overview, entries = self.reader.parse_llms_txt(llms_txt_content, url)
+            return self._format_index(overview, entries)
+        except Exception as e:
+            return f"Error reading llms.txt index from {url}: {type(e).__name__}: {e}"
 
     async def aget_llms_txt_index(self, url: str) -> str:
         """
@@ -88,15 +95,18 @@ async def aget_llms_txt_index(self, url: str) -> str:
         Returns:
             str: JSON with the overview and list of available documentation pages
         """
-        log_info(f"Reading llms.txt index from {url}")
-        async with self._async_client() as client:
-            llms_txt_content = await self.reader.async_fetch_url(client, url)
+        try:
+            log_info(f"Reading llms.txt index from {url}")
+            async with self._async_client() as client:
+                llms_txt_content = await self.reader.async_fetch_url(client, url)
 
-        if not llms_txt_content:
-            return f"Failed to fetch llms.txt from {url}"
+            if not llms_txt_content:
+                return f"Failed to fetch llms.txt from {url}"
 
-        overview, entries = self.reader.parse_llms_txt(llms_txt_content, url)
-        return self._format_index(overview, entries)
+            overview, entries = self.reader.parse_llms_txt(llms_txt_content, url)
+            return self._format_index(overview, entries)
+        except Exception as e:
+            return f"Error reading llms.txt index from {url}: {type(e).__name__}: {e}"
 
     def read_llms_txt_url(self, url: str) -> str:
         """
@@ -109,11 +119,14 @@ def read_llms_txt_url(self, url: str) -> str:
         Returns:
             str: The text content of the page
         """
-        log_debug(f"Fetching URL: {url}")
-        content = self.reader.fetch_url(url)
-        if not content:
-            return f"Failed to fetch content from {url}"
-        return content
+        try:
+            log_debug(f"Fetching URL: {url}")
+            content = self.reader.fetch_url(url)
+            if not content:
+                return f"Failed to fetch content from {url}"
+            return content
+        except Exception as e:
+            return f"Error fetching {url}: {type(e).__name__}: {e}"
 
     async def aread_llms_txt_url(self, url: str) -> str:
         """
@@ -126,13 +139,16 @@ async def aread_llms_txt_url(self, url: str) -> str:
         Returns:
             str: The text content of the page
         """
-        log_debug(f"Fetching URL: {url}")
-        async with self._async_client() as client:
-            content = await self.reader.async_fetch_url(client, url)
+        try:
+            log_debug(f"Fetching URL: {url}")
+            async with self._async_client() as client:
+                content = await self.reader.async_fetch_url(client, url)
 
-        if not content:
-            return f"Failed to fetch content from {url}"
-        return content
+            if not content:
+                return f"Failed to fetch content from {url}"
+            return content
+        except Exception as e:
+            return f"Error fetching {url}: {type(e).__name__}: {e}"
 
     def read_llms_txt_and_load_knowledge(self, url: str) -> str:
         """
@@ -147,9 +163,12 @@ def read_llms_txt_and_load_knowledge(self, url: str) -> str:
         if self.knowledge is None:
             return "Knowledge base not provided"
 
-        log_info(f"Reading llms.txt from {url}")
-        self.knowledge.insert(url=url, reader=self.reader)
-        return f"Successfully loaded documentation from {url} into the knowledge base"
+        try:
+            log_info(f"Reading llms.txt from {url}")
+            self.knowledge.insert(url=url, reader=self.reader)
+            return f"Successfully loaded documentation from {url} into the knowledge base"
+        except Exception as e:
+            return f"Error loading knowledge from {url}: {type(e).__name__}: {e}"
 
     async def aread_llms_txt_and_load_knowledge(self, url: str) -> str:
         """
@@ -164,6 +183,9 @@ async def aread_llms_txt_and_load_knowledge(self, url: str) -> str:
         if self.knowledge is None:
             return "Knowledge base not provided"
 
-        log_info(f"Reading llms.txt from {url}")
-        await self.knowledge.ainsert(url=url, reader=self.reader)
-        return f"Successfully loaded documentation from {url} into the knowledge base"
+        try:
+            log_info(f"Reading llms.txt from {url}")
+            await self.knowledge.ainsert(url=url, reader=self.reader)
+            return f"Successfully loaded documentation from {url} into the knowledge base"
+        except Exception as e:
+            return f"Error loading knowledge from {url}: {type(e).__name__}: {e}"

From beea0b0a4bfc571b396fa59c02fe8b5c17b4477e Mon Sep 17 00:00:00 2001
From: Mustafa Esoofally <coolmusta@gmail.com>
Date: Fri, 10 Apr 2026 12:14:02 -0400
Subject: [PATCH 18/23] fix: replace Any with proper types
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

tools list uses Callable instead of Any. Removed Any from kwargs
(untyped kwargs is the codebase pattern — other toolkits don't type it).
---
 libs/agno/agno/knowledge/reader/llms_txt_reader.py | 4 ++--
 libs/agno/agno/tools/llms_txt.py                   | 6 +++---
 2 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/libs/agno/agno/knowledge/reader/llms_txt_reader.py b/libs/agno/agno/knowledge/reader/llms_txt_reader.py
index 8f0a2a5e78..4be8058b6e 100644
--- a/libs/agno/agno/knowledge/reader/llms_txt_reader.py
+++ b/libs/agno/agno/knowledge/reader/llms_txt_reader.py
@@ -2,7 +2,7 @@
 import re
 import uuid
 from dataclasses import dataclass
-from typing import Any, Dict, List, Optional, Tuple
+from typing import Dict, List, Optional, Tuple
 from urllib.parse import urljoin
 
 import httpx
@@ -47,7 +47,7 @@ def __init__(
         timeout: int = 60,
         proxy: Optional[str] = None,
         skip_optional: bool = False,
-        **kwargs: Any,
+        **kwargs,
     ):
         if chunking_strategy is None:
             chunk_size = kwargs.get("chunk_size", 5000)
diff --git a/libs/agno/agno/tools/llms_txt.py b/libs/agno/agno/tools/llms_txt.py
index 699d1c2c2d..cd33a0b0be 100644
--- a/libs/agno/agno/tools/llms_txt.py
+++ b/libs/agno/agno/tools/llms_txt.py
@@ -1,5 +1,5 @@
 import json
-from typing import Any, List, Optional
+from typing import Callable, List, Optional
 
 import httpx
 
@@ -16,7 +16,7 @@ def __init__(
         max_urls: int = 20,
         timeout: int = 60,
         skip_optional: bool = False,
-        **kwargs: Any,
+        **kwargs,
     ):
         self.knowledge: Optional[Knowledge] = knowledge
         self.max_urls = max_urls
@@ -28,7 +28,7 @@ def __init__(
             skip_optional=skip_optional,
         )
 
-        tools: List[Any] = []
+        tools: List[Callable] = []
         async_tools_list: List[tuple] = []
         # Agentic mode — agent picks which pages to read
         if self.knowledge is None:

From 7ebc5a2408e4212251fe3246aa1944341aae1dcd Mon Sep 17 00:00:00 2001
From: Mustafa Esoofally <coolmusta@gmail.com>
Date: Fri, 10 Apr 2026 12:19:33 -0400
Subject: [PATCH 19/23] =?UTF-8?q?test:=20rewrite=20tests=20following=20Per?=
 =?UTF-8?q?plexity/Gmail=20pattern=20=E2=80=94=2046=20tests?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Restructured from class-based to flat functions with @pytest.fixture,
matching test_perplexity.py and test_gmail_tools.py patterns.

New coverage:
- Async reader: async_read happy path + failure
- Async toolkit: aget_llms_txt_index, aread_llms_txt_url,
  aread_llms_txt_and_load_knowledge
- Error handling: try/except returns error strings
- Edge cases: empty overview, HTML sniffing, unknown content-type
- Shared _mock_httpx_response helper for DRY mock setup

34 tests -> 46 tests
---
 libs/agno/tests/unit/tools/test_llms_txt.py | 671 ++++++++++++--------
 1 file changed, 417 insertions(+), 254 deletions(-)

diff --git a/libs/agno/tests/unit/tools/test_llms_txt.py b/libs/agno/tests/unit/tools/test_llms_txt.py
index edfb6023c9..f4e3c47b0f 100644
--- a/libs/agno/tests/unit/tools/test_llms_txt.py
+++ b/libs/agno/tests/unit/tools/test_llms_txt.py
@@ -1,7 +1,7 @@
 """Unit tests for LLMsTxtTools and LLMsTxtReader."""
 
 import json
-from unittest.mock import MagicMock, patch
+from unittest.mock import AsyncMock, MagicMock, Mock, patch
 
 import httpx
 import pytest
@@ -12,7 +12,7 @@
 from agno.tools.llms_txt import LLMsTxtTools  # noqa: E402
 
 # ---------------------------------------------------------------------------
-# Sample llms.txt content for testing
+# Fixtures
 # ---------------------------------------------------------------------------
 
 SAMPLE_LLMS_TXT = """# Acme Project
@@ -49,331 +49,494 @@
 """
 
 
-# ---------------------------------------------------------------------------
-# LLMsTxtReader tests
-# ---------------------------------------------------------------------------
+@pytest.fixture
+def reader():
+    return LLMsTxtReader(chunk=False)
 
 
-class TestLLMsTxtReaderInit:
-    def test_defaults(self):
-        reader = LLMsTxtReader()
-        assert reader.max_urls == 20
-        assert reader.timeout == 60
-        assert reader.proxy is None
-        assert reader.skip_optional is False
-
-    def test_custom_params(self):
-        reader = LLMsTxtReader(max_urls=50, timeout=10, skip_optional=True)
-        assert reader.max_urls == 50
-        assert reader.timeout == 10
-        assert reader.skip_optional is True
-
-
-class TestParseLLMsTxt:
-    def test_parses_entries(self):
-        reader = LLMsTxtReader()
-        overview, entries = reader.parse_llms_txt(SAMPLE_LLMS_TXT, "https://docs.acme.com/llms.txt")
-
-        assert len(entries) == 7
-        assert entries[0].title == "Introduction"
-        assert entries[0].url == "https://docs.acme.com/introduction"
-        assert entries[0].description == "Overview of Acme"
-        assert entries[0].section == "Getting Started"
-
-    def test_parses_overview(self):
-        reader = LLMsTxtReader()
-        overview, entries = reader.parse_llms_txt(SAMPLE_LLMS_TXT, "https://docs.acme.com/llms.txt")
-
-        assert "# Acme Project" in overview
-        assert "Acme makes it easy" in overview
-
-    def test_sections_assigned(self):
-        reader = LLMsTxtReader()
-        _, entries = reader.parse_llms_txt(SAMPLE_LLMS_TXT, "https://docs.acme.com/llms.txt")
+@pytest.fixture
+def tools():
+    return LLMsTxtTools()
 
-        sections = {e.section for e in entries}
-        assert sections == {"Getting Started", "API Reference", "Optional"}
 
-    def test_skip_optional(self):
-        reader = LLMsTxtReader(skip_optional=True)
-        _, entries = reader.parse_llms_txt(SAMPLE_LLMS_TXT, "https://docs.acme.com/llms.txt")
+@pytest.fixture
+def tools_with_knowledge():
+    mock_knowledge = MagicMock()
+    return LLMsTxtTools(knowledge=mock_knowledge)
 
-        assert len(entries) == 5
-        assert all(e.section != "Optional" for e in entries)
 
-    def test_relative_urls_resolved(self):
-        reader = LLMsTxtReader()
-        _, entries = reader.parse_llms_txt(SAMPLE_LLMS_TXT_RELATIVE, "https://example.com/llms.txt")
+def _mock_httpx_response(text: str, content_type: str = "text/plain") -> Mock:
+    resp = Mock()
+    resp.headers = {"content-type": content_type}
+    resp.text = text
+    resp.raise_for_status = Mock()
+    return resp
 
-        assert entries[0].url == "https://example.com/docs/guide"
-        assert entries[1].url == "https://example.com/api/reference"
 
-    def test_empty_content(self):
-        reader = LLMsTxtReader()
-        overview, entries = reader.parse_llms_txt("", "https://example.com/llms.txt")
+# ============================================================================
+# READER: INIT
+# ============================================================================
 
-        assert overview == ""
-        assert entries == []
 
-    def test_no_links(self):
-        content = "# Title\n\nSome overview text.\n\n## Section\n\nNo links here."
-        reader = LLMsTxtReader()
-        overview, entries = reader.parse_llms_txt(content, "https://example.com/llms.txt")
-
-        assert "# Title" in overview
-        assert entries == []
+def test_reader_defaults():
+    reader = LLMsTxtReader()
+    assert reader.max_urls == 20
+    assert reader.timeout == 60
+    assert reader.proxy is None
+    assert reader.skip_optional is False
 
 
-class TestProcessResponse:
-    def test_extracts_from_main_tag(self):
-        reader = LLMsTxtReader()
-        html = "<html><body><nav>Nav</nav><main>Main content here</main><footer>Foot</footer></body></html>"
-        result = reader._process_response("text/html", html)
-        assert "Main content here" in result
-        assert "Nav" not in result
-
-    def test_extracts_from_body_fallback(self):
-        reader = LLMsTxtReader()
-        html = "<html><body><div>Body content</div></body></html>"
-        result = reader._process_response("text/html", html)
-        assert "Body content" in result
+def test_reader_custom_params():
+    reader = LLMsTxtReader(max_urls=50, timeout=10, skip_optional=True)
+    assert reader.max_urls == 50
+    assert reader.timeout == 10
+    assert reader.skip_optional is True
 
-    def test_strips_script_and_style(self):
-        reader = LLMsTxtReader()
-        html = "<html><body><script>var x=1;</script><style>.a{}</style><p>Text</p></body></html>"
-        result = reader._process_response("text/html", html)
-        assert "var x" not in result
-        assert "Text" in result
 
-    def test_preserves_structure_with_newlines(self):
-        reader = LLMsTxtReader()
-        html = "<html><body><main><p>First paragraph</p><p>Second paragraph</p></main></body></html>"
-        result = reader._process_response("text/html", html)
-        assert "First paragraph" in result
-        assert "Second paragraph" in result
-        assert "\n" in result
+# ============================================================================
+# READER: PARSE
+# ============================================================================
 
 
-class TestFetchUrl:
-    def test_returns_text_for_plain_content(self):
-        reader = LLMsTxtReader()
-        mock_response = MagicMock()
-        mock_response.headers = {"content-type": "text/plain"}
-        mock_response.text = "Plain text content"
-        mock_response.raise_for_status = MagicMock()
+def test_parse_entries(reader):
+    overview, entries = reader.parse_llms_txt(SAMPLE_LLMS_TXT, "https://docs.acme.com/llms.txt")
 
-        with patch("httpx.get", return_value=mock_response):
-            result = reader.fetch_url("https://example.com/file.txt")
+    assert len(entries) == 7
+    assert entries[0].title == "Introduction"
+    assert entries[0].url == "https://docs.acme.com/introduction"
+    assert entries[0].description == "Overview of Acme"
+    assert entries[0].section == "Getting Started"
 
-        assert result == "Plain text content"
 
-    def test_extracts_html_content(self):
-        reader = LLMsTxtReader()
-        mock_response = MagicMock()
-        mock_response.headers = {"content-type": "text/html"}
-        mock_response.text = "<html><body><main>Extracted</main></body></html>"
-        mock_response.raise_for_status = MagicMock()
+def test_parse_overview(reader):
+    overview, _ = reader.parse_llms_txt(SAMPLE_LLMS_TXT, "https://docs.acme.com/llms.txt")
 
-        with patch("httpx.get", return_value=mock_response):
-            result = reader.fetch_url("https://example.com/page")
+    assert "# Acme Project" in overview
+    assert "Acme makes it easy" in overview
 
-        assert "Extracted" in result
 
-    def test_returns_none_on_http_error(self):
-        reader = LLMsTxtReader()
+def test_parse_sections(reader):
+    _, entries = reader.parse_llms_txt(SAMPLE_LLMS_TXT, "https://docs.acme.com/llms.txt")
 
-        with patch(
-            "httpx.get",
-            side_effect=httpx.HTTPStatusError("error", request=MagicMock(), response=MagicMock(status_code=404)),
-        ):
-            result = reader.fetch_url("https://example.com/missing")
+    sections = {e.section for e in entries}
+    assert sections == {"Getting Started", "API Reference", "Optional"}
 
-        assert result is None
 
-    def test_returns_none_on_request_error(self):
-        reader = LLMsTxtReader()
+def test_parse_skip_optional():
+    reader = LLMsTxtReader(skip_optional=True)
+    _, entries = reader.parse_llms_txt(SAMPLE_LLMS_TXT, "https://docs.acme.com/llms.txt")
 
-        with patch("httpx.get", side_effect=httpx.RequestError("connection failed")):
-            result = reader.fetch_url("https://example.com/down")
+    assert len(entries) == 5
+    assert all(e.section != "Optional" for e in entries)
 
-        assert result is None
 
+def test_parse_relative_urls(reader):
+    _, entries = reader.parse_llms_txt(SAMPLE_LLMS_TXT_RELATIVE, "https://example.com/llms.txt")
 
-class TestBuildDocuments:
-    def test_builds_overview_and_linked_docs(self):
-        reader = LLMsTxtReader(chunk=False)
-        entries = [
-            LLMsTxtEntry(title="Intro", url="https://example.com/intro", description="Intro page", section="Docs"),
-        ]
-        fetched = {"https://example.com/intro": "Introduction content here"}
+    assert entries[0].url == "https://example.com/docs/guide"
+    assert entries[1].url == "https://example.com/api/reference"
 
-        docs = reader._build_documents("Overview text", entries, fetched, "https://example.com/llms.txt", None)
 
-        assert len(docs) == 2
-        assert docs[0].meta_data["type"] == "llms_txt_overview"
-        assert docs[0].content == "Overview text"
-        assert docs[1].meta_data["type"] == "llms_txt_linked_doc"
-        assert docs[1].name == "Intro"
-        assert docs[1].content == "Introduction content here"
+def test_parse_empty_content(reader):
+    overview, entries = reader.parse_llms_txt("", "https://example.com/llms.txt")
 
-    def test_skips_unfetched_entries(self):
-        reader = LLMsTxtReader(chunk=False)
-        entries = [
-            LLMsTxtEntry(title="Missing", url="https://example.com/missing", description="", section="Docs"),
-        ]
-        fetched = {}
+    assert overview == ""
+    assert entries == []
 
-        docs = reader._build_documents("Overview", entries, fetched, "https://example.com/llms.txt", None)
 
-        # Only the overview doc
-        assert len(docs) == 1
+def test_parse_no_links(reader):
+    content = "# Title\n\nSome overview text.\n\n## Section\n\nNo links here."
+    overview, entries = reader.parse_llms_txt(content, "https://example.com/llms.txt")
 
+    assert "# Title" in overview
+    assert entries == []
 
-class TestRead:
-    def test_read_fetches_and_builds_docs(self):
-        reader = LLMsTxtReader(max_urls=5, chunk=False)
 
-        def mock_fetch(url):
-            if url == "https://example.com/llms.txt":
-                return SAMPLE_LLMS_TXT
-            return f"Content of {url}"
+# ============================================================================
+# READER: PROCESS RESPONSE
+# ============================================================================
 
-        with patch.object(reader, "fetch_url", side_effect=mock_fetch):
-            docs = reader.read("https://example.com/llms.txt")
 
-        # 1 overview + 5 linked docs (max_urls=5)
-        assert len(docs) == 6
-        assert docs[0].meta_data["type"] == "llms_txt_overview"
+def test_process_response_plain_text(reader):
+    result = reader._process_response("text/plain", "Plain text content")
+    assert result == "Plain text content"
 
-    def test_read_returns_empty_on_fetch_failure(self):
-        reader = LLMsTxtReader()
 
-        with patch.object(reader, "fetch_url", return_value=None):
-            docs = reader.read("https://example.com/llms.txt")
+def test_process_response_markdown(reader):
+    result = reader._process_response("text/markdown", "# Heading\n\nBody")
+    assert result == "# Heading\n\nBody"
 
-        assert docs == []
 
-    def test_max_urls_limits_fetched_pages(self):
-        reader = LLMsTxtReader(max_urls=2, chunk=False)
+def test_process_response_html_extracts_main(reader):
+    html = "<html><body><nav>Nav</nav><main>Main content here</main><footer>Foot</footer></body></html>"
+    result = reader._process_response("text/html", html)
+    assert "Main content here" in result
+    assert "Nav" not in result
 
-        def mock_fetch(url):
-            if url == "https://example.com/llms.txt":
-                return SAMPLE_LLMS_TXT
-            return f"Content of {url}"
 
-        with patch.object(reader, "fetch_url", side_effect=mock_fetch):
-            docs = reader.read("https://example.com/llms.txt")
+def test_process_response_html_body_fallback(reader):
+    html = "<html><body><div>Body content</div></body></html>"
+    result = reader._process_response("text/html", html)
+    assert "Body content" in result
 
-        # 1 overview + 2 linked docs (max_urls=2)
-        assert len(docs) == 3
 
+def test_process_response_strips_scripts(reader):
+    html = "<html><body><script>var x=1;</script><style>.a{}</style><p>Text</p></body></html>"
+    result = reader._process_response("text/html", html)
+    assert "var x" not in result
+    assert "Text" in result
+
+
+def test_process_response_newline_separator(reader):
+    html = "<html><body><main><p>First paragraph</p><p>Second paragraph</p></main></body></html>"
+    result = reader._process_response("text/html", html)
+    assert "First paragraph" in result
+    assert "Second paragraph" in result
+    assert "\n" in result
+
+
+def test_process_response_html_sniffing(reader):
+    """HTML detected by content prefix when content-type header is missing."""
+    result = reader._process_response("", "<!DOCTYPE html><html><body><p>Sniffed</p></body></html>")
+    assert "Sniffed" in result
+
+
+def test_process_response_unknown_content_type(reader):
+    """Unknown content-type returns raw text."""
+    result = reader._process_response("application/json", '{"key": "value"}')
+    assert result == '{"key": "value"}'
+
+
+# ============================================================================
+# READER: FETCH
+# ============================================================================
+
+
+def test_fetch_url_plain_content(reader):
+    mock_response = _mock_httpx_response("Plain text content", "text/plain")
+
+    with patch("agno.utils.http.httpx.get", return_value=mock_response):
+        result = reader.fetch_url("https://example.com/file.txt")
+
+    assert result == "Plain text content"
+
+
+def test_fetch_url_html_content(reader):
+    mock_response = _mock_httpx_response("<html><body><main>Extracted</main></body></html>", "text/html")
+
+    with patch("agno.utils.http.httpx.get", return_value=mock_response):
+        result = reader.fetch_url("https://example.com/page")
+
+    assert "Extracted" in result
+
+
+def test_fetch_url_http_error(reader):
+    with patch(
+        "agno.utils.http.httpx.get",
+        side_effect=httpx.HTTPStatusError("error", request=MagicMock(), response=MagicMock(status_code=404)),
+    ):
+        result = reader.fetch_url("https://example.com/missing")
+
+    assert result is None
+
+
+def test_fetch_url_request_error(reader):
+    with patch("agno.utils.http.httpx.get", side_effect=httpx.RequestError("connection failed")):
+        result = reader.fetch_url("https://example.com/down")
+
+    assert result is None
+
+
+# ============================================================================
+# READER: BUILD DOCUMENTS
+# ============================================================================
+
+
+def test_build_documents_overview_and_linked(reader):
+    entries = [
+        LLMsTxtEntry(title="Intro", url="https://example.com/intro", description="Intro page", section="Docs"),
+    ]
+    fetched = {"https://example.com/intro": "Introduction content here"}
+
+    docs = reader._build_documents("Overview text", entries, fetched, "https://example.com/llms.txt", None)
+
+    assert len(docs) == 2
+    assert docs[0].meta_data["type"] == "llms_txt_overview"
+    assert docs[0].content == "Overview text"
+    assert docs[1].meta_data["type"] == "llms_txt_linked_doc"
+    assert docs[1].name == "Intro"
+    assert docs[1].content == "Introduction content here"
+
+
+def test_build_documents_skips_unfetched(reader):
+    entries = [
+        LLMsTxtEntry(title="Missing", url="https://example.com/missing", description="", section="Docs"),
+    ]
+    docs = reader._build_documents("Overview", entries, {}, "https://example.com/llms.txt", None)
+
+    assert len(docs) == 1
+    assert docs[0].meta_data["type"] == "llms_txt_overview"
+
+
+def test_build_documents_empty_overview(reader):
+    entries = [
+        LLMsTxtEntry(title="Page", url="https://example.com/page", description="", section="Docs"),
+    ]
+    fetched = {"https://example.com/page": "Page content"}
+
+    docs = reader._build_documents("", entries, fetched, "https://example.com/llms.txt", None)
+
+    assert len(docs) == 1
+    assert docs[0].meta_data["type"] == "llms_txt_linked_doc"
+
+
+# ============================================================================
+# READER: READ
+# ============================================================================
+
+
+def test_read_fetches_and_builds():
+    reader = LLMsTxtReader(max_urls=5, chunk=False)
+
+    def mock_fetch(url):
+        if url == "https://example.com/llms.txt":
+            return SAMPLE_LLMS_TXT
+        return f"Content of {url}"
+
+    with patch.object(reader, "fetch_url", side_effect=mock_fetch):
+        docs = reader.read("https://example.com/llms.txt")
+
+    assert len(docs) == 6
+    assert docs[0].meta_data["type"] == "llms_txt_overview"
+
+
+def test_read_returns_empty_on_failure():
+    reader = LLMsTxtReader()
+
+    with patch.object(reader, "fetch_url", return_value=None):
+        docs = reader.read("https://example.com/llms.txt")
+
+    assert docs == []
+
+
+def test_read_max_urls_limits():
+    reader = LLMsTxtReader(max_urls=2, chunk=False)
+
+    def mock_fetch(url):
+        if url == "https://example.com/llms.txt":
+            return SAMPLE_LLMS_TXT
+        return f"Content of {url}"
+
+    with patch.object(reader, "fetch_url", side_effect=mock_fetch):
+        docs = reader.read("https://example.com/llms.txt")
+
+    assert len(docs) == 3
+
+
+# ============================================================================
+# READER: ASYNC READ
+# ============================================================================
+
+
+@pytest.mark.asyncio
+async def test_async_read_fetches_concurrently():
+    reader = LLMsTxtReader(max_urls=3, chunk=False)
+
+    async def mock_async_fetch(client, url):
+        if "llms.txt" in url:
+            return SAMPLE_LLMS_TXT
+        return f"Content of {url}"
+
+    with patch.object(reader, "async_fetch_url", side_effect=mock_async_fetch):
+        docs = await reader.async_read("https://example.com/llms.txt")
+
+    assert len(docs) == 4
+    assert docs[0].meta_data["type"] == "llms_txt_overview"
+
+
+@pytest.mark.asyncio
+async def test_async_read_returns_empty_on_failure():
+    reader = LLMsTxtReader()
+
+    async def mock_async_fetch(client, url):
+        return None
+
+    with patch.object(reader, "async_fetch_url", side_effect=mock_async_fetch):
+        docs = await reader.async_read("https://example.com/llms.txt")
+
+    assert docs == []
+
+
+# ============================================================================
+# TOOLKIT: INIT
+# ============================================================================
+
+
+def test_toolkit_agentic_tools(tools):
+    func_names = [func.name for func in tools.functions.values()]
+    assert "get_llms_txt_index" in func_names
+    assert "read_llms_txt_url" in func_names
+    assert "read_llms_txt_and_load_knowledge" not in func_names
+
+
+def test_toolkit_async_tools(tools):
+    async_func_names = [func.name for func in tools.async_functions.values()]
+    assert "get_llms_txt_index" in async_func_names
+    assert "read_llms_txt_url" in async_func_names
+
+
+def test_toolkit_knowledge_tools(tools_with_knowledge):
+    func_names = [func.name for func in tools_with_knowledge.functions.values()]
+    assert "read_llms_txt_and_load_knowledge" in func_names
+    assert "get_llms_txt_index" not in func_names
+
+
+def test_toolkit_knowledge_async_tools(tools_with_knowledge):
+    async_func_names = [func.name for func in tools_with_knowledge.async_functions.values()]
+    assert "read_llms_txt_and_load_knowledge" in async_func_names
+
+
+def test_toolkit_custom_params():
+    t = LLMsTxtTools(max_urls=50, timeout=10, skip_optional=True)
+    assert t.max_urls == 50
+    assert t.timeout == 10
+    assert t.skip_optional is True
+
+
+def test_toolkit_reader_reuse(tools):
+    assert tools.reader is not None
+    assert tools.reader.timeout == tools.timeout
+    assert tools.reader.max_urls == tools.max_urls
+
+
+# ============================================================================
+# TOOLKIT: GET INDEX
+# ============================================================================
+
+
+def test_get_index_returns_json(tools):
+    mock_response = _mock_httpx_response(SAMPLE_LLMS_TXT, "text/plain")
+
+    with patch("agno.utils.http.httpx.get", return_value=mock_response):
+        result = tools.get_llms_txt_index("https://docs.acme.com/llms.txt")
+
+    data = json.loads(result)
+    assert data["total_pages"] == 7
+    assert data["pages"][0]["title"] == "Introduction"
+    assert data["pages"][0]["url"] == "https://docs.acme.com/introduction"
+    assert "overview" in data
+
+
+def test_get_index_failure(tools):
+    with patch("agno.utils.http.httpx.get", side_effect=httpx.RequestError("connection failed")):
+        result = tools.get_llms_txt_index("https://example.com/llms.txt")
+
+    assert "Failed to fetch" in result
+
+
+def test_get_index_error_handling(tools):
+    with patch.object(tools.reader, "fetch_url", side_effect=RuntimeError("unexpected")):
+        result = tools.get_llms_txt_index("https://example.com/llms.txt")
+
+    assert "Error" in result
+    assert "RuntimeError" in result
+
+
+# ============================================================================
+# TOOLKIT: READ URL
+# ============================================================================
+
+
+def test_read_url_returns_content(tools):
+    mock_response = _mock_httpx_response("Page content here", "text/plain")
+
+    with patch("agno.utils.http.httpx.get", return_value=mock_response):
+        result = tools.read_llms_txt_url("https://docs.acme.com/introduction")
+
+    assert result == "Page content here"
+
+
+def test_read_url_failure(tools):
+    with patch("agno.utils.http.httpx.get", side_effect=httpx.RequestError("connection failed")):
+        result = tools.read_llms_txt_url("https://example.com/missing")
+
+    assert "Failed to fetch" in result
 
-# ---------------------------------------------------------------------------
-# LLMsTxtTools tests
-# ---------------------------------------------------------------------------
 
+# ============================================================================
+# TOOLKIT: ASYNC TOOLS
+# ============================================================================
 
-class TestLLMsTxtToolsInit:
-    def test_without_knowledge_registers_agentic_tools(self):
-        tools = LLMsTxtTools()
-        func_names = [func.name for func in tools.functions.values()]
-        assert "get_llms_txt_index" in func_names
-        assert "read_llms_txt_url" in func_names
-        assert "read_llms_txt_and_load_knowledge" not in func_names
 
-    def test_without_knowledge_registers_async_tools(self):
-        tools = LLMsTxtTools()
-        async_func_names = [func.name for func in tools.async_functions.values()]
-        assert "get_llms_txt_index" in async_func_names
-        assert "read_llms_txt_url" in async_func_names
+@pytest.mark.asyncio
+async def test_aget_index_returns_json(tools):
+    mock_response = _mock_httpx_response(SAMPLE_LLMS_TXT, "text/plain")
 
-    def test_with_knowledge_registers_load(self):
-        mock_knowledge = MagicMock()
-        tools = LLMsTxtTools(knowledge=mock_knowledge)
-        func_names = [func.name for func in tools.functions.values()]
-        assert "read_llms_txt_and_load_knowledge" in func_names
-        assert "get_llms_txt_index" not in func_names
+    mock_client = AsyncMock()
+    mock_client.get.return_value = mock_response
 
-    def test_with_knowledge_registers_async_load(self):
-        mock_knowledge = MagicMock()
-        tools = LLMsTxtTools(knowledge=mock_knowledge)
-        async_func_names = [func.name for func in tools.async_functions.values()]
-        assert "read_llms_txt_and_load_knowledge" in async_func_names
+    with patch("agno.tools.llms_txt.httpx.AsyncClient") as mock_async_client:
+        mock_async_client.return_value.__aenter__ = AsyncMock(return_value=mock_client)
+        mock_async_client.return_value.__aexit__ = AsyncMock(return_value=False)
 
-    def test_custom_params(self):
-        tools = LLMsTxtTools(max_urls=50, timeout=10, skip_optional=True)
-        assert tools.max_urls == 50
-        assert tools.timeout == 10
-        assert tools.skip_optional is True
+        result = await tools.aget_llms_txt_index("https://docs.acme.com/llms.txt")
 
-    def test_reader_is_reused(self):
-        tools = LLMsTxtTools()
-        assert tools.reader is not None
-        assert tools.reader.timeout == tools.timeout
-        assert tools.reader.max_urls == tools.max_urls
+    data = json.loads(result)
+    assert data["total_pages"] == 7
+    assert data["pages"][0]["title"] == "Introduction"
 
 
-class TestGetLLMsTxtIndex:
-    def test_returns_index_json(self):
-        tools = LLMsTxtTools()
+@pytest.mark.asyncio
+async def test_aread_url_returns_content(tools):
+    mock_response = _mock_httpx_response("Async page content", "text/plain")
 
-        mock_response = MagicMock()
-        mock_response.headers = {"content-type": "text/plain"}
-        mock_response.text = SAMPLE_LLMS_TXT
-        mock_response.raise_for_status = MagicMock()
+    mock_client = AsyncMock()
+    mock_client.get.return_value = mock_response
 
-        with patch("httpx.get", return_value=mock_response):
-            result = tools.get_llms_txt_index("https://docs.acme.com/llms.txt")
+    with patch("agno.tools.llms_txt.httpx.AsyncClient") as mock_async_client:
+        mock_async_client.return_value.__aenter__ = AsyncMock(return_value=mock_client)
+        mock_async_client.return_value.__aexit__ = AsyncMock(return_value=False)
 
-        data = json.loads(result)
-        assert data["total_pages"] == 7
-        assert data["pages"][0]["title"] == "Introduction"
-        assert data["pages"][0]["url"] == "https://docs.acme.com/introduction"
-        assert "overview" in data
+        result = await tools.aread_llms_txt_url("https://docs.acme.com/page")
 
-    def test_returns_error_on_fetch_failure(self):
-        tools = LLMsTxtTools()
+    assert result == "Async page content"
 
-        with patch("httpx.get", side_effect=httpx.RequestError("connection failed")):
-            result = tools.get_llms_txt_index("https://example.com/llms.txt")
 
-        assert "Failed to fetch" in result
+@pytest.mark.asyncio
+async def test_aread_knowledge_delegates(tools_with_knowledge):
+    tools_with_knowledge.knowledge.ainsert = AsyncMock()
 
+    result = await tools_with_knowledge.aread_llms_txt_and_load_knowledge("https://example.com/llms.txt")
 
-class TestReadLLMsTxtUrl:
-    def test_returns_page_content(self):
-        tools = LLMsTxtTools()
+    tools_with_knowledge.knowledge.ainsert.assert_called_once_with(
+        url="https://example.com/llms.txt", reader=tools_with_knowledge.reader
+    )
+    assert "Successfully loaded" in result
 
-        mock_response = MagicMock()
-        mock_response.headers = {"content-type": "text/plain"}
-        mock_response.text = "Page content here"
-        mock_response.raise_for_status = MagicMock()
 
-        with patch("httpx.get", return_value=mock_response):
-            result = tools.read_llms_txt_url("https://docs.acme.com/introduction")
+# ============================================================================
+# TOOLKIT: KNOWLEDGE
+# ============================================================================
 
-        assert result == "Page content here"
 
-    def test_returns_error_on_fetch_failure(self):
-        tools = LLMsTxtTools()
+def test_knowledge_delegates_to_insert(tools_with_knowledge):
+    result = tools_with_knowledge.read_llms_txt_and_load_knowledge("https://example.com/llms.txt")
 
-        with patch("httpx.get", side_effect=httpx.RequestError("connection failed")):
-            result = tools.read_llms_txt_url("https://example.com/missing")
+    tools_with_knowledge.knowledge.insert.assert_called_once_with(
+        url="https://example.com/llms.txt", reader=tools_with_knowledge.reader
+    )
+    assert "Successfully loaded" in result
 
-        assert "Failed to fetch" in result
 
+def test_knowledge_no_knowledge(tools):
+    result = tools.read_llms_txt_and_load_knowledge("https://example.com/llms.txt")
+    assert result == "Knowledge base not provided"
 
-class TestLoadKnowledge:
-    def test_delegates_to_knowledge_insert(self):
-        mock_knowledge = MagicMock()
-        tools = LLMsTxtTools(knowledge=mock_knowledge)
 
-        result = tools.read_llms_txt_and_load_knowledge("https://example.com/llms.txt")
+def test_knowledge_error_handling(tools_with_knowledge):
+    tools_with_knowledge.knowledge.insert.side_effect = RuntimeError("db connection failed")
 
-        mock_knowledge.insert.assert_called_once_with(url="https://example.com/llms.txt", reader=tools.reader)
-        assert "Successfully loaded" in result
+    result = tools_with_knowledge.read_llms_txt_and_load_knowledge("https://example.com/llms.txt")
 
-    def test_returns_message_when_no_knowledge(self):
-        tools = LLMsTxtTools()
-        result = tools.read_llms_txt_and_load_knowledge("https://example.com/llms.txt")
-        assert result == "Knowledge base not provided"
+    assert "Error" in result
+    assert "RuntimeError" in result

From 4908f397fa1aab74bfddf5efc31f3e9fdcdd2c00 Mon Sep 17 00:00:00 2001
From: Mustafa Esoofally <coolmusta@gmail.com>
Date: Fri, 10 Apr 2026 12:33:51 -0400
Subject: [PATCH 20/23] fix: use ContentType.URL to decide pre-download skip in
 Knowledge
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The previous fix (skip pre-download when any custom reader is provided)
broke PDFReader and other file-based readers that need BytesIO. Now we
check if the reader supports ContentType.URL — only URL-based readers
like LLMsTxtReader and WebsiteReader skip the pre-download. File-based
readers (PDFReader, CSVReader, etc.) still get pre-downloaded bytes.
---
 libs/agno/agno/knowledge/knowledge.py | 23 +++++++++++++++++------
 1 file changed, 17 insertions(+), 6 deletions(-)

diff --git a/libs/agno/agno/knowledge/knowledge.py b/libs/agno/agno/knowledge/knowledge.py
index dd01927627..bcb01dcbe2 100644
--- a/libs/agno/agno/knowledge/knowledge.py
+++ b/libs/agno/agno/knowledge/knowledge.py
@@ -17,6 +17,7 @@
 from agno.knowledge.content import Content, ContentAuth, ContentStatus, FileData
 from agno.knowledge.document import Document
 from agno.knowledge.reader import Reader, ReaderFactory
+from agno.knowledge.types import ContentType
 from agno.knowledge.remote_content.base import BaseStorageConfig
 from agno.knowledge.remote_content.remote_content import (
     RemoteContent,
@@ -1564,9 +1565,14 @@ async def _aload_from_url(
         file_extension = url_path.suffix.lower()
 
         bytes_content = None
-        # Skip pre-download when a custom reader is provided — it knows how to
-        # handle the URL directly (e.g. LLMsTxtReader fetches linked pages)
-        if file_extension and not content.reader:
+        # Skip pre-download when a custom URL-based reader is provided —
+        # it handles the URL directly (e.g. LLMsTxtReader fetches linked pages)
+        skip_download = (
+            content.reader is not None
+            and hasattr(content.reader, "get_supported_content_types")
+            and ContentType.URL in content.reader.get_supported_content_types()
+        )
+        if file_extension and not skip_download:
             async with AsyncClient() as client:
                 response = await async_fetch_with_retry(content.url, client=client)
             bytes_content = BytesIO(response.content)
@@ -1718,9 +1724,14 @@ def _load_from_url(
         file_extension = url_path.suffix.lower()
 
         bytes_content = None
-        # Skip pre-download when a custom reader is provided — it knows how to
-        # handle the URL directly (e.g. LLMsTxtReader fetches linked pages)
-        if file_extension and not content.reader:
+        # Skip pre-download when a custom URL-based reader is provided —
+        # it handles the URL directly (e.g. LLMsTxtReader fetches linked pages)
+        skip_download = (
+            content.reader is not None
+            and hasattr(content.reader, "get_supported_content_types")
+            and ContentType.URL in content.reader.get_supported_content_types()
+        )
+        if file_extension and not skip_download:
             response = fetch_with_retry(content.url)
             bytes_content = BytesIO(response.content)
 

From b075bae7ce10151db1e046f4fd90a04900dac347 Mon Sep 17 00:00:00 2001
From: Mustafa Esoofally <coolmusta@gmail.com>
Date: Fri, 10 Apr 2026 12:51:28 -0400
Subject: [PATCH 21/23] fix: preserve httpx defaults when
 timeout/follow_redirects not specified

Only forward timeout and follow_redirects to httpx when explicitly
passed by the caller. Previously, default values (timeout=None,
follow_redirects=False) were always forwarded, which removed httpx's
built-in 5s timeout and overrode client-level redirect settings.
---
 libs/agno/agno/utils/http.py | 26 ++++++++++++++++----------
 1 file changed, 16 insertions(+), 10 deletions(-)

diff --git a/libs/agno/agno/utils/http.py b/libs/agno/agno/utils/http.py
index 833650dd4d..5f2cbe59ba 100644
--- a/libs/agno/agno/utils/http.py
+++ b/libs/agno/agno/utils/http.py
@@ -180,18 +180,18 @@ def fetch_with_retry(
     backoff_factor: int = DEFAULT_BACKOFF_FACTOR,
     proxy: Optional[str] = None,
     timeout: Optional[int] = None,
-    follow_redirects: bool = False,
+    follow_redirects: Optional[bool] = None,
 ) -> httpx.Response:
     """Synchronous HTTP GET with retry logic."""
 
     for attempt in range(max_retries):
         try:
-            response = httpx.get(
-                url,
-                proxy=proxy,
-                follow_redirects=follow_redirects,
-                timeout=timeout,  # type: ignore[arg-type]
-            )
+            kwargs: dict = {"proxy": proxy}
+            if timeout is not None:
+                kwargs["timeout"] = timeout
+            if follow_redirects is not None:
+                kwargs["follow_redirects"] = follow_redirects
+            response = httpx.get(url, **kwargs)
             response.raise_for_status()
             return response
         except httpx.RequestError as e:
@@ -215,16 +215,22 @@ async def async_fetch_with_retry(
     backoff_factor: int = DEFAULT_BACKOFF_FACTOR,
     proxy: Optional[str] = None,
     timeout: Optional[int] = None,
-    follow_redirects: bool = False,
+    follow_redirects: Optional[bool] = None,
 ) -> httpx.Response:
     """Asynchronous HTTP GET with retry logic."""
 
     async def _fetch():
+        kwargs: dict = {}
+        if timeout is not None:
+            kwargs["timeout"] = timeout
+        if follow_redirects is not None:
+            kwargs["follow_redirects"] = follow_redirects
+
         if client is None:
             async with httpx.AsyncClient(proxy=proxy) as local_client:
-                return await local_client.get(url, follow_redirects=follow_redirects, timeout=timeout)  # type: ignore[arg-type]
+                return await local_client.get(url, **kwargs)
         else:
-            return await client.get(url, follow_redirects=follow_redirects, timeout=timeout)  # type: ignore[arg-type]
+            return await client.get(url, **kwargs)
 
     for attempt in range(max_retries):
         try:

From 4b6fb1d593b3025ef070f63d6a6963eeb94a9313 Mon Sep 17 00:00:00 2001
From: Mustafa Esoofally <coolmusta@gmail.com>
Date: Fri, 10 Apr 2026 12:51:47 -0400
Subject: [PATCH 22/23] fix: use Optional for new http util params, fix import
 order

follow_redirects and timeout use Optional[None] default so existing
callers see zero behavior change. Build kwargs dict conditionally
instead of type-ignore comments. Import order fixed by format.sh.
---
 libs/agno/agno/knowledge/knowledge.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/libs/agno/agno/knowledge/knowledge.py b/libs/agno/agno/knowledge/knowledge.py
index bcb01dcbe2..89780edde8 100644
--- a/libs/agno/agno/knowledge/knowledge.py
+++ b/libs/agno/agno/knowledge/knowledge.py
@@ -17,12 +17,12 @@
 from agno.knowledge.content import Content, ContentAuth, ContentStatus, FileData
 from agno.knowledge.document import Document
 from agno.knowledge.reader import Reader, ReaderFactory
-from agno.knowledge.types import ContentType
 from agno.knowledge.remote_content.base import BaseStorageConfig
 from agno.knowledge.remote_content.remote_content import (
     RemoteContent,
 )
 from agno.knowledge.remote_knowledge import RemoteKnowledge
+from agno.knowledge.types import ContentType
 from agno.knowledge.utils import merge_user_metadata, set_agno_metadata, strip_agno_metadata
 from agno.utils.http import async_fetch_with_retry
 from agno.utils.log import log_debug, log_error, log_info, log_warning

From 32b0aacbabdbe6a42f86fc43965acf78fd781e64 Mon Sep 17 00:00:00 2001
From: Mustafa Esoofally <coolmusta@gmail.com>
Date: Wed, 15 Apr 2026 15:36:49 -0400
Subject: [PATCH 23/23] wip: checkpoint LLMs.txt local review-round history +
 stray test file

---
 libs/agno/tests/unit/os/routers/test_sort_order_default.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/libs/agno/tests/unit/os/routers/test_sort_order_default.py b/libs/agno/tests/unit/os/routers/test_sort_order_default.py
index 856d625ad2..1843e6f148 100644
--- a/libs/agno/tests/unit/os/routers/test_sort_order_default.py
+++ b/libs/agno/tests/unit/os/routers/test_sort_order_default.py
@@ -15,7 +15,6 @@
 
 from agno.os.schema import SortOrder
 
-
 # ---------------------------------------------------------------------------
 # Helpers – create mock DB / Knowledge with only the methods each router needs
 # ---------------------------------------------------------------------------