GitHubSecurityLab · data-douser · Apr 17, 2026 · Apr 21, 2026 · Apr 21, 2026
@@ -10,6 +10,9 @@
       "version": "3.11",
       "installTools": true
     },
+    "ghcr.io/devcontainers/features/node:1": {
+      "version": "24"
+    },
     "ghcr.io/devcontainers/features/git:1": {
       "version": "latest"
     },

@@ -17,6 +17,11 @@ hatch build
 # Install this package from local directory.
 pip install -e .
 
+# Install codeql-development-mcp-server and its CodeQL tool query packs
+echo "📦 Installing codeql-development-mcp-server..."
+npm install -g codeql-development-mcp-server@2.25.2
+codeql-development-mcp-server-setup-packs
+
 # Create .env file if it doesn't exist
 if [ ! -f .env ]; then
     echo "📝 Creating .env template..."

@@ -30,6 +30,11 @@ RUN curl -fsSL https://cli.github.com/packages/githubcli-archive-keyring.gpg | d
     && apt-get install -y gh \
     && rm -rf /var/lib/apt/lists/*
 
+# Install Node.js 24 (required for codeql-development-mcp-server)
+RUN curl -fsSL https://deb.nodesource.com/setup_24.x | bash - \
+    && apt-get install -y nodejs \
+    && rm -rf /var/lib/apt/lists/*
+
 # Install CodeQL CLI
 RUN curl -Ls -o /tmp/codeql.zip https://github.com/github/codeql-cli-binaries/releases/latest/download/codeql-linux64.zip \
     && unzip /tmp/codeql.zip -d /opt \
@@ -38,11 +43,35 @@ RUN curl -Ls -o /tmp/codeql.zip https://github.com/github/codeql-cli-binaries/re
     && rm /tmp/codeql.zip
 
 # Install seclab-taskflow-agent from PyPI
+# For local testing with unpublished changes, replace with:
+#   COPY . /app/seclab-taskflow-agent-src
+#   RUN pip install /app/seclab-taskflow-agent-src
 RUN pip install seclab-taskflow-agent
 
-# Install CodeQL pack dependencies
+# Install CodeQL pack dependencies (legacy codeql toolbox)
 RUN export SECLAB_TASKFLOW_AGENT=$(python -c 'import seclab_taskflow_agent as x; print(x.__path__[0])') && \
     codeql pack install $SECLAB_TASKFLOW_AGENT/mcp_servers/codeql/queries/mcp-cpp && \
     codeql pack install $SECLAB_TASKFLOW_AGENT/mcp_servers/codeql/queries/mcp-js
 
+# Install codeql-development-mcp-server and its CodeQL tool query packs
+RUN npm install -g codeql-development-mcp-server@2.25.2 \
+    && codeql-development-mcp-server-setup-packs
+
+# Default in-container paths consumed by the codeql_ql_mcp toolbox YAML
+# (CODEQL_CLI / CODEQL_DBS_BASE_PATH / DATA_DIR / LOG_DIR). Mount host
+# directories at these paths to override:
+#   -v $HOME/codeql/databases:/codeql-databases  # MUST be writable:
+#                                                # codeql_database_create writes
+#                                                # new DBs into this directory
+#   -v $HOME/seclab-data:/data
+#   -v $HOME/seclab-logs:/logs
+ENV CODEQL_CLI=/usr/local/bin/codeql \
+    CODEQL_DBS_BASE_PATH=/codeql-databases \
+    DATA_DIR=/data \
+    LOG_DIR=/logs
+
+RUN mkdir -p /codeql-databases /data /logs
+
+VOLUME ["/codeql-databases", "/data", "/logs"]
+
 ENTRYPOINT ["python", "-m", "seclab_taskflow_agent"]
@@ -165,7 +165,10 @@ def mcp_client_params(
         match kind:
             case "stdio":
                 env = dict(sp.env) if sp.env else None
-                args = list(sp.args) if sp.args else None
+                # Note: must distinguish None from empty list. An empty list is
+                # a valid value (e.g. a binary that takes no args) and must be
+                # passed through to StdioServerParameters as `[]`, not `None`.
+                args = list(sp.args) if sp.args is not None else None
                 logging.debug(f"Initializing toolbox: {tb}\nargs:\n{args}\nenv:\n{env}\n")
                 if env:
                     for k, v in list(env.items()):
@@ -198,7 +201,8 @@ def mcp_client_params(
 
                 if sp.command is not None:
                     env = dict(sp.env) if sp.env else None
-                    args = list(sp.args) if sp.args else None
+                    # See stdio branch above: empty list must not be coerced to None.
+                    args = list(sp.args) if sp.args is not None else None
                     logging.debug(f"Initializing streamable toolbox: {tb}\nargs:\n{args}\nenv:\n{env}\n")
                     exe = shutil.which(sp.command)
                     if exe is None:

@@ -0,0 +1,174 @@
+# SPDX-FileCopyrightText: GitHub, Inc.
+# SPDX-License-Identifier: MIT
+
+seclab-taskflow-agent:
+  version: "1.0"
+  filetype: toolbox
+
+server_params:
+  kind: stdio
+  command: codeql-development-mcp-server
+  args: []
+  env:
+    CODEQL_PATH: "{{ env('CODEQL_CLI') }}"
+    CODEQL_DATABASES_BASE_DIRS: "{{ env('CODEQL_DBS_BASE_PATH') }}"
+    ENABLE_ANNOTATION_TOOLS: "true"
+    MONITORING_STORAGE_LOCATION: "{{ env('DATA_DIR') }}"
+    LOG_DIR: "{{ env('LOG_DIR') }}"
+server_prompt: |
+  ## CodeQL Development MCP Server (ql-mcp)
+
+  Comprehensive CodeQL analysis backend with 62 tools, 14 prompts, and
+  resources across 10 supported languages.
+
+  ## Supported Languages
+
+    - actions: GitHub Actions workflows
+    - cpp: C and C++
+    - csharp: C#
+    - go: Go
+    - java: Java (including Kotlin)
+    - javascript: JavaScript (including TypeScript)
+    - python: Python
+    - ruby: Ruby
+    - rust: Rust
+    - swift: Swift
+
+  If you cannot determine the language, ask the user.
+
+  ## Key Workflows
+
+  ### Reading Source Code from a Database
+
+  Use `read_database_source` to inspect source files inside a CodeQL database.
+  - Omit `filePath` to list all files in the database source archive
+  - Provide `filePath` to read a specific file's contents
+  This replaces any need for external file viewers when analyzing code
+  captured in a CodeQL database.
+
+  ### Discovering and Registering Databases
+
+  - `list_codeql_databases` — discover databases in configured base directories
+  - `register_database` — register a database by its local path
+  - `codeql_database_create` — create a new database from source code
+
+  ### Understanding Code Structure (Tool Queries)
+
+  Run via `codeql_query_run` with the `queryName` parameter:
+  - `PrintAST` — visualize how source code maps to CodeQL AST classes
+  - `PrintCFG` — visualize control flow for a function (`sourceFunction`)
+  - `CallGraphFrom` — outbound calls from a function (`sourceFunction`)
+  - `CallGraphTo` — inbound callers of a function (`targetFunction`)
+  - `CallGraphFromTo` — verify call paths between two functions
+
+  Always run `PrintAST` on test code before writing queries to understand
+  which QL classes represent which source constructs.
+
+  ### Running and Analyzing Queries
+
+  - `codeql_query_run` — execute a query against a database
+  - `codeql_database_analyze` — run query suites, produce SARIF output
+  - `codeql_bqrs_decode` — decode binary query results (text, csv, json)
+  - `codeql_bqrs_interpret` — interpret BQRS into SARIF or CSV
+  - `codeql_bqrs_info` — get result set metadata and row counts
+
+  ### SARIF Analysis
+
+  - `sarif_list_rules` — list rules with result counts, severity, tags
+  - `sarif_extract_rule` — extract per-rule SARIF subset
+  - `sarif_rule_to_markdown` — generate markdown with Mermaid dataflow diagrams
+  - `sarif_compare_alerts` — compare alerts for location overlap
+    (modes: `sink`, `source`, `any-location`, `full-path`)
+  - `sarif_diff_runs` — diff two SARIF files for behavioral changes
+
+  SARIF output is only produced for queries with `@kind problem` or
+  `@kind path-problem` (path-problem is preferred — it carries dataflow paths
+  that `sarif_rule_to_markdown` renders as Mermaid diagrams). Queries with
+  `@kind graph` do not emit SARIF, so:
+  - `PrintAST` and `PrintCFG` (`@kind graph`) are NOT compatible with SARIF
+    tools or with automatic SARIF parsing/caching — decode their BQRS results
+    with `codeql_bqrs_decode` instead.
+  - `CallGraphFrom`, `CallGraphTo`, and `CallGraphFromTo` (`@kind problem`)
+    ARE compatible with SARIF output and the full SARIF tool pipeline.
+
+  Recommended pipeline for triaging analysis output:
+  1. `codeql_database_analyze` → produces SARIF (for problem / path-problem
+     queries only)
+  2. `sarif_list_rules` → identify which rules fired and how often
+  3. `sarif_extract_rule` → narrow to a specific rule's results
+  4. `sarif_rule_to_markdown` → render alerts with dataflow diagrams for review
+  5. `sarif_compare_alerts` / `sarif_diff_runs` → deduplicate across runs or
+     detect regressions when iterating on queries or comparing MRVA results
+
+  ### Query Development (TDD)
+
+  1. `create_codeql_query` — scaffold query, test files, and `.qlref`
+  2. `codeql_pack_install` — install pack dependencies
+  3. Write test code with positive and negative cases
+  4. `codeql_test_run` — run tests (expect initial failure)
+  5. Implement query logic
+  6. `codeql_query_compile` — validate syntax
+  7. `codeql_test_run` — iterate until tests pass
+  8. `codeql_test_accept` — accept correct results as baseline
+
+  Use `quick_evaluate` to test individual predicates or classes against a
+  database without running the full query. Use `find_predicate_position` or
+  `find_class_position` to locate symbols (returns 1-based positions).
+
+  ### LSP Tools (CodeQL Language Server)
+
+  For exploring CodeQL libraries and iterative query development:
+  - `codeql_lsp_completion` — code completions at a cursor position
+  - `codeql_lsp_definition` — jump to symbol definition
+  - `codeql_lsp_references` — find all references to a symbol
+  - `codeql_lsp_diagnostics` — syntax and semantic validation
+  - `codeql_lsp_document_symbols` — list symbols in a file
+
+  Important: LSP tools use 0-based line/character positions. The
+  `workspace_uri` parameter must be a plain directory path to the pack root
+  containing `codeql-pack.yml` (not a file:// URI). Run `codeql_pack_install`
+  before using LSP tools. `codeql_lsp_diagnostics` cannot resolve imports —
+  use `codeql_query_compile` for files with imports.
+
+  ### Discovering and Searching QL Code
+
+  - `search_ql_code` — search QL source files for text or regex patterns
+  - `codeql_resolve_files` — find QL files by extension and glob patterns
+  - `validate_codeql_query` — quick heuristic structural check
+
+  ### Audit & Annotation Tools
+
+  For managing findings across repositories (MRVA triage workflows):
+  - `audit_store_findings` — store findings with owner, repo, sourceLocation, line
+  - `audit_list_findings` — list findings for a repository
+  - `audit_add_notes` — append analyst notes to a finding
+  - `audit_clear_repo` — clear all findings for a repository
+
+  General-purpose annotations:
+  - `annotation_create` — create notes/bookmarks on any entity
+  - `annotation_get` — retrieve annotation by ID
+  - `annotation_list` — list with category/entity/prefix filters
+  - `annotation_update` — update content or metadata
+  - `annotation_delete` — delete by ID, category, or prefix
+  - `annotation_search` — full-text search across annotations
+
+  ### Query Result Cache
+
+  Results are auto-cached after `codeql_query_run` and `codeql_database_analyze`:
+  - `query_results_cache_lookup` — check if results exist (metadata only)
+  - `query_results_cache_retrieve` — get results with subset selection
+  - `query_results_cache_clear` — clear cached results
+  - `query_results_cache_compare` — compare results across databases
+
+  ## Database Path Convention
+
+  Provide database paths relative to the configured `CODEQL_DATABASES_BASE_DIRS`
+  or as absolute filesystem paths. Use `list_codeql_databases` to discover
+  available databases.
+
+  ## Source File URI Convention
+
+  When tool results reference source locations, they use the URI scheme
+  `file:///path/to/file` optionally with a region suffix
+  `file:///path/to/file:startLine:startCol:endLine:endCol` (1-based).
+  Use `read_database_source` with the file path to read the source content.
@@ -0,0 +1,106 @@
+# SPDX-FileCopyrightText: GitHub, Inc.
+# SPDX-License-Identifier: MIT
+
+"""Tests for the codeql_ql_mcp toolbox configuration.
+
+Validates that the codeql_ql_mcp toolbox YAML loads correctly and configures
+the codeql-development-mcp-server as a drop-in replacement for the legacy
+codeql toolbox.
+"""
+
+import yaml
+
+from seclab_taskflow_agent.models import ToolboxDocument
+
+TOOLBOX_PATH = "src/seclab_taskflow_agent/toolboxes/codeql_ql_mcp.yaml"
+
+# All 10 CodeQL-supported language acronyms
+CODEQL_LANGUAGES = [
+    "actions",
+    "cpp",
+    "csharp",
+    "go",
+    "java",
+    "javascript",
+    "python",
+    "ruby",
+    "rust",
+    "swift",
+]
+
+# Environment variables the ql-mcp server needs to receive
+REQUIRED_ENV_VARS = [
+    "CODEQL_PATH",
+    "CODEQL_DATABASES_BASE_DIRS",
+    "ENABLE_ANNOTATION_TOOLS",
+]
+
+
+def _load_toolbox() -> ToolboxDocument:
+    with open(TOOLBOX_PATH) as f:
+        data = yaml.safe_load(f)
+    return ToolboxDocument(**data)
+
+
+class TestCodeqlQlMcpToolbox:
+    """Validate the codeql_ql_mcp toolbox YAML."""
+
+    def test_parses_into_valid_toolbox_document(self):
+        doc = _load_toolbox()
+        assert doc.header.filetype == "toolbox"
+        assert doc.header.version == "1.0"
+
+    def test_transport_is_stdio(self):
+        doc = _load_toolbox()
+        assert doc.server_params.kind == "stdio"
+
+    def test_command_is_ql_mcp_binary(self):
+        doc = _load_toolbox()
+        assert doc.server_params.command == "codeql-development-mcp-server"
+
+    def test_env_maps_seclab_vars_to_ql_mcp_vars(self):
+        doc = _load_toolbox()
+        env = doc.server_params.env
+        assert env is not None
+        for var in REQUIRED_ENV_VARS:
+            assert var in env, f"Missing required env var: {var}"
+        # Verify the mappings from seclab env names to ql-mcp env names
+        assert env["CODEQL_PATH"] == "{{ env('CODEQL_CLI') }}"
+        assert env["CODEQL_DATABASES_BASE_DIRS"] == "{{ env('CODEQL_DBS_BASE_PATH') }}"
+        assert env["ENABLE_ANNOTATION_TOOLS"] == "true"
+
+    def test_server_prompt_lists_all_languages(self):
+        doc = _load_toolbox()
+        prompt = doc.server_prompt
+        assert prompt != ""
+        for lang in CODEQL_LANGUAGES:
+            assert lang in prompt, f"Language '{lang}' missing from server_prompt"
+
+    def test_server_prompt_has_file_uri_docs(self):
+        doc = _load_toolbox()
+        assert "file://" in doc.server_prompt
+
+    def test_annotation_tools_enabled(self):
+        doc = _load_toolbox()
+        assert doc.server_params.env["ENABLE_ANNOTATION_TOOLS"] == "true"
+
+    def test_server_prompt_covers_critical_tools(self):
+        """Verify key tools that seclab-taskflow use cases depend on."""
+        doc = _load_toolbox()
+        prompt = doc.server_prompt
+        critical_tools = [
+            "read_database_source",
+            "list_codeql_databases",
+            "register_database",
+            "codeql_query_run",
+            "codeql_database_analyze",
+            "audit_store_findings",
+            "audit_list_findings",
+            "audit_add_notes",
+            "audit_clear_repo",
+            "sarif_list_rules",
+            "search_ql_code",
+            "quick_evaluate",
+        ]
+        for tool in critical_tools:
+            assert tool in prompt, f"Critical tool '{tool}' missing from server_prompt"