diff --git a/pkg/parser/frontmatter_content.go b/pkg/parser/frontmatter_content.go index da213370ad..1a96f1b555 100644 --- a/pkg/parser/frontmatter_content.go +++ b/pkg/parser/frontmatter_content.go @@ -24,10 +24,15 @@ type FrontmatterResult struct { // ExtractFrontmatterFromContent parses YAML frontmatter from markdown content string func ExtractFrontmatterFromContent(content string) (*FrontmatterResult, error) { log.Printf("Extracting frontmatter from content: size=%d bytes", len(content)) - lines := strings.Split(content, "\n") + // Fast-path: inspect only the first line to determine whether frontmatter exists. + firstNewline := strings.IndexByte(content, '\n') + firstLine := content + if firstNewline >= 0 { + firstLine = content[:firstNewline] + } - // Check if file starts with frontmatter delimiter - if len(lines) == 0 || strings.TrimSpace(lines[0]) != "---" { + // Check if file starts with frontmatter delimiter. + if strings.TrimSpace(firstLine) != "---" { log.Print("No frontmatter delimiter found, returning content as markdown") // No frontmatter, return entire content as markdown return &FrontmatterResult{ @@ -38,22 +43,52 @@ func ExtractFrontmatterFromContent(content string) (*FrontmatterResult, error) { }, nil } - // Find end of frontmatter - endIndex := -1 - for i := 1; i < len(lines); i++ { - if strings.TrimSpace(lines[i]) == "---" { - endIndex = i + // Find end of frontmatter by scanning line-by-line without splitting the entire document. + searchStart := len(content) + if firstNewline >= 0 { + searchStart = firstNewline + 1 + } + frontmatterEndStart := -1 + markdownStart := len(content) + for cursor := searchStart; cursor <= len(content); { + lineStart := cursor + lineEnd := len(content) + nextCursor := len(content) + 1 + + if cursor < len(content) { + if relNewline := strings.IndexByte(content[cursor:], '\n'); relNewline >= 0 { + lineEnd = cursor + relNewline + nextCursor = lineEnd + 1 + } + } + + if strings.TrimSpace(content[lineStart:lineEnd]) == "---" { + frontmatterEndStart = lineStart + markdownStart = nextCursor break } + + if nextCursor > len(content) { + break + } + cursor = nextCursor } - if endIndex == -1 { + if frontmatterEndStart == -1 { return nil, errors.New("frontmatter not properly closed") } // Extract frontmatter YAML - frontmatterLines := lines[1:endIndex] - frontmatterYAML := strings.Join(frontmatterLines, "\n") + frontmatterYAML := content[searchStart:frontmatterEndStart] + frontmatterLines := []string{} + if frontmatterYAML != "" { + frontmatterLines = strings.Split(frontmatterYAML, "\n") + // Preserve previous behavior from lines[1:endIndex]: a trailing newline before + // the closing delimiter does not create an additional empty frontmatter line. + if strings.HasSuffix(frontmatterYAML, "\n") { + frontmatterLines = frontmatterLines[:len(frontmatterLines)-1] + } + } // Sanitize no-break whitespace characters (U+00A0) which break the YAML parser frontmatterYAML = strings.ReplaceAll(frontmatterYAML, "\u00A0", " ") @@ -73,11 +108,10 @@ func ExtractFrontmatterFromContent(content string) (*FrontmatterResult, error) { } // Extract markdown content (everything after the closing ---) - var markdownLines []string - if endIndex+1 < len(lines) { - markdownLines = lines[endIndex+1:] + markdown := "" + if markdownStart <= len(content) { + markdown = content[markdownStart:] } - markdown := strings.Join(markdownLines, "\n") log.Printf("Successfully extracted frontmatter: fields=%d, markdown_size=%d bytes", len(frontmatter), len(markdown)) return &FrontmatterResult{ diff --git a/pkg/parser/frontmatter_extraction_test.go b/pkg/parser/frontmatter_extraction_test.go index e8ec140271..e5cf0eafc2 100644 --- a/pkg/parser/frontmatter_extraction_test.go +++ b/pkg/parser/frontmatter_extraction_test.go @@ -3,6 +3,7 @@ package parser import ( + "reflect" "testing" ) @@ -287,3 +288,71 @@ This is markdown.`, }) } } + +func TestExtractFrontmatterFromContent_FrontmatterLinesAndStart(t *testing.T) { + tests := []struct { + name string + content string + wantFrontmatterLines []string + wantFrontmatterStart int + }{ + { + name: "no trailing blank frontmatter line without blank before closing delimiter", + content: `--- +on: workflow_dispatch +permissions: + contents: read +--- +# Body +`, + wantFrontmatterLines: []string{ + "on: workflow_dispatch", + "permissions:", + " contents: read", + }, + wantFrontmatterStart: 2, + }, + { + name: "preserve intentional blank line before closing delimiter", + content: `--- +on: workflow_dispatch +permissions: + contents: read + +--- +# Body +`, + wantFrontmatterLines: []string{ + "on: workflow_dispatch", + "permissions:", + " contents: read", + "", + }, + wantFrontmatterStart: 2, + }, + { + name: "no frontmatter keeps empty frontmatter metadata", + content: `# Body without frontmatter +`, + wantFrontmatterLines: []string{}, + wantFrontmatterStart: 0, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + result, err := ExtractFrontmatterFromContent(tt.content) + if err != nil { + t.Fatalf("ExtractFrontmatterFromContent() error = %v", err) + } + + if !reflect.DeepEqual(result.FrontmatterLines, tt.wantFrontmatterLines) { + t.Errorf("ExtractFrontmatterFromContent() FrontmatterLines = %#v, want %#v", result.FrontmatterLines, tt.wantFrontmatterLines) + } + + if result.FrontmatterStart != tt.wantFrontmatterStart { + t.Errorf("ExtractFrontmatterFromContent() FrontmatterStart = %d, want %d", result.FrontmatterStart, tt.wantFrontmatterStart) + } + }) + } +} diff --git a/pkg/workflow/compiler_orchestrator_engine.go b/pkg/workflow/compiler_orchestrator_engine.go index 19dd5c07a1..8cbf679b85 100644 --- a/pkg/workflow/compiler_orchestrator_engine.go +++ b/pkg/workflow/compiler_orchestrator_engine.go @@ -159,8 +159,9 @@ func (c *Compiler) setupEngineAndImports(result *parser.FrontmatterResult, clean if idx := strings.Index(importFilePath, "#"); idx >= 0 { importFilePath = importFilePath[:idx] } - // Only scan markdown files — .yml imports are YAML config, not markdown content - if !strings.HasSuffix(importFilePath, ".md") { + // Only scan non-builtin markdown imports. + // Builtin imports are trusted project assets and are validated in-source. + if !shouldScanImportedMarkdown(importFilePath) { continue } // Resolve the import path to a full filesystem path @@ -358,6 +359,15 @@ func (c *Compiler) setupEngineAndImports(result *parser.FrontmatterResult, clean }, nil } +// shouldScanImportedMarkdown reports whether an import path should be processed by +// markdown security scanning. +func shouldScanImportedMarkdown(importFilePath string) bool { + if !strings.HasSuffix(importFilePath, ".md") { + return false + } + return !strings.HasPrefix(importFilePath, parser.BuiltinPathPrefix) +} + // isStringFormEngine reports whether the "engine" field in the given frontmatter is a // plain string (e.g. "engine: copilot"), as opposed to an object with an "id" or // "runtime" sub-key. diff --git a/pkg/workflow/compiler_orchestrator_engine_test.go b/pkg/workflow/compiler_orchestrator_engine_test.go index 3445daa172..f269bbff21 100644 --- a/pkg/workflow/compiler_orchestrator_engine_test.go +++ b/pkg/workflow/compiler_orchestrator_engine_test.go @@ -218,6 +218,47 @@ strict: false } } +func TestShouldScanImportedMarkdown(t *testing.T) { + tests := []struct { + name string + importFilePath string + want bool + }{ + { + name: "scans regular markdown imports", + importFilePath: "shared/workflow.md", + want: true, + }, + { + name: "skips builtin markdown imports", + importFilePath: parser.BuiltinPathPrefix + "engines/claude.md", + want: false, + }, + { + name: "skips non-markdown imports", + importFilePath: "shared/workflow.lock.yml", + want: false, + }, + { + name: "skips uppercase markdown extension", + importFilePath: "shared/workflow.MD", + want: false, + }, + { + name: "skips builtin non-markdown imports", + importFilePath: parser.BuiltinPathPrefix + "engines/claude.yml", + want: false, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + got := shouldScanImportedMarkdown(tt.importFilePath) + assert.Equal(t, tt.want, got, "shouldScanImportedMarkdown(%q)", tt.importFilePath) + }) + } +} + // TestSetupEngineAndImports_NetworkMerging tests network permissions merging from imports func TestSetupEngineAndImports_NetworkMerging(t *testing.T) { tmpDir := testutil.TempDir(t, "engine-network")