Performance regression in Linux for NVARCHAR strings when using Arrow #10

Workflow file for this run

.github/workflows/issue-triage.yml at 9688b10

	name: Issue Triage

	on:
	issues:
	types: [opened]

	workflow_dispatch:
	inputs:
	issue_number:
	description: 'Issue number to triage'
	required: true
	type: number

	permissions:
	issues: read
	contents: read
	models: read

	jobs:
	triage:
	runs-on: ubuntu-latest
	steps:
	- name: Wait for acknowledgement
	run: sleep 3600

	- name: Triage Analysis
	id: triage
	uses: actions/github-script@v7
	with:
	github-token: ${{ secrets.GITHUB_TOKEN }}
	script: \|
	// --- Helper function for GitHub Models ---
	async function callGitHubModels(prompt) {
	const token = process.env.GITHUB_TOKEN;
	const url = 'https://models.inference.ai.azure.com/chat/completions';

	const response = await fetch(url, {
	method: "POST",
	headers: {
	"Authorization": `Bearer ${token}`,
	"Content-Type": "application/json"
	},
	body: JSON.stringify({
	model: "gpt-4.1",
	messages: [{ role: "user", content: prompt }],
	temperature: 0.1,
	response_format: { type: "json_object" }
	})
	});

	if (!response.ok) {
	const errText = await response.text();
	throw new Error(`GitHub Models error: ${response.status} - ${errText}`);
	}

	const data = await response.json();
	return data.choices[0].message.content;
	}

	// --- Get issue details ---
	const issueNumber = context.payload.inputs?.issue_number
	? parseInt(context.payload.inputs.issue_number)
	: context.payload.issue.number;

	let issue;
	if (context.payload.issue && !context.payload.inputs?.issue_number) {
	issue = context.payload.issue;
	} else {
	issue = (await github.rest.issues.get({
	owner: context.repo.owner,
	repo: context.repo.repo,
	issue_number: issueNumber
	})).data;
	}

	const issueTitle = issue.title;
	const issueBody = issue.body \|\| '';
	const issueAuthor = issue.user.login;

	console.log(`Triaging issue #${issueNumber}: ${issueTitle}`);

	// --- Classify the issue ---
	const classificationPrompt = `
	You are an expert triage system for the mssql-python repository — a Python driver for Microsoft SQL Server.
	The driver uses ODBC under the hood with a C++/pybind11 native extension layer and Python wrappers.
	Note: The pybind/ directory contains C++/pybind11 code (NOT Rust). Only reference Rust if the issue is specifically about BCP (Bulk Copy Protocol).

	Key source files in the repo:
	- mssql_python/connection.py — Connection management, pooling integration
	- mssql_python/cursor.py — Cursor operations, execute, fetch, bulkcopy
	- mssql_python/auth.py — Authentication (SQL auth, Azure AD, etc.)
	- mssql_python/exceptions.py — Error handling and exception classes
	- mssql_python/pooling.py — Connection pooling
	- mssql_python/helpers.py — Utility functions
	- mssql_python/constants.py — Constants, SQL types, enums
	- mssql_python/connection_string_parser.py — Connection string parsing
	- mssql_python/parameter_helper.py — Query parameter handling
	- mssql_python/logging.py — Logging infrastructure
	- mssql_python/row.py — Row objects
	- mssql_python/type.py — Type mappings
	- mssql_python/ddbc_bindings.py — Python/pybind11 ODBC bindings (C++ native extension, NOT Rust)
	- mssql_python/pybind/ — C++/pybind11 native extension layer (NOT Rust)

	Classify the following GitHub issue into EXACTLY ONE category:

	1. FEATURE_REQUEST — User wants new functionality or enhancements
	2. BUG — Something is broken, incorrect behavior, or errors
	3. DISCUSSION — User is asking a question or wants clarification
	4. BREAK_FIX — A regression or critical bug: segfaults, crashes, data corruption,
	or user says "this used to work"

	Respond in this exact JSON format:
	{
	"category": "BUG\|FEATURE_REQUEST\|DISCUSSION\|BREAK_FIX",
	"confidence": <0-100>,
	"justification": "<2-3 sentence explanation>",
	"severity": "critical\|high\|medium\|low",
	"relevant_source_files": ["<top 3 most relevant source file paths>"],
	"keywords": ["<key technical terms from the issue>"],
	"summary_for_maintainers": "<detailed 3-5 sentence analysis for maintainer notification>"
	}

	Issue Title: ${issueTitle}
	Issue Body:
	${issueBody.slice(0, 4000)}
	`;

	let analysis;
	try {
	const classifyResult = await callGitHubModels(classificationPrompt);
	analysis = JSON.parse(classifyResult);
	} catch (e) {
	core.setFailed(`Classification failed: ${e.message}`);
	return;
	}

	console.log(`Classification: ${analysis.category} (${analysis.confidence}%)`);
	console.log(`Severity: ${analysis.severity}`);

	// --- Fetch relevant source files (for ALL categories) ---
	console.log('Fetching relevant source files for code-grounded analysis...');
	const fileContents = [];
	for (const filePath of analysis.relevant_source_files.slice(0, 3)) {
	try {
	const file = await github.rest.repos.getContent({
	owner: context.repo.owner,
	repo: context.repo.repo,
	path: filePath
	});
	const content = Buffer.from(file.data.content, 'base64').toString();
	fileContents.push(`### File: ${filePath}\n\`\`\`python\n${content.slice(0, 3000)}\n\`\`\``);
	console.log(`Fetched: ${filePath}`);
	} catch (e) {
	console.log(`Could not fetch ${filePath}: ${e.message}`);
	}
	}

	const codeContext = fileContents.length > 0
	? `\n\nRelevant source files from the repository:\n${fileContents.join('\n\n')}`
	: '';

	// --- For BUG/BREAK_FIX, analyze codebase ---
	let codeAnalysis = '';

	if (['BUG', 'BREAK_FIX'].includes(analysis.category) && fileContents.length > 0) {
	console.log('Bug/Break-fix detected — analyzing codebase...');

	const codePrompt = `
	You are a senior Python developer analyzing a potential
	${analysis.category === 'BREAK_FIX' ? 'regression/break-fix' : 'bug'}
	in the mssql-python driver (Python + ODBC + C++/pybind11 native layer).
	IMPORTANT: ddbc_bindings.py and the pybind/ directory are C++/pybind11 code, NOT Rust. Only mention Rust if the issue is specifically about BCP (Bulk Copy Protocol).
	IMPORTANT: Base your analysis ONLY on the actual source code provided below. Do not speculate about code you haven't seen.

	Bug Report:
	Title: ${issueTitle}
	Body: ${issueBody.slice(0, 2000)}
	${codeContext}

	Provide analysis in JSON:
	{
	"is_bug": "Confirmed Bug\|Likely Bug\|Require More Analysis\|Not a Bug",
	"root_cause": "<detailed root cause analysis based on actual code above>",
	"affected_components": ["<affected modules/functions from the code above>"],
	"evidence_and_context": "<specific evidence from the codebase — cite exact functions, variables, line logic, or patterns that support your analysis>",
	"recommended_fixes": ["<fix 1 — describe the approach referencing specific code>", "<fix 2>", "<fix 3>"],
	"code_locations": ["<file:function or file:class where changes should be made>"],
	"risk_assessment": "<risk to users>"
	}
	`;

	try {
	codeAnalysis = await callGitHubModels(codePrompt);
	console.log('Code analysis complete');
	} catch (e) {
	console.log(`Code analysis failed: ${e.message}`);
	}
	}

	// --- For FEATURE_REQUEST/DISCUSSION, provide code-grounded engineer guidance ---
	let engineerGuidance = '';

	if (['FEATURE_REQUEST', 'DISCUSSION'].includes(analysis.category)) {
	console.log('Non-bug issue — generating code-grounded engineer guidance...');

	const guidancePrompt = `
	You are a senior engineer on the mssql-python team — a Python driver for Microsoft SQL Server
	(ODBC + C++/pybind11 native extension + Python wrappers).
	IMPORTANT: Base your analysis ONLY on the actual source code provided below. Do not speculate about code you haven't seen. If the code doesn't contain enough information, say so explicitly.

	A user filed a GitHub issue classified as: ${analysis.category}

	Issue Title: ${issueTitle}
	Issue Body:
	${issueBody.slice(0, 3000)}
	${codeContext}

	Based on the ACTUAL SOURCE CODE above, provide a detailed analysis to help the engineering team respond efficiently.
	Respond in JSON:
	{
	"technical_assessment": "<detailed technical assessment grounded in the actual code above>",
	"verdict": "Confirmed Bug\|Likely Bug\|Require More Analysis\|Not a Bug",
	"issue_identified": true/false,
	"affected_files": ["<specific source files, modules, functions, or classes from the code above>"],
	"current_behavior": "<describe what the current code actually does based on your reading>",
	"implementation_approach": "<concrete implementation steps referencing specific functions/lines from the code — ONLY if issue_identified is true, otherwise empty string>",
	"effort_estimate": "small\|medium\|large\|epic",
	"risks_and_tradeoffs": "<potential risks, backward compatibility concerns, or tradeoffs — ONLY if issue_identified is true, otherwise empty string>",
	"suggested_response": "<a draft response the engineer could post on the issue. Always ask the user to share a minimal repro or code snippet that demonstrates the issue or desired behavior, if they haven't already provided one.>",
	"related_considerations": ["<other things the team should think about — ONLY if issue_identified is true, otherwise empty array>"]
	}

	IMPORTANT: If your technical_assessment does not identify any actual issue or gap in the code, set issue_identified to false and leave implementation_approach, risks_and_tradeoffs, and related_considerations empty. Only populate those fields when a real problem or improvement opportunity is confirmed in the code.
	`;

	try {
	engineerGuidance = await callGitHubModels(guidancePrompt);
	console.log('Engineer guidance generated');
	} catch (e) {
	console.log(`Engineer guidance failed: ${e.message}`);
	}
	}

	// NO labels modified on the issue — label info sent to Teams only
	// NO comment posted to the issue

	// --- Store outputs ---
	core.setOutput('category', analysis.category);
	core.setOutput('confidence', analysis.confidence.toString());
	core.setOutput('severity', analysis.severity);
	core.setOutput('justification', analysis.justification);
	core.setOutput('summary_for_maintainers', analysis.summary_for_maintainers \|\| analysis.justification);
	core.setOutput('relevant_files', analysis.relevant_source_files.join(', '));
	core.setOutput('keywords', analysis.keywords.join(', '));
	core.setOutput('code_analysis', codeAnalysis);
	core.setOutput('engineer_guidance', engineerGuidance);
	core.setOutput('issue_number', issueNumber.toString());
	core.setOutput('issue_title', issueTitle);
	core.setOutput('issue_url', issue.html_url);
	core.setOutput('issue_author', issueAuthor);

	outputs:
	category: ${{ steps.triage.outputs.category }}
	confidence: ${{ steps.triage.outputs.confidence }}
	severity: ${{ steps.triage.outputs.severity }}
	justification: ${{ steps.triage.outputs.justification }}
	summary_for_maintainers: ${{ steps.triage.outputs.summary_for_maintainers }}
	relevant_files: ${{ steps.triage.outputs.relevant_files }}
	keywords: ${{ steps.triage.outputs.keywords }}
	code_analysis: ${{ steps.triage.outputs.code_analysis }}
	engineer_guidance: ${{ steps.triage.outputs.engineer_guidance }}
	issue_number: ${{ steps.triage.outputs.issue_number }}
	issue_title: ${{ steps.triage.outputs.issue_title }}
	issue_url: ${{ steps.triage.outputs.issue_url }}
	issue_author: ${{ steps.triage.outputs.issue_author }}

	notify:
	needs: triage
	uses: ./.github/workflows/issue-notify.yml
	with:
	category: ${{ needs.triage.outputs.category }}
	confidence: ${{ needs.triage.outputs.confidence }}
	severity: ${{ needs.triage.outputs.severity }}
	justification: ${{ needs.triage.outputs.justification }}
	summary_for_maintainers: ${{ needs.triage.outputs.summary_for_maintainers }}
	relevant_files: ${{ needs.triage.outputs.relevant_files }}
	keywords: ${{ needs.triage.outputs.keywords }}
	code_analysis: ${{ needs.triage.outputs.code_analysis }}
	engineer_guidance: ${{ needs.triage.outputs.engineer_guidance }}
	issue_number: ${{ needs.triage.outputs.issue_number }}
	issue_title: ${{ needs.triage.outputs.issue_title }}
	issue_url: ${{ needs.triage.outputs.issue_url }}
	issue_author: ${{ needs.triage.outputs.issue_author }}
	secrets:
	TEAMS_WEBHOOK_URL: ${{ secrets.TEAMS_WEBHOOK_URL }}

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Performance regression in Linux for NVARCHAR strings when using Arrow #10

Workflow file

Performance regression in Linux for NVARCHAR strings when using Arrow #10

Uh oh!

Workflow file for this run