Performance regression in Linux for NVARCHAR strings when using Arrow #10
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| name: Issue Triage | |
| on: | |
| issues: | |
| types: [opened] | |
| workflow_dispatch: | |
| inputs: | |
| issue_number: | |
| description: 'Issue number to triage' | |
| required: true | |
| type: number | |
| permissions: | |
| issues: read | |
| contents: read | |
| models: read | |
| jobs: | |
| triage: | |
| runs-on: ubuntu-latest | |
| steps: | |
| - name: Wait for acknowledgement | |
| run: sleep 3600 | |
| - name: Triage Analysis | |
| id: triage | |
| uses: actions/github-script@v7 | |
| with: | |
| github-token: ${{ secrets.GITHUB_TOKEN }} | |
| script: | | |
| // --- Helper function for GitHub Models --- | |
| async function callGitHubModels(prompt) { | |
| const token = process.env.GITHUB_TOKEN; | |
| const url = 'https://models.inference.ai.azure.com/chat/completions'; | |
| const response = await fetch(url, { | |
| method: "POST", | |
| headers: { | |
| "Authorization": `Bearer ${token}`, | |
| "Content-Type": "application/json" | |
| }, | |
| body: JSON.stringify({ | |
| model: "gpt-4.1", | |
| messages: [{ role: "user", content: prompt }], | |
| temperature: 0.1, | |
| response_format: { type: "json_object" } | |
| }) | |
| }); | |
| if (!response.ok) { | |
| const errText = await response.text(); | |
| throw new Error(`GitHub Models error: ${response.status} - ${errText}`); | |
| } | |
| const data = await response.json(); | |
| return data.choices[0].message.content; | |
| } | |
| // --- Get issue details --- | |
| const issueNumber = context.payload.inputs?.issue_number | |
| ? parseInt(context.payload.inputs.issue_number) | |
| : context.payload.issue.number; | |
| let issue; | |
| if (context.payload.issue && !context.payload.inputs?.issue_number) { | |
| issue = context.payload.issue; | |
| } else { | |
| issue = (await github.rest.issues.get({ | |
| owner: context.repo.owner, | |
| repo: context.repo.repo, | |
| issue_number: issueNumber | |
| })).data; | |
| } | |
| const issueTitle = issue.title; | |
| const issueBody = issue.body || ''; | |
| const issueAuthor = issue.user.login; | |
| console.log(`Triaging issue #${issueNumber}: ${issueTitle}`); | |
| // --- Classify the issue --- | |
| const classificationPrompt = ` | |
| You are an expert triage system for the mssql-python repository — a Python driver for Microsoft SQL Server. | |
| The driver uses ODBC under the hood with a C++/pybind11 native extension layer and Python wrappers. | |
| Note: The pybind/ directory contains C++/pybind11 code (NOT Rust). Only reference Rust if the issue is specifically about BCP (Bulk Copy Protocol). | |
| Key source files in the repo: | |
| - mssql_python/connection.py — Connection management, pooling integration | |
| - mssql_python/cursor.py — Cursor operations, execute, fetch, bulkcopy | |
| - mssql_python/auth.py — Authentication (SQL auth, Azure AD, etc.) | |
| - mssql_python/exceptions.py — Error handling and exception classes | |
| - mssql_python/pooling.py — Connection pooling | |
| - mssql_python/helpers.py — Utility functions | |
| - mssql_python/constants.py — Constants, SQL types, enums | |
| - mssql_python/connection_string_parser.py — Connection string parsing | |
| - mssql_python/parameter_helper.py — Query parameter handling | |
| - mssql_python/logging.py — Logging infrastructure | |
| - mssql_python/row.py — Row objects | |
| - mssql_python/type.py — Type mappings | |
| - mssql_python/ddbc_bindings.py — Python/pybind11 ODBC bindings (C++ native extension, NOT Rust) | |
| - mssql_python/pybind/ — C++/pybind11 native extension layer (NOT Rust) | |
| Classify the following GitHub issue into EXACTLY ONE category: | |
| 1. FEATURE_REQUEST — User wants new functionality or enhancements | |
| 2. BUG — Something is broken, incorrect behavior, or errors | |
| 3. DISCUSSION — User is asking a question or wants clarification | |
| 4. BREAK_FIX — A regression or critical bug: segfaults, crashes, data corruption, | |
| or user says "this used to work" | |
| Respond in this exact JSON format: | |
| { | |
| "category": "BUG|FEATURE_REQUEST|DISCUSSION|BREAK_FIX", | |
| "confidence": <0-100>, | |
| "justification": "<2-3 sentence explanation>", | |
| "severity": "critical|high|medium|low", | |
| "relevant_source_files": ["<top 3 most relevant source file paths>"], | |
| "keywords": ["<key technical terms from the issue>"], | |
| "summary_for_maintainers": "<detailed 3-5 sentence analysis for maintainer notification>" | |
| } | |
| Issue Title: ${issueTitle} | |
| Issue Body: | |
| ${issueBody.slice(0, 4000)} | |
| `; | |
| let analysis; | |
| try { | |
| const classifyResult = await callGitHubModels(classificationPrompt); | |
| analysis = JSON.parse(classifyResult); | |
| } catch (e) { | |
| core.setFailed(`Classification failed: ${e.message}`); | |
| return; | |
| } | |
| console.log(`Classification: ${analysis.category} (${analysis.confidence}%)`); | |
| console.log(`Severity: ${analysis.severity}`); | |
| // --- Fetch relevant source files (for ALL categories) --- | |
| console.log('Fetching relevant source files for code-grounded analysis...'); | |
| const fileContents = []; | |
| for (const filePath of analysis.relevant_source_files.slice(0, 3)) { | |
| try { | |
| const file = await github.rest.repos.getContent({ | |
| owner: context.repo.owner, | |
| repo: context.repo.repo, | |
| path: filePath | |
| }); | |
| const content = Buffer.from(file.data.content, 'base64').toString(); | |
| fileContents.push(`### File: ${filePath}\n\`\`\`python\n${content.slice(0, 3000)}\n\`\`\``); | |
| console.log(`Fetched: ${filePath}`); | |
| } catch (e) { | |
| console.log(`Could not fetch ${filePath}: ${e.message}`); | |
| } | |
| } | |
| const codeContext = fileContents.length > 0 | |
| ? `\n\nRelevant source files from the repository:\n${fileContents.join('\n\n')}` | |
| : ''; | |
| // --- For BUG/BREAK_FIX, analyze codebase --- | |
| let codeAnalysis = ''; | |
| if (['BUG', 'BREAK_FIX'].includes(analysis.category) && fileContents.length > 0) { | |
| console.log('Bug/Break-fix detected — analyzing codebase...'); | |
| const codePrompt = ` | |
| You are a senior Python developer analyzing a potential | |
| ${analysis.category === 'BREAK_FIX' ? 'regression/break-fix' : 'bug'} | |
| in the mssql-python driver (Python + ODBC + C++/pybind11 native layer). | |
| IMPORTANT: ddbc_bindings.py and the pybind/ directory are C++/pybind11 code, NOT Rust. Only mention Rust if the issue is specifically about BCP (Bulk Copy Protocol). | |
| IMPORTANT: Base your analysis ONLY on the actual source code provided below. Do not speculate about code you haven't seen. | |
| Bug Report: | |
| Title: ${issueTitle} | |
| Body: ${issueBody.slice(0, 2000)} | |
| ${codeContext} | |
| Provide analysis in JSON: | |
| { | |
| "is_bug": "Confirmed Bug|Likely Bug|Require More Analysis|Not a Bug", | |
| "root_cause": "<detailed root cause analysis based on actual code above>", | |
| "affected_components": ["<affected modules/functions from the code above>"], | |
| "evidence_and_context": "<specific evidence from the codebase — cite exact functions, variables, line logic, or patterns that support your analysis>", | |
| "recommended_fixes": ["<fix 1 — describe the approach referencing specific code>", "<fix 2>", "<fix 3>"], | |
| "code_locations": ["<file:function or file:class where changes should be made>"], | |
| "risk_assessment": "<risk to users>" | |
| } | |
| `; | |
| try { | |
| codeAnalysis = await callGitHubModels(codePrompt); | |
| console.log('Code analysis complete'); | |
| } catch (e) { | |
| console.log(`Code analysis failed: ${e.message}`); | |
| } | |
| } | |
| // --- For FEATURE_REQUEST/DISCUSSION, provide code-grounded engineer guidance --- | |
| let engineerGuidance = ''; | |
| if (['FEATURE_REQUEST', 'DISCUSSION'].includes(analysis.category)) { | |
| console.log('Non-bug issue — generating code-grounded engineer guidance...'); | |
| const guidancePrompt = ` | |
| You are a senior engineer on the mssql-python team — a Python driver for Microsoft SQL Server | |
| (ODBC + C++/pybind11 native extension + Python wrappers). | |
| IMPORTANT: Base your analysis ONLY on the actual source code provided below. Do not speculate about code you haven't seen. If the code doesn't contain enough information, say so explicitly. | |
| A user filed a GitHub issue classified as: ${analysis.category} | |
| Issue Title: ${issueTitle} | |
| Issue Body: | |
| ${issueBody.slice(0, 3000)} | |
| ${codeContext} | |
| Based on the ACTUAL SOURCE CODE above, provide a detailed analysis to help the engineering team respond efficiently. | |
| Respond in JSON: | |
| { | |
| "technical_assessment": "<detailed technical assessment grounded in the actual code above>", | |
| "verdict": "Confirmed Bug|Likely Bug|Require More Analysis|Not a Bug", | |
| "issue_identified": true/false, | |
| "affected_files": ["<specific source files, modules, functions, or classes from the code above>"], | |
| "current_behavior": "<describe what the current code actually does based on your reading>", | |
| "implementation_approach": "<concrete implementation steps referencing specific functions/lines from the code — ONLY if issue_identified is true, otherwise empty string>", | |
| "effort_estimate": "small|medium|large|epic", | |
| "risks_and_tradeoffs": "<potential risks, backward compatibility concerns, or tradeoffs — ONLY if issue_identified is true, otherwise empty string>", | |
| "suggested_response": "<a draft response the engineer could post on the issue. Always ask the user to share a minimal repro or code snippet that demonstrates the issue or desired behavior, if they haven't already provided one.>", | |
| "related_considerations": ["<other things the team should think about — ONLY if issue_identified is true, otherwise empty array>"] | |
| } | |
| IMPORTANT: If your technical_assessment does not identify any actual issue or gap in the code, set issue_identified to false and leave implementation_approach, risks_and_tradeoffs, and related_considerations empty. Only populate those fields when a real problem or improvement opportunity is confirmed in the code. | |
| `; | |
| try { | |
| engineerGuidance = await callGitHubModels(guidancePrompt); | |
| console.log('Engineer guidance generated'); | |
| } catch (e) { | |
| console.log(`Engineer guidance failed: ${e.message}`); | |
| } | |
| } | |
| // NO labels modified on the issue — label info sent to Teams only | |
| // NO comment posted to the issue | |
| // --- Store outputs --- | |
| core.setOutput('category', analysis.category); | |
| core.setOutput('confidence', analysis.confidence.toString()); | |
| core.setOutput('severity', analysis.severity); | |
| core.setOutput('justification', analysis.justification); | |
| core.setOutput('summary_for_maintainers', analysis.summary_for_maintainers || analysis.justification); | |
| core.setOutput('relevant_files', analysis.relevant_source_files.join(', ')); | |
| core.setOutput('keywords', analysis.keywords.join(', ')); | |
| core.setOutput('code_analysis', codeAnalysis); | |
| core.setOutput('engineer_guidance', engineerGuidance); | |
| core.setOutput('issue_number', issueNumber.toString()); | |
| core.setOutput('issue_title', issueTitle); | |
| core.setOutput('issue_url', issue.html_url); | |
| core.setOutput('issue_author', issueAuthor); | |
| outputs: | |
| category: ${{ steps.triage.outputs.category }} | |
| confidence: ${{ steps.triage.outputs.confidence }} | |
| severity: ${{ steps.triage.outputs.severity }} | |
| justification: ${{ steps.triage.outputs.justification }} | |
| summary_for_maintainers: ${{ steps.triage.outputs.summary_for_maintainers }} | |
| relevant_files: ${{ steps.triage.outputs.relevant_files }} | |
| keywords: ${{ steps.triage.outputs.keywords }} | |
| code_analysis: ${{ steps.triage.outputs.code_analysis }} | |
| engineer_guidance: ${{ steps.triage.outputs.engineer_guidance }} | |
| issue_number: ${{ steps.triage.outputs.issue_number }} | |
| issue_title: ${{ steps.triage.outputs.issue_title }} | |
| issue_url: ${{ steps.triage.outputs.issue_url }} | |
| issue_author: ${{ steps.triage.outputs.issue_author }} | |
| notify: | |
| needs: triage | |
| uses: ./.github/workflows/issue-notify.yml | |
| with: | |
| category: ${{ needs.triage.outputs.category }} | |
| confidence: ${{ needs.triage.outputs.confidence }} | |
| severity: ${{ needs.triage.outputs.severity }} | |
| justification: ${{ needs.triage.outputs.justification }} | |
| summary_for_maintainers: ${{ needs.triage.outputs.summary_for_maintainers }} | |
| relevant_files: ${{ needs.triage.outputs.relevant_files }} | |
| keywords: ${{ needs.triage.outputs.keywords }} | |
| code_analysis: ${{ needs.triage.outputs.code_analysis }} | |
| engineer_guidance: ${{ needs.triage.outputs.engineer_guidance }} | |
| issue_number: ${{ needs.triage.outputs.issue_number }} | |
| issue_title: ${{ needs.triage.outputs.issue_title }} | |
| issue_url: ${{ needs.triage.outputs.issue_url }} | |
| issue_author: ${{ needs.triage.outputs.issue_author }} | |
| secrets: | |
| TEAMS_WEBHOOK_URL: ${{ secrets.TEAMS_WEBHOOK_URL }} |