Pkg.go.dev 文档转 LLM.txt 格式化工具

从 pkg.go.dev 提取 Go 语言包的文档内容(包括 API 定义和代码示例),并将其转换为结构化的 LLM.txt 文本格式,方便在本地使用大型语言模型 (LLM) 进行参考和分析。

// ==UserScript==
// @name         Pkg.go.dev to LLM.txt Formatter
// @name:zh-CN   Pkg.go.dev 文档转 LLM.txt 格式化工具
// @namespace    https://x.com/janxin
// @version      0.1.1
// @description  Extracts Go package documentation (API definitions, examples) from pkg.go.dev and converts it into a structured LLM.txt format, suitable for local AI/LLM reference and analysis.
// @description:zh-CN 从 pkg.go.dev 提取 Go 语言包的文档内容(包括 API 定义和代码示例),并将其转换为结构化的 LLM.txt 文本格式,方便在本地使用大型语言模型 (LLM) 进行参考和分析。
// @author       hellowor
// @match        https://pkg.go.dev/*
// @icon         https://pkg.go.dev/favicon.ico
// @grant        GM_addStyle
// @grant        GM_setClipboard
// @homepageURL  https://x.com/janxin
// @supportURL   https://x.com/janxin
// @license MIT
// ==/UserScript==

(function() {
    'use strict';

    GM_addStyle(`
        .llm-download-button {
            position: fixed;
            bottom: 20px;
            right: 20px;
            z-index: 9999;
            padding: 10px 15px;
            background-color: #007d9c;
            color: white;
            border: none;
            border-radius: 5px;
            cursor: pointer;
            font-size: 14px;
            box-shadow: 0 2px 5px rgba(0,0,0,0.2);
        }
        .llm-download-button:hover {
            background-color: #005f79;
        }
    `);

    function getCleanText(element) {
        return element ? element.textContent.trim() : '';
    }

    function getCodeFromPre(preElement) {
        if (!preElement) return '';
        // Check if there's a span inside the pre, which often holds the actual code lines
        const spanInsidePre = preElement.querySelector('span');
        if (spanInsidePre) {
            let code = '';
            spanInsidePre.childNodes.forEach(node => {
                if (node.nodeType === Node.TEXT_NODE) {
                    code += node.textContent;
                } else if (node.nodeType === Node.ELEMENT_NODE && node.tagName === 'BR') {
                    code += '\n';
                } else if (node.nodeType === Node.ELEMENT_NODE) {
                    code += node.textContent;
                }
            });
            const trimmedCodeFromSpan = code.trim();
            if (trimmedCodeFromSpan) {
                return trimmedCodeFromSpan;
            }
        }
        // Fallback if no span or span processing yielded empty string
        return preElement.textContent; // textContent decodes HTML entities
    }


    function getDirectSiblingParagraphs(element) {
        if (!element) {
            return '';
        }
        let description = [];
        let sibling = element.nextElementSibling;
        while (sibling && (sibling.tagName === 'P' || (sibling.tagName === 'UL' && !sibling.closest('.Documentation-exampleDetails')))) {
            if (sibling.tagName === 'P') {
                description.push(getCleanText(sibling));
            } else if (sibling.tagName === 'UL') {
                 let listItems = [];
                 sibling.querySelectorAll('li').forEach(li => listItems.push('- ' + getCleanText(li)));
                 if (listItems.length > 0) {
                     description.push(listItems.join('\n'));
                 }
            }
            sibling = sibling.nextElementSibling;
        }
        return description.join('\n\n');
    }

    function extractExample(detailElement, level = 3) {
        const summaryEl = detailElement.querySelector('.Documentation-exampleDetailsHeader');
        const exampleBody = detailElement.querySelector('.Documentation-exampleDetailsBody');
        let codeContent = '';

        if (exampleBody) {
            const textareaEl = exampleBody.querySelector('textarea.Documentation-exampleCode.code');
            if (textareaEl) {
                codeContent = textareaEl.value;
                // console.log(`[extractExample] Found textarea for "${getCleanText(summaryEl)}". Value length: ${codeContent.length}. Starts with: ${codeContent.substring(0, 70).replace(/\n/g, '\\n')}`);
            }

            // If textarea not found or its value is empty, try <pre>
            if (!codeContent.trim()) {
                const preEl = exampleBody.querySelector('pre.Documentation-exampleCode');
                if (preEl) {
                    codeContent = getCodeFromPre(preEl);
                    // console.log(`[extractExample] Found pre for "${getCleanText(summaryEl)}". Content length: ${codeContent.length}. Starts with: ${codeContent.substring(0, 70).replace(/\n/g, '\\n')}`);
                }
            }
             if (!codeContent.trim() && !textareaEl && !exampleBody.querySelector('pre.Documentation-exampleCode')) {
                // console.warn(`[extractExample] No code element (textarea or pre) found for example: "${getCleanText(summaryEl)}" in body:`, exampleBody.innerHTML.substring(0,200));
            }
        } else {
            // console.warn(`[extractExample] No exampleBody found for example: "${getCleanText(summaryEl)}"`);
        }

        const outputLabelEl = detailElement.querySelector('.Documentation-exampleOutputLabel');
        const outputEl = exampleBody ? exampleBody.querySelector('span.Documentation-exampleOutput, pre.Documentation-exampleOutput') : null;

        let exampleText = "";
        const title = getCleanText(summaryEl).replace(/ ¶$/, '');
        if (title) {
            exampleText += `${'#'.repeat(level)} Example: ${title}\n\n`;
        } else {
            exampleText += `${'#'.repeat(level)} Example\n\n`;
        }

        const trimmedCode = codeContent.trim();
        if (trimmedCode) {
            exampleText += "```go\n" + trimmedCode + "\n```\n\n";
        } else {
            // console.warn(`[extractExample] Code content is effectively empty for: "${title}"`);
        }

        if (outputLabelEl && outputEl) {
            let outputContent = "";
            if (outputEl.tagName === 'PRE') {
                outputContent = getCodeFromPre(outputEl);
            } else {
                const preInsideSpan = outputEl.querySelector('pre');
                if (preInsideSpan) {
                    outputContent = getCodeFromPre(preInsideSpan);
                } else {
                    outputContent = getCleanText(outputEl);
                }
            }
            const trimmedOutput = outputContent.trim();
            if (trimmedOutput) {
                exampleText += `Output:\n\`\`\`\n${trimmedOutput}\n\`\`\`\n\n`;
            }
        }
        return exampleText;
    }

    function extractNameFromSignatureOrHeader(headerEl, sigPre, entityType = "Unknown") {
        if (headerEl) {
            const nameAnchor = headerEl.querySelector('a:not(.Documentation-idLink):not(.Documentation-source)');
            if (nameAnchor && getCleanText(nameAnchor)) {
                return getCleanText(nameAnchor);
            }
        }
        if (sigPre) {
            const sigText = getCodeFromPre(sigPre).trim(); // Ensure sigText is trimmed before regex
            let match;
            switch (entityType) {
                case "Function":
                case "Constructor":
                    match = sigText.match(/^func\s+([A-Z_][A-Za-z0-9_]*)\s*\(/);
                    if (match && match[1]) return match[1];
                    break;
                case "Method":
                    match = sigText.match(/^func\s*\([\s\S]*?\)\s*([A-Z_][A-Za-z0-9_]*)\s*\(/);
                    if (match && match[1]) return match[1];
                    break;
                case "Type":
                    match = sigText.match(/^type\s+([A-Z_][A-Za-z0-9_]*)/);
                    if (match && match[1]) return match[1];
                    break;
            }
        }
        if (headerEl) {
            let headerText = getCleanText(headerEl).replace(/ ¶$/, '');
            headerText = headerText.replace(/^func\s+/, '').replace(/^type\s+/, '');
            const firstWord = headerText.split(/\s|\(/)[0];
            if (firstWord) return firstWord;
        }
        return `Unknown${entityType}`;
    }


    function processDocumentationSection() {
        let output = [];
        const docContainer = document.querySelector('.Documentation.js-documentation .Documentation-content.js-docContent');
        if (!docContainer) {
            console.warn("Main documentation content (.Documentation-content.js-docContent) not found.");
            return '';
        }

        const overviewSection = docContainer.querySelector('section.Documentation-overview');
        if (overviewSection) {
            const overviewHeader = overviewSection.querySelector('h3#pkg-overview');
            if (overviewHeader) {
                const packageDescription = getDirectSiblingParagraphs(overviewHeader);
                 if (packageDescription) {
                    output.push("## Package Overview\n\n" + packageDescription + "\n\n");
                }
            }
            const overviewExamples = overviewSection.querySelectorAll('details.Documentation-exampleDetails');
            if (overviewExamples.length > 0) {
                let examplesInSectionFound = false;
                overviewExamples.forEach(ex => {
                    const exampleContent = extractExample(ex, 3);
                    if (exampleContent.split('\n').filter(l => l.trim() !== '').length > 2) { // Check if more than just title
                        if (!examplesInSectionFound) {
                            output.push("## Package Examples (from Overview)\n");
                            examplesInSectionFound = true;
                        }
                        output.push(exampleContent);
                    }
                });
            }
        }

        const examplesSectionHeader = docContainer.querySelector('h4#pkg-examples');
        if (examplesSectionHeader) {
            const examplesList = examplesSectionHeader.parentElement.querySelector('ul.Documentation-examplesList');
            if (examplesList) {
                 let examplesInSectionFound = false;
                 examplesList.querySelectorAll('li a.js-exampleHref').forEach(exLink => {
                     const exampleId = exLink.getAttribute('href').substring(1);
                     const exampleDetail = docContainer.querySelector(`details#${exampleId}.Documentation-exampleDetails`);
                     if (exampleDetail) {
                         const exampleContent = extractExample(exampleDetail, 3);
                         if (exampleContent.split('\n').filter(l => l.trim() !== '').length > 2) {
                            if (!examplesInSectionFound) {
                                output.push("## Examples (Listed)\n");
                                examplesInSectionFound = true;
                            }
                            output.push(exampleContent);
                         }
                     }
                 });
            }
        }

        const constHeader = docContainer.querySelector('#pkg-constants');
        if (constHeader) {
            const constSection = constHeader.closest('h3').nextElementSibling;
            if (constSection && constSection.classList.contains('Documentation-constants')) {
                const declarations = constSection.querySelectorAll('div.Documentation-declaration');
                if (declarations.length > 0) {
                    output.push("## Constants\n");
                    declarations.forEach(decl => {
                        const sigPre = decl.querySelector('pre');
                        if (sigPre) output.push("```go\n" + getCodeFromPre(sigPre).trim() + "\n```\n");
                        const desc = getDirectSiblingParagraphs(decl);
                        if (desc) output.push(desc + "\n");
                        output.push("---\n");
                    });
                }
            }
        }

        const varHeader = docContainer.querySelector('#pkg-variables');
        if (varHeader) {
            const varSection = varHeader.closest('h3').nextElementSibling;
            if (varSection && varSection.classList.contains('Documentation-variables')) {
                const declarations = varSection.querySelectorAll('div.Documentation-declaration');
                if (declarations.length > 0) {
                    output.push("## Variables\n");
                    declarations.forEach(decl => {
                        const sigPre = decl.querySelector('pre');
                        if (sigPre) output.push("```go\n" + getCodeFromPre(sigPre).trim() + "\n```\n");
                        const desc = getDirectSiblingParagraphs(decl);
                        if (desc) output.push(desc + "\n");
                        output.push("---\n");
                    });
                }
            }
        }

        const funcHeader = docContainer.querySelector('#pkg-functions');
        if (funcHeader) {
            const funcSection = funcHeader.closest('h3').nextElementSibling;
            if (funcSection && funcSection.classList.contains('Documentation-functions')) {
                const functions = funcSection.querySelectorAll('div.Documentation-function');
                 if (functions.length > 0) {
                    output.push("## Functions\n");
                    functions.forEach(fnDiv => {
                        const fnHeaderEl = fnDiv.querySelector('h4.Documentation-functionHeader');
                        const declarationDiv = fnDiv.querySelector('div.Documentation-declaration');
                        const sigPre = declarationDiv ? declarationDiv.querySelector('pre') : null;
                        const funcName = extractNameFromSignatureOrHeader(fnHeaderEl, sigPre, "Function");

                        output.push(`### Function: ${funcName}\n`);
                        if (sigPre) {
                            output.push("```go\n" + getCodeFromPre(sigPre).trim() + "\n```\n");
                        }
                        const desc = declarationDiv ? getDirectSiblingParagraphs(declarationDiv) : (fnHeaderEl ? getDirectSiblingParagraphs(fnHeaderEl) : '');
                        if (desc) output.push(desc + "\n");

                        fnDiv.querySelectorAll('details.Documentation-exampleDetails').forEach(ex => {
                            const exampleContent = extractExample(ex, 4);
                            if (exampleContent.split('\n').filter(l => l.trim() !== '').length > 2) {
                                output.push(exampleContent);
                            }
                        });
                        output.push("---\n");
                    });
                }
            }
        }

        const typeHeader = docContainer.querySelector('#pkg-types');
        if (typeHeader) {
            const typeSection = typeHeader.closest('h3').nextElementSibling;
            if (typeSection && typeSection.classList.contains('Documentation-types')) {
                const types = typeSection.querySelectorAll('div.Documentation-type');
                if (types.length > 0) {
                    output.push("## Types\n");
                    types.forEach(typeDiv => {
                        const typeHeaderEl = typeDiv.querySelector('h4.Documentation-typeHeader');
                        const typeDeclarationDiv = typeDiv.querySelector('div.Documentation-declaration');
                        const sigPre = typeDeclarationDiv ? typeDeclarationDiv.querySelector('pre') : null;
                        const typeName = extractNameFromSignatureOrHeader(typeHeaderEl, sigPre, "Type");

                        output.push(`### Type: ${typeName}\n`);
                        if (sigPre) {
                            output.push("```go\n" + getCodeFromPre(sigPre).trim() + "\n```\n");
                        }
                        const desc = typeDeclarationDiv ? getDirectSiblingParagraphs(typeDeclarationDiv) : (typeHeaderEl ? getDirectSiblingParagraphs(typeHeaderEl) : '');
                        if (desc) output.push(desc + "\n");

                        typeDiv.querySelectorAll(':scope > details.Documentation-exampleDetails').forEach(ex => {
                             const exampleContent = extractExample(ex, 4);
                             if (exampleContent.split('\n').filter(l => l.trim() !== '').length > 2) {
                                 output.push(exampleContent);
                             }
                        });

                        typeDiv.querySelectorAll('div.Documentation-typeFunc').forEach(assocFnDiv => {
                            const assocFnHeaderEl = assocFnDiv.querySelector('h4.Documentation-functionHeader');
                            const assocFnDeclarationDiv = assocFnDiv.querySelector('div.Documentation-declaration');
                            const assocSigPre = assocFnDeclarationDiv ? assocFnDeclarationDiv.querySelector('pre') : null;
                            const constructorName = extractNameFromSignatureOrHeader(assocFnHeaderEl, assocSigPre, "Constructor");

                            output.push(`#### Constructor: ${constructorName}\n`);
                            if (assocSigPre) {
                                output.push("```go\n" + getCodeFromPre(assocSigPre).trim() + "\n```\n");
                            }
                            const assocDesc = assocFnDeclarationDiv ? getDirectSiblingParagraphs(assocFnDeclarationDiv) : (assocFnHeaderEl ? getDirectSiblingParagraphs(assocFnHeaderEl) : '');
                            if (assocDesc) output.push(assocDesc + "\n");

                            assocFnDiv.querySelectorAll('details.Documentation-exampleDetails').forEach(ex => {
                                const exampleContent = extractExample(ex, 5);
                                if (exampleContent.split('\n').filter(l => l.trim() !== '').length > 2) {
                                    output.push(exampleContent);
                                }
                            });
                            output.push("---\n");
                        });

                        typeDiv.querySelectorAll('div.Documentation-typeMethod').forEach(assocMethodDiv => {
                            const assocMethodHeaderEl = assocMethodDiv.querySelector('h4.Documentation-functionHeader');
                            const assocMethodDeclarationDiv = assocMethodDiv.querySelector('div.Documentation-declaration');
                            const assocSigPre = assocMethodDeclarationDiv ? assocMethodDeclarationDiv.querySelector('pre') : null;
                            const methodName = extractNameFromSignatureOrHeader(assocMethodHeaderEl, assocSigPre, "Method");

                            output.push(`#### Method: ${methodName}\n`);
                             if (assocSigPre) {
                                output.push("```go\n" + getCodeFromPre(assocSigPre).trim() + "\n```\n");
                            }
                            const assocDesc = assocMethodDeclarationDiv ? getDirectSiblingParagraphs(assocMethodDeclarationDiv) : (assocMethodHeaderEl ? getDirectSiblingParagraphs(assocMethodHeaderEl) : '');
                            if (assocDesc) output.push(assocDesc + "\n");
                             assocMethodDiv.querySelectorAll('details.Documentation-exampleDetails').forEach(ex => {
                                const exampleContent = extractExample(ex, 5);
                                if (exampleContent.split('\n').filter(l => l.trim() !== '').length > 2) {
                                    output.push(exampleContent);
                                }
                            });
                            output.push("---\n");
                        });
                         output.push("===\n");
                    });
                }
            }
        }
        let cleanedOutput = output.join('\n')
                                .replace(/(\n---\n)+(\s*(\n---|\n===|$))/g, '\n---\n$2') // Consolidate multiple --- unless followed by ===
                                .replace(/(\n===\n)+/g, '\n===\n') // Consolidate multiple ===
                                .replace(/\n{3,}/g, '\n\n');       // Max 2 blank lines
        return cleanedOutput.trim();
    }

    function getPackageNameForFilename() {
        let path = window.location.pathname;
        path = path.split('@')[0];
        if (path.startsWith('/')) {
            path = path.substring(1);
        }
        return path.replace(/\/$/, '').replace(/\//g, '_');
    }

    function download(filename, text) {
        const element = document.createElement('a');
        element.setAttribute('href', 'data:text/plain;charset=utf-8,' + encodeURIComponent(text));
        element.setAttribute('download', filename);
        element.style.display = 'none';
        document.body.appendChild(element);
        element.click();
        document.body.removeChild(element);
    }

    function initializeScraper() {
        // console.log("Starting extraction...");
        const data = processDocumentationSection();
        if (data.trim()) {
            const packageName = getPackageNameForFilename();
            const filename = packageName ? `${packageName}_llm.txt` : 'llm.txt';
            download(filename, data);
            // console.log(`${filename} download initiated.`);
            // GM_setClipboard(data); // For debugging
            // alert('Data extracted and download initiated!');
        } else {
            // console.warn("No documentation section found or no actual data was extracted.");
            alert("Could not find documentation section or no actual data was extracted. Check console for details (if any logs were enabled).");
        }
    }

    const downloadButton = document.createElement('button');
    downloadButton.textContent = 'Download llm.txt';
    downloadButton.className = 'llm-download-button';
    downloadButton.addEventListener('click', () => {
        // console.log("Button clicked, scheduling scraper with 1s delay.");
        setTimeout(initializeScraper, 1000); // 1 second delay
    });

    // Fallback: if the page is very simple and loads fast, or if the button is added very late
    // For very dynamic pages, a MutationObserver on document.body or a specific container might be more robust
    // but setTimeout is simpler for now.
    if (document.readyState === "complete") {
        // If page is already loaded, maybe CodeMirror is also ready?
        // This is less likely to be the case for why it's not working.
        // The click handler with setTimeout is more reliable.
    }

    document.body.appendChild(downloadButton);

})();

QingJ © 2025

镜像随时可能失效,请加Q群300939539或关注我们的公众号极客氢云获取最新地址