Pkg.go.dev 文档转 LLM.txt 格式化工具

从 pkg.go.dev 提取 Go 语言包的文档内容(包括 API 定义和代码示例),并将其转换为结构化的 LLM.txt 文本格式,方便在本地使用大型语言模型 (LLM) 进行参考和分析。

  1. // ==UserScript==
  2. // @name Pkg.go.dev to LLM.txt Formatter
  3. // @name:zh-CN Pkg.go.dev 文档转 LLM.txt 格式化工具
  4. // @namespace https://x.com/janxin
  5. // @version 0.1.1
  6. // @description Extracts Go package documentation (API definitions, examples) from pkg.go.dev and converts it into a structured LLM.txt format, suitable for local AI/LLM reference and analysis.
  7. // @description:zh-CN 从 pkg.go.dev 提取 Go 语言包的文档内容(包括 API 定义和代码示例),并将其转换为结构化的 LLM.txt 文本格式,方便在本地使用大型语言模型 (LLM) 进行参考和分析。
  8. // @author hellowor
  9. // @match https://pkg.go.dev/*
  10. // @icon https://pkg.go.dev/favicon.ico
  11. // @grant GM_addStyle
  12. // @grant GM_setClipboard
  13. // @homepageURL https://x.com/janxin
  14. // @supportURL https://x.com/janxin
  15. // @license MIT
  16. // ==/UserScript==
  17.  
  18. (function() {
  19. 'use strict';
  20.  
  21. GM_addStyle(`
  22. .llm-download-button {
  23. position: fixed;
  24. bottom: 20px;
  25. right: 20px;
  26. z-index: 9999;
  27. padding: 10px 15px;
  28. background-color: #007d9c;
  29. color: white;
  30. border: none;
  31. border-radius: 5px;
  32. cursor: pointer;
  33. font-size: 14px;
  34. box-shadow: 0 2px 5px rgba(0,0,0,0.2);
  35. }
  36. .llm-download-button:hover {
  37. background-color: #005f79;
  38. }
  39. `);
  40.  
  41. function getCleanText(element) {
  42. return element ? element.textContent.trim() : '';
  43. }
  44.  
  45. function getCodeFromPre(preElement) {
  46. if (!preElement) return '';
  47. // Check if there's a span inside the pre, which often holds the actual code lines
  48. const spanInsidePre = preElement.querySelector('span');
  49. if (spanInsidePre) {
  50. let code = '';
  51. spanInsidePre.childNodes.forEach(node => {
  52. if (node.nodeType === Node.TEXT_NODE) {
  53. code += node.textContent;
  54. } else if (node.nodeType === Node.ELEMENT_NODE && node.tagName === 'BR') {
  55. code += '\n';
  56. } else if (node.nodeType === Node.ELEMENT_NODE) {
  57. code += node.textContent;
  58. }
  59. });
  60. const trimmedCodeFromSpan = code.trim();
  61. if (trimmedCodeFromSpan) {
  62. return trimmedCodeFromSpan;
  63. }
  64. }
  65. // Fallback if no span or span processing yielded empty string
  66. return preElement.textContent; // textContent decodes HTML entities
  67. }
  68.  
  69.  
  70. function getDirectSiblingParagraphs(element) {
  71. if (!element) {
  72. return '';
  73. }
  74. let description = [];
  75. let sibling = element.nextElementSibling;
  76. while (sibling && (sibling.tagName === 'P' || (sibling.tagName === 'UL' && !sibling.closest('.Documentation-exampleDetails')))) {
  77. if (sibling.tagName === 'P') {
  78. description.push(getCleanText(sibling));
  79. } else if (sibling.tagName === 'UL') {
  80. let listItems = [];
  81. sibling.querySelectorAll('li').forEach(li => listItems.push('- ' + getCleanText(li)));
  82. if (listItems.length > 0) {
  83. description.push(listItems.join('\n'));
  84. }
  85. }
  86. sibling = sibling.nextElementSibling;
  87. }
  88. return description.join('\n\n');
  89. }
  90.  
  91. function extractExample(detailElement, level = 3) {
  92. const summaryEl = detailElement.querySelector('.Documentation-exampleDetailsHeader');
  93. const exampleBody = detailElement.querySelector('.Documentation-exampleDetailsBody');
  94. let codeContent = '';
  95.  
  96. if (exampleBody) {
  97. const textareaEl = exampleBody.querySelector('textarea.Documentation-exampleCode.code');
  98. if (textareaEl) {
  99. codeContent = textareaEl.value;
  100. // console.log(`[extractExample] Found textarea for "${getCleanText(summaryEl)}". Value length: ${codeContent.length}. Starts with: ${codeContent.substring(0, 70).replace(/\n/g, '\\n')}`);
  101. }
  102.  
  103. // If textarea not found or its value is empty, try <pre>
  104. if (!codeContent.trim()) {
  105. const preEl = exampleBody.querySelector('pre.Documentation-exampleCode');
  106. if (preEl) {
  107. codeContent = getCodeFromPre(preEl);
  108. // console.log(`[extractExample] Found pre for "${getCleanText(summaryEl)}". Content length: ${codeContent.length}. Starts with: ${codeContent.substring(0, 70).replace(/\n/g, '\\n')}`);
  109. }
  110. }
  111. if (!codeContent.trim() && !textareaEl && !exampleBody.querySelector('pre.Documentation-exampleCode')) {
  112. // console.warn(`[extractExample] No code element (textarea or pre) found for example: "${getCleanText(summaryEl)}" in body:`, exampleBody.innerHTML.substring(0,200));
  113. }
  114. } else {
  115. // console.warn(`[extractExample] No exampleBody found for example: "${getCleanText(summaryEl)}"`);
  116. }
  117.  
  118. const outputLabelEl = detailElement.querySelector('.Documentation-exampleOutputLabel');
  119. const outputEl = exampleBody ? exampleBody.querySelector('span.Documentation-exampleOutput, pre.Documentation-exampleOutput') : null;
  120.  
  121. let exampleText = "";
  122. const title = getCleanText(summaryEl).replace(/ ¶$/, '');
  123. if (title) {
  124. exampleText += `${'#'.repeat(level)} Example: ${title}\n\n`;
  125. } else {
  126. exampleText += `${'#'.repeat(level)} Example\n\n`;
  127. }
  128.  
  129. const trimmedCode = codeContent.trim();
  130. if (trimmedCode) {
  131. exampleText += "```go\n" + trimmedCode + "\n```\n\n";
  132. } else {
  133. // console.warn(`[extractExample] Code content is effectively empty for: "${title}"`);
  134. }
  135.  
  136. if (outputLabelEl && outputEl) {
  137. let outputContent = "";
  138. if (outputEl.tagName === 'PRE') {
  139. outputContent = getCodeFromPre(outputEl);
  140. } else {
  141. const preInsideSpan = outputEl.querySelector('pre');
  142. if (preInsideSpan) {
  143. outputContent = getCodeFromPre(preInsideSpan);
  144. } else {
  145. outputContent = getCleanText(outputEl);
  146. }
  147. }
  148. const trimmedOutput = outputContent.trim();
  149. if (trimmedOutput) {
  150. exampleText += `Output:\n\`\`\`\n${trimmedOutput}\n\`\`\`\n\n`;
  151. }
  152. }
  153. return exampleText;
  154. }
  155.  
  156. function extractNameFromSignatureOrHeader(headerEl, sigPre, entityType = "Unknown") {
  157. if (headerEl) {
  158. const nameAnchor = headerEl.querySelector('a:not(.Documentation-idLink):not(.Documentation-source)');
  159. if (nameAnchor && getCleanText(nameAnchor)) {
  160. return getCleanText(nameAnchor);
  161. }
  162. }
  163. if (sigPre) {
  164. const sigText = getCodeFromPre(sigPre).trim(); // Ensure sigText is trimmed before regex
  165. let match;
  166. switch (entityType) {
  167. case "Function":
  168. case "Constructor":
  169. match = sigText.match(/^func\s+([A-Z_][A-Za-z0-9_]*)\s*\(/);
  170. if (match && match[1]) return match[1];
  171. break;
  172. case "Method":
  173. match = sigText.match(/^func\s*\([\s\S]*?\)\s*([A-Z_][A-Za-z0-9_]*)\s*\(/);
  174. if (match && match[1]) return match[1];
  175. break;
  176. case "Type":
  177. match = sigText.match(/^type\s+([A-Z_][A-Za-z0-9_]*)/);
  178. if (match && match[1]) return match[1];
  179. break;
  180. }
  181. }
  182. if (headerEl) {
  183. let headerText = getCleanText(headerEl).replace(/ ¶$/, '');
  184. headerText = headerText.replace(/^func\s+/, '').replace(/^type\s+/, '');
  185. const firstWord = headerText.split(/\s|\(/)[0];
  186. if (firstWord) return firstWord;
  187. }
  188. return `Unknown${entityType}`;
  189. }
  190.  
  191.  
  192. function processDocumentationSection() {
  193. let output = [];
  194. const docContainer = document.querySelector('.Documentation.js-documentation .Documentation-content.js-docContent');
  195. if (!docContainer) {
  196. console.warn("Main documentation content (.Documentation-content.js-docContent) not found.");
  197. return '';
  198. }
  199.  
  200. const overviewSection = docContainer.querySelector('section.Documentation-overview');
  201. if (overviewSection) {
  202. const overviewHeader = overviewSection.querySelector('h3#pkg-overview');
  203. if (overviewHeader) {
  204. const packageDescription = getDirectSiblingParagraphs(overviewHeader);
  205. if (packageDescription) {
  206. output.push("## Package Overview\n\n" + packageDescription + "\n\n");
  207. }
  208. }
  209. const overviewExamples = overviewSection.querySelectorAll('details.Documentation-exampleDetails');
  210. if (overviewExamples.length > 0) {
  211. let examplesInSectionFound = false;
  212. overviewExamples.forEach(ex => {
  213. const exampleContent = extractExample(ex, 3);
  214. if (exampleContent.split('\n').filter(l => l.trim() !== '').length > 2) { // Check if more than just title
  215. if (!examplesInSectionFound) {
  216. output.push("## Package Examples (from Overview)\n");
  217. examplesInSectionFound = true;
  218. }
  219. output.push(exampleContent);
  220. }
  221. });
  222. }
  223. }
  224.  
  225. const examplesSectionHeader = docContainer.querySelector('h4#pkg-examples');
  226. if (examplesSectionHeader) {
  227. const examplesList = examplesSectionHeader.parentElement.querySelector('ul.Documentation-examplesList');
  228. if (examplesList) {
  229. let examplesInSectionFound = false;
  230. examplesList.querySelectorAll('li a.js-exampleHref').forEach(exLink => {
  231. const exampleId = exLink.getAttribute('href').substring(1);
  232. const exampleDetail = docContainer.querySelector(`details#${exampleId}.Documentation-exampleDetails`);
  233. if (exampleDetail) {
  234. const exampleContent = extractExample(exampleDetail, 3);
  235. if (exampleContent.split('\n').filter(l => l.trim() !== '').length > 2) {
  236. if (!examplesInSectionFound) {
  237. output.push("## Examples (Listed)\n");
  238. examplesInSectionFound = true;
  239. }
  240. output.push(exampleContent);
  241. }
  242. }
  243. });
  244. }
  245. }
  246.  
  247. const constHeader = docContainer.querySelector('#pkg-constants');
  248. if (constHeader) {
  249. const constSection = constHeader.closest('h3').nextElementSibling;
  250. if (constSection && constSection.classList.contains('Documentation-constants')) {
  251. const declarations = constSection.querySelectorAll('div.Documentation-declaration');
  252. if (declarations.length > 0) {
  253. output.push("## Constants\n");
  254. declarations.forEach(decl => {
  255. const sigPre = decl.querySelector('pre');
  256. if (sigPre) output.push("```go\n" + getCodeFromPre(sigPre).trim() + "\n```\n");
  257. const desc = getDirectSiblingParagraphs(decl);
  258. if (desc) output.push(desc + "\n");
  259. output.push("---\n");
  260. });
  261. }
  262. }
  263. }
  264.  
  265. const varHeader = docContainer.querySelector('#pkg-variables');
  266. if (varHeader) {
  267. const varSection = varHeader.closest('h3').nextElementSibling;
  268. if (varSection && varSection.classList.contains('Documentation-variables')) {
  269. const declarations = varSection.querySelectorAll('div.Documentation-declaration');
  270. if (declarations.length > 0) {
  271. output.push("## Variables\n");
  272. declarations.forEach(decl => {
  273. const sigPre = decl.querySelector('pre');
  274. if (sigPre) output.push("```go\n" + getCodeFromPre(sigPre).trim() + "\n```\n");
  275. const desc = getDirectSiblingParagraphs(decl);
  276. if (desc) output.push(desc + "\n");
  277. output.push("---\n");
  278. });
  279. }
  280. }
  281. }
  282.  
  283. const funcHeader = docContainer.querySelector('#pkg-functions');
  284. if (funcHeader) {
  285. const funcSection = funcHeader.closest('h3').nextElementSibling;
  286. if (funcSection && funcSection.classList.contains('Documentation-functions')) {
  287. const functions = funcSection.querySelectorAll('div.Documentation-function');
  288. if (functions.length > 0) {
  289. output.push("## Functions\n");
  290. functions.forEach(fnDiv => {
  291. const fnHeaderEl = fnDiv.querySelector('h4.Documentation-functionHeader');
  292. const declarationDiv = fnDiv.querySelector('div.Documentation-declaration');
  293. const sigPre = declarationDiv ? declarationDiv.querySelector('pre') : null;
  294. const funcName = extractNameFromSignatureOrHeader(fnHeaderEl, sigPre, "Function");
  295.  
  296. output.push(`### Function: ${funcName}\n`);
  297. if (sigPre) {
  298. output.push("```go\n" + getCodeFromPre(sigPre).trim() + "\n```\n");
  299. }
  300. const desc = declarationDiv ? getDirectSiblingParagraphs(declarationDiv) : (fnHeaderEl ? getDirectSiblingParagraphs(fnHeaderEl) : '');
  301. if (desc) output.push(desc + "\n");
  302.  
  303. fnDiv.querySelectorAll('details.Documentation-exampleDetails').forEach(ex => {
  304. const exampleContent = extractExample(ex, 4);
  305. if (exampleContent.split('\n').filter(l => l.trim() !== '').length > 2) {
  306. output.push(exampleContent);
  307. }
  308. });
  309. output.push("---\n");
  310. });
  311. }
  312. }
  313. }
  314.  
  315. const typeHeader = docContainer.querySelector('#pkg-types');
  316. if (typeHeader) {
  317. const typeSection = typeHeader.closest('h3').nextElementSibling;
  318. if (typeSection && typeSection.classList.contains('Documentation-types')) {
  319. const types = typeSection.querySelectorAll('div.Documentation-type');
  320. if (types.length > 0) {
  321. output.push("## Types\n");
  322. types.forEach(typeDiv => {
  323. const typeHeaderEl = typeDiv.querySelector('h4.Documentation-typeHeader');
  324. const typeDeclarationDiv = typeDiv.querySelector('div.Documentation-declaration');
  325. const sigPre = typeDeclarationDiv ? typeDeclarationDiv.querySelector('pre') : null;
  326. const typeName = extractNameFromSignatureOrHeader(typeHeaderEl, sigPre, "Type");
  327.  
  328. output.push(`### Type: ${typeName}\n`);
  329. if (sigPre) {
  330. output.push("```go\n" + getCodeFromPre(sigPre).trim() + "\n```\n");
  331. }
  332. const desc = typeDeclarationDiv ? getDirectSiblingParagraphs(typeDeclarationDiv) : (typeHeaderEl ? getDirectSiblingParagraphs(typeHeaderEl) : '');
  333. if (desc) output.push(desc + "\n");
  334.  
  335. typeDiv.querySelectorAll(':scope > details.Documentation-exampleDetails').forEach(ex => {
  336. const exampleContent = extractExample(ex, 4);
  337. if (exampleContent.split('\n').filter(l => l.trim() !== '').length > 2) {
  338. output.push(exampleContent);
  339. }
  340. });
  341.  
  342. typeDiv.querySelectorAll('div.Documentation-typeFunc').forEach(assocFnDiv => {
  343. const assocFnHeaderEl = assocFnDiv.querySelector('h4.Documentation-functionHeader');
  344. const assocFnDeclarationDiv = assocFnDiv.querySelector('div.Documentation-declaration');
  345. const assocSigPre = assocFnDeclarationDiv ? assocFnDeclarationDiv.querySelector('pre') : null;
  346. const constructorName = extractNameFromSignatureOrHeader(assocFnHeaderEl, assocSigPre, "Constructor");
  347.  
  348. output.push(`#### Constructor: ${constructorName}\n`);
  349. if (assocSigPre) {
  350. output.push("```go\n" + getCodeFromPre(assocSigPre).trim() + "\n```\n");
  351. }
  352. const assocDesc = assocFnDeclarationDiv ? getDirectSiblingParagraphs(assocFnDeclarationDiv) : (assocFnHeaderEl ? getDirectSiblingParagraphs(assocFnHeaderEl) : '');
  353. if (assocDesc) output.push(assocDesc + "\n");
  354.  
  355. assocFnDiv.querySelectorAll('details.Documentation-exampleDetails').forEach(ex => {
  356. const exampleContent = extractExample(ex, 5);
  357. if (exampleContent.split('\n').filter(l => l.trim() !== '').length > 2) {
  358. output.push(exampleContent);
  359. }
  360. });
  361. output.push("---\n");
  362. });
  363.  
  364. typeDiv.querySelectorAll('div.Documentation-typeMethod').forEach(assocMethodDiv => {
  365. const assocMethodHeaderEl = assocMethodDiv.querySelector('h4.Documentation-functionHeader');
  366. const assocMethodDeclarationDiv = assocMethodDiv.querySelector('div.Documentation-declaration');
  367. const assocSigPre = assocMethodDeclarationDiv ? assocMethodDeclarationDiv.querySelector('pre') : null;
  368. const methodName = extractNameFromSignatureOrHeader(assocMethodHeaderEl, assocSigPre, "Method");
  369.  
  370. output.push(`#### Method: ${methodName}\n`);
  371. if (assocSigPre) {
  372. output.push("```go\n" + getCodeFromPre(assocSigPre).trim() + "\n```\n");
  373. }
  374. const assocDesc = assocMethodDeclarationDiv ? getDirectSiblingParagraphs(assocMethodDeclarationDiv) : (assocMethodHeaderEl ? getDirectSiblingParagraphs(assocMethodHeaderEl) : '');
  375. if (assocDesc) output.push(assocDesc + "\n");
  376. assocMethodDiv.querySelectorAll('details.Documentation-exampleDetails').forEach(ex => {
  377. const exampleContent = extractExample(ex, 5);
  378. if (exampleContent.split('\n').filter(l => l.trim() !== '').length > 2) {
  379. output.push(exampleContent);
  380. }
  381. });
  382. output.push("---\n");
  383. });
  384. output.push("===\n");
  385. });
  386. }
  387. }
  388. }
  389. let cleanedOutput = output.join('\n')
  390. .replace(/(\n---\n)+(\s*(\n---|\n===|$))/g, '\n---\n$2') // Consolidate multiple --- unless followed by ===
  391. .replace(/(\n===\n)+/g, '\n===\n') // Consolidate multiple ===
  392. .replace(/\n{3,}/g, '\n\n'); // Max 2 blank lines
  393. return cleanedOutput.trim();
  394. }
  395.  
  396. function getPackageNameForFilename() {
  397. let path = window.location.pathname;
  398. path = path.split('@')[0];
  399. if (path.startsWith('/')) {
  400. path = path.substring(1);
  401. }
  402. return path.replace(/\/$/, '').replace(/\//g, '_');
  403. }
  404.  
  405. function download(filename, text) {
  406. const element = document.createElement('a');
  407. element.setAttribute('href', 'data:text/plain;charset=utf-8,' + encodeURIComponent(text));
  408. element.setAttribute('download', filename);
  409. element.style.display = 'none';
  410. document.body.appendChild(element);
  411. element.click();
  412. document.body.removeChild(element);
  413. }
  414.  
  415. function initializeScraper() {
  416. // console.log("Starting extraction...");
  417. const data = processDocumentationSection();
  418. if (data.trim()) {
  419. const packageName = getPackageNameForFilename();
  420. const filename = packageName ? `${packageName}_llm.txt` : 'llm.txt';
  421. download(filename, data);
  422. // console.log(`${filename} download initiated.`);
  423. // GM_setClipboard(data); // For debugging
  424. // alert('Data extracted and download initiated!');
  425. } else {
  426. // console.warn("No documentation section found or no actual data was extracted.");
  427. alert("Could not find documentation section or no actual data was extracted. Check console for details (if any logs were enabled).");
  428. }
  429. }
  430.  
  431. const downloadButton = document.createElement('button');
  432. downloadButton.textContent = 'Download llm.txt';
  433. downloadButton.className = 'llm-download-button';
  434. downloadButton.addEventListener('click', () => {
  435. // console.log("Button clicked, scheduling scraper with 1s delay.");
  436. setTimeout(initializeScraper, 1000); // 1 second delay
  437. });
  438.  
  439. // Fallback: if the page is very simple and loads fast, or if the button is added very late
  440. // For very dynamic pages, a MutationObserver on document.body or a specific container might be more robust
  441. // but setTimeout is simpler for now.
  442. if (document.readyState === "complete") {
  443. // If page is already loaded, maybe CodeMirror is also ready?
  444. // This is less likely to be the case for why it's not working.
  445. // The click handler with setTimeout is more reliable.
  446. }
  447.  
  448. document.body.appendChild(downloadButton);
  449.  
  450. })();

QingJ © 2025

镜像随时可能失效,请加Q群300939539或关注我们的公众号极客氢云获取最新地址