Perplexity.ai Chat Exporter

Export Perplexity.ai conversations as markdown with configurable citation styles

  1. // ==UserScript==
  2. // @name Perplexity.ai Chat Exporter
  3. // @namespace https://github.com/ckep1/pplxport
  4. // @version 1.0.5
  5. // @description Export Perplexity.ai conversations as markdown with configurable citation styles
  6. // @author Chris Kephart
  7. // @match https://www.perplexity.ai/*
  8. // @grant GM_getValue
  9. // @grant GM_setValue
  10. // @grant GM_registerMenuCommand
  11. // @license MIT
  12. // ==/UserScript==
  13.  
  14. (function () {
  15. "use strict";
  16.  
  17. // Style options
  18. const CITATION_STYLES = {
  19. ENDNOTES: "endnotes",
  20. INLINE: "inline",
  21. PARENTHESIZED: "parenthesized",
  22. };
  23.  
  24. const FORMAT_STYLES = {
  25. FULL: "full", // Include User/Assistant tags and all dividers
  26. CONCISE: "concise", // Just content, minimal dividers
  27. };
  28.  
  29. // Get user preferences
  30. function getPreferences() {
  31. return {
  32. citationStyle: GM_getValue("citationStyle", CITATION_STYLES.PARENTHESIZED),
  33. formatStyle: GM_getValue("formatStyle", FORMAT_STYLES.FULL),
  34. };
  35. }
  36.  
  37. // Register menu commands
  38. GM_registerMenuCommand("Use Endnotes Citation Style", () => {
  39. GM_setValue("citationStyle", CITATION_STYLES.ENDNOTES);
  40. alert("Citation style set to endnotes. Format: [1] with sources listed at end.");
  41. });
  42.  
  43. GM_registerMenuCommand("Use Inline Citation Style", () => {
  44. GM_setValue("citationStyle", CITATION_STYLES.INLINE);
  45. alert("Citation style set to inline. Format: [1](url)");
  46. });
  47.  
  48. GM_registerMenuCommand("Use Parenthesized Citation Style", () => {
  49. GM_setValue("citationStyle", CITATION_STYLES.PARENTHESIZED);
  50. alert("Citation style set to parenthesized. Format: ([1](url))");
  51. });
  52.  
  53. GM_registerMenuCommand("Full Format (with User/Assistant)", () => {
  54. GM_setValue("formatStyle", FORMAT_STYLES.FULL);
  55. alert("Format set to full with User/Assistant tags.");
  56. });
  57.  
  58. GM_registerMenuCommand("Concise Format (content only)", () => {
  59. GM_setValue("formatStyle", FORMAT_STYLES.CONCISE);
  60. alert("Format set to concise content only.");
  61. });
  62.  
  63. // Convert HTML content to markdown
  64. function htmlToMarkdown(html, citationStyle = CITATION_STYLES.PARENTHESIZED) {
  65. const tempDiv = document.createElement("div");
  66. tempDiv.innerHTML = html;
  67.  
  68. tempDiv.querySelectorAll("code").forEach((codeElem) => {
  69. if (codeElem.style.whiteSpace && codeElem.style.whiteSpace.includes("pre-wrap")) {
  70. if (codeElem.parentElement.tagName.toLowerCase() !== "pre") {
  71. const pre = document.createElement("pre");
  72. let language = "";
  73. const prevDiv = codeElem.closest("div.pr-lg")?.previousElementSibling;
  74. if (prevDiv) {
  75. const langDiv = prevDiv.querySelector(".text-text-200");
  76. if (langDiv) {
  77. language = langDiv.textContent.trim().toLowerCase();
  78. langDiv.remove();
  79. }
  80. }
  81. pre.dataset.language = language;
  82. pre.innerHTML = "<code>" + codeElem.innerHTML + "</code>";
  83. codeElem.parentNode.replaceChild(pre, codeElem);
  84. }
  85. }
  86. });
  87.  
  88. // Process citations
  89. const citations = [...tempDiv.querySelectorAll("a.citation")];
  90. const citationRefs = new Map();
  91. citations.forEach((citation) => {
  92. // Find the inner <span> holding the citation number
  93. const numberSpan = citation.querySelector("span span");
  94. const number = numberSpan ? numberSpan.textContent.trim() : null;
  95. const href = citation.getAttribute("href");
  96. if (number && href) {
  97. citationRefs.set(number, { href });
  98. }
  99. });
  100.  
  101. // Clean up citations based on style
  102. tempDiv.querySelectorAll(".citation").forEach((el) => {
  103. const numberSpan = el.querySelector("span span");
  104. const number = numberSpan ? numberSpan.textContent.trim() : null;
  105. const href = el.getAttribute("href");
  106.  
  107. if (citationStyle === CITATION_STYLES.INLINE) {
  108. el.replaceWith(` [${number}](${href}) `);
  109. } else if (citationStyle === CITATION_STYLES.PARENTHESIZED) {
  110. el.replaceWith(` ([${number}](${href})) `);
  111. } else {
  112. el.replaceWith(` [${number}] `);
  113. }
  114. });
  115.  
  116. // Convert strong sections to headers and clean up content
  117. let text = tempDiv.innerHTML;
  118.  
  119. // Basic HTML conversion
  120. text = text
  121. .replace(/<h1[^>]*>([\s\S]*?)<\/h1>/g, "# $1")
  122. .replace(/<h2[^>]*>([\s\S]*?)<\/h2>/g, "## $1")
  123. .replace(/<h3[^>]*>([\s\S]*?)<\/h3>/g, "### $1")
  124. .replace(/<h4[^>]*>([\s\S]*?)<\/h4>/g, "#### $1")
  125. .replace(/<h5[^>]*>([\s\S]*?)<\/h5>/g, "##### $1")
  126. .replace(/<h6[^>]*>([\s\S]*?)<\/h6>/g, "###### $1")
  127. .replace(/<p[^>]*>([\s\S]*?)<\/p>/g, "$1\n")
  128. .replace(/<br\s*\/?>/g, "\n")
  129. .replace(/<strong>([\s\S]*?)<\/strong>/g, "**$1**")
  130. .replace(/<em>([\s\S]*?)<\/em>/g, "*$1*")
  131. .replace(/<ul[^>]*>([\s\S]*?)<\/ul>/g, "$1\n")
  132. .replace(/<li[^>]*>([\s\S]*?)<\/li>/g, " - $1\n");
  133.  
  134. // Handle tables before removing remaining HTML
  135. text = text.replace(/<table[^>]*>([\s\S]*?)<\/table>/g, (match) => {
  136. const tableDiv = document.createElement("div");
  137. tableDiv.innerHTML = match;
  138. const rows = [];
  139.  
  140. // Process header rows
  141. const headerRows = tableDiv.querySelectorAll("thead tr");
  142. if (headerRows.length > 0) {
  143. headerRows.forEach((row) => {
  144. const cells = [...row.querySelectorAll("th, td")].map((cell) => cell.textContent.trim() || " ");
  145. if (cells.length > 0) {
  146. rows.push(`| ${cells.join(" | ")} |`);
  147. // Add separator row after headers
  148. rows.push(`| ${cells.map(() => "---").join(" | ")} |`);
  149. }
  150. });
  151. }
  152.  
  153. // Process body rows
  154. const bodyRows = tableDiv.querySelectorAll("tbody tr");
  155. bodyRows.forEach((row) => {
  156. const cells = [...row.querySelectorAll("td")].map((cell) => cell.textContent.trim() || " ");
  157. if (cells.length > 0) {
  158. rows.push(`| ${cells.join(" | ")} |`);
  159. }
  160. });
  161.  
  162. // Return markdown table with proper spacing
  163. return rows.length > 0 ? `\n\n${rows.join("\n")}\n\n` : "";
  164. });
  165.  
  166. // Continue with remaining HTML conversion
  167. text = text
  168. .replace(/<pre[^>]*data-language="([^"]*)"[^>]*><code>([\s\S]*?)<\/code><\/pre>/g, "```$1\n$2\n```")
  169. .replace(/<pre><code>([\s\S]*?)<\/code><\/pre>/g, "```\n$1\n```")
  170. .replace(/<code>(.*?)<\/code>/g, "`$1`")
  171. .replace(/<a\s+(?:[^>]*?\s+)?href="([^"]*)"[^>]*>(.*?)<\/a>/g, "[$2]($1)")
  172. .replace(/<[^>]+>/g, ""); // Remove any remaining HTML tags
  173.  
  174. // Clean up whitespace
  175. // Convert bold text at start of line to h3 headers, but not if inside a list item
  176. text = text.replace(/^(\s*)\*\*([^*\n]+)\*\*(?!.*\n\s*-)/gm, "$1### $2");
  177. // This fixes list items where the entire text was incorrectly converted to headers
  178. // We need to preserve both partial bold items and fully bold items
  179. text = text.replace(/^(\s*-\s+)###\s+([^\n]+)/gm, function(match, listPrefix, content) {
  180. // Check if the content contains bold markers
  181. if (content.includes("**")) {
  182. // If it already has bold markers, just remove the ### and keep the rest intact
  183. return `${listPrefix}${content}`;
  184. } else {
  185. // If it doesn't have bold markers (because it was fully bold before),
  186. // add them back (this was incorrectly converted to a header)
  187. return `${listPrefix}**${content}**`;
  188. }
  189. });
  190.  
  191. // Fix list spacing (no extra newlines between items)
  192. text = text.replace(/\n\s*-\s+/g, "\n- ");
  193.  
  194. // Ensure headers have proper spacing
  195. text = text.replace(/([^\n])(\n#{1,3} )/g, "$1\n\n$2");
  196. // Fix unbalanced or misplaced bold markers in list items
  197. text = text.replace(/^(\s*-\s+.*?)(\s\*\*\s*)$/gm, "$1"); // Remove trailing ** with space before
  198. // Fix citation and bold issues - make sure citations aren't wrapped in bold
  199. text = text.replace(/\*\*([^*]+)(\[[0-9]+\]\([^)]+\))\s*\*\*/g, "**$1**$2");
  200. text = text.replace(/\*\*([^*]+)(\(\[[0-9]+\]\([^)]+\)\))\s*\*\*/g, "**$1**$2");
  201. // Fix cases where a line ends with an extra bold marker after a citation
  202. text = text.replace(/(\[[0-9]+\]\([^)]+\))\s*\*\*/g, "$1");
  203. text = text.replace(/(\(\[[0-9]+\]\([^)]+\)\))\s*\*\*/g, "$1");
  204.  
  205. // Clean up whitespace
  206. text = text
  207. .replace(/^[\s\n]+|[\s\n]+$/g, "") // Trim start and end
  208. .replace(/\n{3,}/g, "\n\n") // Max two consecutive newlines
  209. .replace(/^\s+/gm, "") // Remove leading spaces on each line
  210. .replace(/[ \t]+$/gm, "") // Remove trailing spaces
  211. .trim();
  212.  
  213. if (citationStyle === CITATION_STYLES.INLINE || citationStyle === CITATION_STYLES.PARENTHESIZED) {
  214. // Remove extraneous space before a period: e.g. " [1](url) ." -> " [1](url)."
  215. text = text.replace(/\s+\./g, ".");
  216. }
  217.  
  218. // Add citations at the bottom for endnotes style
  219. if (citationStyle === CITATION_STYLES.ENDNOTES && citationRefs.size > 0) {
  220. text += "\n\n### Sources\n";
  221. for (const [number, { href }] of citationRefs) {
  222. text += `[${number}] ${href}\n`;
  223. }
  224. }
  225.  
  226. return text;
  227. }
  228.  
  229. // Format the complete markdown document
  230. function formatMarkdown(conversations) {
  231. const title = document.title.replace(" | Perplexity", "").trim();
  232. const timestamp = new Date().toISOString().split("T")[0];
  233. const prefs = getPreferences();
  234.  
  235. let markdown = "---\n";
  236. markdown += `title: ${title}\n`;
  237. markdown += `date: ${timestamp}\n`;
  238. markdown += `source: ${window.location.href}\n`;
  239. markdown += "---\n\n"; // Add newline after properties
  240.  
  241. conversations.forEach((conv, index) => {
  242. if (conv.role === "Assistant") {
  243. if (prefs.formatStyle === FORMAT_STYLES.FULL) {
  244. markdown += `**${conv.role}:** ${conv.content}\n\n`; // Add newline after content
  245. } else {
  246. markdown += `${conv.content}\n\n`; // Add newline after content
  247. }
  248.  
  249. // Add divider only between assistant responses, not after the last one
  250. const nextAssistant = conversations.slice(index + 1).find((c) => c.role === "Assistant");
  251. if (nextAssistant) {
  252. markdown += "---\n\n"; // Add newline after divider
  253. }
  254. } else if (conv.role === "User" && prefs.formatStyle === FORMAT_STYLES.FULL) {
  255. markdown += `**${conv.role}:** ${conv.content}\n\n`; // Add newline after content
  256. markdown += "---\n\n"; // Add newline after divider
  257. }
  258. });
  259.  
  260. return markdown.trim(); // Trim any trailing whitespace at the very end
  261. }
  262.  
  263. // Extract conversation content
  264. function extractConversation(citationStyle) {
  265. const conversation = [];
  266. console.log("Using updated selectors for Perplexity");
  267. // Check for user query
  268. const userQueries = document.querySelectorAll(".whitespace-pre-line.text-pretty.break-words");
  269. userQueries.forEach(query => {
  270. conversation.push({
  271. role: "User",
  272. content: query.textContent.trim(),
  273. });
  274. });
  275.  
  276. // Check for assistant responses
  277. const assistantResponses = document.querySelectorAll(".prose.text-pretty.dark\\:prose-invert");
  278. assistantResponses.forEach(response => {
  279. const answerContent = response.cloneNode(true);
  280. conversation.push({
  281. role: "Assistant",
  282. content: htmlToMarkdown(answerContent.innerHTML, citationStyle),
  283. });
  284. });
  285.  
  286. // Fallback to more generic selectors if needed
  287. if (conversation.length === 0) {
  288. console.log("Attempting to use fallback selectors");
  289. // Try more generic selectors that might match Perplexity's structure
  290. const queryElements = document.querySelectorAll("[class*='whitespace-pre-line'][class*='break-words']");
  291. queryElements.forEach(query => {
  292. conversation.push({
  293. role: "User",
  294. content: query.textContent.trim(),
  295. });
  296. });
  297.  
  298. const responseElements = document.querySelectorAll("[class*='prose'][class*='prose-invert']");
  299. responseElements.forEach(response => {
  300. const answerContent = response.cloneNode(true);
  301. conversation.push({
  302. role: "Assistant",
  303. content: htmlToMarkdown(answerContent.innerHTML, citationStyle),
  304. });
  305. });
  306. }
  307.  
  308. return conversation;
  309. }
  310.  
  311. // Download markdown file
  312. function downloadMarkdown(content, filename) {
  313. const blob = new Blob([content], { type: "text/markdown" });
  314. const url = URL.createObjectURL(blob);
  315. const a = document.createElement("a");
  316. a.href = url;
  317. a.download = filename;
  318. document.body.appendChild(a);
  319. a.click();
  320. document.body.removeChild(a);
  321. URL.revokeObjectURL(url);
  322. }
  323.  
  324. // Create and add export button
  325. function addExportButton() {
  326. const existingButton = document.getElementById("perplexity-export-btn");
  327. if (existingButton) {
  328. existingButton.remove();
  329. }
  330.  
  331. const button = document.createElement("button");
  332. button.id = "perplexity-export-btn";
  333. button.textContent = "Export as Markdown";
  334. button.style.cssText = `
  335. position: fixed;
  336. bottom: 20px;
  337. right: 80px;
  338. padding: 8px 16px;
  339. background-color: #6366f1;
  340. color: white;
  341. border: none;
  342. border-radius: 8px;
  343. cursor: pointer;
  344. font-size: 14px;
  345. z-index: 99999;
  346. font-family: system-ui, -apple-system, sans-serif;
  347. transition: background-color 0.2s;
  348. box-shadow: 0 2px 4px rgba(0,0,0,0.1);
  349. `;
  350.  
  351. button.addEventListener("mouseenter", () => {
  352. button.style.backgroundColor = "#4f46e5";
  353. });
  354.  
  355. button.addEventListener("mouseleave", () => {
  356. button.style.backgroundColor = "#6366f1";
  357. });
  358.  
  359. button.addEventListener("click", () => {
  360. const prefs = getPreferences();
  361. const conversation = extractConversation(prefs.citationStyle);
  362. if (conversation.length === 0) {
  363. alert("No conversation content found to export.");
  364. return;
  365. }
  366.  
  367. const title = document.title.replace(" | Perplexity", "").trim();
  368. const safeTitle = title
  369. .toLowerCase()
  370. .replace(/[^a-z0-9]+/g, " ")
  371. .replace(/^-+|-+$/g, "");
  372. const filename = `${safeTitle}.md`;
  373.  
  374. const markdown = formatMarkdown(conversation);
  375. downloadMarkdown(markdown, filename);
  376. });
  377.  
  378. document.body.appendChild(button);
  379. }
  380.  
  381. // Initialize the script
  382. function init() {
  383. const observer = new MutationObserver(() => {
  384. if ((document.querySelector(".prose.text-pretty.dark\\:prose-invert") ||
  385. document.querySelector("[class*='prose'][class*='prose-invert']")) &&
  386. !document.getElementById("perplexity-export-btn")) {
  387. addExportButton();
  388. }
  389. });
  390.  
  391. observer.observe(document.body, {
  392. childList: true,
  393. subtree: true,
  394. });
  395.  
  396. if (document.querySelector(".prose.text-pretty.dark\\:prose-invert") ||
  397. document.querySelector("[class*='prose'][class*='prose-invert']")) {
  398. addExportButton();
  399. }
  400. }
  401.  
  402. if (document.readyState === "loading") {
  403. document.addEventListener("DOMContentLoaded", init);
  404. } else {
  405. init();
  406. }
  407. })();

QingJ © 2025

镜像随时可能失效,请加Q群300939539或关注我们的公众号极客氢云获取最新地址