extractsentence

提取选中词所在句子

此脚本不应直接安装。它是供其他脚本使用的外部库,要使用该库请加入元指令 // @require https://update.gf.qytechs.cn/scripts/534393/1605572/extractsentence.js

  1. // altered from https://github.com/ninja33/ODH/blob/master/src/fg/js/text.js
  2. ;const {
  3. getSentence, calSentence, cutSentence, escapeRegExp
  4. } = (() => {
  5. const HtmlTagsToReplace = {
  6. '&': '&',
  7. '<': '&lt;',
  8. '>': '&gt;'
  9. };
  10.  
  11. function replaceHtmlTag(tag) {
  12. return HtmlTagsToReplace[tag] || tag;
  13. }
  14.  
  15. function escapeHtmlTag(string) {
  16. return string.replace(/[&<>]/g, replaceHtmlTag);
  17. }
  18.  
  19. function escapeRegExp(string) {
  20. return string.replace(/[.*+?^${}()|[\]\\]/g, '\\$&'); // $& means the whole matched string
  21. }
  22.  
  23. // peculiarly, it's will change unsafeWindow's String replaceAll if not rename
  24. String.prototype.replaceAllX = function (search, replacement, flag = 'g') {
  25. let target = this;
  26. search = escapeRegExp(search);
  27. return target.replace(new RegExp(search, flag), replacement);
  28. };
  29.  
  30. String.prototype.searchAll = function (search) {
  31. let target = this;
  32. search = escapeRegExp(search);
  33. let regex = new RegExp(search, 'gi');
  34. let result = 0;
  35. let indices = [];
  36. while ((result = regex.exec(target)) && result) {
  37. indices.push(result.index);
  38. }
  39. return indices;
  40. };
  41.  
  42. function isPDFJSPage() {
  43. return (document.querySelectorAll('div#viewer.pdfViewer,pdf-viewer#viewer').length > 0);
  44. }
  45.  
  46. function isEmpty(word) {
  47. return (!word);
  48. }
  49.  
  50. function isShortandNum(word) {
  51. let numReg = /\d/;
  52. return (word.length < 3 || numReg.test(word));
  53. }
  54.  
  55. function isChinese(word) {
  56. let cnReg = /[\u4e00-\u9fa5]+/gi;
  57. return (cnReg.test(word));
  58. }
  59.  
  60. function isInvalid(word) {
  61. if (isChinese(word)) return false;
  62. return (isChinese(word) && isEmpty(word) || isShortandNum(word));
  63. }
  64.  
  65. function cutSentence(word, offset, sentence, sentenceNum, wordFormat, sentenceFormat) {
  66. if (!word) {
  67. return '';
  68. }
  69. if (!wordFormat) {
  70. wordFormat = '<b>{$bold}</b>';
  71. }
  72. if (!sentenceFormat) {
  73. sentenceFormat = '{$sentence}';
  74. }
  75. wordFormat = wordFormat.split('{$bold}').join('\$&');
  76. if (sentenceNum > 0) {
  77. let arr = sentence.match(/((?![.!?;:。!?]['"’”]?\s).|\n)*[.!?;:。!?]['"’”]?(\s|.*$)/g);
  78. if (arr && arr.length > 1) {
  79. arr = arr.reduceRight((accumulation, current) => {
  80. if (current.search(/\.\w{0,3}\.\s$/g) !== -1) {
  81. accumulation[0] = current + accumulation[0];
  82. } else {
  83. accumulation.unshift(current);
  84. }
  85. return accumulation;
  86. }, ['']);
  87. arr = arr.filter(x => x.length);
  88. } else {
  89. arr = [sentence];
  90. }
  91.  
  92. let index = arr.findIndex(ele => { //try to exactly match to word based on offset.
  93. if (ele.indexOf(word) !== -1 && ele.searchAll(word).indexOf(offset) !== -1)
  94. return true;
  95. else
  96. offset -= ele.length;
  97. });
  98.  
  99. if (index === -1) // fallback if can not exactly find word.
  100. index = arr.findIndex(ele => ele.indexOf(word) !== -1);
  101.  
  102. let left = Math.ceil((sentenceNum - 1) / 2);
  103. let start = index - left;
  104. let end = index + ((sentenceNum - 1) - left);
  105.  
  106. if (start < 0) {
  107. start = 0;
  108. end = sentenceNum - 1;
  109. } else if (end > (arr.length - 1)) {
  110. end = arr.length - 1;
  111.  
  112. if ((end - (sentenceNum - 1)) < 0) {
  113. start = 0;
  114. } else {
  115. start = end - (sentenceNum - 1);
  116. }
  117. }
  118.  
  119. sentence = arr.slice(start, end + 1).join('').replaceAllX(word, word.replace(/\S+/g, wordFormat), 'gi');
  120. } else {
  121. sentence = sentence.replace(word, word.replace(/\S+/g, wordFormat));
  122. }
  123. return sentenceFormat.replaceAll('{$sentence}', sentence);
  124. }
  125.  
  126. function getSelectionOffset(node) {
  127. const range = window.getSelection().getRangeAt(0);
  128. const clone = range.cloneRange();
  129. clone.selectNodeContents(node);
  130. clone.setEnd(range.startContainer, range.startOffset);
  131. let start = clone.toString().length;
  132. clone.setEnd(range.endContainer, range.endOffset);
  133. let end = clone.toString().length;
  134. return {start, end};
  135.  
  136. }
  137.  
  138. function getPDFNode(node) {
  139.  
  140. let backwardindex = 0;
  141. do {
  142. node = node.parentNode;
  143. } while (node.name && node.nodeName.toUpperCase() !== 'SPAN' && node.nodeName.toUpperCase() !== 'DIV');
  144. let currentspan = node;
  145.  
  146. let sentenceNodes = [currentspan];
  147. let previous = null;
  148. while ((previous = node.previousSibling)) {
  149. sentenceNodes.unshift(previous);
  150. backwardindex += 1;
  151. if (previous.textContent.search(/[.!?;:。!?]['"’”]?(\s|.*$)/g) !== -1)
  152. break;
  153. else
  154. node = previous;
  155. }
  156.  
  157. node = currentspan;
  158. let next = null;
  159. while ((next = node.nextSibling)) {
  160. sentenceNodes.push(next);
  161. if (node.nextSibling.textContent.search(/[.!?;:。!?]['"’”]?(\s|.*$)/g) !== -1)
  162. break;
  163. else
  164. node = next;
  165. }
  166.  
  167. let sentence = '';
  168. let offset = 0;
  169. sentenceNodes = sentenceNodes.filter(x => x.textContent !== '' || x.textContent !== '-');
  170. for (const node of sentenceNodes) {
  171. if (backwardindex === 0)
  172. offset = sentence.length + window.getSelection().getRangeAt(0).startOffset;
  173. backwardindex -= 1;
  174. let nodetext = node.textContent;
  175. if (nodetext === '-')
  176. sentence = sentence.slice(0, sentence.length - 1);
  177. else
  178. sentence += (nodetext[nodetext.length - 1] === '-') ? nodetext.slice(0, nodetext.length - 1) : nodetext + ' ';
  179. }
  180.  
  181. return {sentence, offset};
  182. }
  183.  
  184. function calSentence() {
  185. let sentence = '';
  186. let offset = 0;
  187. const upNum = 4;
  188.  
  189. const selection = window.getSelection();
  190. let word = (selection.toString() || '').trim();
  191. const res = {sentence, offset, word}
  192. if (selection.rangeCount < 1)
  193. return res;
  194.  
  195. let node = selection.getRangeAt(0).commonAncestorContainer;
  196.  
  197. if (['INPUT', 'TEXTAREA'].indexOf(node.tagName) !== -1) {
  198. return res;
  199. }
  200.  
  201. if (isPDFJSPage()) {
  202. let pdfcontext = getPDFNode(node);
  203. sentence = escapeHtmlTag(pdfcontext.sentence);
  204. offset = pdfcontext.offset;
  205. } else {
  206. node = getWebNode(node, upNum);
  207.  
  208. if (node !== document) {
  209. sentence = escapeHtmlTag(node.textContent);
  210. offset = getSelectionOffset(node).start;
  211. }
  212. }
  213. return {sentence, offset, word}
  214. }
  215.  
  216. function getSentence(sentenceNum, wordFormat, sentenceFormat) {
  217. const {word, offset, sentence} = calSentence()
  218. if (word === '') {
  219. return '';
  220. }
  221. return cutSentence(word, offset, sentence, sentenceNum, wordFormat, sentenceFormat);
  222. }
  223.  
  224. function getWebNode(node, deep) {
  225. const blockTags = ['LI', 'P', 'DIV', 'BODY', 'PRE', 'CODE'];
  226. const nodeName = node.nodeName.toUpperCase();
  227. if (blockTags.includes(nodeName) || deep === 0) {
  228. return node;
  229. } else {
  230. return getWebNode(node.parentElement, deep - 1);
  231. }
  232. }
  233.  
  234. function selectedText() {
  235. const selection = window.getSelection();
  236. return (selection.toString() || '').trim();
  237. }
  238.  
  239. function isValidElement() {
  240. // if (document.activeElement.getAttribute('contenteditable'))
  241. // return false;
  242.  
  243. const invalidTags = ['INPUT', 'TEXTAREA'];
  244. const nodeName = document.activeElement.nodeName.toUpperCase();
  245. return !invalidTags.includes(nodeName);
  246. }
  247.  
  248. return {
  249. getSentence, calSentence, cutSentence, escapeRegExp
  250. }
  251. })();

QingJ © 2025

镜像随时可能失效,请加Q群300939539或关注我们的公众号极客氢云获取最新地址