您需要先安装一个扩展,例如 篡改猴、Greasemonkey 或 暴力猴,之后才能安装此脚本。
您需要先安装一个扩展,例如 篡改猴 或 暴力猴,之后才能安装此脚本。
您需要先安装一个扩展,例如 篡改猴 或 暴力猴,之后才能安装此脚本。
您需要先安装一个扩展,例如 篡改猴 或 Userscripts ,之后才能安装此脚本。
您需要先安装一款用户脚本管理器扩展,例如 Tampermonkey,才能安装此脚本。
您需要先安装用户脚本管理器扩展后才能安装此脚本。
获取大众点评网页评论,解决动态字体加密
// ==UserScript== // @name 大众点评评论 // @namespace http://tampermonkey.net/ // @version 0.11 // @description 获取大众点评网页评论,解决动态字体加密 // @author You // @match http://www.dianping.com/shop* // @match https://www.dianping.com/shop* // @icon https://www.google.com/s2/favicons?domain=dianping.com // @require https://gf.qytechs.cn/scripts/435146-html2canvas-132/code/html2canvas132.js?version=986217 // @require https://unpkg.com/[email protected]/dist/tesseract.min.js // @grant GM.xmlHttpRequest // ==/UserScript== /* global html2canvas Tesseract */ // console.log(GM_info); const moreBtnClass = '.fold'; const lessBtnClass = '.unfold'; const commentClass = '.review-words'; const nextBtnClass = '.NextPage'; const ppocrUrl = 'https://www.paddlepaddle.org.cn/paddlehub-api/image_classification/chinese_ocr_db_crnn_mobile'; (function() { 'use strict'; const $ = document.querySelectorAll.bind(document); const renderCmt = elm => { return new Promise((resolve, reject) => { // console.log('elm', elm) html2canvas(elm, { allowTaint: true, scale: 1, useCORS: true, width: elm.offsetWidth * 1.2, height: elm.offsetHeight * 1.2, x: -elm.offsetWidth * 0.1, y: -elm.offsetHeight * 0.16 }).then(canvas => { const data = canvas.toDataURL().split(',')[1]; // console.log('data', data); // document.body.append(canvas); GM.xmlHttpRequest({ method: 'POST', url: ppocrUrl, responseType: 'json', headers: { 'Content-Type': 'application/json' }, data: JSON.stringify({ image: data }), onload: response => { // console.log('response', response); const res = response.response.result[0].data.map(r => r.text).join(''); return resolve(res); } }); // return resolve(canvas); // console.log('start to recognize'); // Tesseract.recognize(canvas, 'chi_sim', { // langPath: 'https://raw.githubusercontent.com/naptha/tessdata/gh-pages/4.0.0_best/', // }).then(res => { // console.log(res); // const { text } = res.data; // return resolve(text); // }) }) }); } const getAllCommentCanvas = async () => { let comments = $(commentClass); // comments = [comments[0]]; let tasks = []; let ret = []; //for(let i = 0;i <= comments.length; i++) { //const cmt = comments[i]; //const res = await renderCmt(cmt) //console.log('res', i, res); //} comments.forEach((cmt, idx) => { const imgs = cmt.querySelectorAll('img'); imgs.forEach(img => cmt.removeChild(img)); tasks.push(renderCmt(cmt)); }); ret = await Promise.all(tasks); return ret; } const getResult = async () => { const $ = document.querySelectorAll.bind(document); const documentTxt = new XMLSerializer().serializeToString(document); const getCssUrl = () => { const bar = documentTxt.matchAll(/href=\"(\/\/s3plus\.meituan\.net\/v1\/.*?)\"/g); const baz = [...bar]; return baz.map(b => 'https:' + b[1]); } const getSvgUrl = (content) => { const bar = content.matchAll(/\[class\^=\"(.*?)\"\].*?url\((\/\/s3plus.meituan.net\/v1\/.*?)\)/g); const baz = [...bar]; return baz; } const getFileViaUrl = url => { return new Promise((resolve, reject) => { GM.xmlHttpRequest({ method: 'GET', url: url, responseType: 'text', headers: { 'Content-Type': 'text/css' }, onload: response => { if (response.status === 200) return resolve(response.response) else return resolve(''); } }); }); } const cssNameMap = {}; const svgMap = {}; const urls = getCssUrl(); let svgUrls = []; for(let i = 0;i < urls.length;i += 1) { const cssContent = await getFileViaUrl(urls[i]); const matchs = cssContent.matchAll(/.(.*?)\{background:-(.*?)px -(.*?)px;}/mg); const matchNames = [...matchs]; matchNames.forEach(name => { if (!name[0].includes('[')) { cssNameMap[name[1]] = [+Number(name[2]).toFixed(0), +Number(name[3]).toFixed(0)] } }); const svgUrl = getSvgUrl(cssContent); svgUrls = [...svgUrl, ...svgUrls]; } for(let i = 0;i < svgUrls.length;i += 1) { const svgContent = await getFileViaUrl(svgUrls[i][2]); const fontLocMap = [...svgContent.matchAll(/<text x=\".*?\" y=\"(.*?)\">(.*?)<\/text>/mg)]; let fontHeightOffset =0; let fontWeightOffset = 0 if (svgContent.includes('#333')) { fontHeightOffset = 23; fontWeightOffset = 0; } if (svgContent.includes('#666')) { fontHeightOffset = 15; fontWeightOffset = 0; } const fontLoc = {}; fontLocMap.forEach((fl, idx) => { fontLoc[fl[1]] = idx + 1; }); svgMap[svgUrls[i][1]] = {}; svgMap[svgUrls[i][1]]['fontLocMap'] = fontLocMap; svgMap[svgUrls[i][1]]['fontHeightOffset'] = fontHeightOffset; svgMap[svgUrls[i][1]]['fontWeightOffset'] = fontWeightOffset; svgMap[svgUrls[i][1]]['fontLoc'] = fontLoc; } // console.log('svgMap', svgMap); // console.log('cssNameMap', cssNameMap); Object.keys(cssNameMap).forEach((key, idx) => { const arr = cssNameMap[key]; const keys = Object.keys(svgMap); const foo = keys.find(k => key.includes(k)); const fontMap = svgMap[foo]; if (!fontMap) return; const locX = arr[0]; const locY = arr[1]; const fontHeightOffset = fontMap.fontHeightOffset; const fontWeightOffset = fontMap.fontWeightOffset; const fontLoc = fontMap.fontLoc; const fontLocMap = fontMap.fontLocMap; const locXLine = Math.floor((locX + fontWeightOffset) / 14); const locYLine = fontLoc[locY + fontHeightOffset]; let val = ''; // console.log('fontLocMap', fontLocMap); if (fontLocMap[locYLine - 1]) val = fontLocMap[locYLine - 1][2][locXLine]; cssNameMap[key].push(val); }); // console.log('cssMap', cssNameMap) const comments = [...$(commentClass)]; const result = []; comments.forEach(cmt => { const imgs = cmt.querySelectorAll('img'); imgs.forEach(img => cmt.removeChild(img)); const nodes = [...cmt.childNodes]; let foo = ''; nodes.forEach(node => { const cls = node.className; if (cls) { const bar = cssNameMap[cls]; if (bar) foo += bar[2]; } else foo += node.textContent; }); result.push(foo.trim()); }); return result; } const showResult = (pics) => { let foo = document.createElement('p'); foo.innerHTML = pics.map(p => '<div style="margin-top: 20px;">' + p + '</div>').join(` `); foo.style.position = 'fixed'; foo.style.width = '600px'; foo.style.height = '600px'; foo.style.left = '10px'; foo.style.bottom = '20px'; foo.style.padding = '20px'; foo.style.background = '#61ffff'; foo.style.overflow = 'auto'; document.body.appendChild(foo); } let btn = document.createElement('button'); let next = document.createElement('button'); btn.innerHTML = '开始采集'; btn.style.position = 'fixed' btn.style.right = '20px'; btn.style.bottom = '80px'; next.innerHTML = '下一页'; next.style.position = 'fixed' next.style.right = '20px'; next.style.bottom = '120px'; document.body.appendChild(btn); document.body.appendChild(next); btn.onclick = async () => { const moreBtns = $(moreBtnClass); moreBtns.forEach(b => { b.click(); b.style.opacity = 0; }); const lessBtns = $(lessBtnClass); lessBtns.forEach(l => l.style.opacity = 0); //getAllCommentCanvas() //.then(pics => { //console.log(pics); //showResult(pics); //}) //.catch(console.error) const res = await getResult(); // console.log('res', res); showResult(res); } next.onclick = () => { const nextBtn = $(nextBtnClass)[0]; if (nextBtn) nextBtn.click(); } })();
QingJ © 2025
镜像随时可能失效,请加Q群300939539或关注我们的公众号极客氢云获取最新地址