douyin-user-data-download

下载抖音用户主页数据!

当前为 2024-06-13 提交的版本,查看 最新版本

  1. // ==UserScript==
  2. // @name douyin-user-data-download
  3. // @namespace http://tampermonkey.net/
  4. // @version 0.3.5
  5. // @description 下载抖音用户主页数据!
  6. // @author xxmdmst
  7. // @match https://www.douyin.com/*
  8. // @icon https://xxmdmst.oss-cn-beijing.aliyuncs.com/imgs/favicon.ico
  9. // @grant none
  10. // @require https://cdnjs.cloudflare.com/ajax/libs/jszip/3.6.0/jszip.min.js
  11. // @license MIT
  12. // ==/UserScript==
  13.  
  14. (function () {
  15. let table;
  16.  
  17. function initGbkTable() {
  18. // https://en.wikipedia.org/wiki/GBK_(character_encoding)#Encoding
  19. const ranges = [
  20. [0xA1, 0xA9, 0xA1, 0xFE],
  21. [0xB0, 0xF7, 0xA1, 0xFE],
  22. [0x81, 0xA0, 0x40, 0xFE],
  23. [0xAA, 0xFE, 0x40, 0xA0],
  24. [0xA8, 0xA9, 0x40, 0xA0],
  25. [0xAA, 0xAF, 0xA1, 0xFE],
  26. [0xF8, 0xFE, 0xA1, 0xFE],
  27. [0xA1, 0xA7, 0x40, 0xA0],
  28. ];
  29. const codes = new Uint16Array(23940);
  30. let i = 0;
  31.  
  32. for (const [b1Begin, b1End, b2Begin, b2End] of ranges) {
  33. for (let b2 = b2Begin; b2 <= b2End; b2++) {
  34. if (b2 !== 0x7F) {
  35. for (let b1 = b1Begin; b1 <= b1End; b1++) {
  36. codes[i++] = b2 << 8 | b1
  37. }
  38. }
  39. }
  40. }
  41. table = new Uint16Array(65536);
  42. table.fill(0xFFFF);
  43. const str = new TextDecoder('gbk').decode(codes);
  44. for (let i = 0; i < str.length; i++) {
  45. table[str.charCodeAt(i)] = codes[i]
  46. }
  47. }
  48.  
  49. function str2gbk(str, opt = {}) {
  50. if (!table) {
  51. initGbkTable()
  52. }
  53. const NodeJsBufAlloc = typeof Buffer === 'function' && Buffer.allocUnsafe;
  54. const defaultOnAlloc = NodeJsBufAlloc
  55. ? (len) => NodeJsBufAlloc(len)
  56. : (len) => new Uint8Array(len);
  57. const defaultOnError = () => 63;
  58. const onAlloc = opt.onAlloc || defaultOnAlloc;
  59. const onError = opt.onError || defaultOnError;
  60.  
  61. const buf = onAlloc(str.length * 2);
  62. let n = 0;
  63.  
  64. for (let i = 0; i < str.length; i++) {
  65. const code = str.charCodeAt(i);
  66. if (code < 0x80) {
  67. buf[n++] = code;
  68. continue
  69. }
  70. const gbk = table[code];
  71.  
  72. if (gbk !== 0xFFFF) {
  73. buf[n++] = gbk;
  74. buf[n++] = gbk >> 8
  75. } else if (code === 8364) {
  76. buf[n++] = 0x80
  77. } else {
  78. const ret = onError(i, str);
  79. if (ret === -1) {
  80. break
  81. }
  82. if (ret > 0xFF) {
  83. buf[n++] = ret;
  84. buf[n++] = ret >> 8
  85. } else {
  86. buf[n++] = ret
  87. }
  88. }
  89. }
  90. return buf.subarray(0, n)
  91. }
  92.  
  93. let aweme_list = [];
  94. let userKey = [
  95. "昵称", "关注", "粉丝", "获赞",
  96. "抖音号", "IP属地", "性别",
  97. "位置", "签名", "作品数", "主页"
  98. ];
  99. let userData = [];
  100. let timer;
  101.  
  102. function copyUserData(node) {
  103. if (!userData[0]) {
  104. alert("未抓取到用户数据!");
  105. return;
  106. }
  107. let text = [];
  108. for (let i = 0; i < userKey.length; i++) {
  109. let key = userKey[i];
  110. let value = userData[i];
  111. text.push(key + ":" + value);
  112. }
  113. navigator.clipboard.writeText(text.join("\n")).then(r => {
  114. node.textContent = "复制成功";
  115. }).catch((e) => {
  116. node.textContent = "复制失败";
  117. })
  118. setTimeout(() => node.textContent = '', 2000);
  119. }
  120.  
  121. function createVideoButton(text, top, func) {
  122. const button = document.createElement("button");
  123. button.textContent = text;
  124. button.style.position = "absolute";
  125. button.style.right = "0px";
  126. button.style.top = top;
  127. button.style.opacity = "0.5";
  128. button.addEventListener("click", func);
  129. return button;
  130. }
  131.  
  132. function openLink(url) {
  133. const link = document.createElement('a');
  134. link.href = url;
  135. link.target = "_blank";
  136. document.body.appendChild(link);
  137. link.click();
  138. document.body.removeChild(link);
  139. }
  140.  
  141. function createEachButton() {
  142. let targetNodes = document.querySelectorAll("div[data-e2e='user-post-list'] > ul[data-e2e='scroll-list'] > li a");
  143. for (let i = 0; i < targetNodes.length; i++) {
  144. let targetNode = targetNodes[i];
  145. if (targetNode.dataset.added) {
  146. continue;
  147. }
  148. targetNode.appendChild(createVideoButton("打开视频源", "0px", (event) => {
  149. event.preventDefault();
  150. event.stopPropagation();
  151. openLink(aweme_list[i].url);
  152. }));
  153. let downloadVideoButton = createVideoButton("下载视频", "21px", (event) => {
  154. event.preventDefault();
  155. event.stopPropagation();
  156. let xhr = new XMLHttpRequest();
  157. xhr.open('GET', aweme_list[i].url.replace("http://", "https://"), true);
  158. xhr.responseType = 'blob';
  159. xhr.onload = (e) => {
  160. let a = document.createElement('a');
  161. a.href = window.URL.createObjectURL(xhr.response);
  162. a.download = (aweme_list[i].desc ? aweme_list[i].desc.slice(0, 20).replace(/[\/:*?"<>|\s]/g, "") : aweme_list[i].awemeId) + (aweme_list[i].images ? ".mp3" : ".mp4");
  163. a.click()
  164. };
  165. xhr.onprogress = (event) => {
  166. if (event.lengthComputable) {
  167. downloadVideoButton.textContent = "下载" + (event.loaded * 100 / event.total).toFixed(1) + '%';
  168. }
  169. };
  170. xhr.send();
  171. });
  172. targetNode.appendChild(downloadVideoButton);
  173. if (aweme_list[i].images) {
  174. targetNode.appendChild(createVideoButton("图片打包下载", "42px", (event) => {
  175. event.preventDefault();
  176. event.stopPropagation();
  177. const zip = new JSZip();
  178. console.log(aweme_list[i].images);
  179. downloadVideoButton.textContent = "下载并打包中...";
  180. const promises = aweme_list[i].images.map((link, index) => {
  181. return fetch(link)
  182. .then((response) => response.arrayBuffer())
  183. .then((buffer) => {
  184. zip.file(`image_${index + 1}.jpg`, buffer);
  185. });
  186. });
  187. Promise.all(promises)
  188. .then(() => {
  189. return zip.generateAsync({type: "blob"});
  190. })
  191. .then((content) => {
  192. const link = document.createElement("a");
  193. link.href = URL.createObjectURL(content);
  194. link.download = (aweme_list[i].desc ? aweme_list[i].desc.slice(0, 20).replace(/[\/:*?"<>|\s]/g, "") : aweme_list[i].awemeId) + ".zip";
  195. link.click();
  196. downloadVideoButton.textContent = "图片打包完成";
  197. });
  198. }));
  199. }
  200. targetNode.dataset.added = true;
  201. }
  202. }
  203.  
  204. function flush() {
  205. if (timer !== undefined) {
  206. clearTimeout(timer);
  207. timer = undefined;
  208. }
  209. timer = setTimeout(createEachButton, 500);
  210. data_button.p2.textContent = `${aweme_list.length}`;
  211. let img_num = aweme_list.filter(a => a.images).length;
  212. dimg_button.p2.textContent = `${img_num}`;
  213. msg_pre.textContent = `已加载${aweme_list.length}个作品,${img_num}个图文\n激活上方头像可展开下载按钮`;
  214. }
  215.  
  216. let flag = false;
  217.  
  218. function interceptResponse() {
  219. const originalSend = XMLHttpRequest.prototype.send;
  220. XMLHttpRequest.prototype.send = function () {
  221. const self = this;
  222. this.onreadystatechange = function () {
  223. if (self.readyState === 4 && self._url) {
  224. if (self._url.indexOf("/aweme/v1/web/aweme/post") > -1) {
  225. var json = JSON.parse(self.response);
  226. let post_data = json.aweme_list.map(item => Object.assign(
  227. {
  228. "awemeId": item.aweme_id,
  229. "desc": item.desc.replace(/[^\x00-\x7F\u4E00-\u9FFF\uFF00-\uFFEF]+/g, " ").trim()
  230. },
  231. {
  232. "diggCount": item.statistics.digg_count,
  233. "commentCount": item.statistics.comment_count,
  234. "collectCount": item.statistics.collect_count,
  235. "shareCount": item.statistics.share_count
  236. },
  237. {
  238. "date": new Date(item.create_time * 1000).toLocaleString(),
  239. "url": item.video.play_addr.url_list[0]
  240. },
  241. {
  242. "images": item.images ? item.images.map(row => row.url_list.pop()) : null
  243. }
  244. ));
  245. aweme_list.push(...post_data);
  246. if (domLoadedTimer === null) {
  247. flush();
  248. } else {
  249. flag = true;
  250. }
  251. } else if (self._url.indexOf("/aweme/v1/web/user/profile/other") > -1) {
  252. let userInfo = JSON.parse(self.response).user;
  253. userInfo.city = userInfo.city ? userInfo.city : '';
  254. userInfo.district = userInfo.district ? userInfo.district : '';
  255. userData.push(
  256. userInfo.nickname, userInfo.following_count, userInfo.mplatform_followers_count,
  257. userInfo.total_favorited, '\t' + (userInfo.unique_id ? userInfo.unique_id : userInfo.short_id), userInfo.ip_location, userInfo.gender === 2 ? "女" : "男",
  258. `${userInfo.city${userInfo.district}`, '"' + (userInfo.signature ? userInfo.signature : '') + '"', userInfo.aweme_count, "https://www.douyin.com/user/" + userInfo.sec_uid
  259. );
  260. }
  261. }
  262. };
  263. originalSend.apply(this, arguments);
  264. };
  265. }
  266.  
  267. function txt2file(txt, filename) {
  268. const blob = new Blob([txt], {type: 'text/plain'});
  269. const url = URL.createObjectURL(blob);
  270. const link = document.createElement('a');
  271. link.href = url;
  272. link.download = filename.slice(0, 20).replace(/[\/:*?"<>|\s]/g, "");
  273. document.body.appendChild(link);
  274. link.click();
  275. document.body.removeChild(link);
  276. URL.revokeObjectURL(url);
  277. }
  278.  
  279. function downloadData(encoding) {
  280. let text = userKey.join(",") + "\n" + userData.join(",") + "\n\n";
  281. text += "作品描述,点赞数,评论数,收藏数,分享数,发布时间,下载链接\n";
  282. aweme_list.forEach(item => {
  283. text += ['"' + item.desc + '"', item.diggCount, item.commentCount,
  284. item.collectCount, item.shareCount, item.date, item.url].join(",") + "\n"
  285. });
  286. if (encoding === "gbk") {
  287. text = str2gbk(text);
  288. }
  289. txt2file(text, userData[0] + ".csv");
  290. }
  291.  
  292. let dimg_button, data_button, scroll_button, msg_pre;
  293.  
  294. function createMsgBox() {
  295. msg_pre = document.createElement('pre');
  296. msg_pre.textContent = '等待上方头像加载完毕';
  297. msg_pre.style.color = 'white';
  298. msg_pre.style.position = 'fixed';
  299. msg_pre.style.right = '5px';
  300. msg_pre.style.top = '60px';
  301. msg_pre.style.color = 'white';
  302. msg_pre.style.zIndex = '90000';
  303. msg_pre.style.opacity = "0.5";
  304. document.body.appendChild(msg_pre);
  305. }
  306.  
  307. function createAllButton() {
  308. let dom = document.querySelector("#douyin-header-menuCt pace-island > div > div:nth-last-child(1) ul a:nth-last-child(1)");
  309. let baseNode = dom.cloneNode(true);
  310. baseNode.removeAttribute("target");
  311. baseNode.removeAttribute("rel");
  312. baseNode.removeAttribute("href");
  313. let svgChild = baseNode.querySelector("svg");
  314. if (svgChild) baseNode.removeChild(svgChild);
  315.  
  316. dimg_button = baseNode.cloneNode(true);
  317. dimg_button.p1 = dimg_button.querySelector("p:nth-child(1)");
  318. dimg_button.p2 = dimg_button.querySelector("p:nth-child(2)");
  319. dimg_button.p1.textContent = "图文打包下载";
  320. dimg_button.p2.textContent = "0";
  321. dom.after(dimg_button);
  322. dimg_button.addEventListener('click', downloadImg);
  323.  
  324. data_button = baseNode.cloneNode(true);
  325. data_button.p1 = data_button.querySelector("p:nth-child(1)");
  326. data_button.p2 = data_button.querySelector("p:nth-child(2)");
  327. data_button.p1.textContent = "下载已加载的数据";
  328. data_button.p2.textContent = "0";
  329. const label = document.createElement('label');
  330. label.setAttribute('for', 'gbk');
  331. label.innerText = 'gbk';
  332. data_button.p1.after(label);
  333. const checkbox = document.createElement('input');
  334. checkbox.setAttribute('type', 'checkbox');
  335. checkbox.setAttribute('id', 'gbk');
  336. data_button.p1.after(checkbox);
  337. dom.after(data_button);
  338. let stopPropagation = (event) => event.stopPropagation()
  339. label.addEventListener('click', stopPropagation);
  340. checkbox.addEventListener('click', stopPropagation);
  341. data_button.addEventListener('click', () => downloadData(checkbox.checked ? "gbk" : "utf-8"));
  342.  
  343. scroll_button = baseNode.cloneNode(true);
  344. scroll_button.p1 = scroll_button.querySelector("p:nth-child(1)");
  345. scroll_button.p2 = scroll_button.querySelector("p:nth-child(2)");
  346. scroll_button.p1.textContent = "开启自动下拉到底";
  347. scroll_button.p2.textContent = "";
  348. dom.after(scroll_button);
  349.  
  350. let copyUserData_button = baseNode.cloneNode(true);
  351. copyUserData_button.p1 = copyUserData_button.querySelector("p:nth-child(1)");
  352. copyUserData_button.p2 = copyUserData_button.querySelector("p:nth-child(2)");
  353. copyUserData_button.p1.textContent = "复制作者信息";
  354. copyUserData_button.p2.textContent = "";
  355. dom.after(copyUserData_button);
  356. copyUserData_button.addEventListener('click', () => copyUserData(copyUserData_button.p2));
  357. }
  358.  
  359. function scrollPageToBottom() {
  360. let scrollInterval;
  361.  
  362. function scrollLoop() {
  363. let endText = document.querySelector("div[data-e2e='user-post-list'] > ul[data-e2e='scroll-list'] + div div").innerText;
  364. if (aweme_list.length < userData[9] && !endText) {
  365. scrollTo(0, document.body.scrollHeight);
  366. } else {
  367. clearInterval(scrollInterval);
  368. scrollInterval = null;
  369. scroll_button.p1.textContent = "已加载全部!";
  370. }
  371. }
  372.  
  373. scroll_button.addEventListener('click', () => {
  374. if (!scrollInterval) {
  375. scrollInterval = setInterval(scrollLoop, 1200);
  376. scroll_button.p1.textContent = "停止自动下拉";
  377. } else {
  378. clearInterval(scrollInterval);
  379. scrollInterval = null;
  380. scroll_button.p1.textContent = "开启自动下拉";
  381. }
  382. });
  383. }
  384.  
  385. async function downloadImg() {
  386. const zip = new JSZip();
  387. let flag = true;
  388. for (let [index, aweme] of aweme_list.filter(a => a.images).entries()) {
  389. msg_pre.textContent = `${index + 1}.${aweme.desc.slice(0, 20)}...`;
  390. let folder = zip.folder((index + 1) + "." + (aweme.desc ? aweme.desc.replace(/[\/:*?"<>|\s]/g, "").slice(0, 20).replace(/[.\d]+$/g, "") : aweme.awemeId));
  391. await Promise.all(aweme.images.map((link, index) => {
  392. return fetch(link)
  393. .then((res) => res.arrayBuffer())
  394. .then((buffer) => {
  395. folder.file(`image_${index + 1}.jpg`, buffer);
  396. });
  397. }));
  398. flag = false;
  399. }
  400. if (flag) {
  401. alert("当前页面未发现图文链接");
  402. return
  403. }
  404. msg_pre.textContent = "图片打包中...";
  405. zip.generateAsync({type: "blob"})
  406. .then((content) => {
  407. const link = document.createElement("a");
  408. link.href = URL.createObjectURL(content);
  409. link.download = userData[0].slice(0, 20).replace(/[\/:*?"<>|\s]/g, "") + ".zip";
  410. link.click();
  411. msg_pre.textContent = "图片打包完成";
  412. });
  413. }
  414.  
  415. function douyinVideoDownloader() {
  416. function run() {
  417. let downloadOption = [{name: '打开视频源', id: 'toLink'}];
  418. let videoElements = document.querySelectorAll('video');
  419. if (videoElements.length === 0) return;
  420. //把自动播放的video标签选择出来
  421. let playVideoElements = [];
  422. videoElements.forEach(function (element) {
  423. let autoplay = element.getAttribute('autoplay');
  424. if (autoplay !== null) {
  425. playVideoElements.push(element);
  426. }
  427. })
  428. let videoContainer = location.href.indexOf('modal_id') !== -1
  429. ? playVideoElements[0]
  430. : playVideoElements[playVideoElements.length - 1];
  431. if (!videoContainer) return;
  432. //获取视频播放地址
  433. let url = videoContainer && videoContainer.children.length > 0 && videoContainer.children[0].src
  434. ? videoContainer.children[0].src
  435. : videoContainer.src;
  436. //获取视频ID,配合自定义id使用
  437. let videoId;
  438. let resp = url.match(/^(https:)?\/\/.+\.com\/([a-zA-Z0-9]+)\/[a-zA-Z0-9]+\/video/);
  439. let res = url.match(/blob:https:\/\/www.douyin.com\/(.*)/);
  440. if (resp && resp[2]) {
  441. videoId = resp[2];
  442. } else if (res && res[1]) {
  443. videoId = res[1]
  444. } else {
  445. videoId = videoContainer.getAttribute('data-xgplayerid')
  446. }
  447. let playContainer = videoContainer.parentNode.parentNode.querySelector('.xg-right-grid');
  448. if (!playContainer) return;
  449. //在对主页就行视频浏览时会出现多个按钮,删除不需要的,只保留当前对应的
  450. let videoDownloadDom = playContainer.querySelector('#scriptVideoDownload' + videoId);
  451. if (videoDownloadDom) {
  452. let dom = playContainer.querySelectorAll('.xgplayer-playclarity-setting');
  453. dom.forEach(function (d) {
  454. let btn = d.querySelector('.btn');
  455. if (d.id !== 'scriptVideoDownload' + videoId && btn.innerText === '下载') {
  456. d.parentNode.removeChild(d);
  457. }
  458. });
  459. return;
  460. }
  461. if (videoContainer && playContainer) {
  462. let playClarityDom = playContainer.querySelector('.xgplayer-playclarity-setting');
  463. if (!playClarityDom) return;
  464.  
  465. let palyClarityBtn = playClarityDom.querySelector('.btn');
  466. if (!palyClarityBtn) return;
  467.  
  468. let downloadDom = playClarityDom.cloneNode(true);
  469. downloadDom.setAttribute('id', 'scriptVideoDownload' + videoId);
  470.  
  471. if (location.href.indexOf('search') === -1) {
  472. downloadDom.style = 'margin-top:-68px;padding-top:100px;padding-left:20px;padding-right:20px;';
  473. } else {
  474. downloadDom.style = 'margin-top:0px;padding-top:100px;';
  475. }
  476.  
  477. let downloadText = downloadDom.querySelector('.btn');
  478. downloadText.innerText = '下载';
  479. downloadText.style = 'font-size:14px;font-weight:600;';
  480. downloadText.setAttribute('id', 'zhmDouyinDownload' + videoId);
  481. let detail = playContainer.querySelector('xg-icon:nth-of-type(1)').children[0];
  482. let linkUrl = detail.getAttribute('href') ? detail.getAttribute('href') : location.href;
  483.  
  484. if (linkUrl.indexOf('www.douyin.com') === -1) {
  485. linkUrl = '//www.douyin.com' + linkUrl;
  486. }
  487.  
  488. downloadText.setAttribute('data-url', linkUrl);
  489. downloadText.removeAttribute('target');
  490. downloadText.setAttribute('href', 'javascript:void(0);');
  491.  
  492. let virtualDom = downloadDom.querySelector('.virtual');
  493. downloadDom.onmouseover = function () {
  494. if (location.href.indexOf('search') === -1) {
  495. virtualDom.style = 'display:block !important';
  496. } else {
  497. virtualDom.style = 'display:block !important;margin-bottom:37px;';
  498. }
  499. }
  500.  
  501. downloadDom.onmouseout = function () {
  502. virtualDom.style = 'display:none !important';
  503. }
  504.  
  505. let downloadHtml = '';
  506. downloadOption.forEach(function (item) {
  507. if (item.id === "toLink") {
  508. downloadHtml += `<div style="text-align:center;" class="item ${item.id}" id="${item.id}${videoId}">${item.name}</div>`;
  509. }
  510. })
  511. if (downloadDom.querySelector('.virtual')) {
  512. downloadDom.querySelector('.virtual').innerHTML = downloadHtml;
  513. }
  514. playClarityDom.after(downloadDom);
  515. //直接打开
  516. let toLinkDom = playContainer.querySelector('#toLink' + videoId);
  517. if (toLinkDom) {
  518. toLinkDom.addEventListener('click', function () {
  519. if (url.match(/^blob/)) {
  520. alert("加密视频地址,无法直接打开");
  521. } else {
  522. window.open(url);
  523. }
  524. })
  525. }
  526. }
  527. }
  528.  
  529. setInterval(run, 500);
  530. }
  531.  
  532. if (document.title === "验证码中间页") {
  533. return
  534. }
  535. createMsgBox();
  536. interceptResponse();
  537. douyinVideoDownloader();
  538. let domLoadedTimer;
  539. const checkElementLoaded = () => {
  540. const element = document.querySelector('#douyin-header-menuCt pace-island > div > div:nth-last-child(1) ul a');
  541. if (element) {
  542. console.log('检测到头像加载完毕.');
  543. msg_pre.textContent = "头像加载完成\n若需要下载用户数据,需进入目标用户主页";
  544. clearInterval(domLoadedTimer);
  545. domLoadedTimer = null;
  546. createAllButton();
  547. scrollPageToBottom();
  548. if (flag) flush();
  549. }
  550. };
  551. window.onload = () => {
  552. domLoadedTimer = setInterval(checkElementLoaded, 500);
  553. }
  554. })();

QingJ © 2025

镜像随时可能失效,请加Q群300939539或关注我们的公众号极客氢云获取最新地址