Douyin UserData Exporter Debug_v4.0

capture statistics of an author

  1. // ==UserScript==
  2. // @name Douyin UserData Exporter Debug_v4.0
  3. // @namespace http://tampermonkey.net/
  4. // @version v4.0_20240107
  5. // @description capture statistics of an author
  6. // @author qmcc
  7. // @match https://www.douyin.com/user/*
  8. // @icon https://www.google.com/s2/favicons?sz=64&domain=douyin.com
  9. // @grant none
  10. // @license MIT
  11.  
  12. // ==/UserScript==
  13.  
  14. (function() {
  15. 'use strict';
  16.  
  17. const posts = [];
  18. let hasMore = true;
  19. const author = {};
  20.  
  21. function scrollToBottom() {
  22. window.scrollTo(0, document.body.scrollHeight);
  23. }
  24.  
  25. setInterval(() => {
  26. if (hasMore) {
  27. console.log('Scrolling to bottom');
  28. scrollToBottom();
  29. }
  30.  
  31. }, 100);
  32.  
  33. function parseHasMore(response) {
  34. if (response.has_more === 1) {
  35. hasMore = true;
  36. } else {
  37. hasMore = false;
  38. }
  39. }
  40.  
  41. function parsePost(aweme) {
  42. // 视频信息统计
  43. // title: 视频标题
  44. const post = {};
  45. post.title = aweme.desc;
  46. // createDatetime: 发布日期
  47. post.createDatetime = new Date(aweme.create_time * 1000);
  48. // likeCount: 点赞数
  49. post.likeCount = aweme.statistics.digg_count;
  50. // shareCount: 转发数
  51. post.shareCount = aweme.statistics.share_count;
  52. // commentCount: 评论数
  53. post.commentCount = aweme.statistics.comment_count;
  54. // collectCount: 收藏数
  55. post.collectCount = aweme.statistics.collect_count;
  56. // duration: 时长
  57. post.duration = aweme.duration / 1000;
  58. posts.push(post);
  59. }
  60.  
  61.  
  62.  
  63. function parseAuthor(profile) {
  64. // 作者信息收集
  65. // 用户主页
  66. author.url = window.location.href
  67.  
  68. // nickname: 昵称
  69. author.nickname = profile.user.nickname
  70. // id: 抖音号
  71. author.id = profile.user.unique_id || profile.user.short_id;
  72. // favoratedCount: 获赞数
  73. author.favoratedCount = profile.user.total_favorited;
  74. // followerCount: 粉丝数
  75. author.followerCount = profile.user.follower_count;
  76. // followingCount: 关注数
  77. author.followingCount = profile.user.following_count;
  78. // favoritingCount: 喜欢数
  79. author.favoritingCount = profile.user.favoriting_count;
  80. // gender: 性别
  81. author.gender = profile.user.gender === 1 ? '男' : profile.user.gender === 2 ? '女' : undefined;
  82. // age: 年龄
  83. author.age = profile.user.user_age > 0 ? profile.user.user_age : null;
  84. // ipLocation: IP属地
  85. author.ipLocation = profile.user.ip_location ? profile.user.ip_location.replace('IP属地:', '') : undefined;
  86. // province: 省份
  87. author.province = profile.user.province;
  88. // city: 城市
  89. author.city = profile.user.city;
  90. // postCount: 发布视频数
  91. author.postCount = profile.user.aweme_count;
  92. // hasShop: 是否有橱窗
  93. author.hasShop = profile.user.with_fusion_shop_entry;
  94. // hasLiveCommerce: 是否有直播带货?
  95. author.hasLiveCommerce = profile.user.live_commerce;
  96. // signature: 个性签名
  97. author.signature = profile.user.signature;
  98. // withCommerceEnterpriseTabEntry
  99. author.withCommerceEnterpriseTabEntry = profile.user.with_commerce_enterprise_tab_entry;
  100. // withCommerceEntry
  101. author.withCommerceEntry = profile.user.with_commerce_entry;
  102. // withNewGoods
  103. author.withNewGoods = profile.user.with_new_goods;
  104. // youtubeChannelId
  105. author.youtubeChannelId = profile.user.youtube_channel_id;
  106. // youtubeChannelTitle
  107. author.youtubeChannelTitle = profile.user.youtube_channel_title;
  108. // showFavoriteList: 是否展示喜欢列表
  109. author.showFavoriteList = profile.user.show_favorite_list;
  110. // showSubscription: 是否展示关注列表
  111. author.showSubscription = profile.user.show_subscription;
  112. // isActivityUser: 是否活跃用户
  113. author.isActivityUser = profile.user.is_activity_user;
  114. // isBan: 是否被封禁
  115. author.isBan = profile.user.is_ban;
  116. // isBlock: 是否被拉黑
  117. author.isBlock = profile.user.is_block;
  118. // isBlocked
  119. author.isBlocked = profile.user.is_blocked;
  120. // isEffectArtist: 是否是特效艺术家
  121. author.isEffectArtist = profile.user.is_effect_artist;
  122. // isGovMediaVip: 是否是政府媒体VIP
  123. author.isGovMediaVip = profile.user.is_gov_media_vip;
  124. // isMixUser: 是否是混合用户
  125. author.isMixUser = profile.user.is_mix_user;
  126. // isNotShow: 是否不展示
  127. author.isNotShow = profile.user.is_not_show;
  128. // isSeriesUser: 是否是系列用户
  129. author.isSeriesUser = profile.user.is_series_user;
  130. // isSharingProfileUser: 是否是分享资料用户
  131. author.isSharingProfileUser = profile.user.is_sharing_profile_user;
  132. // isStar: 是否是明星
  133. author.isStar = profile.user.is_star;
  134. // isoCountryCode: 国家代码
  135. author.isoCountryCode = profile.user.iso_country_code;
  136. // customVerify: 自定义认证
  137. author.customVerify = profile.user.custom_verify;
  138. // hasMcn: 是否有MCN机构
  139. author.hasMcn = (profile.user.account_info_url && profile.user.account_info_url.includes('mcn')) || false;
  140. // groupChatCount: 群聊数量
  141. author.groupChatCount = 0;
  142. if (profile.user.card_entries) {
  143. const groupChatEntry = profile.user.card_entries.find(entry => entry.sub_title && entry.sub_title.includes('群聊'));
  144. if (groupChatEntry) {
  145. const match = groupChatEntry.sub_title.match(/(\d+)个群聊/);
  146. if (match) {
  147. author.groupChatCount = parseInt(match[1]);
  148. }
  149. }
  150. }
  151. }
  152.  
  153. let dataFrame = [];
  154.  
  155. function clearDataFrame() {
  156. dataFrame = [];
  157. }
  158.  
  159. function addEntryToDataFrame(header, content) {
  160. // if content is string, replace newline with space
  161. if (typeof content === 'string') {
  162. content = content.replace(/\n/g, ' ');
  163. content = content.replace(/\t/g, ' ');
  164. }
  165. dataFrame.push({header, content});
  166. }
  167.  
  168. function addAuthorToDataFrame() {
  169. addEntryToDataFrame('用户主页', author.url);
  170. addEntryToDataFrame('ID (抖音号)', author.id);
  171. addEntryToDataFrame('Nickname (昵称)', author.nickname);
  172. addEntryToDataFrame('Favorated (获赞数)', author.favoratedCount);
  173. addEntryToDataFrame('Follower (粉丝数)', author.followerCount);
  174. addEntryToDataFrame('Following (关注数)', author.followingCount);
  175. addEntryToDataFrame('Favoriting (喜欢数)', author.favoritingCount);
  176. addEntryToDataFrame('Gender (性别)', author.gender);
  177. addEntryToDataFrame('Age (年龄)', author.age);
  178. addEntryToDataFrame('IP Location (IP属地)', author.ipLocation);
  179. addEntryToDataFrame('Province (省份)', author.province);
  180. addEntryToDataFrame('City (城市)', author.city);
  181. addEntryToDataFrame('Post Count (发布视频数)', author.postCount);
  182. addEntryToDataFrame('Has Shop (是否有橱窗)', author.hasShop);
  183. addEntryToDataFrame('Has Live Commerce (是否有直播带货)', author.hasLiveCommerce);
  184. addEntryToDataFrame('Signature (个性签名)', author.signature);
  185. addEntryToDataFrame('With Commerce Enterprise Tab Entry', author.withCommerceEnterpriseTabEntry);
  186. addEntryToDataFrame('With Commerce Entry', author.withCommerceEntry);
  187. addEntryToDataFrame('With New Goods', author.withNewGoods);
  188. addEntryToDataFrame('Youtube Channel ID', author.youtubeChannelId);
  189. addEntryToDataFrame('Youtube Channel Title', author.youtubeChannelTitle);
  190. addEntryToDataFrame('Show Favorite List (是否展示喜欢列表)', author.showFavoriteList);
  191. addEntryToDataFrame('Show Subscription (是否展示关注列表)', author.showSubscription);
  192. addEntryToDataFrame('Is Activity User (是否活跃用户)', author.isActivityUser);
  193. addEntryToDataFrame('Is Ban (是否被封禁)', author.isBan);
  194. addEntryToDataFrame('Is Block (是否被拉黑)', author.isBlock);
  195. addEntryToDataFrame('Is Blocked', author.isBlocked);
  196. addEntryToDataFrame('Is Effect Artist (是否是特效艺术家)', author.isEffectArtist);
  197. addEntryToDataFrame('Is Gov Media VIP (是否是政府媒体VIP)', author.isGovMediaVip);
  198. addEntryToDataFrame('Is Mix User (是否是混合用户)', author.isMixUser);
  199. addEntryToDataFrame('Is Not Show (是否不展示)', author.isNotShow);
  200. addEntryToDataFrame('Is Series User (是否是系列用户)', author.isSeriesUser);
  201. addEntryToDataFrame('Is Sharing Profile User (是否是分享资料用户)', author.isSharingProfileUser);
  202. addEntryToDataFrame('Is Star (是否是明星)', author.isStar);
  203. addEntryToDataFrame('ISO Country Code (国家代码)', author.isoCountryCode);
  204. addEntryToDataFrame('Custom Verify (自定义认证)', author.customVerify);
  205. addEntryToDataFrame('Has MCN (是否有MCN机构)', author.hasMcn);
  206. addEntryToDataFrame('Group Chat Count (群聊数量)', author.groupChatCount);
  207. }
  208.  
  209. function addPostToDataFrame(title, post) {
  210. addEntryToDataFrame(title + '-Datetime (发布日期)', post.createDatetime.toLocaleDateString());
  211. addEntryToDataFrame(title + '-Like (点赞数)', post.likeCount);
  212. addEntryToDataFrame(title + '-Share (转发数)', post.shareCount);
  213. addEntryToDataFrame(title + '-Comment (评论数)', post.commentCount);
  214. addEntryToDataFrame(title + '-Collect (收藏数)', post.collectCount);
  215. }
  216.  
  217. function getStatistics() {
  218. clearDataFrame();
  219. // 作者信息
  220. addAuthorToDataFrame();
  221. // 视频平均长度
  222. const averageDuration = posts.reduce((acc, post) => acc + post.duration, 0) / posts.length;
  223. addEntryToDataFrame('Average Duration (平均时长)', averageDuration);
  224. // 视频中位数长度
  225. const durations = posts.map(post => post.duration);
  226. durations.sort((a, b) => a - b);
  227. const medianDuration = durations[Math.floor(durations.length / 2)];
  228. addEntryToDataFrame('Median Duration (中位数时长)', medianDuration);
  229. // 视频前10%长度
  230. const percentile10Duration = durations[Math.floor(durations.length * 0.9)];
  231. addEntryToDataFrame('10% Duration (前10%时长)', percentile10Duration);
  232. // 视频后10%长度
  233. const percentile90Duration = durations[Math.floor(durations.length * 0.1)];
  234. addEntryToDataFrame('90% Duration (后10%时长)', percentile90Duration);
  235. // 最近一周发布的视频数量
  236. const oneWeekAgo = new Date();
  237. oneWeekAgo.setDate(oneWeekAgo.getDate() - 7);
  238. const lastWeekPostsCount = posts.filter(post => post.createDatetime > oneWeekAgo).length;
  239. addEntryToDataFrame('Last Week Posts Count (最近一周发布数)', lastWeekPostsCount);
  240. // ————— 重点:在每次 sort() 前,都拷贝一份 posts —————
  241. // 1) Top3热门视频
  242. let top3HotPosts = [...posts]
  243. .sort((a, b) => b.likeCount - a.likeCount)
  244. .slice(0, 3);
  245.  
  246. // 如果只有 1 或 2 条,就补空对象
  247. while (top3HotPosts.length < 3) {
  248. top3HotPosts.push({
  249. title: '', // 标题缺省
  250. createDatetime: new Date(0), // 发布日期缺省
  251. likeCount: 0,
  252. shareCount: 0,
  253. commentCount: 0,
  254. collectCount: 0,
  255. duration: 0 // 时长缺省
  256. });
  257. }
  258.  
  259. top3HotPosts.forEach((post, index) => {
  260. addPostToDataFrame(`Hot${index + 1}`, post);
  261. });
  262.  
  263.  
  264. // 2) Latest3最新视频
  265. let latest3Posts = [...posts]
  266. .sort((a, b) => b.createDatetime - a.createDatetime)
  267. .slice(0, 3);
  268.  
  269. while (latest3Posts.length < 3) {
  270. latest3Posts.push({
  271. title: '',
  272. createDatetime: new Date(0),
  273. likeCount: 0,
  274. shareCount: 0,
  275. commentCount: 0,
  276. collectCount: 0,
  277. duration: 0
  278. });
  279. }
  280.  
  281. latest3Posts.forEach((post, index) => {
  282. addPostToDataFrame(`Latest${index + 1}`, post);
  283. });
  284.  
  285.  
  286. // 3) Oldest3最早视频
  287. let oldest3Posts = [...posts]
  288. .sort((a, b) => a.createDatetime - b.createDatetime)
  289. .slice(0, 3);
  290.  
  291. while (oldest3Posts.length < 3) {
  292. oldest3Posts.push({
  293. title: '',
  294. createDatetime: new Date(0),
  295. likeCount: 0,
  296. shareCount: 0,
  297. commentCount: 0,
  298. collectCount: 0,
  299. duration: 0
  300. });
  301. }
  302.  
  303. oldest3Posts.forEach((post, index) => {
  304. addPostToDataFrame(`Oldest${index + 1}`, post);
  305. });
  306. // 生成表头
  307. const headers = dataFrame.map(entry => entry.header);
  308. // 生成内容
  309. const content = dataFrame.map(entry => entry.content);
  310. // 返回二维数组 [headersRow, contentRow]
  311. return [headers, content];
  312. }
  313.  
  314.  
  315.  
  316. function writeHeadersToClipboard() {
  317. // Excel tab-separated format
  318. const headers = getStatistics()[0];
  319. const text = headers.join('\t');
  320. navigator.clipboard.writeText(text);
  321. alert('表头已复制到剪贴板');
  322. }
  323.  
  324. function writeContentToClipboard() {
  325. // Excel tab-separated format
  326. const content = getStatistics()[1];
  327. const text = content.join('\t');
  328. navigator.clipboard.writeText(text);
  329. if (hasMore) {
  330. alert('内容已复制到剪贴板,还有更多数据,请继续滚动页面');
  331. } else {
  332. alert('内容已复制到剪贴板,数据已全部加载完毕');
  333. }
  334. }
  335.  
  336. console.log('Douyin Crawler is running');
  337.  
  338. function findDivByInnerText(text) {
  339. return Array.from(document.querySelectorAll('div')).find(div => div.innerText === text);
  340. }
  341.  
  342. setInterval(() => {
  343. const feedback = findDivByInnerText('意见反馈');
  344. if (feedback) {
  345. const newFeedback = feedback.cloneNode(true);
  346. newFeedback.innerText = '复制内容';
  347. newFeedback.onclick = writeContentToClipboard;
  348. feedback.parentNode.appendChild(newFeedback);
  349. feedback.remove();
  350. }
  351. const faq = findDivByInnerText('常见问题');
  352. if (faq) {
  353. const newFaq = faq.cloneNode(true);
  354. newFaq.innerText = '复制表头';
  355. newFaq.onclick = writeHeadersToClipboard;
  356. faq.parentNode.appendChild(newFaq);
  357. faq.remove();
  358. }
  359. }, 1000);
  360.  
  361. function convertToCSV(headers, content) {
  362. // Combine headers and content into a CSV string
  363. const csvRows = [ content.join(',')+"\n"];
  364. return csvRows.join('\n');
  365. }
  366.  
  367. function downloadCSV(filename, csvContent) {
  368. // Create a Blob with the CSV content
  369. const blob = new Blob([csvContent], { type: 'text/csv;charset=utf-8;' });
  370. const url = URL.createObjectURL(blob);
  371. const link = document.createElement('a');
  372. link.href = url;
  373. link.download = filename;
  374. document.body.appendChild(link);
  375. link.click();
  376. document.body.removeChild(link);
  377. }
  378.  
  379. function exportDataToCSV() {
  380. const [headers, content] = getStatistics();
  381. const csvContent = convertToCSV(headers, content);
  382. downloadCSV('douyin_user_data.csv', csvContent);
  383. alert('CSV 文件已生成并下载!');
  384. }
  385.  
  386. // 增加一个按钮,用于触发 CSV 导出
  387. function createExportButton() {
  388. const button = document.createElement('button');
  389. button.textContent = '导出为 CSV';
  390. button.style.position = 'fixed';
  391. button.style.bottom = '772px';
  392. button.style.right = '227px';
  393. button.style.padding = '10px';
  394. button.style.backgroundColor = '#4CAF50';
  395. button.style.color = 'white';
  396. button.style.border = 'none';
  397. button.style.borderRadius = '5px';
  398. button.style.cursor = 'pointer';
  399. button.style.zIndex = '10000';
  400. button.addEventListener('click', exportDataToCSV);
  401. document.body.appendChild(button);
  402. }
  403.  
  404. // 调用按钮创建函数
  405. createExportButton();
  406.  
  407.  
  408.  
  409.  
  410. // Hijack XMLHttpRequest
  411. var open = XMLHttpRequest.prototype.open;
  412. XMLHttpRequest.prototype.open = function(method, url, async, user, pass) {
  413. this.addEventListener('readystatechange', function() {
  414. if (this.readyState === 4 && url.includes('/post')) {
  415. const response = JSON.parse(this.responseText);
  416. parseHasMore(response);
  417. const awemeList = response.aweme_list;
  418. awemeList.forEach(aweme => {
  419. parsePost(aweme);
  420. });
  421. console.log('Posts:', posts);
  422. } else if (this.readyState === 4 && url.includes('/profile/other')) {
  423. const response = JSON.parse(this.responseText);
  424. parseAuthor(response);
  425. console.log('Author:', author);
  426. }
  427. }, false);
  428. open.call(this, method, url, async, user, pass);
  429. };
  430.  
  431.  
  432. })();

QingJ © 2025

镜像随时可能失效,请加Q群300939539或关注我们的公众号极客氢云获取最新地址