douyin-user-data-download

下载抖音用户主页数据!

目前為 2024-06-13 提交的版本,檢視 最新版本

您需要先安裝使用者腳本管理器擴展,如 TampermonkeyGreasemonkeyViolentmonkey 之後才能安裝該腳本。

You will need to install an extension such as Tampermonkey to install this script.

您需要先安裝使用者腳本管理器擴充功能,如 TampermonkeyViolentmonkey 後才能安裝該腳本。

您需要先安裝使用者腳本管理器擴充功能,如 TampermonkeyUserscripts 後才能安裝該腳本。

你需要先安裝一款使用者腳本管理器擴展,比如 Tampermonkey,才能安裝此腳本

您需要先安裝使用者腳本管理器擴充功能後才能安裝該腳本。

(我已經安裝了使用者腳本管理器,讓我安裝!)

你需要先安裝一款使用者樣式管理器擴展,比如 Stylus,才能安裝此樣式

你需要先安裝一款使用者樣式管理器擴展,比如 Stylus,才能安裝此樣式

你需要先安裝一款使用者樣式管理器擴展,比如 Stylus,才能安裝此樣式

你需要先安裝一款使用者樣式管理器擴展後才能安裝此樣式

你需要先安裝一款使用者樣式管理器擴展後才能安裝此樣式

你需要先安裝一款使用者樣式管理器擴展後才能安裝此樣式

(我已經安裝了使用者樣式管理器,讓我安裝!)

// ==UserScript==
// @name         douyin-user-data-download
// @namespace    http://tampermonkey.net/
// @version      0.3.4
// @description  下载抖音用户主页数据!
// @author       xxmdmst
// @match        https://www.douyin.com/*
// @icon         https://xxmdmst.oss-cn-beijing.aliyuncs.com/imgs/favicon.ico
// @grant        none
// @require      https://cdnjs.cloudflare.com/ajax/libs/jszip/3.6.0/jszip.min.js
// @license MIT
// ==/UserScript==

(function () {
    let table;

    function initGbkTable() {
        // https://en.wikipedia.org/wiki/GBK_(character_encoding)#Encoding
        const ranges = [
            [0xA1, 0xA9, 0xA1, 0xFE],
            [0xB0, 0xF7, 0xA1, 0xFE],
            [0x81, 0xA0, 0x40, 0xFE],
            [0xAA, 0xFE, 0x40, 0xA0],
            [0xA8, 0xA9, 0x40, 0xA0],
            [0xAA, 0xAF, 0xA1, 0xFE],
            [0xF8, 0xFE, 0xA1, 0xFE],
            [0xA1, 0xA7, 0x40, 0xA0],
        ];
        const codes = new Uint16Array(23940);
        let i = 0;

        for (const [b1Begin, b1End, b2Begin, b2End] of ranges) {
            for (let b2 = b2Begin; b2 <= b2End; b2++) {
                if (b2 !== 0x7F) {
                    for (let b1 = b1Begin; b1 <= b1End; b1++) {
                        codes[i++] = b2 << 8 | b1
                    }
                }
            }
        }
        table = new Uint16Array(65536);
        table.fill(0xFFFF);
        const str = new TextDecoder('gbk').decode(codes);
        for (let i = 0; i < str.length; i++) {
            table[str.charCodeAt(i)] = codes[i]
        }
    }

    function str2gbk(str, opt = {}) {
        if (!table) {
            initGbkTable()
        }
        const NodeJsBufAlloc = typeof Buffer === 'function' && Buffer.allocUnsafe;
        const defaultOnAlloc = NodeJsBufAlloc
            ? (len) => NodeJsBufAlloc(len)
            : (len) => new Uint8Array(len);
        const defaultOnError = () => 63;
        const onAlloc = opt.onAlloc || defaultOnAlloc;
        const onError = opt.onError || defaultOnError;

        const buf = onAlloc(str.length * 2);
        let n = 0;

        for (let i = 0; i < str.length; i++) {
            const code = str.charCodeAt(i);
            if (code < 0x80) {
                buf[n++] = code;
                continue
            }
            const gbk = table[code];

            if (gbk !== 0xFFFF) {
                buf[n++] = gbk;
                buf[n++] = gbk >> 8
            } else if (code === 8364) {
                buf[n++] = 0x80
            } else {
                const ret = onError(i, str);
                if (ret === -1) {
                    break
                }
                if (ret > 0xFF) {
                    buf[n++] = ret;
                    buf[n++] = ret >> 8
                } else {
                    buf[n++] = ret
                }
            }
        }
        return buf.subarray(0, n)
    }

    let aweme_list = [];
    let userKey = [
        "昵称", "关注", "粉丝", "获赞",
        "抖音号", "IP属地", "性别",
        "位置", "签名", "作品数", "主页"
    ];
    let userData = [];
    let timer;

    function copyUserData(node) {
        if (!userData) {
            alert("未抓取到用户数据!");
            return;
        }
        let text = [];
        for (let i = 0; i < userKey.length; i++) {
            let key = userKey[i];
            let value = userData[i];
            text.push(key + ":" + value);
        }
        navigator.clipboard.writeText(text.join("\n")).then(r => {
            node.textContent = "复制成功";
        }).catch((e) => {
            node.textContent = "复制失败";
        })
        setTimeout(() => node.textContent = '', 2000);
    }

    function createVideoButton(text, top, func) {
        const button = document.createElement("button");
        button.textContent = text;
        button.style.position = "absolute";
        button.style.right = "0px";
        button.style.top = top;
        button.style.opacity = "0.5";
        button.addEventListener("click", func);
        return button;
    }

    function openLink(url) {
        const link = document.createElement('a');
        link.href = url;
        link.target = "_blank";
        document.body.appendChild(link);
        link.click();
        document.body.removeChild(link);
    }

    function createEachButton() {
        let targetNodes = document.querySelectorAll("div[data-e2e='user-post-list'] > ul[data-e2e='scroll-list'] > li a");
        for (let i = 0; i < targetNodes.length; i++) {
            let targetNode = targetNodes[i];
            if (targetNode.dataset.added) {
                continue;
            }
            targetNode.appendChild(createVideoButton("打开视频源", "0px", (event) => {
                event.preventDefault();
                event.stopPropagation();
                openLink(aweme_list[i].url);
            }));
            let downloadVideoButton = createVideoButton("下载视频", "21px", (event) => {
                event.preventDefault();
                event.stopPropagation();
                let xhr = new XMLHttpRequest();
                xhr.open('GET', aweme_list[i].url.replace("http://", "https://"), true);
                xhr.responseType = 'blob';
                xhr.onload = (e) => {
                    let a = document.createElement('a');
                    a.href = window.URL.createObjectURL(xhr.response);
                    a.download = (aweme_list[i].desc ? aweme_list[i].desc.slice(0, 20).replace(/[\/:*?"<>|\s]/g, "") : aweme_list[i].awemeId) + (aweme_list[i].images ? ".mp3" : ".mp4");
                    a.click()
                };
                xhr.onprogress = (event) => {
                    if (event.lengthComputable) {
                        downloadVideoButton.textContent = "下载" + (event.loaded * 100 / event.total).toFixed(1) + '%';
                    }
                };
                xhr.send();
            });
            targetNode.appendChild(downloadVideoButton);
            if (aweme_list[i].images) {
                targetNode.appendChild(createVideoButton("图片打包下载", "42px", (event) => {
                    event.preventDefault();
                    event.stopPropagation();
                    const zip = new JSZip();
                    console.log(aweme_list[i].images);
                    downloadVideoButton.textContent = "下载并打包中...";
                    const promises = aweme_list[i].images.map((link, index) => {
                        return fetch(link)
                            .then((response) => response.arrayBuffer())
                            .then((buffer) => {
                                zip.file(`image_${index + 1}.jpg`, buffer);
                            });
                    });
                    Promise.all(promises)
                        .then(() => {
                            return zip.generateAsync({type: "blob"});
                        })
                        .then((content) => {
                            const link = document.createElement("a");
                            link.href = URL.createObjectURL(content);
                            link.download = (aweme_list[i].desc ? aweme_list[i].desc.slice(0, 20).replace(/[\/:*?"<>|\s]/g, "") : aweme_list[i].awemeId) + ".zip";
                            link.click();
                            downloadVideoButton.textContent = "图片打包完成";
                        });
                }));
            }
            targetNode.dataset.added = true;
        }
    }

    function flush() {
        if (timer !== undefined) {
            clearTimeout(timer);
            timer = undefined;
        }
        timer = setTimeout(createEachButton, 500);
        data_button.p2.textContent = `${aweme_list.length}`;
        let img_num = aweme_list.filter(a => a.images).length;
        dimg_button.p2.textContent = `${img_num}`;
        msg_pre.textContent = `已加载${aweme_list.length}个作品,${img_num}个图文\n激活上方头像可展开下载按钮`;
    }

    let flag = false;

    function interceptResponse() {
        const originalSend = XMLHttpRequest.prototype.send;
        XMLHttpRequest.prototype.send = function () {
            const self = this;
            this.onreadystatechange = function () {
                if (self.readyState === 4 && self._url) {
                    if (self._url.indexOf("/aweme/v1/web/aweme/post") > -1) {
                        var json = JSON.parse(self.response);
                        let post_data = json.aweme_list.map(item => Object.assign(
                            {
                                "awemeId": item.aweme_id,
                                "desc": item.desc.replace(/[^\x00-\x7F\u4E00-\u9FFF\uFF00-\uFFEF]+/g, " ").trim()
                            },
                            {
                                "diggCount": item.statistics.digg_count,
                                "commentCount": item.statistics.comment_count,
                                "collectCount": item.statistics.collect_count,
                                "shareCount": item.statistics.share_count
                            },
                            {
                                "date": new Date(item.create_time * 1000).toLocaleString(),
                                "url": item.video.play_addr.url_list[0]
                            },
                            {
                                "images": item.images ? item.images.map(row => row.url_list.pop()) : null
                            }
                        ));
                        aweme_list.push(...post_data);
                        if (domLoadedTimer === null) {
                            flush();
                        } else {
                            flag = true;
                        }
                    } else if (self._url.indexOf("/aweme/v1/web/user/profile/other") > -1) {
                        let userInfo = JSON.parse(self.response).user;
                        userInfo.city = userInfo.city ? userInfo.city : '';
                        userInfo.district = userInfo.district ? userInfo.district : '';
                        userData.push(
                            userInfo.nickname, userInfo.following_count, userInfo.mplatform_followers_count,
                            userInfo.total_favorited, '\t' + (userInfo.unique_id ? userInfo.unique_id : userInfo.short_id), userInfo.ip_location, userInfo.gender === 2 ? "女" : "男",
                            `${userInfo.city}·${userInfo.district}`, '"' + (userInfo.signature ? userInfo.signature : '') + '"', userInfo.aweme_count, "https://www.douyin.com/user/" + userInfo.sec_uid
                        );
                    }
                }
            };
            originalSend.apply(this, arguments);
        };
    }

    function txt2file(txt, filename) {
        const blob = new Blob([txt], {type: 'text/plain'});
        const url = URL.createObjectURL(blob);
        const link = document.createElement('a');
        link.href = url;
        link.download = filename.slice(0, 20).replace(/[\/:*?"<>|\s]/g, "");
        document.body.appendChild(link);
        link.click();
        document.body.removeChild(link);
        URL.revokeObjectURL(url);
    }

    function downloadData(encoding) {
        let text = userKey.join(",") + "\n" + userData.join(",") + "\n\n";
        text += "作品描述,点赞数,评论数,收藏数,分享数,发布时间,下载链接\n";
        aweme_list.forEach(item => {
            text += ['"' + item.desc + '"', item.diggCount, item.commentCount,
                item.collectCount, item.shareCount, item.date, item.url].join(",") + "\n"
        });
        if (encoding === "gbk") {
            text = str2gbk(text);
        }
        txt2file(text, userData[0] + ".csv");
    }

    let dimg_button, data_button, scroll_button, msg_pre;

    function createMsgBox() {
        msg_pre = document.createElement('pre');
        msg_pre.textContent = '等待上方头像加载完毕';
        msg_pre.style.color = 'white';
        msg_pre.style.position = 'fixed';
        msg_pre.style.right = '5px';
        msg_pre.style.top = '60px';
        msg_pre.style.color = 'white';
        msg_pre.style.zIndex = '90000';
        msg_pre.style.opacity = "0.5";
        document.body.appendChild(msg_pre);
    }

    function createAllButton() {
        let dom = document.querySelector("#douyin-header-menuCt pace-island > div > div:nth-last-child(1) ul a:nth-last-child(1)");
        let baseNode = dom.cloneNode(true);
        baseNode.removeAttribute("target");
        baseNode.removeAttribute("rel");
        baseNode.removeAttribute("href");
        let svgChild = baseNode.querySelector("svg");
        if (svgChild) baseNode.removeChild(svgChild);

        dimg_button = baseNode.cloneNode(true);
        dimg_button.p1 = dimg_button.querySelector("p:nth-child(1)");
        dimg_button.p2 = dimg_button.querySelector("p:nth-child(2)");
        dimg_button.p1.textContent = "图文打包下载";
        dimg_button.p2.textContent = "0";
        dom.after(dimg_button);
        dimg_button.addEventListener('click', downloadImg);

        data_button = baseNode.cloneNode(true);
        data_button.p1 = data_button.querySelector("p:nth-child(1)");
        data_button.p2 = data_button.querySelector("p:nth-child(2)");
        data_button.p1.textContent = "下载已加载的数据";
        data_button.p2.textContent = "0";
        const label = document.createElement('label');
        label.setAttribute('for', 'gbk');
        label.innerText = 'gbk';
        data_button.p1.after(label);
        const checkbox = document.createElement('input');
        checkbox.setAttribute('type', 'checkbox');
        checkbox.setAttribute('id', 'gbk');
        data_button.p1.after(checkbox);
        dom.after(data_button);
        let stopPropagation = (event) => event.stopPropagation()
        label.addEventListener('click', stopPropagation);
        checkbox.addEventListener('click', stopPropagation);
        data_button.addEventListener('click', () => downloadData(checkbox.checked ? "gbk" : "utf-8"));

        scroll_button = baseNode.cloneNode(true);
        scroll_button.p1 = scroll_button.querySelector("p:nth-child(1)");
        scroll_button.p2 = scroll_button.querySelector("p:nth-child(2)");
        scroll_button.p1.textContent = "开启自动下拉到底";
        scroll_button.p2.textContent = "";
        dom.after(scroll_button);

        let copyUserData_button = baseNode.cloneNode(true);
        copyUserData_button.p1 = copyUserData_button.querySelector("p:nth-child(1)");
        copyUserData_button.p2 = copyUserData_button.querySelector("p:nth-child(2)");
        copyUserData_button.p1.textContent = "复制作者信息";
        copyUserData_button.p2.textContent = "";
        dom.after(copyUserData_button);
        copyUserData_button.addEventListener('click', () => copyUserData(copyUserData_button.p2));
    }

    function scrollPageToBottom() {
        let scrollInterval;

        function scrollLoop() {
            let endText = document.querySelector("div[data-e2e='user-post-list'] > ul[data-e2e='scroll-list'] + div div").innerText;
            if (aweme_list.length < userData[9] && !endText) {
                scrollTo(0, document.body.scrollHeight);
            } else {
                clearInterval(scrollInterval);
                scrollInterval = null;
                scroll_button.p1.textContent = "已加载全部!";
            }
        }

        scroll_button.addEventListener('click', () => {
            if (!scrollInterval) {
                scrollInterval = setInterval(scrollLoop, 1200);
                scroll_button.p1.textContent = "停止自动下拉";
            } else {
                clearInterval(scrollInterval);
                scrollInterval = null;
                scroll_button.p1.textContent = "开启自动下拉";
            }
        });
    }

    async function downloadImg() {
        const zip = new JSZip();
        let flag = true;
        for (let [index, aweme] of aweme_list.filter(a => a.images).entries()) {
            msg_pre.textContent = `${index + 1}.${aweme.desc.slice(0, 20)}...`;
            let folder = zip.folder((index + 1) + "." + (aweme.desc ? aweme.desc.replace(/[\/:*?"<>|\s]/g, "").slice(0, 20).replace(/[.\d]+$/g, "") : aweme.awemeId));
            await Promise.all(aweme.images.map((link, index) => {
                return fetch(link)
                    .then((res) => res.arrayBuffer())
                    .then((buffer) => {
                        folder.file(`image_${index + 1}.jpg`, buffer);
                    });
            }));
            flag = false;
        }
        if (flag) {
            alert("当前页面未发现图文链接");
            return
        }
        msg_pre.textContent = "图片打包中...";
        zip.generateAsync({type: "blob"})
            .then((content) => {
                const link = document.createElement("a");
                link.href = URL.createObjectURL(content);
                link.download = userData[0].slice(0, 20).replace(/[\/:*?"<>|\s]/g, "") + ".zip";
                link.click();
                msg_pre.textContent = "图片打包完成";
            });
    }

    function douyinVideoDownloader() {
        function run() {
            let downloadOption = [{name: '打开视频源', id: 'toLink'}];
            let videoElements = document.querySelectorAll('video');
            if (videoElements.length === 0) return;
            //把自动播放的video标签选择出来
            let playVideoElements = [];
            videoElements.forEach(function (element) {
                let autoplay = element.getAttribute('autoplay');
                if (autoplay !== null) {
                    playVideoElements.push(element);
                }
            })
            let videoContainer = location.href.indexOf('modal_id') !== -1
                ? playVideoElements[0]
                : playVideoElements[playVideoElements.length - 1];
            if (!videoContainer) return;
            //获取视频播放地址
            let url = videoContainer && videoContainer.children.length > 0 && videoContainer.children[0].src
                ? videoContainer.children[0].src
                : videoContainer.src;
            //获取视频ID,配合自定义id使用
            let videoId;
            let resp = url.match(/^(https:)?\/\/.+\.com\/([a-zA-Z0-9]+)\/[a-zA-Z0-9]+\/video/);
            let res = url.match(/blob:https:\/\/www.douyin.com\/(.*)/);
            if (resp && resp[2]) {
                videoId = resp[2];
            } else if (res && res[1]) {
                videoId = res[1]
            } else {
                videoId = videoContainer.getAttribute('data-xgplayerid')
            }
            let playContainer = videoContainer.parentNode.parentNode.querySelector('.xg-right-grid');
            if (!playContainer) return;
            //在对主页就行视频浏览时会出现多个按钮,删除不需要的,只保留当前对应的
            let videoDownloadDom = playContainer.querySelector('#scriptVideoDownload' + videoId);
            if (videoDownloadDom) {
                let dom = playContainer.querySelectorAll('.xgplayer-playclarity-setting');
                dom.forEach(function (d) {
                    let btn = d.querySelector('.btn');
                    if (d.id !== 'scriptVideoDownload' + videoId && btn.innerText === '下载') {
                        d.parentNode.removeChild(d);
                    }
                });
                return;
            }
            if (videoContainer && playContainer) {
                let playClarityDom = playContainer.querySelector('.xgplayer-playclarity-setting');
                if (!playClarityDom) return;

                let palyClarityBtn = playClarityDom.querySelector('.btn');
                if (!palyClarityBtn) return;

                let downloadDom = playClarityDom.cloneNode(true);
                downloadDom.setAttribute('id', 'scriptVideoDownload' + videoId);

                if (location.href.indexOf('search') === -1) {
                    downloadDom.style = 'margin-top:-68px;padding-top:100px;padding-left:20px;padding-right:20px;';
                } else {
                    downloadDom.style = 'margin-top:0px;padding-top:100px;';
                }

                let downloadText = downloadDom.querySelector('.btn');
                downloadText.innerText = '下载';
                downloadText.style = 'font-size:14px;font-weight:600;';
                downloadText.setAttribute('id', 'zhmDouyinDownload' + videoId);
                let detail = playContainer.querySelector('xg-icon:nth-of-type(1)').children[0];
                let linkUrl = detail.getAttribute('href') ? detail.getAttribute('href') : location.href;

                if (linkUrl.indexOf('www.douyin.com') === -1) {
                    linkUrl = '//www.douyin.com' + linkUrl;
                }

                downloadText.setAttribute('data-url', linkUrl);
                downloadText.removeAttribute('target');
                downloadText.setAttribute('href', 'javascript:void(0);');

                let virtualDom = downloadDom.querySelector('.virtual');
                downloadDom.onmouseover = function () {
                    if (location.href.indexOf('search') === -1) {
                        virtualDom.style = 'display:block !important';
                    } else {
                        virtualDom.style = 'display:block !important;margin-bottom:37px;';
                    }
                }

                downloadDom.onmouseout = function () {
                    virtualDom.style = 'display:none !important';
                }

                let downloadHtml = '';
                downloadOption.forEach(function (item) {
                    if (item.id === "toLink") {
                        downloadHtml += `<div style="text-align:center;" class="item ${item.id}" id="${item.id}${videoId}">${item.name}</div>`;
                    }
                })
                if (downloadDom.querySelector('.virtual')) {
                    downloadDom.querySelector('.virtual').innerHTML = downloadHtml;
                }
                playClarityDom.after(downloadDom);
                //直接打开
                let toLinkDom = playContainer.querySelector('#toLink' + videoId);
                if (toLinkDom) {
                    toLinkDom.addEventListener('click', function () {
                        if (url.match(/^blob/)) {
                            alert("加密视频地址,无法直接打开");
                        } else {
                            window.open(url);
                        }
                    })
                }
            }
        }

        setInterval(run, 500);
    }

    if (document.title === "验证码中间页") {
        return
    }
    createMsgBox();
    interceptResponse();
    let domLoadedTimer;
    const checkElementLoaded = () => {
        const element = document.querySelector('#douyin-header-menuCt pace-island > div > div:nth-last-child(1) ul a');
        if (element) {
            console.log('Node has been loaded.');
            clearInterval(domLoadedTimer);
            domLoadedTimer = null;
            createAllButton();
            douyinVideoDownloader();
            scrollPageToBottom();
            if (flag) flush();
        }
    };
    window.onload = () => {
        domLoadedTimer = setInterval(checkElementLoaded, 500);
    }
})();