您需要先安装一个扩展,例如 篡改猴、Greasemonkey 或 暴力猴,之后才能安装此脚本。
您需要先安装一个扩展,例如 篡改猴 或 暴力猴,之后才能安装此脚本。
您需要先安装一个扩展,例如 篡改猴 或 暴力猴,之后才能安装此脚本。
您需要先安装一个扩展,例如 篡改猴 或 Userscripts ,之后才能安装此脚本。
您需要先安装一款用户脚本管理器扩展,例如 Tampermonkey,才能安装此脚本。
您需要先安装用户脚本管理器扩展后才能安装此脚本。
微博数据下载备份
当前为
// ==UserScript== // @name 微博备份 // @namespace https://github.com/Shapooo/ // @version 0.6 // @description 微博数据下载备份 // @author Shapooo // @homepageURL https://github.com/Shapooo/WeiBack // @match *://*.weibo.com/* // @icon https://www.google.com/s2/favicons?sz=64&domain=weibo.com // @require https://cdn.bootcdn.net/ajax/libs/jszip/3.10.1/jszip.min.js // @require https://cdn.bootcdn.net/ajax/libs/FileSaver.js/2.0.5/FileSaver.min.js // @grant none // @license GPL // @note 2023-05-12 0.1 完成最基本的功能,能够拉取原始json数据文件进行备份 // @note 2023-05-15 0.2 完成html格式保存功能 // @note 2023-05-20 0.3 完成分段下载功能 // @note 2023-05-20 0.3 优化链接显示标题 // @note 2023-05-20 0.3 增加对图片、视频混合型微博支持 // @note 2023-05-20 0.3 增加对 www.weibo.com 的支持 // @note 2023-05-22 0.4 本地图片缓存添加 lru 算法 // @note 2023-05-23 0.4 将图片获取变为串行,减小下载连接被 reset 的可能性 // @note 2023-05-23 0.4 为图片下载增加了重试机制 // @note 2023-05-23 0.4 增加了备份失败时的提示,方便用户反馈问题 // @note 2023-05-23 0.4 修复若干问题 // @note 2023-05-28 0.5 修复若干问题 // @note 2023-05-28 0.5 增加分段下载设置功能 // @note 2023-05-28 0.5 增加图片大小设置功能 // @note 2023-06-11 0.6 修复无法下载全部微博的问题 // ==/UserScript== 'use strict' function LRUCache(capacity) { this.capacity = capacity this.next = new Uint16Array(capacity) this.prev = new Uint16Array(capacity) this.K = new Array(capacity) this.V = new Array(capacity) this.size = 0 this.head = 0 this.tail = 0 this.items = new Map() } LRUCache.prototype.splayOnTop = function (pointer) { const oldHead = this.head if (this.head === pointer) { return this } const previous = this.prev[pointer] const next = this.next[pointer] if (this.tail === pointer) { this.tail = previous } else { this.prev[next] = previous } this.next[previous] = next this.prev[oldHead] = pointer this.head = pointer this.next[pointer] = oldHead return this } LRUCache.prototype.set = function (key, value) { let pointer = this.items.get(key) if (typeof pointer !== 'undefined') { this.splayOnTop(pointer) this.V[pointer] = value return } if (this.size < this.capacity) { pointer = this.size++ } else { pointer = this.tail this.tail = this.prev[pointer] this.items.delete(this.K[pointer]) } this.items.set(key, pointer) this.K[pointer] = key this.V[pointer] = value this.next[pointer] = this.head this.prev[this.head] = pointer this.head = pointer } LRUCache.prototype.get = function (key) { const pointer = this.items.get(key) if (typeof pointer === 'undefined') { return } this.splayOnTop(pointer) return this.V[pointer] } async function fetchPostMeta(uid = 0, page = 1, type = 'myblog') { let api = `${STATUSES_MY_MICRO_BLOG_API}?uid=${uid}&page=${page}&feature=0` if (type === 'fav') { api = `${FAVORITES_ALL_FAV_API}?uid=${uid}&page=${page}` } if (type === 'like') { api = `${STATUSES_LIKE_LIST_API}?uid=${uid}&page=${page}` } console.log(`request ${api}`) const req = await fetch(api) const data = await req.json() return data } async function fetchAllPosts(type = 'myblog', range) { console.log(`fetching ${type} post ${range}`) const storage = { cache: new LRUCache(200), taskName: `WeiBack-${type}-${Date.now()}`, picUrls: new Set(), index: 1 } await fetchEmoticon() const uid = globalConfig.uid downloadPerid = downloadPerid || 10 let allPageData = [] let noMore = false let index = 0 const zip = new JSZip() const name = storage.taskName const rootFolder = zip.folder(name) const resources = new Map() for (let page = range[0]; page <= range[1]; page++) { console.log('scan', 'page', page) showTip(`正在备份第 ${page} 页<br>因微博速率限制,过程可能较长,先干点别的吧`) let data for (let i = 0; i < 10; i++) { const pageData = await fetchPostMeta(uid, page, type) if (pageData.ok) { data = type === 'fav' ? pageData.data : pageData.data.list if (data.length === 0) noMore = true break } await new Promise((resolve) => { setTimeout(resolve, 8 * 1000) }) console.log('retry', i) showTip( `[重试]备份第 ${index} 页,错误内容: ${JSON.stringify(pageData)}` ) } allPageData.push(await generateHTMLPage(data, storage)) for (const url of storage.picUrls) { if (resources.get(url)) { continue } const blob = storage.cache.get(url) if (blob) { resources.set(url, blob) } else { const blob = await fetchPic(url) storage.cache.set(url, blob) resources.set(url, blob) } } storage.picUrls.clear() index++ if (index % downloadPerid === 0 || page === range[1] || noMore) { const taskName = `${name}-${storage.index}` storage.index++ const doc = (new DOMParser()).parseFromString(HTML_GEN_TEMP, 'text/html') doc.body.innerHTML = allPageData.join('') allPageData = [] rootFolder.file(taskName + '.html', doc.documentElement.outerHTML) const resourcesFolder = rootFolder.folder(taskName + '_files') resources.forEach((blob, url) => { resourcesFolder.file(getFilename(url), blob, { base64: true }) }) resources.clear() } if (noMore || page === range[1]) break await new Promise((resolve) => { setTimeout(resolve, 10 * 1000) }) } showTip('数据拉取完成,等待下载到本地') zip.generateAsync({ type: 'blob' }).then(function (content) { saveAs(content, name + '.zip') }).catch((err) => { console.error(err) }) console.log('all done') showTip('完成,可以进行其它操作') } async function fetchPic(url) { for (let i = 0; i < 5; ++i) { try { const res = await fetch(url) return (await res.blob()) } catch (err) { console.log('fic fetch occurs: ', err) await new Promise((resolve) => { setTimeout(resolve, i * 300) }) } } console.log(`pic ${url} download failed`) } const HTML_GEN_TEMP = '<html lang="zh-CN"><head><meta charset="UTF-8"><meta http-equiv="X-UA-Compatible" content="IE=edge"><meta name="viewport" content="width=device-width,initial-scale=1"><style>body{background-color:#f1f2f5}.bk-post-wrapper{border-radius:4px;background:#fff;width:700px;margin:8px auto;padding:10px 0}.bk-poster{display:flex;align-items:center;vertical-align:middle;height:60px}.bk-poster-avatar,.bk-retweeter-avatar{height:60px;width:60px;border-radius:50%;margin:auto 8px}.bk-retweeter-avatar{height:40px;width:40px}.bk-post-text{background:#fff}.bk-content{margin:8px 24px 8px 76px;font-size:15px;line-height:24px}.bk-retweeter{display:flex;align-items:center;vertical-align:middle;height:40px;margin:8px 24px 8px 76px}.bk-retweet{background:#f9f9f9;padding:2px 0}.bk-pic{max-width:600px;max-height:400px;margin:auto}.bk-poster-name,.bk-retweeter-name{color:#000;font-weight:700;text-decoration:none;font-family:Arial,Helvetica,sans-serif}.bk-poster-name:hover,.bk-retweeter-name:hover{color:#eb7350}.bk-icon-link{height:20px;filter:sepia(100%) saturate(3800%) contrast(75%);vertical-align:middle;margin-bottom:4px}.bk-link,.bk-user{color:#eb7350;text-decoration:none}.bk-link:hover,.bk-user:hover{text-decoration:underline}.bk-emoji{height:20px;vertical-align:middle;margin-bottom:4px}.bk-create-detail{font-size:10px;color:#939393}</style><title>微博备份</title></head><body></body></html>' async function generateHTMLPage(posts, storage) { return Promise.all(posts.map((post) => generateHTMLPost(post, storage))).then(posts => posts.join('')) } function composePost(postMeta, isRetweet = false) { let prefix = 'bk-post' if (isRetweet) { prefix = 'bk-retweet' } const postDiv = document.createElement('div') postDiv.className = prefix if (postMeta.posterName) { const posterDiv = document.createElement('div') posterDiv.className = prefix + 'er' const avatar = document.createElement('img') avatar.className = prefix + 'er-avatar' avatar.alt = '头像' avatar.src = postMeta.posterAvatar posterDiv.appendChild(avatar) const name = document.createElement('a') name.className = prefix + 'er-name' name.href = postMeta.posterUrl name.innerText = postMeta.posterName posterDiv.appendChild(name) const createDetail = document.createElement('p') createDetail.className = 'bk-create-detail' createDetail.innerHTML = `   ${postMeta.createdAt} ${postMeta.regionName}` posterDiv.appendChild(createDetail) postDiv.appendChild(posterDiv) } const textDiv = document.createElement('div') textDiv.className = prefix + '-text bk-content' textDiv.innerHTML = postMeta.text postDiv.appendChild(textDiv) if (postMeta.medium) { const mediaDiv = document.createElement('div') mediaDiv.className = prefix + '-media bk-content' mediaDiv.innerHTML = postMeta.medium.join('') postDiv.appendChild(mediaDiv) } if (postMeta.postUrl) { const postUrl = document.createElement('a') postUrl.innerHTML = '[原贴链接]' postUrl.href = postMeta.postUrl postUrl.className = 'bk-link bk-content' postDiv.appendChild(postUrl) } return postDiv } async function generateHTMLPost(post, storage) { const wrapper = document.createElement('div') wrapper.className = 'bk-post-wrapper' wrapper.appendChild(composePost(await parsePost(post, storage))) if (post.retweeted_status) { wrapper.appendChild(composePost(await parsePost(post.retweeted_status, storage), true)) } return wrapper.outerHTML } function getMedium(post, storage) { if (post.mix_media_info) { return post.mix_media_info.items.map(item => { const data = item.data if (item.type === 'pic') { const pic = document.createElement('img') pic.className = 'bk-pic' pic.alt = '[图片]' let picObj if (imageDefinition === 3) { picObj = item.data.mw2000 || item.data.largest || item.data.original || item.data.large || item.data.bmiddle } else if (imageDefinition === 2) { picObj = item.data.large || item.data.bmiddle } else { picObj = item.data.bmiddle || item.data.thumbnail } pic.src = picObj.url return pic.outerHTML } else if (item.type === 'video') { const video = document.createElement('a') video.className = 'bk-link' video.innerHTML = data.page_title video.href = data.media_info && data.media_info.h5_url const pic = document.createElement('img') pic.className = 'bk-pic' pic.alt = '[视频]' pic.src = url2path(data.page_pic, storage) video.appendChild(pic) return video.outerHTML } else { return '' } }) } else if (post.pic_ids && post.pic_infos) { return post.pic_ids.map(id => { const tmpObj = post.pic_infos[id] let picObj if (imageDefinition === 3) { picObj = tmpObj.mw2000 || tmpObj.largest || tmpObj.original || tmpObj.large || tmpObj.bmiddle } else if (imageDefinition === 2) { picObj = tmpObj.large || tmpObj.bmiddle } else { picObj = tmpObj.bmiddle || tmpObj.thumbnail } return url2path(picObj.url, storage) }).map((loc) => { const pic = document.createElement('img') pic.className = 'bk-pic' pic.alt = '[图片]' pic.src = loc return pic.outerHTML }) } else { return [] } } async function parsePost(post, storage) { let text = post.isLongText ? await fetchLongText(post.mblogid) : post.text_raw text = await transText(text || post.text_raw, post.topic_struct, post.url_struct, storage) const posterAvatar = post.user && imageDefinition === 1 ? post.user.avatar_large && url2path(post.user.avatar_large, storage) : post.user.avatar_hd && url2path(post.user.avatar_hd, storage) return { posterName: post.user && post.user.screen_name, posterUrl: post.user && 'https://weibo.com' + post.user.profile_url, posterAvatar, text, postUrl: post.user && post.user.avatar_large && `https://weibo.com/${post.user.idstr}/${post.mblogid}`, mblogid: post.mblogid, createdAt: post.created_at, regionName: post.region_name ? post.region_name : '', medium: getMedium(post, storage) } } async function fetchLongText(mblogid) { const api = `${STATUSES_LONGTEXT_API}?id=${mblogid}` const res = await fetch(api) const longText = (await res.json()).data.longTextContent return longText } async function transText(text, topicStruct, urlStruct, storage) { const o = { url: '(http|https)://[a-zA-Z0-9$%&~_#/.\\-:=,?]{5,280}', stock: '\\$([^\\$]+)\\$', br: '\\n' } const atExpr = /@[\u4e00-\u9fa5|\uE7C7-\uE7F3|\w_\-·]+/g // at id const emojiExpr = /(\[.*?\])(?!#)/g // emoji const r = new RegExp(Object.values(o).join('|'), 'g') const emailExpr = /[A-Za-z0-9]+([_.][A-Za-z0-9]+)*@([A-Za-z0-9-]+\.)+[A-Za-z]{2,6}/g // mail addr const topicExpr = /#([^#]+)#/g // topic const u = [] const ret = text && (text = text.replace(emailExpr, function (t) { // mail addr if (t.match(atExpr)) { const e = t.match(atExpr) u.push(e && e[0]) } return t }), text = text.replace(r, function (e) { if (e) { const o = e.slice(0, 1) return e === '\n' ? transBr() : o === 'h' || urlStruct ? transUrl(e, urlStruct) : e } }), text = text.replace(atExpr, function (e) { return u.indexOf(e) !== -1 ? e : transUser(e) }), text = text.replace(topicExpr, function (e) { const a = e.slice(0, 1) return a === '#' && '#&#' && isSuperTopic(e, urlStruct) !== e ? transTopic(e, topicStruct) : e }), console.assert(globalConfig.emoticon), text = text.replace(emojiExpr, function (e) { return transEmoji(e, storage) }), text) return ret } function isSuperTopic(input, urlStruct) { input = input !== undefined ? input : '' urlStruct = urlStruct !== undefined ? urlStruct : [] return (input && urlStruct.length) && urlStruct.find(function (e) { return e.short_url === input }) } function transUrl(input, urlStruct) { const urlObj = urlStruct && urlStruct.find(function (e) { return e.short_url === input }) const urlExpr = /(http|https):\/\/([\w.]+\/?)\S*/ if (!urlExpr.test(input)) { return input } const u = /^http:\/\/t\.cn/ u.test(input) && (input = input.replace(/http:/, 'https:')) let urlTitle = '网页链接' let url = input if (urlObj && urlObj.url_title) { urlTitle = urlObj.url_title } if (urlObj && urlObj.long_url) { url = urlObj.long_url } return `<a class="bk-link" target="_blank" href="${url}"><img class="bk-icon-link" src="https://h5.sinaimg.cn/upload/2015/09/25/3/timeline_card_small_web_default.png"/>${urlTitle}</a>` } function transTopic(t, topicStruct) { const i = t && t.slice(1, -1) const a = topicStruct && topicStruct.find(function (t) { return t.topic_title === i && t.is_invalid === 1 }) if (a) { return t } const o = 'https://s.weibo.com/weibo?q='.concat(encodeURIComponent(t)) const n = '_blank' return '<a class ="bk-link" href="'.concat(o, '" target="').concat(n, '">').concat(t, '</a>') } function transUser(t) { const e = t.slice(1) return e ? '<a class="bk-user" href=https://weibo.com/n/'.concat(e, '>').concat(t, '</a>') : t } function transBr() { return '<br />' } function transEmoji(t, storage) { const location = getEmoji(t, storage) const o = t.slice(1, -1) return location ? '<img class="bk-emoji" alt="['.concat(o, ']" title="[').concat(o, ']" src="').concat(location, '" />') : t } async function fetchEmoticon() { const api = STATUSES_CONFIG_API const res = await fetch(api) const rawEmoticon = (await res.json()).data.emoticon console.assert(rawEmoticon !== undefined) const emoticon = new Map() for (const lang in rawEmoticon) { // v: {哆啦A梦:{}, 其它:{}...} const emojiSets = rawEmoticon[lang] for (const setName in emojiSets) { for (const obj of emojiSets[setName]) { emoticon.set(obj.phrase, obj.url) } } } globalConfig.emoticon = emoticon } function getEmoji(e, storage) { const emoticon = globalConfig.emoticon if (emoticon === undefined) { return '' } const url = emoticon.get(e) if (url) { return url2path(url, storage) } else { return '' } } function getFilename(url) { return url.split('/').slice(-1)[0].split('?')[0] } function url2path(url, storage) { const fileName = getFilename(url) const filePath = `./${storage.taskName}-${storage.index}_files/` + fileName storage.picUrls.add(url) return filePath } const bkBoxStyle = '.bkBox{position:fixed;border-radius:5px;background:#fff;top:80px;right:20px;z-index:100000;padding:10px 15px;text-align:center;border:1px solid #a7a6a6}.bkBox-title{font-size:20px;color:#ff8200;margin:8px 0;font-weight:400;border:1px solid transparent;border-bottom-color:#a7a6a6}.bkBox-button{border-radius:2.25rem;display:block;background:#ffd3a3;padding:5px 8px;color:#fff;text-align:center;cursor:pointer;margin:16px auto 7px;border:0;font:white}.bkBox-button:hover{background:#ff8200}.box,.nav{width:300px;margin:auto;display:flex}.nav{height:30px;background-color:#fff;border:1px gray}.box{overflow:hidden}.bk-naviItem{color:#000;height:30px;width:100px;margin:auto;border:8px #000;float:left}.bk-naviItem,.bk-naviItem:hover{text-decoration:none}.bk-navi-inner{border-radius:8px;height:30px}.bk-navi-inner:hover{background-color:#f2f0ef}.bk-pageNumIndicator{margin:16px}.bk-settingPage{border-radius:5px;width:300px;height:190px;background-color:#f2f0ef;flex-shrink:0}.bk-input{border-radius:1em;border:2px solid #ffd3a3;height:20px;width:50px;text-align:center}.bk-input:hover{border:2px solid #ff8200}.bkBox-tip{height:100px;width:300px;display:flex;align-items:center;justify-content:center}' const SETTINGS_TEMP = '<div class="bk-pageNumIndicator"></div><div>下载范围:<input type="number" value="1" min="1" max="22" class="bk-input"> 至 <input type="number" value="22" min="1" max="22" class="bk-input"> 页</div><div>分段下载:每 <input type="number" value="10" min="1" max="20" class="bk-input"> 页分段</div><div>图片清晰度:<input type="number" value="3" min="1" max="3" class="bk-input">(越大越清晰)</div><div><button class="bkBox-button">开始下载</button></div>' const TITLE_PIC = '<img src="">' const BKBOX_TEMP = `<div class='bkBox-title'>${TITLE_PIC}</div><nav><div class='navi'></div></nav><section><div class='box'></div></section>` const domain = window.location.host const STATUSES_CONFIG_API = `https://${domain}/ajax/statuses/config` const STATUSES_MY_MICRO_BLOG_API = `https://${domain}/ajax/statuses/mymblog` const STATUSES_LONGTEXT_API = `https://${domain}/ajax/statuses/longtext` const STATUSES_LIKE_LIST_API = `https://${domain}/ajax/statuses/likelist` const FAVORITES_ALL_FAV_API = `https://${domain}/ajax/favorites/all_fav` const FAVORITES_TAGS_API = `https://${domain}/ajax/favorites/tags?page=1&is_show_total=1` const PROFILE_INFO_API = `https://${domain}/ajax/profile/info` const globalConfig = { uid: $CONFIG.uid } const TT = (type, name) => { return (async () => { const amount = await getPageAmount(type) console.log(`get amount of ${type} post: ${amount}`) return { type, name, pageAmount: amount } })() } const bkBox = document.createElement('div') bkBox.className = 'bkBox' bkBox.innerHTML = BKBOX_TEMP const bkBoxStyleSheet = document.createElement('style') bkBoxStyleSheet.innerText = bkBoxStyle bkBox.appendChild(bkBoxStyleSheet) const bkBoxTip = document.createElement('div') bkBoxTip.className = 'bkBox-tip' bkBox.appendChild(bkBoxTip) let downloadPerid = 10 let imageDefinition = 3 const bkTypes = [TT('fav', '我的收藏'), TT('like', '我的点赞'), TT('myblog', '我的发布')] Promise.all(bkTypes).then((values) => { values.map((t) => { return { type: t.type, name: t.name, settingsPage: (() => { const type = t.type const name = t.name const pageAmount = t.pageAmount const page = document.createElement('div') page.innerHTML = SETTINGS_TEMP const pageNumIndicator = page.getElementsByClassName('bk-pageNumIndicator')[0] if (type === 'like') { pageNumIndicator.innerHTML = '无法获知点赞总页数' pageNumIndicator.title = '微博的接口并不返回全部点赞的数量;同样每页大约二十条' } else { pageNumIndicator.innerHTML = name + '的总页数约为 ' + pageAmount pageNumIndicator.title = '微博每页返回数目不定中,每页至多二十条博文。如果设置最高页数,则默认下载完全部' } const pageEndInput = page.querySelectorAll('input')[1] pageEndInput.max = pageAmount pageEndInput.value = pageAmount const btn = page.getElementsByClassName('bkBox-button')[0] btn.addEventListener('click', async () => { const input = page.querySelectorAll('input') const dlRange = [parseInt(input[0].value), parseInt(input[1].value)] dlRange = dlRange === pageAmount ? Infinity : dlRange; downloadPerid = parseInt(input[2].value) downloadPerid = downloadPerid > 20 ? 20 : downloadPerid downloadPerid = downloadPerid < 1 ? 1 : downloadPerid imageDefinition = parseInt(input[3].value) imageDefinition = imageDefinition > 3 ? 3 : imageDefinition imageDefinition = imageDefinition < 1 ? 1 : imageDefinition try { hideAllButton() await fetchAllPosts(type, dlRange) showAllButton() } catch (err) { showTip('程序崩了,原因: ', err) console.error(err) } }) return page })() } }).forEach((c) => { settleDownBkTypes(c.type, c.name, c.settingsPage) console.log(`settled post type ${c.type} to bkBox`) }) document.body.appendChild(bkBox) }) function hideAllButton() { const buttons = document.getElementsByClassName('bkBox-button') buttons.forEach(btn => { btn.style.display = 'none' }) } function showAllButton() { const buttons = document.getElementsByClassName('bkBox-button') buttons.forEach(btn => { btn.style.display = 'block' }) } function settleDownBkTypes(type, name, settingsPage) { const nav = bkBox.getElementsByClassName('navi')[0] const settingBox = bkBox.getElementsByClassName('box')[0] const naviButton = document.createElement('a') naviButton.innerHTML = '<div class="bk-navi-inner">' + name + '</div>' naviButton.className = 'bk-naviItem' naviButton.href = '#' + type nav.appendChild(naviButton) const page = document.createElement('div') page.id = type page.className = 'bk-settingPage' page.appendChild(settingsPage) settingBox.appendChild(page) } async function getPageAmount(type) { if (type === 'like') { return Infinity } if (type === 'fav') { const req = await fetch(FAVORITES_TAGS_API) const json = await req.json() return json && json.ok && Math.ceil(json.fav_total_num / 20) } const req = await fetch(`${PROFILE_INFO_API}?uid=${globalConfig.uid}`) const json = await req.json() return json && json.ok && Math.ceil(json.data.user.statuses_count / 20) } function showTip(msg) { bkBoxTip.innerHTML = msg }
QingJ © 2025
镜像随时可能失效,请加Q群300939539或关注我们的公众号极客氢云获取最新地址