InstaGrab

下载 Instagram 上的图片和视频。

  1. // ==UserScript==
  2. // @name InstaGrab
  3. // @namespace http://tampermonkey.net/
  4. // @version 0.1.0-alpha
  5. // @description 下载 Instagram 上的图片和视频。
  6. // @author cheer <cheer_cheer@alumni.tongji.edu.cn>
  7. // @license MIT
  8. // @match https://www.instagram.com/*
  9. // @exclude https://www.instagram.com/p/*
  10. // @exclude https://www.instagram.com/reels/*
  11. // @exclude https://www.instagram.com/explore/
  12. // @icon https://static.cdninstagram.com/rsrc.php/v3/yt/r/30PrGfR3xhB.png
  13. // @require https://cdn.bootcdn.net/ajax/libs/jszip/3.9.1/jszip.min.js
  14. // @connect instagram.com
  15. // @connect cdninstagram.com
  16. // @connect fbcdn.net
  17. // @grant GM_registerMenuCommand
  18. // @grant GM_xmlhttpRequest
  19. // @grant unsafeWindow
  20. // ==/UserScript==
  21.  
  22. (function () {
  23. 'use strict'
  24.  
  25. const isFunction = f => Object.prototype.toString.call(f) === '[object Function]'
  26. const isString = o => Object.prototype.toString.call(o) === '[object String]'
  27.  
  28. const zfill = (n, count) => {
  29. return ('' + n).padStart(count, '0')
  30. }
  31.  
  32. const ellipsize = (s, maxLength) => {
  33. if (s.length <= maxLength) {
  34. return s
  35. }
  36.  
  37. maxLength = maxLength - 2
  38. let truncated = s.substring(0, maxLength)
  39.  
  40. // 如果最后一个字符是代理字符,需要检查是否截断了代理对
  41. if (truncated.charCodeAt(maxLength - 1) >= 0xd800 &&
  42. truncated.charCodeAt(maxLength - 1) <= 0xdbff &&
  43. s.charCodeAt(maxLength) >= 0xdc00 &&
  44. s.charCodeAt(maxLength) <= 0xdfff) {
  45. truncated = s.substring(0, maxLength + 1)
  46. }
  47.  
  48. return truncated + '……'
  49. }
  50.  
  51. const getExtension = (url) => {
  52. const u = new URL(url, location.href)
  53. let path = u.pathname
  54. const i = path.lastIndexOf('/')
  55. if (i >= 0) {
  56. path = path.substring(i + 1)
  57. }
  58.  
  59. const dotIndex = path.lastIndexOf('.')
  60. if (dotIndex < 0) {
  61. return ''
  62. }
  63.  
  64. return path.substring(dotIndex)
  65. }
  66.  
  67. const safeFileName = (() => {
  68. const INVALID_FILE_NAME_CHARS = new Set(('"<>|\0\u0001\u0002\u0003\u0004\u0005\u0006\u0007\b\t\n\v\f\r\u000e\u000f' +
  69. '\u0010\u0011\u0012\u0013\u0014\u0015\u0016\u0017\u0018\u0019\u001a\u001b\u001c\u001d\u001e\u001f' +
  70. ':*?\\/').split(''))
  71.  
  72. return (name) => {
  73. let safeName = ''
  74. for (const ch of name) {
  75. if (INVALID_FILE_NAME_CHARS.has(ch)) {
  76. safeName += '_'
  77. } else {
  78. safeName += ch
  79. }
  80. }
  81. return safeName
  82. }
  83. })();
  84.  
  85. const archive = (zip, options) => {
  86. const opt = {
  87. ...options,
  88. type: 'blob'
  89. }
  90.  
  91. let lastProgressReportTime = -1
  92. return zip.generateAsync(opt, (metadata) => {
  93. // 每隔 3 秒,在控制台汇报一次进度
  94. const now = +new Date()
  95. if (now - lastProgressReportTime < 3000) {
  96. return
  97. }
  98.  
  99. let file = metadata.currentFile?.trim() || ''
  100. file = file.split('/').pop()
  101. if (file) {
  102. console.debug('归档进度:%c%s%c %% - %c%s%c',
  103. 'color: yellow', metadata.percent.toFixed(2).padStart(6), 'color: inherit',
  104. 'color: yellow', file.split('/').pop(), 'color: inherit')
  105. } else {
  106. console.debug('归档进度:%c%s%c %%',
  107. 'color: yellow', metadata.percent.toFixed(2).padStart(6), 'color: inherit')
  108. }
  109.  
  110. lastProgressReportTime = now
  111. })
  112. }
  113.  
  114. const downloadBlob = (blob, filename) => {
  115. const link = unsafeWindow.document.createElement('a')
  116. link.href = window.URL.createObjectURL(blob)
  117. link.download = filename || true
  118. link.click()
  119. window.URL.revokeObjectURL(link.href)
  120. }
  121.  
  122. const parseCookie = str => (str || document.cookie)
  123. .split(';')
  124. .map(v => v.split('='))
  125. .reduce((acc, v) => {
  126. acc[decodeURIComponent(v[0])] = decodeURIComponent(v[1])
  127. return acc
  128. }, {})
  129.  
  130. const objToQueryString = obj => {
  131. if (!obj) {
  132. return ''
  133. }
  134.  
  135. const qs = new URLSearchParams()
  136. for (const key of Object.keys(obj)) {
  137. const value = obj[key]
  138. if (Object.prototype.toString.call(value) !== '[object Array]') {
  139. qs.append(key, value + '')
  140. } else {
  141. for (const item of value) {
  142. qs.append(key, item)
  143. }
  144. }
  145. }
  146. return qs.toString()
  147. }
  148.  
  149. const Constants = {
  150. queryHash: 'd4d88dc1500312af6f937f7b804c68c3',
  151. asbdId: '198387',
  152. appId: '936619743392459'
  153. }
  154.  
  155. const getUsername = () => {
  156. const match = location.pathname.match(/^\/([a-z0-9_-]+?)\/?$/i)
  157. if (!match) {
  158. return ''
  159. }
  160. const group = match[1]
  161. if (group === 'explore') {
  162. return ''
  163. }
  164. return decodeURI(group || '')
  165. }
  166.  
  167. const getPostItems = posts => {
  168. const items = []
  169.  
  170. for (const item of posts.items || []) {
  171. const caption = item.caption?.text || ''
  172. const media = []
  173.  
  174. if (item.video_versions) {
  175. const video = item.video_versions[0]
  176. if (video) {
  177. media.push({
  178. id: video.id,
  179. code: item.code,
  180. video: true,
  181. width: video.width,
  182. height: video.height,
  183. url: video.url
  184. })
  185. }
  186. } else if (item.carousel_media) {
  187. for (const m of item.carousel_media) {
  188. const mi = getMediaInfo(m)
  189. if (mi) {
  190. mi.code = item.code
  191. media.push(mi)
  192. }
  193. }
  194. } else {
  195. const mi = getMediaInfo(item)
  196. if (mi) {
  197. mi.code = item.code
  198. media.push(mi)
  199. }
  200. }
  201.  
  202. let takenAt = item.taken_at
  203. if (takenAt) {
  204. takenAt = new Date(takenAt * 1000)
  205. } else {
  206. takenAt = undefined
  207. }
  208.  
  209. items.push({
  210. caption,
  211. media,
  212. takenAt
  213. })
  214. }
  215.  
  216. return items
  217. }
  218.  
  219. const getMediaInfo = m => {
  220. const id = m.id
  221. const originalHeight = m.original_height
  222. const originalWidth = m.original_width
  223.  
  224. let originalMediaUrl
  225. const mediaCandidates = m.image_versions2?.candidates || []
  226. for (const c of mediaCandidates) {
  227. if (c.width === originalWidth && c.height === originalHeight) {
  228. originalMediaUrl = c.url
  229. break
  230. }
  231. }
  232. if (!originalMediaUrl) {
  233. originalMediaUrl = mediaCandidates[0].url
  234. }
  235.  
  236. if (!originalMediaUrl) {
  237. return null
  238. }
  239.  
  240. return {
  241. id: id,
  242. video: false,
  243. width: originalWidth,
  244. height: originalHeight,
  245. url: originalMediaUrl
  246. }
  247. }
  248.  
  249. class Instagram {
  250. constructor() {
  251. this._username = getUsername()
  252. }
  253.  
  254. get username() {
  255. return this._username
  256. }
  257.  
  258. get userId() {
  259. return this._userId
  260. }
  261.  
  262. set userId(value) {
  263. this._userId = value
  264. }
  265.  
  266. get queryHash() {
  267. return Constants.queryHash
  268. }
  269.  
  270. get asbdId() {
  271. return Constants.asbdId
  272. }
  273.  
  274. get appId() {
  275. return Constants.appId
  276. }
  277.  
  278. get csrfToken() {
  279. if (this._csrfToken === undefined) {
  280. this._csrfToken = parseCookie().csrftoken
  281. }
  282. return this._csrfToken
  283. }
  284.  
  285. get wwwClaim() {
  286. if (this._wwwClaim === undefined) {
  287. this._wwwClaim = sessionStorage.getItem('www-claim-v2') || '0'
  288. }
  289. return this._wwwClaim
  290. }
  291.  
  292. get rolloutHash() {
  293. if (this._rolloutHash === undefined) {
  294. this._rolloutHash = (() => {
  295. const el = document.querySelector('[data-btmanifest$=_main]')
  296. if (!el) {
  297. return ''
  298. }
  299. const value = el.getAttribute('data-btmanifest')
  300. return value.substring(0, value.length - 5)
  301. })()
  302. }
  303. return this._rolloutHash
  304. }
  305.  
  306. _executeApi({ method, url, params, data, headers }) {
  307. const qs = objToQueryString(params)
  308. if (qs) {
  309. url = url + '?' + qs
  310. }
  311.  
  312. const getOptions = (resolve, reject) => {
  313. const options = {
  314. method: method || 'GET',
  315. url,
  316. headers: Object.assign({
  317. 'origin': location.origin,
  318. 'referer': document.referer || 'https://www.instagram.com/',
  319. 'x-asbd-id': this.asbdId,
  320. 'x-csrftoken': this.csrfToken,
  321. 'x-ig-app-id': this.appId,
  322. 'x-ig-www-claim': this.wwwClaim,
  323. 'x-instagram-ajax': this.rolloutHash,
  324. 'x-requested-with': 'XMLHttpRequest'
  325. }, headers || {}),
  326. responseType: 'json',
  327. onload(r) {
  328. const resp = r.response
  329. if (Object.prototype.toString.call(resp) === '[object Object]') {
  330. if (resp.status === 'ok') {
  331. resolve(resp)
  332. return
  333. }
  334.  
  335. const err = new Error('接口返回的状态不正确。')
  336. err.name = 'ApiStatusError'
  337. err.response = r
  338. reject(err)
  339. return
  340. }
  341.  
  342. const err = new Error('接口返回的内容无法解析。')
  343. err.name = 'UnexpectedApiResponseError'
  344. err.response = r
  345. reject(err)
  346. },
  347. onerror() {
  348. console.error(arguments)
  349. const err = new Error('接口调用失败。')
  350. err.name = 'UnhandledApiError'
  351. reject(err)
  352. }
  353. }
  354.  
  355. if (data !== undefined) {
  356. options.data = isString(data) ? data : JSON.stringify(data)
  357. }
  358.  
  359. return options
  360. }
  361.  
  362. return new Promise((resolve, reject) => GM_xmlhttpRequest(getOptions(resolve, reject)))
  363. }
  364.  
  365. getPosts(maxId) {
  366. const params = {
  367. count: 12
  368. }
  369. if (maxId) {
  370. params.max_id = maxId
  371. }
  372.  
  373. let url
  374. if (this.userId) {
  375. url = `https://i.instagram.com/api/v1/feed/user/${encodeURI(this.userId)}/`
  376. } else {
  377. url = `https://i.instagram.com/api/v1/feed/user/${encodeURI(this.username)}/username/`
  378. }
  379.  
  380. return this._executeApi({
  381. url,
  382. params
  383. })
  384. }
  385. }
  386.  
  387. const downloadMedia = (media, item) => {
  388. return new Promise((resolve, reject) => {
  389. let accept = 'image/jpg,image/apng,image/svg+xml,image/*,*/*;q=0.8'
  390. if (media.video) {
  391. accept = '*/*'
  392. }
  393. GM_xmlhttpRequest({
  394. method: 'GET',
  395. url: media.url,
  396. responseType: 'blob',
  397. timeout: 45_000,
  398. headers: {
  399. accept
  400. },
  401. onload(r) {
  402. if (200 < r.status || r.status > 299) {
  403. const err = new Error(`媒体资源下载失败,服务器返回 HTTP ${r.status}。`)
  404. err.name = 'HttpStatusError'
  405. err.status = r.status
  406. err.response = r
  407. reject(err)
  408.  
  409. return
  410. }
  411. resolve({
  412. media,
  413. item,
  414. content: r.response
  415. })
  416. },
  417. ontimeout() {
  418. const err = new Error('媒体资源下载超时。')
  419. err.name = 'TimeoutError'
  420. reject(err)
  421. },
  422. onerror(r) {
  423. const err = new Error('媒体资源下载失败。')
  424. err.name = 'HttpError'
  425.  
  426. if (r) {
  427. err.response = r
  428. if (r.error != null) {
  429. err.error = r.error
  430. }
  431. }
  432.  
  433. reject(err)
  434. }
  435. })
  436. })
  437. }
  438.  
  439. const download = async ({
  440. onUserResolved,
  441. onPostsFetched,
  442. onMediaDownloaded,
  443. onMediaDownloadFailed,
  444. }) => {
  445. const ins = new Instagram()
  446.  
  447. let seq = 0
  448. const promises = []
  449. let nextMaxId = null
  450. while (true) {
  451. const posts = await ins.getPosts(nextMaxId)
  452. nextMaxId = posts.next_max_id
  453.  
  454. if (!ins.userId) {
  455. // set user id
  456. ins.userId = posts.user?.pk
  457. if (onUserResolved) {
  458. await onUserResolved(posts.user)
  459. }
  460. }
  461.  
  462. const items = getPostItems(posts)
  463.  
  464. for (const item of items) {
  465. for (const m of item.media) {
  466. seq++
  467. const seqNo = seq
  468. const promise = downloadMedia(m, item)
  469. .then(async r => {
  470. r.seq = seqNo
  471. if (isFunction(onMediaDownloaded)) {
  472. await onMediaDownloaded(r)
  473. }
  474. return r
  475. })
  476. .catch(async e => {
  477. if (isFunction(onMediaDownloadFailed)) {
  478. await onMediaDownloadFailed(e)
  479. }
  480. throw e
  481. })
  482.  
  483. promises.push(promise)
  484. }
  485. }
  486.  
  487. if (isFunction(onPostsFetched)) {
  488. await onPostsFetched(posts, items)
  489. }
  490.  
  491. if (!posts.more_available || !posts.next_max_id) {
  492. console.log('下载完成啦。')
  493. break
  494. }
  495. }
  496.  
  497. const results = await Promise.allSettled(promises)
  498. console.log('Results:', results)
  499. }
  500.  
  501. const downloadAsZip = async () => {
  502. try {
  503. const zip = new JSZip()
  504. let user
  505. let userdir
  506.  
  507. await download({
  508. onUserResolved(u) {
  509. user = u
  510. // 在 zip 文件中创建用户目录
  511. userdir = zip.folder(safeFileName(u.username))
  512. },
  513. onPostsFetched(posts, items) {
  514. const mediaCount = items.map(x => x.media.length).reduce((a, b) => a + b, 0)
  515. console.log(`本次采集到 ${items.length} 个帖子,${mediaCount} 个图片/视频资源。`)
  516. },
  517. onMediaDownloaded(e) {
  518. // console.log('下载啦:', e)
  519.  
  520. const caption = e.item.caption || e.media.id
  521. const ext = getExtension(e.media.url)
  522. const fileName = `${zfill(e.seq, 4)} - ${ellipsize(caption, 36)}${ext}`
  523. userdir.file(safeFileName(fileName), e.content, {
  524. date: e.item.takenAt || new Date(),
  525. comment: `${e.item.caption} https://www.instagram.com/p/${e.media.code}/`.trim()
  526. })
  527. },
  528. onMediaDownloadFailed(e) {
  529. i++
  530. console.error('下载失败啦', e)
  531. }
  532. })
  533.  
  534. console.log('下载完成,正在归档文件。')
  535. const blob = await archive(zip, {
  536. comment: `Instagram: https://www.instagram.com/${encodeURIComponent(user.username)}/`
  537. })
  538. console.log('归档完成,准备下载。')
  539. downloadBlob(blob, safeFileName(user.full_name || user.username) + '.zip')
  540. } catch (e) {
  541. alert('下载失败。')
  542. console.error('下载失败。', e)
  543. }
  544. }
  545.  
  546. GM_registerMenuCommand('下载 TA 的帖子', downloadAsZip)
  547. })()

QingJ © 2025

镜像随时可能失效,请加Q群300939539或关注我们的公众号极客氢云获取最新地址