InstaGrab

下载 Instagram 上的图片和视频。

您需要先安裝使用者腳本管理器擴展,如 TampermonkeyGreasemonkeyViolentmonkey 之後才能安裝該腳本。

You will need to install an extension such as Tampermonkey to install this script.

您需要先安裝使用者腳本管理器擴充功能,如 TampermonkeyViolentmonkey 後才能安裝該腳本。

您需要先安裝使用者腳本管理器擴充功能,如 TampermonkeyUserscripts 後才能安裝該腳本。

你需要先安裝一款使用者腳本管理器擴展,比如 Tampermonkey,才能安裝此腳本

您需要先安裝使用者腳本管理器擴充功能後才能安裝該腳本。

(我已經安裝了使用者腳本管理器,讓我安裝!)

你需要先安裝一款使用者樣式管理器擴展,比如 Stylus,才能安裝此樣式

你需要先安裝一款使用者樣式管理器擴展,比如 Stylus,才能安裝此樣式

你需要先安裝一款使用者樣式管理器擴展,比如 Stylus,才能安裝此樣式

你需要先安裝一款使用者樣式管理器擴展後才能安裝此樣式

你需要先安裝一款使用者樣式管理器擴展後才能安裝此樣式

你需要先安裝一款使用者樣式管理器擴展後才能安裝此樣式

(我已經安裝了使用者樣式管理器,讓我安裝!)

// ==UserScript==
// @name         InstaGrab
// @namespace    http://tampermonkey.net/
// @version      0.1.0-alpha
// @description  下载 Instagram 上的图片和视频。
// @author       cheer <[email protected]>
// @license      MIT
// @match        https://www.instagram.com/*
// @exclude      https://www.instagram.com/p/*
// @exclude      https://www.instagram.com/reels/*
// @exclude      https://www.instagram.com/explore/
// @icon         https://static.cdninstagram.com/rsrc.php/v3/yt/r/30PrGfR3xhB.png
// @require      https://cdn.bootcdn.net/ajax/libs/jszip/3.9.1/jszip.min.js
// @connect      instagram.com
// @connect      cdninstagram.com
// @connect      fbcdn.net
// @grant        GM_registerMenuCommand
// @grant        GM_xmlhttpRequest
// @grant        unsafeWindow
// ==/UserScript==

(function () {
  'use strict'

  const isFunction = f => Object.prototype.toString.call(f) === '[object Function]'
  const isString = o => Object.prototype.toString.call(o) === '[object String]'

  const zfill = (n, count) => {
    return ('' + n).padStart(count, '0')
  }

  const ellipsize = (s, maxLength) => {
    if (s.length <= maxLength) {
      return s
    }

    maxLength = maxLength - 2
    let truncated = s.substring(0, maxLength)

    // 如果最后一个字符是代理字符,需要检查是否截断了代理对
    if (truncated.charCodeAt(maxLength - 1) >= 0xd800 &&
      truncated.charCodeAt(maxLength - 1) <= 0xdbff &&
      s.charCodeAt(maxLength) >= 0xdc00 &&
      s.charCodeAt(maxLength) <= 0xdfff) {
      truncated = s.substring(0, maxLength + 1)
    }

    return truncated + '……'
  }

  const getExtension = (url) => {
    const u = new URL(url, location.href)
    let path = u.pathname
    const i = path.lastIndexOf('/')
    if (i >= 0) {
      path = path.substring(i + 1)
    }

    const dotIndex = path.lastIndexOf('.')
    if (dotIndex < 0) {
      return ''
    }

    return path.substring(dotIndex)
  }

  const safeFileName = (() => {
    const INVALID_FILE_NAME_CHARS = new Set(('"<>|\0\u0001\u0002\u0003\u0004\u0005\u0006\u0007\b\t\n\v\f\r\u000e\u000f' +
      '\u0010\u0011\u0012\u0013\u0014\u0015\u0016\u0017\u0018\u0019\u001a\u001b\u001c\u001d\u001e\u001f' +
      ':*?\\/').split(''))

    return (name) => {
      let safeName = ''
      for (const ch of name) {
        if (INVALID_FILE_NAME_CHARS.has(ch)) {
          safeName += '_'
        } else {
          safeName += ch
        }
      }
      return safeName
    }
  })();

  const archive = (zip, options) => {
    const opt = {
      ...options,
      type: 'blob'
    }

    let lastProgressReportTime = -1
    return zip.generateAsync(opt, (metadata) => {
      // 每隔 3 秒,在控制台汇报一次进度
      const now = +new Date()
      if (now - lastProgressReportTime < 3000) {
        return
      }

      let file = metadata.currentFile?.trim() || ''
      file = file.split('/').pop()
      if (file) {
        console.debug('归档进度:%c%s%c %% - %c%s%c',
          'color: yellow', metadata.percent.toFixed(2).padStart(6), 'color: inherit',
          'color: yellow', file.split('/').pop(), 'color: inherit')
      } else {
        console.debug('归档进度:%c%s%c %%',
          'color: yellow', metadata.percent.toFixed(2).padStart(6), 'color: inherit')
      }

      lastProgressReportTime = now
    })
  }

  const downloadBlob = (blob, filename) => {
    const link = unsafeWindow.document.createElement('a')
    link.href = window.URL.createObjectURL(blob)
    link.download = filename || true
    link.click()
    window.URL.revokeObjectURL(link.href)
  }

  const parseCookie = str => (str || document.cookie)
    .split(';')
    .map(v => v.split('='))
    .reduce((acc, v) => {
      acc[decodeURIComponent(v[0])] = decodeURIComponent(v[1])
      return acc
    }, {})

  const objToQueryString = obj => {
    if (!obj) {
      return ''
    }

    const qs = new URLSearchParams()
    for (const key of Object.keys(obj)) {
      const value = obj[key]
      if (Object.prototype.toString.call(value) !== '[object Array]') {
        qs.append(key, value + '')
      } else {
        for (const item of value) {
          qs.append(key, item)
        }
      }
    }
    return qs.toString()
  }

  const Constants = {
    queryHash: 'd4d88dc1500312af6f937f7b804c68c3',
    asbdId: '198387',
    appId: '936619743392459'
  }

  const getUsername = () => {
    const match = location.pathname.match(/^\/([a-z0-9_-]+?)\/?$/i)
    if (!match) {
      return ''
    }
    const group = match[1]
    if (group === 'explore') {
      return ''
    }
    return decodeURI(group || '')
  }

  const getPostItems = posts => {
    const items = []

    for (const item of posts.items || []) {
      const caption = item.caption?.text || ''
      const media = []

      if (item.video_versions) {
        const video = item.video_versions[0]
        if (video) {
          media.push({
            id: video.id,
            code: item.code,
            video: true,
            width: video.width,
            height: video.height,
            url: video.url
          })
        }
      } else if (item.carousel_media) {
        for (const m of item.carousel_media) {
          const mi = getMediaInfo(m)
          if (mi) {
            mi.code = item.code
            media.push(mi)
          }
        }
      } else {
        const mi = getMediaInfo(item)
        if (mi) {
          mi.code = item.code
          media.push(mi)
        }
      }

      let takenAt = item.taken_at
      if (takenAt) {
        takenAt = new Date(takenAt * 1000)
      } else {
        takenAt = undefined
      }

      items.push({
        caption,
        media,
        takenAt
      })
    }

    return items
  }

  const getMediaInfo = m => {
    const id = m.id
    const originalHeight = m.original_height
    const originalWidth = m.original_width

    let originalMediaUrl
    const mediaCandidates = m.image_versions2?.candidates || []
    for (const c of mediaCandidates) {
      if (c.width === originalWidth && c.height === originalHeight) {
        originalMediaUrl = c.url
        break
      }
    }
    if (!originalMediaUrl) {
      originalMediaUrl = mediaCandidates[0].url
    }

    if (!originalMediaUrl) {
      return null
    }

    return {
      id: id,
      video: false,
      width: originalWidth,
      height: originalHeight,
      url: originalMediaUrl
    }
  }

  class Instagram {
    constructor() {
      this._username = getUsername()
    }

    get username() {
      return this._username
    }

    get userId() {
      return this._userId
    }

    set userId(value) {
      this._userId = value
    }

    get queryHash() {
      return Constants.queryHash
    }

    get asbdId() {
      return Constants.asbdId
    }

    get appId() {
      return Constants.appId
    }

    get csrfToken() {
      if (this._csrfToken === undefined) {
        this._csrfToken = parseCookie().csrftoken
      }
      return this._csrfToken
    }

    get wwwClaim() {
      if (this._wwwClaim === undefined) {
        this._wwwClaim = sessionStorage.getItem('www-claim-v2') || '0'
      }
      return this._wwwClaim
    }

    get rolloutHash() {
      if (this._rolloutHash === undefined) {
        this._rolloutHash = (() => {
          const el = document.querySelector('[data-btmanifest$=_main]')
          if (!el) {
            return ''
          }
          const value = el.getAttribute('data-btmanifest')
          return value.substring(0, value.length - 5)
        })()
      }
      return this._rolloutHash
    }

    _executeApi({ method, url, params, data, headers }) {
      const qs = objToQueryString(params)
      if (qs) {
        url = url + '?' + qs
      }

      const getOptions = (resolve, reject) => {
        const options = {
          method: method || 'GET',
          url,
          headers: Object.assign({
            'origin': location.origin,
            'referer': document.referer || 'https://www.instagram.com/',
            'x-asbd-id': this.asbdId,
            'x-csrftoken': this.csrfToken,
            'x-ig-app-id': this.appId,
            'x-ig-www-claim': this.wwwClaim,
            'x-instagram-ajax': this.rolloutHash,
            'x-requested-with': 'XMLHttpRequest'
          }, headers || {}),
          responseType: 'json',
          onload(r) {
            const resp = r.response
            if (Object.prototype.toString.call(resp) === '[object Object]') {
              if (resp.status === 'ok') {
                resolve(resp)
                return
              }

              const err = new Error('接口返回的状态不正确。')
              err.name = 'ApiStatusError'
              err.response = r
              reject(err)
              return
            }

            const err = new Error('接口返回的内容无法解析。')
            err.name = 'UnexpectedApiResponseError'
            err.response = r
            reject(err)
          },
          onerror() {
            console.error(arguments)
            const err = new Error('接口调用失败。')
            err.name = 'UnhandledApiError'
            reject(err)
          }
        }

        if (data !== undefined) {
          options.data = isString(data) ? data : JSON.stringify(data)
        }

        return options
      }

      return new Promise((resolve, reject) => GM_xmlhttpRequest(getOptions(resolve, reject)))
    }

    getPosts(maxId) {
      const params = {
        count: 12
      }
      if (maxId) {
        params.max_id = maxId
      }

      let url
      if (this.userId) {
        url = `https://i.instagram.com/api/v1/feed/user/${encodeURI(this.userId)}/`
      } else {
        url = `https://i.instagram.com/api/v1/feed/user/${encodeURI(this.username)}/username/`
      }

      return this._executeApi({
        url,
        params
      })
    }
  }

  const downloadMedia = (media, item) => {
    return new Promise((resolve, reject) => {
      let accept = 'image/jpg,image/apng,image/svg+xml,image/*,*/*;q=0.8'
      if (media.video) {
        accept = '*/*'
      }
      GM_xmlhttpRequest({
        method: 'GET',
        url: media.url,
        responseType: 'blob',
        timeout: 45_000,
        headers: {
          accept
        },
        onload(r) {
          if (200 < r.status || r.status > 299) {
            const err = new Error(`媒体资源下载失败,服务器返回 HTTP ${r.status}。`)
            err.name = 'HttpStatusError'
            err.status = r.status
            err.response = r
            reject(err)

            return
          }
          resolve({
            media,
            item,
            content: r.response
          })
        },
        ontimeout() {
          const err = new Error('媒体资源下载超时。')
          err.name = 'TimeoutError'
          reject(err)
        },
        onerror(r) {
          const err = new Error('媒体资源下载失败。')
          err.name = 'HttpError'

          if (r) {
            err.response = r
            if (r.error != null) {
              err.error = r.error
            }
          }

          reject(err)
        }
      })
    })
  }

  const download = async ({
    onUserResolved,
    onPostsFetched,
    onMediaDownloaded,
    onMediaDownloadFailed,
  }) => {
    const ins = new Instagram()

    let seq = 0
    const promises = []
    let nextMaxId = null
    while (true) {
      const posts = await ins.getPosts(nextMaxId)
      nextMaxId = posts.next_max_id

      if (!ins.userId) {
        // set user id
        ins.userId = posts.user?.pk
        if (onUserResolved) {
          await onUserResolved(posts.user)
        }
      }

      const items = getPostItems(posts)

      for (const item of items) {
        for (const m of item.media) {
          seq++
          const seqNo = seq
          const promise = downloadMedia(m, item)
            .then(async r => {
              r.seq = seqNo
              if (isFunction(onMediaDownloaded)) {
                await onMediaDownloaded(r)
              }
              return r
            })
            .catch(async e => {
              if (isFunction(onMediaDownloadFailed)) {
                await onMediaDownloadFailed(e)
              }
              throw e
            })

          promises.push(promise)
        }
      }

      if (isFunction(onPostsFetched)) {
        await onPostsFetched(posts, items)
      }

      if (!posts.more_available || !posts.next_max_id) {
        console.log('下载完成啦。')
        break
      }
    }

    const results = await Promise.allSettled(promises)
    console.log('Results:', results)
  }

  const downloadAsZip = async () => {
    try {
      const zip = new JSZip()
      let user
      let userdir

      await download({
        onUserResolved(u) {
          user = u
          // 在 zip 文件中创建用户目录
          userdir = zip.folder(safeFileName(u.username))
        },
        onPostsFetched(posts, items) {
          const mediaCount = items.map(x => x.media.length).reduce((a, b) => a + b, 0)
          console.log(`本次采集到 ${items.length} 个帖子,${mediaCount} 个图片/视频资源。`)
        },
        onMediaDownloaded(e) {
          // console.log('下载啦:', e)

          const caption = e.item.caption || e.media.id
          const ext = getExtension(e.media.url)
          const fileName = `${zfill(e.seq, 4)} - ${ellipsize(caption, 36)}${ext}`
          userdir.file(safeFileName(fileName), e.content, {
            date: e.item.takenAt || new Date(),
            comment: `${e.item.caption}  https://www.instagram.com/p/${e.media.code}/`.trim()
          })
        },
        onMediaDownloadFailed(e) {
          i++
          console.error('下载失败啦', e)
        }
      })

      console.log('下载完成,正在归档文件。')
      const blob = await archive(zip, {
        comment: `Instagram: https://www.instagram.com/${encodeURIComponent(user.username)}/`
      })
      console.log('归档完成,准备下载。')
      downloadBlob(blob, safeFileName(user.full_name || user.username) + '.zip')
    } catch (e) {
      alert('下载失败。')
      console.error('下载失败。', e)
    }
  }

  GM_registerMenuCommand('下载 TA 的帖子', downloadAsZip)
})()