Twitter Direct

Remove t.co tracking links from Twitter

当前为 2020-10-03 提交的版本,查看 最新版本

  1. // ==UserScript==
  2. // @name Twitter Direct
  3. // @description Remove t.co tracking links from Twitter
  4. // @author chocolateboy
  5. // @copyright chocolateboy
  6. // @version 1.0.1
  7. // @namespace https://github.com/chocolateboy/userscripts
  8. // @license GPL: https://www.gnu.org/copyleft/gpl.html
  9. // @include https://twitter.com/
  10. // @include https://twitter.com/*
  11. // @include https://mobile.twitter.com/
  12. // @include https://mobile.twitter.com/*
  13. // @require https://unpkg.com/@chocolateboy/uncommonjs@2.0.1/index.min.js
  14. // @require https://unpkg.com/get-wild@1.2.0/dist/index.umd.min.js
  15. // @require https://unpkg.com/just-safe-set@2.1.0/index.js
  16. // @require https://cdn.jsdelivr.net/gh/chocolateboy/gm-compat@a26896b85770aa853b2cdaf2ff79029d8807d0c0/index.min.js
  17. // @run-at document-start
  18. // @inject-into auto
  19. // ==/UserScript==
  20.  
  21. /*
  22. * a pattern which matches the content-type header of responses we scan for
  23. * URLs: "application/json" or "application/json; charset=utf-8"
  24. */
  25. const CONTENT_TYPE = /^application\/json\b/
  26.  
  27. /*
  28. * the minimum size (in bytes) of documents we deem to be "not small"
  29. *
  30. * we log misses (i.e. no URLs ever found/replaced) in documents whose size is
  31. * greater than or equal to this value
  32. *
  33. * if we keep failing to find URLs in large documents, we may be able to speed
  34. * things up by blacklisting them, at least in theory
  35. *
  36. * (in practice, URL data is optional in most of the matched document types
  37. * (contained in arrays that can be empty), so an absence of URLs doesn't
  38. * necessarily mean URL data will never be included...)
  39. */
  40. const LOG_THRESHOLD = 1024
  41.  
  42. /*
  43. * an immutable array used in various places as a way to indicate "no values".
  44. * static to avoid unnecessary allocations.
  45. */
  46. const NONE = []
  47.  
  48. /*
  49. * used to keep track of which roots (don't) have matching URIs and which URIs
  50. * (don't) have matching roots
  51. */
  52. const STATS = { root: {}, uri: {} }
  53.  
  54. /*
  55. * the domain intercepted links are routed through
  56. *
  57. * not all links are intercepted. exceptions include links to twitter (e.g.
  58. * https://twitter.com) and card URIs (e.g. card://123456)
  59. */
  60. const TRACKING_DOMAIN = 't.co'
  61.  
  62. /*
  63. * the domain we expect data (JSON) to come from. responses that aren't from
  64. * this domain are ignored.
  65. */
  66. const TWITTER_API = 'api.twitter.com'
  67.  
  68. /*
  69. * default locations to search for URL metadata (arrays of objects) within tweet
  70. * nodes
  71. */
  72. const TWEET_PATHS = [
  73. 'entities.media',
  74. 'entities.urls',
  75. 'extended_entities.media',
  76. 'extended_entities.urls',
  77. ]
  78.  
  79. /*
  80. * default locations to search for URL metadata (arrays of objects) within
  81. * user/profile nodes
  82. */
  83. const USER_PATHS = [
  84. 'entities.description.urls',
  85. 'entities.url.urls',
  86. ]
  87.  
  88. /*
  89. * a router which matches URIs (pathnames) to queries. each query contains a
  90. * root path (required) and some additional options which specify the locations
  91. * under the root path to substitute URLs in.
  92. *
  93. * implemented as an array of pairs with URI-pattern keys (string(s) or
  94. * regexp(s)) and one or more queries as the value. if a query is a path (string
  95. * or array) it is converted into an object with the path as its `root`
  96. * property.
  97. *
  98. * options:
  99. *
  100. * - root (required): a path (string or array of steps) into the document
  101. * under which to begin searching
  102. *
  103. * - collect (default: Object.values): a function which takes a root node and
  104. * turns it into an array of context nodes to scan for URL data
  105. *
  106. * - scan (default: USER_PATHS): an array of paths to probe for arrays of
  107. * { url, expanded_url } pairs in a context node
  108. *
  109. * - targets (default: NONE): an array of paths to standalone URLs (URLs that
  110. * don't have an accompanying expansion), e.g. for URLs in cards embedded in
  111. * tweets. these URLs are replaced by expanded URLs gathered during the
  112. * scan.
  113. *
  114. * target paths can point directly to a URL node (string) or to an
  115. * array of objects. in the latter case, we find the URL object in the array
  116. * (obj.key === "card_url") and replace its URL node (obj.value.string_value)
  117. *
  118. * if a target path is an object containing a { url: path, expanded_url: path }
  119. * pair, the URL is expanded directly in the same way as scanned paths.
  120. */
  121. const MATCH = [
  122. [
  123. // e.g. '/1.1/users/lookup.json',
  124. /\/lookup\.json$/, {
  125. root: NONE, // returns self
  126. }
  127. ],
  128. [
  129. /\/Conversation$/, [
  130. 'data.conversation_timeline.instructions.*.moduleItems.*.item.itemContent.tweet.core.user.legacy',
  131. 'data.conversation_timeline.instructions.*.entries.*.content.items.*.item.itemContent.tweet.core.user.legacy',
  132. {
  133. root: 'data.conversation_timeline.instructions.*.moduleItems.*.item.itemContent.tweet.legacy',
  134. scan: TWEET_PATHS,
  135. targets: ['card.binding_values', 'card.url'],
  136. },
  137. {
  138. root: 'data.conversation_timeline.instructions.*.entries.*.content.items.*.item.itemContent.tweet.legacy',
  139. scan: TWEET_PATHS,
  140. targets: ['card.binding_values', 'card.url'],
  141. },
  142. ]
  143. ],
  144. [
  145. /\/Following$/,
  146. 'data.user.following_timeline.timeline.instructions.*.entries.*.content.itemContent.user.legacy',
  147. ],
  148. [
  149. /\/Followers$/,
  150. 'data.user.followers_timeline.timeline.instructions.*.entries.*.content.itemContent.user.legacy',
  151. ],
  152. [
  153. // used for hovercard data
  154. /\/UserByScreenName$/, {
  155. root: 'data.user.legacy',
  156. collect: Array.of,
  157. }
  158. ],
  159. [
  160. // DMs
  161. // e.g. '/1.1/dm/inbox_initial_state.json' and '/1.1/dm/user_updates.json'
  162. /\/(?:inbox_initial_state|user_updates)\.json$/, {
  163. root: 'inbox_initial_state.entries.*.message.message_data',
  164. scan: TWEET_PATHS,
  165. targets: [
  166. 'attachment.card.binding_values.card_url.string_value',
  167. 'attachment.card.url',
  168. ],
  169. }
  170. ],
  171. [
  172. // e.g. '/1.1/friends/following/list.json',
  173. /\/list\.json$/,
  174. 'users.*'
  175. ],
  176. ]
  177.  
  178. /*
  179. * a single { pattern => queries } pair for the router which matches all URIs
  180. */
  181. const WILDCARD = [
  182. /./,
  183. [
  184. {
  185. root: 'globalObjects.tweets',
  186. scan: TWEET_PATHS,
  187. targets: [{
  188. url: 'card.binding_values.website_shortened_url.string_value',
  189. expanded_url: 'card.binding_values.website_url.string_value',
  190. },
  191. 'card.binding_values.card_url.string_value',
  192. 'card.url',
  193. ],
  194. },
  195. 'globalObjects.tweets.*.card.users.*',
  196. 'globalObjects.users',
  197. ]
  198. ]
  199.  
  200. /*
  201. * a custom version of get-wild's `get` function which uses a simpler/faster
  202. * path parser since we don't use the extended syntax
  203. */
  204. const get = exports.getter({ split: '.' })
  205.  
  206. /*
  207. * a helper function which returns true if the supplied value is a plain object,
  208. * false otherwise
  209. */
  210. const isPlainObject = (function () {
  211. const toString = {}.toString
  212. return value => toString.call(value) === '[object Object]'
  213. })()
  214.  
  215. /*
  216. * a helper function which iterates over the supplied iterable, filtering out
  217. * missing (undefined) values.
  218. *
  219. * this is done in one pass (rather than map + filter) as there may
  220. * potentially be dozens or even hundreds of values e.g. contexts (tweet/user
  221. * objects) under a root node
  222. */
  223. function eachDefined (iterable, fn) {
  224. for (const value of iterable) {
  225. if (value) fn(value)
  226. }
  227. }
  228.  
  229. /**
  230. * a helper function which returns true if the supplied URL is tracked by
  231. * Twitter, false otherwise
  232. */
  233. function isTracked (url) {
  234. return (new URL(url)).hostname === TRACKING_DOMAIN
  235. }
  236.  
  237. /*
  238. * JSON.stringify helper used to serialize stats data
  239. */
  240. function replacer (_key, value) {
  241. return (value instanceof Set) ? Array.from(value) : value
  242. }
  243.  
  244. /*
  245. * an iterator which returns { pattern => queries } pairs where patterns
  246. * are strings/regexps which match a URI and queries are objects which
  247. * define substitutions to perform in the matched document.
  248. *
  249. * this forms the basis of a simple "router" which tries all URI patterns
  250. * until one matches (or none match) and then additionally performs a
  251. * wildcard match which works on all URIs.
  252. *
  253. * the URI patterns are disjoint, so there's no need to try them all if one
  254. * matches. in addition to these, some substitutions are non URI-specific,
  255. * i.e. they work on documents that aren't matched by URI (e.g.
  256. * profile.json) and documents that are (e.g. list.json). currently the
  257. * latter all transform locations under obj.globalObjects, so we check for
  258. * the existence of that property before yielding these catch-all queries
  259. */
  260. function* router (state, data) {
  261. for (const [key, value] of MATCH) {
  262. yield [key, value]
  263.  
  264. if (state.matched) {
  265. break
  266. }
  267. }
  268.  
  269. if ('globalObjects' in data) {
  270. yield WILDCARD
  271. }
  272. }
  273.  
  274. /*
  275. * a helper class which implements document-specific (MATCH) and generic
  276. * (WILDCARD) URL substitutions in nodes (subtrees) within a JSON-formatted
  277. * document returned by the Twitter API.
  278. *
  279. * a transformer is instantiated for each query and its methods are passed a
  280. * context (node within the document tree) and the value of an option from the
  281. * query, e.g. the `scan` option is handled by the `scan` method and the
  282. * `targets` option is processed by the `assign` method
  283. */
  284. class Transformer {
  285. constructor ({ onReplace, root, uri }) {
  286. this._cache = new Map()
  287. this._onReplace = onReplace
  288. this._root = root
  289. this._uri = uri
  290. }
  291.  
  292. /*
  293. * expand URLs in context nodes in the locations specified by the query's
  294. * `scan` and `targets` options
  295. */
  296. // @ts-ignore https://github.com/microsoft/TypeScript/issues/14279
  297. transform (contexts, scan, targets) {
  298. // scan the context nodes for { url, expanded_url } pairs, replace
  299. // each t.co URL with its expansion, and add the mappings to the
  300. // cache
  301. eachDefined(contexts, context => this._scan(context, scan))
  302.  
  303. // do a separate pass for targets because some nested card URLs are
  304. // expanded in other (earlier) tweets under the same root
  305. if (targets.length) {
  306. eachDefined(contexts, context => this._assign(context, targets))
  307. }
  308. }
  309.  
  310. /*
  311. * scan the context node for { url, expanded_url } pairs, replace each t.co
  312. * URL with its expansion, and add the mappings to the cache
  313. */
  314. _scan (context, paths) {
  315. const { _cache: cache, _onReplace: onReplace } = this
  316.  
  317. for (const path of paths) {
  318. const items = get(context, path, NONE)
  319.  
  320. for (const item of items) {
  321. if (item.url && item.expanded_url) {
  322. if (isTracked(item.url)) {
  323. cache.set(item.url, item.expanded_url)
  324. item.url = item.expanded_url
  325. onReplace()
  326. }
  327. } else {
  328. console.warn("can't find url/expanded_url pair for:", {
  329. uri: this._uri,
  330. root: this._root,
  331. path,
  332. item,
  333. })
  334. }
  335. }
  336. }
  337. }
  338.  
  339. /*
  340. * replace URLs in the context which weren't substituted during the scan.
  341. *
  342. * these are either standalone URLs whose expansion we retrieve from the
  343. * cache, or URLs whose expansion exists in the context in a location not
  344. * covered by the scan
  345. */
  346. _assign (context, targets) {
  347. for (const target of targets) {
  348. if (isPlainObject(target)) {
  349. this._assignFromPath(context, target)
  350. } else {
  351. this._assignFromCache(context, target)
  352. }
  353. }
  354. }
  355.  
  356. /*
  357. * replace a short URL in the context with an expanded URL defined in the
  358. * context.
  359. *
  360. * this is similar to the replacements performed during the scan, but rather
  361. * than using a fixed set of locations/property names, the paths to the
  362. * short/expanded URLs are supplied as a parameter
  363. */
  364. _assignFromPath (context, target) {
  365. const { url: urlPath, expanded_url: expandedUrlPath } = target
  366.  
  367. let url, expandedUrl
  368.  
  369. if (
  370. (url = get(context, urlPath))
  371. && isTracked(url)
  372. && (expandedUrl = get(context, expandedUrlPath))
  373. ) {
  374. this._cache.set(url, expandedUrl)
  375. exports.set(context, urlPath, expandedUrl)
  376. this._onReplace()
  377. }
  378. }
  379.  
  380. /*
  381. * pinpoint an isolated URL in the context which doesn't have a
  382. * corresponding expansion, and replace it using the mappings we collected
  383. * during the scan
  384. */
  385. _assignFromCache (context, path) {
  386. let url, $context = context, $path = path
  387.  
  388. const node = get(context, path)
  389.  
  390. // if the target points to an array rather than a string, locate the URL
  391. // object within the array automatically
  392. if (Array.isArray(node)) {
  393. if ($context = node.find(it => it.key === 'card_url')) {
  394. $path = 'value.string_value'
  395. url = get($context, $path)
  396. }
  397. } else {
  398. url = node
  399. }
  400.  
  401. if (typeof url === 'string' && isTracked(url)) {
  402. const expandedUrl = this._cache.get(url)
  403.  
  404. if (expandedUrl) {
  405. exports.set($context, $path, expandedUrl)
  406. this._onReplace()
  407. } else {
  408. console.warn(`can't find expanded URL for ${url} in ${this._uri}`)
  409. }
  410. }
  411. }
  412. }
  413.  
  414. /*
  415. * replace t.co URLs with the original URL in all locations in the document
  416. * which contain URLs
  417. */
  418. function transform (data, uri) {
  419. let count = 0
  420.  
  421. if (!STATS.uri[uri]) {
  422. STATS.uri[uri] = new Set()
  423. }
  424.  
  425. const state = { matched: false }
  426. const it = router(state, data)
  427.  
  428. for (const [key, value] of it) {
  429. const uris = NONE.concat(key)
  430. const queries = NONE.concat(value)
  431. const match = uris.some(want => {
  432. return (typeof want === 'string') ? (uri === want) : want.test(uri)
  433. })
  434.  
  435. if (match) {
  436. // stop matching URIs and switch to the wildcard queries
  437. state.matched = true
  438. } else {
  439. // try the next URI pattern, or switch to the wildcard queries if
  440. // there are no more patterns to match against
  441. continue
  442. }
  443.  
  444. for (const $query of queries) {
  445. const query = isPlainObject($query) ? $query : { root: $query }
  446. const { root: rootPath } = query
  447.  
  448. if (!STATS.root[rootPath]) {
  449. STATS.root[rootPath] = new Set()
  450. }
  451.  
  452. const root = get(data, rootPath)
  453.  
  454. // may be an array (e.g. lookup.json)
  455. if (!root || typeof root !== 'object') {
  456. continue
  457. }
  458.  
  459. const {
  460. collect = Object.values,
  461. scan = USER_PATHS,
  462. targets = NONE,
  463. } = query
  464.  
  465. const updateStats = () => {
  466. ++count
  467. STATS.uri[uri].add(rootPath)
  468. STATS.root[rootPath].add(uri)
  469. }
  470.  
  471. const contexts = collect(root)
  472.  
  473. const transformer = new Transformer({
  474. onReplace: updateStats,
  475. root: rootPath,
  476. uri
  477. })
  478.  
  479. // @ts-ignore https://github.com/microsoft/TypeScript/issues/14279
  480. transformer.transform(contexts, scan, targets)
  481. }
  482. }
  483.  
  484. return count
  485. }
  486.  
  487. /*
  488. * replacement for Twitter's default response handler. we transform the response
  489. * if it's a) JSON and b) contains URL data; otherwise, we leave it unchanged
  490. */
  491. function onResponse (xhr, uri) {
  492. const contentType = xhr.getResponseHeader('Content-Type')
  493.  
  494. if (!CONTENT_TYPE.test(contentType)) {
  495. return
  496. }
  497.  
  498. const url = new URL(uri)
  499.  
  500. // exclude e.g. the config-<date>.json file from pbs.twimg.com, which is the
  501. // second biggest document (~500K) after home_latest.json (~700K)
  502. if (url.hostname !== TWITTER_API) {
  503. return
  504. }
  505.  
  506. const json = xhr.responseText
  507. const size = json.length
  508.  
  509. // fold URIs which differ only in the user ID, e.g.:
  510. // /2/timeline/profile/1234.json -> /2/timeline/profile.json
  511. const path = url.pathname.replace(/\/\d+\.json$/, '.json')
  512.  
  513. let data
  514.  
  515. try {
  516. data = JSON.parse(json)
  517. } catch (e) {
  518. console.error(`Can't parse JSON for ${uri}:`, e)
  519. return
  520. }
  521.  
  522. const oldStats = JSON.stringify(STATS, replacer)
  523. const count = transform(data, path)
  524.  
  525. if (!count) {
  526. if (STATS.uri[path].size === 0 && size >= LOG_THRESHOLD) {
  527. console.debug(`no replacements in ${path} (${size} B)`)
  528. }
  529.  
  530. return
  531. }
  532.  
  533. const descriptor = { value: JSON.stringify(data) }
  534. const clone = GMCompat.export(descriptor)
  535.  
  536. GMCompat.unsafeWindow.Object.defineProperty(xhr, 'responseText', clone)
  537.  
  538. const newStats = JSON.stringify(STATS, replacer)
  539.  
  540. if (newStats !== oldStats) {
  541. const replacements = 'replacement' + (count === 1 ? '' : 's')
  542. console.debug(`${count} ${replacements} in ${path} (${size} B)`)
  543. console.log(JSON.parse(newStats))
  544. }
  545. }
  546.  
  547. /*
  548. * replace the built-in XHR#send method with our custom version which swaps in
  549. * our custom response handler. once done, we delegate to the original handler
  550. * (this.onreadystatechange)
  551. */
  552. function hookXHRSend (oldSend) {
  553. return /** @this {XMLHttpRequest} */ function send () {
  554. const oldOnReadyStateChange = this.onreadystatechange
  555.  
  556. this.onreadystatechange = function () {
  557. if (this.readyState === this.DONE && this.responseURL && this.status === 200) {
  558. onResponse(this, this.responseURL)
  559. }
  560.  
  561. if (oldOnReadyStateChange) {
  562. // @ts-ignore
  563. return oldOnReadyStateChange.apply(this, arguments)
  564. }
  565. }
  566.  
  567. return oldSend.apply(this, arguments)
  568. }
  569. }
  570.  
  571. /*
  572. * replace the default XHR#send with our custom version, which scans responses
  573. * for tweets and expands their URLs
  574. */
  575. GMCompat.unsafeWindow.XMLHttpRequest.prototype.send = GMCompat.export(
  576. hookXHRSend(XMLHttpRequest.prototype.send)
  577. )

QingJ © 2025

镜像随时可能失效,请加Q群300939539或关注我们的公众号极客氢云获取最新地址