Wayback Machine Image Fixer

Attempts to fix broken images by replacing them with working timestamps based on JSON results

当前为 2016-01-02 提交的版本,查看 最新版本

  1. // ==UserScript==
  2. // @name Wayback Machine Image Fixer
  3. // @namespace DoomTay
  4. // @description Attempts to fix broken images by replacing them with working timestamps based on JSON results
  5. // @include http://web.archive.org/web/*
  6. // @include https://web.archive.org/web/*
  7. // @exclude /\*/
  8. // @exclude *.jpg
  9. // @exclude *.jpeg
  10. // @exclude *.png
  11. // @exclude *.gif
  12. // @exclude *.bmp
  13. // @version 1.3.0
  14. // @grant GM_xmlhttpRequest
  15.  
  16. // ==/UserScript==
  17.  
  18. var pics = document.images;
  19. var backgrounds = document.querySelectorAll("[background]");
  20.  
  21. function specialUses(pic)
  22. {
  23. pic.image = pic.src || relativeToAbsolute(pic.background);
  24. var specialCases = [{domain:"northarc.com/images/unsorted/",replacements:[pic.image.replace("thumb.","tn_"),pic.image.replace("thumb.","")],maxDimensions:{width:100,height:80}},
  25. {domain:"*",replacements:[""],condition:pic.image.indexOf("spacer.gif") > -1 || pic.image.indexOf("blank.gif") > -1},
  26. {domain:"tinypic.com/",replacements:[pic.image.replace("/i","/oi")]}];
  27. var filteredCases = {};
  28. filteredCases.replacements = [];
  29. for(var c = 0; c < specialCases.length; c++)
  30. {
  31. var additionalCondition = specialCases[c].condition !== undefined ? specialCases[c].condition : true;
  32. if((pic.image.indexOf(specialCases[c].domain) > -1 || specialCases[c].domain == "*") && additionalCondition)
  33. {
  34. filteredCases.replacements = filteredCases.replacements.concat(specialCases[c].replacements);
  35. if(specialCases[c].maxDimensions) filteredCases.maxDimensions = specialCases[c].maxDimensions;
  36. }
  37. }
  38. return filteredCases;
  39. }
  40.  
  41. var timestamp = /web\/(\d{1,14})/.exec(window.location.href)[1];
  42.  
  43. function replaceImage(target, altURLs)
  44. {
  45. var possibleUses = specialUses(target);
  46. var URLGuesses = [];
  47. if(altURLs && altURLs.length > 0) URLGuesses = altURLs;
  48. else if(possibleUses.replacements.length > 0) URLGuesses = possibleUses.replacements;
  49. else URLGuesses = [target.src || relativeToAbsolute(target.background)];
  50. var testSet = [];
  51. for(var p = 0; p < URLGuesses.length; p++)
  52. {
  53. if(URLGuesses[p].indexOf("http") > -1) testSet[p] = APITest(URLGuesses[p]);
  54. else if(URLGuesses[p].indexOf("data:") > -1)
  55. {
  56. testSet[p] = Promise.resolve(URLGuesses[p]);
  57. break;
  58. }
  59. }
  60. Promise.all(testSet).then(function(value) {
  61. for(var v = 0; v < value.length; v++)
  62. {
  63. if(value[v] !== null)
  64. {
  65. if(possibleUses.maxDimensions) switchWithResize(value[v], target, possibleUses.maxDimensions.width, possibleUses.maxDimensions.height);
  66. else changeImage(value[v], target);
  67. return;
  68. }
  69. }
  70. //Try and "expose" image links that are unclickable due to the image not loading
  71. if(target.alt === "" && target.width === 0 && target.parentNode.nodeName === "A" && target.src)
  72. {
  73. //Changing the source is pretty hacky, but it's the only way I can think of to turn "invisible" image links into something clickable
  74. target.src = target.src.substring(target.src.lastIndexOf("http"));
  75. target.width = 25;
  76. target.height = 25;
  77. }
  78. });
  79. function APITest(replacement)
  80. {
  81. var subPromise = new Promise(function(resolve,reject) {
  82. var originalURL = replacement.substring(replacement.lastIndexOf("http"));
  83. GM_xmlhttpRequest({
  84. url: "http://archive.org/wayback/available?url=" + originalURL + "&timestamp=" + timestamp,
  85. method: "GET",
  86. headers: {"Accept": "application/json"},
  87. onload: function(response) {
  88. if(response.status == 503) reject(response.statusText + " for " + originalURL);
  89. else if(JSON.parse(response.responseText).archived_snapshots.closest !== undefined) resolve(JSON.parse(response.responseText).archived_snapshots.closest.url);
  90. else resolve(null);
  91. }
  92. });
  93. }).catch(function(e) { return APITest(replacement); });
  94. return subPromise;
  95. }
  96. function switchWithResize(url, target, width, height)
  97. {
  98. var oldDimensions = {width:width, height: height};
  99. changeImage(url, target);
  100. target.onload = function() {
  101. var aspectRatio = target.naturalWidth / target.naturalHeight;
  102. if(target.naturalWidth >= target.naturalHeight)
  103. {
  104. target.width = oldDimensions.width;
  105. target.height = oldDimensions.width / aspectRatio;
  106. }
  107. else if(target.naturalWidth <= target.naturalHeight)
  108. {
  109. target.height = oldDimensions.height;
  110. target.width = oldDimensions.height * aspectRatio;
  111. }
  112. };
  113. }
  114. function changeImage(url, target)
  115. {
  116. if(target.src) target.src = url;
  117. else if(target.background) target.background = url;
  118. }
  119. }
  120.  
  121. function relativeToAbsolute(bgURL)
  122. {
  123. var img = new Image();
  124. img.src = bgURL;
  125. return img.src;
  126. }
  127.  
  128. function evaluateImage(pic)
  129. {
  130. GM_xmlhttpRequest({
  131. url: pic.src,
  132. method: "GET",
  133. onload: function(response) {
  134. //Going off of response code is unreliable. Sometimes an image will return a status code of 200 even though it would redirect to an error page should you view the image directly, so we're looking at content type instead
  135. if(response.responseHeaders.indexOf("Content-Type: text/html") > -1)
  136. {
  137. //This might be a case where if you were visit the image directly, you would be redirected elsewhere. This attempts to catch that and replace the pic's src with where it would take you.
  138. var doc = document.implementation.createHTMLDocument("Possible Replacement");
  139. doc.documentElement.innerHTML = response.responseText;
  140. if(doc.getElementsByClassName("impatient")[0])
  141. {
  142. GM_xmlhttpRequest({
  143. url: doc.getElementsByClassName("impatient")[0].firstChild.href,
  144. method: "HEAD",
  145. onload: function(response) {
  146. //Going off of response code is unreliable. Sometimes an image will return a status code of 200 even though it would redirect to an error page should you view the image directly, so we're looking at content type instead
  147. if(response.responseHeaders.indexOf("Content-Type: text/html") == -1)
  148. {
  149. replaceImage(pic,[doc.getElementsByClassName("impatient")[0].firstChild.href]);
  150. }
  151. else
  152. {
  153. replaceImage(pic);
  154. }
  155. }
  156. });
  157. }
  158. else if(response.status != 403)
  159. {
  160. replaceImage(pic);
  161. }
  162. }
  163. }
  164. });
  165. }
  166.  
  167. for(var i = 0; i < pics.length; i++)
  168. {
  169. //Skip over stuff related to the Wayback Machine toolbar and data URIs
  170. if((document.getElementById("wm-ipp") && document.getElementById("wm-ipp").contains(pics[i])) || pics[i].src.indexOf("data:") > -1) continue;
  171. if(pics[i].src.indexOf("ttp://") === 0) pics[i].src = "web.archive.org/web/" + timestamp + "/h" + pics[i].src;
  172. evaluateImage(pics[i]);
  173. }
  174.  
  175. for(var b = 0; b < backgrounds.length; b++)
  176. {
  177. evaluateImage(backgrounds[b]);
  178. }

QingJ © 2025

镜像随时可能失效,请加Q群300939539或关注我们的公众号极客氢云获取最新地址