您需要先安装一个扩展,例如 篡改猴、Greasemonkey 或 暴力猴,之后才能安装此脚本。
您需要先安装一个扩展,例如 篡改猴 或 暴力猴,之后才能安装此脚本。
您需要先安装一个扩展,例如 篡改猴 或 暴力猴,之后才能安装此脚本。
您需要先安装一个扩展,例如 篡改猴 或 Userscripts ,之后才能安装此脚本。
您需要先安装一款用户脚本管理器扩展,例如 Tampermonkey,才能安装此脚本。
您需要先安装用户脚本管理器扩展后才能安装此脚本。
Extract Data that match the criteria from Facebook Search Page
// ==UserScript== // @name Facebook Search Data Automation // @namespace http://tampermonkey.net/ // @version 2.65 // @description Extract Data that match the criteria from Facebook Search Page // @author Doncha1009 // @match https://www.facebook.com/search/* // @icon data:image/gif;base64,R0lGODlhAQABAAAAACH5BAEKAAEALAAAAAABAAEAAAICTAEAOw== // @grant none // ==/UserScript== (function() { 'use strict'; window.addEventListener("load", async function() { const delay = ms => new Promise(res => setTimeout(res, ms)); console.log("starting in 120 seconds..."); await delay(120000); /*const datum = new Date("2023-1-31"); const sadasnji = new Date(); if(sadasnji > datum){ console.log("The trial expired, if you could pay on this Paypal and not on Fiverr because Fiverr takes a big share: https://www.paypal.com/paypalme/Donca1009"); return; }*/ console.log("starting..."); let DateFirstBoundary = promptForDate("Enter a first date boundary (e.g. June 8, 2010):") let DateSecondBoundary = promptForDate("Enter a second date boundary (e.g. June 8, 2010):") let sviRezultati = "Buisness Name,Phone Number,Address,Website,Facebook,Creation Date, Category, Email\n"; let lista_ljudi = document.getElementsByClassName("x193iq5w x1xwk8fm")[0].children; let lista = []; //for petlja da prodje kroz sve klase da nadje samo url biznis strana for(let i = 0; i < lista_ljudi.length - 1; i++) { lista[i] = lista_ljudi[i].children[0].children[0].children[0].children[0].children[0].children[0].children[0].children[0].children[0].children[0].children[1].children[0].children[0].children[0].children[0].children[0].children[0].children[0].getAttribute("href") } /*lista[0] = "https://www.facebook.com/profile.php?id=100082873075641"; lista[1] = "https://www.facebook.com/profile.php?id=100083414017433"; lista[2] = "https://www.facebook.com/profile.php?id=100084155977753"; lista[3] = "https://www.facebook.com/profile.php?id=100085804250570"; lista[4] = "https://www.facebook.com/profile.php?id=100088170301246"; lista[5] = "https://www.facebook.com/profile.php?id=100086963957020"; lista[6] = "https://www.facebook.com/profile.php?id=100087732184149"; lista[7] = "https://www.facebook.com/profile.php?id=100087661926233"; lista[8] = "https://www.facebook.com/profile.php?id=100087007830841"; lista[9] = "https://www.facebook.com/profile.php?id=100082819498005"; lista[10] = "https://www.facebook.com/BensMowingAndHandyman"; lista[11] = "https://www.facebook.com/profile.php?id=100089776776088"; lista[12] = "https://www.facebook.com/profile.php?id=100083271615586"; lista[13] = "https://www.facebook.com/profile.php?id=100085593604065"; lista[14] = "https://www.facebook.com/profile.php?id=100085593604065"; lista[15] = "https://www.facebook.com/profile.php?id=100082588638460"; lista[16] = "";*/ //await delay(5000); console.log("The number of buisness pages: " + lista.length); let pauzaBr = 1; for(let i = 0; i < lista.length - 1; i++) { if(i == pauzaBr * 50) { pauzaBr++; await delay(10000); saveData(sviRezultati, "foundData.csv"); } // ceka pola sata da facebook ne bi blokirao let url = lista[i]; //console.log(url) let datum, broj_telefona, adresa; console.log("starting " + i + " company"); await delay(4000); let windowNovi = window.open(url); // za svaku biznis stranu otvori window await delay(8000); this.self.focus(); let pageSource = windowNovi.document.getElementsByTagName("html")[0].innerHTML; if(pageSource.contains('page_creation_date":{"text":')) datum = getPageCreationDate(pageSource); else { //console.log("Za " + url + ": upao je za: About"); let indexAbout = pageSource.indexOf('"section_type":"ABOUT"'); // izvlaci about link za novi window let stringTemp = pageSource.substring(indexAbout + 22, indexAbout + 150); stringTemp = stringTemp.slice(stringTemp.indexOf('"url":"') + 7, stringTemp.length - 1); stringTemp = stringTemp.slice(0, stringTemp.indexOf('","')); stringTemp += "_profile_transparency"; // da bi uslo odmah za datum stringTemp = stringTemp.replaceAll("\\/", "/"); // zamenjuje \/ sa / await delay(4000); windowNovi.close(); let windowNoviji = window.open(stringTemp); await delay(8000); this.self.focus(); let pageSource2 = windowNoviji.document.getElementsByTagName("html")[0].innerHTML; //console.log(pageSource2); await delay(4000); windowNoviji.close(); datum = getPageCreationDate2(pageSource2); } //console.log("Datum: " + datum); broj_telefona = getPhoneNumber(pageSource); //console.log("Broj Telefona: " + broj_telefona); adresa = getCountryName(pageSource); //console.log("Ime zemlje:" + adresa); if(datum == -1 || broj_telefona == -1 || adresa == -1){ // da li su ispravni //console.log("Nije dobar jer je datum: " + datum + ", broj telefona: " + broj_telefona + ", adresa: " + adresa); } else { if(adresa.indexOf(">") != -1) adresa = adresa.slice(adresa.indexOf(">") + 8, adresa.length - 1); let imeBiznisa = getBuisnessName(pageSource); let kategorija = getBuisnessCategory(pageSource); //console.log("Ime zemlje Biznisa:" + imeBiznisa); let website = getBuisnessWebsite(pageSource); if(website == -1 || !website.contains("http")) website = "There is no website"; else website = website.replaceAll("\\/", "/"); let email = getBuisnessEmail(pageSource); console.log("company matches the criteria: " + imeBiznisa + " "); sviRezultati += '"' + imeBiznisa + '",' + broj_telefona + "," + '"' + adresa + '","' + website + '","' + url + '","' + datum + '","' + kategorija + '","' + email + '",' + "\n"; } await delay(4000); windowNovi.close(); await delay(4000); } saveData(sviRezultati, "foundData.csv"); function promptForDate(promptString) { let dateString = prompt(promptString); let dateObject = new Date(Date.parse(dateString)); return dateObject; } function getPageCreationDate(pageSource) { let leviDeoIndex = pageSource.indexOf('page_creation_date":{"text":'); let stringDatuma = pageSource.substring(leviDeoIndex + 28, leviDeoIndex + 68); //console.log(stringDatuma); if(stringDatuma.contains("2022")){ stringDatuma = stringDatuma.slice(stringDatuma.indexOf(' - ') + 3, stringDatuma.indexOf('"},')); let datum = new Date(Date.parse(stringDatuma)); if(isBetweenDates(datum)) { if(stringDatuma.contains(`\\u`)) return stringDatuma.split("\\u")[1]; return stringDatuma; } else return -1; } return -1; } function getPageCreationDate2(pageSource) { let leviDeoIndex = pageSource.indexOf('"},"field_type":"creation_date"'); let stringDatuma = pageSource.substring(leviDeoIndex - 20, leviDeoIndex + 1); stringDatuma = stringDatuma.slice(stringDatuma.indexOf('":"') + 3, stringDatuma.length - 1); let datum = new Date(Date.parse(stringDatuma)); if(isBetweenDates(datum)) return stringDatuma; return -1; } function isBetweenDates(dateObjectPage) { let timeCurrentPage = dateObjectPage.getTime(); let timeFirst = DateFirstBoundary.getTime(); let timeSecond = DateSecondBoundary.getTime(); return (timeCurrentPage > timeFirst && timeCurrentPage < timeSecond); } function getPhoneNumber(pageSource) { let leviDeoIndex = pageSource.indexOf('"formatted_phone_number":"'); let stringBrojaTelefona = pageSource.substring(leviDeoIndex + 26, leviDeoIndex + 46); if(stringBrojaTelefona.contains("null") || stringBrojaTelefona[4] != '4') { let leviDeoIndex = pageSource.indexOf('dir="auto">+61 4'); stringBrojaTelefona = pageSource.substring(leviDeoIndex + 11, leviDeoIndex + 46); if(leviDeoIndex == -1 || stringBrojaTelefona.contains("null")) return -1; return stringBrojaTelefona.slice(0, stringBrojaTelefona.indexOf('</')) } else return stringBrojaTelefona.slice(1, stringBrojaTelefona.indexOf('","')); } function getCountryName(pageSource) { let leviDeoIndex = pageSource.indexOf('"full_address":'); let stringImenaZemlje = pageSource.substring(leviDeoIndex + 14, leviDeoIndex + 164); if(!stringImenaZemlje.contains("Australia")) { //<div class="xzsf02u x6prxxf xvq8zen x126k92a x12nagc"><span class="x193iq5w xeuugli x13faqbe x1vvkbs x1xmvt09 x1lliihq x1s928wv xhkezso x1gmr53x x1cpjm7i x1fgarty x1943h6x xudqn12 x3x7a5m x6prxxf xvq8zen xo1l8bm xzsf02u" dir="auto"> let leviDeoIndex = pageSource.indexOf('x1a2a7pz x1heor9g xt0b8zv" role="button" tabindex="0"><span class="x193iq5w xeuugli x13faqbe x1vvkbs x1xmvt09 x1lliihq x1s928wv xhkezso x1gmr53x x1cpjm7i x1fgarty x1943h6x xudqn12 x3x7a5m x6prxxf xvq8zen xo1l8bm xzsf02u" dir="auto">'); stringImenaZemlje = pageSource.substring(leviDeoIndex + 232, leviDeoIndex + 300); if(leviDeoIndex == -1 || !stringImenaZemlje.contains("Australia")) return -1; return stringImenaZemlje.slice(0, stringImenaZemlje.indexOf('</')).replaceAll("\\n", " "); } else return stringImenaZemlje.slice(2, stringImenaZemlje.indexOf("Australia") + 9).replaceAll("\\n", " "); } function getBuisnessName(pageSource) { let leviDeoIndex = pageSource.indexOf('"meta":{"title":"'); let imeBiznisa = pageSource.substring(leviDeoIndex + 17, leviDeoIndex + 70); //console.log("Ime biznisa neobradjeno:" + imeBiznisa); imeBiznisa = imeBiznisa.slice(0, imeBiznisa.indexOf('","')).split("|")[0]; if(imeBiznisa.contains(`\\u`)) imeBiznisa = imeBiznisa.replaceAll(`\\u`, "'s "); return imeBiznisa; } function getBuisnessWebsite(pageSource) { let buisnessWebsite = ""; let leviDeoIndex = pageSource.indexOf('"website":'); if(leviDeoIndex != -1) // ako postoji { buisnessWebsite = pageSource.substring(leviDeoIndex + 10, leviDeoIndex + 70); //console.log(buisnessWebsite); return buisnessWebsite.slice(0, buisnessWebsite.indexOf('","')); } // ako ne postoji leviDeoIndex = pageSource.indexOf('<span class="x193iq5w xeuugli x13faqbe x1vvkbs x1xmvt09 x1lliihq x1s928wv xhkezso x1gmr53x x1cpjm7i x1fgarty x1943h6x xudqn12 x3x7a5m x6prxxf xvq8zen xo1l8bm x1qq9wsj x1yc453h" dir="auto">'); if(leviDeoIndex == -1) return -1; buisnessWebsite = pageSource.substring(leviDeoIndex + 188, leviDeoIndex + 220); //console.log(buisnessWebsite); return buisnessWebsite.slice(0, buisnessWebsite.indexOf('<')); } function getBuisnessCategory(pageSource) { if(pageSource.contains('"category_name":"')) { let leviDeoIndex = pageSource.indexOf('"category_name":"'); let kategorija = pageSource.substring(leviDeoIndex + 17, leviDeoIndex + 68); return kategorija.slice(0, kategorija.indexOf('","')).replaceAll("\\/", "&"); } else if (pageSource.contains('"text":"Page ')) { let leviDeoIndex = pageSource.indexOf('"text":"Page'); let kategorija = pageSource.substring(leviDeoIndex + 19, leviDeoIndex + 100); return kategorija.slice(0, kategorija.indexOf('"}')).replaceAll("\\/", "&");; } return "There is no given category"; } function getBuisnessEmail(pageSource) { let buisnessEmail = ""; let leviDeoIndex = pageSource.indexOf('@gmail.com'); if(leviDeoIndex != -1) { buisnessEmail = pageSource.substring(leviDeoIndex - 100, leviDeoIndex + 11); return buisnessEmail.slice(buisnessEmail.indexOf('>') + 1, buisnessEmail.length - 1); } // ako nije gmail leviDeoIndex = pageSource.lastIndexOf('@'); buisnessEmail = pageSource.substring(leviDeoIndex -30, leviDeoIndex + 30); buisnessEmail = buisnessEmail.slice(buisnessEmail.indexOf('>') + 1, buisnessEmail.indexOf('<')); if(leviDeoIndex == -1 || !buisnessEmail.contains(".") || !buisnessEmail.contains("@")) return "Email cannot be extracted"; return buisnessEmail; } function saveData(data, fileName) { console.log("Saving data to " + fileName); var a = document.createElement("a"); document.body.appendChild(a); a.style = "display: none"; var blob = new Blob([data], {type: "octet/stream"}); var url = window.URL.createObjectURL(blob); a.href = url; a.download = fileName; a.click(); window.URL.revokeObjectURL(url); }; }); })();
QingJ © 2025
镜像随时可能失效,请加Q群300939539或关注我们的公众号极客氢云获取最新地址