赞
踩
在使用fofa进行搜索资产时,使用api接口调用进行提取时是有限制的,那提取上限怎么办?一个一个复制出来吗?当然不是,而是需要一个无限制爬取的脚本.
#coding:utf-8 import sys #reload(sys) #sys.setdefaultencoding('utf-8') import importlib,sys importlib.reload(sys) import base64 import requests from lxml import etree import time import threading threads=[] time_start = time.time() def fofa(): #其中search_data为搜索关键词 search_data='port="6379"' search_data_bs=base64.b64encode(search_data.encode('utf-8')) #print (search_data_bs) search_data_bs=str(search_data_bs,'UTF-8') url='https://fofa.info/result?qbase64=' headers={ 'cookie':'refresh_token=1;' 'fofa_token=你的fofa_token;' } for yeshu in range(1,5): urls=str(url)+search_data_bs+"&page="+str(yeshu) #print (urls) print("正在提取第"+str(yeshu)+"页数") try: result=requests.post(urls,headers=headers).content #print (result.decode('utf-8')) dayi=etree.HTML(result.decode('utf-8')) #print(dayi) ip_data=dayi.xpath('//span[@class="hsxa-copy-btn hsxa-copy-btn-no-link"]/@data-clipboard-text') #print (ip_data) ipdata='\n'.join(ip_data) #print (ipdata) with open(r'ip.txt', 'a+') as f: f.write(ipdata+'\n') f.close() except Exception: time.sleep(0.5) pass def ip(): for ip in open('ip.txt'): ip = ip.strip() ip = ip.strip('/') ip = ip.replace('https://','') ip = ip.replace('http://', '') data=":" if data in ip: ip = ip.split(":") del ip[-1] ip="".join(ip) with open(r'ipf.txt', 'a+') as f: f.write(ip + '\n') f.close() else: with open(r'ipf.txt', 'a+') as f: f.write(ip + '\n') f.close() def qc(): lines_seen = set() outfiile = open('result.txt', 'w', encoding='utf-8') f = open('ipf.txt', 'r', encoding='utf-8') for line in f: if line not in lines_seen: outfiile.write(line) lines_seen.add(line) if __name__ == '__main__': threads.append(threading.Thread(target=fofa)) for t in threads: t.start() t.join() ip() qc() time_end = time.time() time_sum = time_end - time_start print (" Program run time %s" %time_sum)
其中search_data填写搜索关键词
fofa_token在F12-cookie当中查找复制即可
后面的页数若是(1,5)那便是提取1页到5页.
ip.txt提取的为原始内容结果如下
http://11.74.197.22:9090
http://11.74.197.22:9091
http://11.156.245.220:9090
ipf.txt提取的为原始内容的IP
11.74.197.22
11.74.197.22
11.156.245.220
result.txt是ipf.txt去重后的结果
11.74.197.22
11.156.245.220
Copyright © 2003-2013 www.wpsshop.cn 版权所有,并保留所有权利。