赞
踩
import requests from bs4 import BeautifulSoup from fake_useragent import UserAgent import time #当当图书信息抓取 def getdangdang(isbn): ua = UserAgent() headers = {'User-Agent':ua.random} url = 'http://search.dangdang.com/?act=input&key='+isbn data = requests.get(url,headers=headers) #print(data.text) soup = BeautifulSoup(data.text,'lxml') ul = soup.find_all('ul',{'class':'bigimg'})[0] #print(ul) li = ul.find_all('li') #print(li) titles = [] imgsrcs = [] prices = [] for index in range(len(li)): #print(li[index].find('img')) data_original = li[index].find('img').get('data-original') #print('data_original=',data_original) if data_original == None: src = li[index].find('img').get('src') if src.find('http') != -1: imgsrcs.append(src) else: imgsrcs.append(data_original) titles.append(li[index].find('img').get('alt')) prices.append(li[index].find('p',{'class':'price'}).find('span',{'class':'search_now_price'}).get_text().replace('¥','')) #print(titles) #print(imgsrcs) return titles,imgsrcs,prices if __name__=='__main__': print('当当图书信息抓取开始。。。') #time.sleep(1) titles,imgsrcs,prices = getdangdang('9787533735609') print('titles=',titles) print('img=',imgsrcs) print('prices=',prices) print('当当图书信息抓取结束。。。')
#以上代码仅提供参考交流学习
Copyright © 2003-2013 www.wpsshop.cn 版权所有,并保留所有权利。