赞
踩
使用chrome的driver爬取数据:
- from selenium import webdriver
- from selenium.webdriver.common.by import By
- from selenium.webdriver.common. keys import Keys
- from selenium. webdriver.support import expected_conditions as EC
- from selenium. webdriver.support.wait import WebDriverWait
- from selenium.webdriver.common.action_chains import ActionChains
- from urllib import request
- import requests
- from lxml import etree
- import pandas as pd
-
- browser = webdriver.Chrome()
-
- filepath = 'C:/data.csv'
-
- data = pd.read_csv(filepath)
- nrows = len(data)
-
- path_data = []
- find_data = data['keyword'].values.tolist()
-
- for i in range(0,nrows):
- browser.get('https://www.baidu.com')
- query =find_data[i]
- input = browser.find_element_by_id ('kw')
- input.send_keys(query)
- ActionChains(browser).send_keys(Keys.ENTER).perform()
- #input.send_keys(Keys.ENTER)
- wait = WebDriverWait(browser, 10)
- wait.until(EC.presence_of_element_located((By.ID,'content_left')))
- #print(browser.current_url)
- #print(browser.get_cookies())
- #print(browser.page_source)
- source_html = browser.page_source
- html_object=etree.HTML(source_html)
- path_data = html_object.xpath('//div[@id="tsn_inner"]/div[2]/span[1]/text()')
- data['hot_num'][i] = str(path_data)
-
- data.to_excel('C:/百度20230228.xlsx',sheet_name='Sheet1',index=False)

备注:文章就是之前爬取方式的一个变种,增加了chromedriver的下载,使用selenium貌似需要成功安装后才能使用这个方式。
Copyright © 2003-2013 www.wpsshop.cn 版权所有,并保留所有权利。