赞
踩
目录
2.获取京东网站首页查询按钮并完成点击事件(进入页面自动查询)
Selenium是一个用于Web应用程序自动化测试工具。Selenium测试直接运行在浏览器中,就像真正的用户在
操作一样。支持的浏览器包括IE(7, 8, 9, 10, 11),Mozilla Firefox,Safari,Google Chrome,Opera等。
适用于自动化测试,js动态爬虫(破解反爬虫)等领域。
Selenium的核心Selenium Core基于JsUnit,
完全由JavaScript编写,因此可以用于任何支持JavaScript的浏览器上。
selenium可以模拟真实浏览器,自动化测试工具,支持多种浏览器,
谷歌下载地址:http://chromedriver.storage.googleapis.com/index.html
如果在下载地址中找不到与自己浏览器完全匹配的版本,可以用相近的版本
- <dependency>
- <groupId>org.seleniumhq.selenium</groupId>
- <artifactId>selenium-java</artifactId>
- <version>3.141.59</version>
- </dependency>
- //设置驱动
- System.setProperty("webdriver.chrome.driver","C:\\Users\\Administrator\\AppData\\Local\\Google\\Chrome\\Application\\chromedriver.exe");
- //创建驱动
- WebDriver driver=new ChromeDriver();
- //与将要爬取的网站建立连接
- driver.get("https://www.baidu.com/");
Class:
- List<WebElement> elements = driver.findElements(By.className("hotsearch-item"));
- for (WebElement element : elements) {
- System.out.println(element.getText());
- }
ID:
- WebElement kw = driver.findElement(By.id("kw"));
- System.out.println(kw.getAttribute("name"));
name:
- WebElement tn = driver.findElement(By.name("tn"));
- System.out.println(tn.getAttribute("value"));
tag:
- List<WebElement> input = driver.findElements(By.tagName("input"));
- for(WebElement webElement:input){
- String value = webElement.getAttribute("value");
- System.out.println(value);
- }
link:
- List<WebElement> elements = driver.findElements(By.linkText("地图"));
- for (WebElement element : elements){
- System.out.println(element.getText());
- }
Partial link选择(a标签文本内容模糊匹配):
- List<WebElement> elements = driver.findElements(By.partialLinkText("中国"));
- for (WebElement element : elements){
- System.out.println(element.getText());
- }
css选择器:
- List<WebElement> elements = driver.findElements(By.cssSelector("#hotsearch-content-wrapper > li:nth-child(even)"));
- for (WebElement element : elements){
- System.out.println(element.getText());
xpath选择:
- WebElement element = driver.findElement(By.xpath("//*[@id=\"kw\"]"));
- System.out.println(element.getAttribute("class"));
- WebElement kw = driver.findElement(By.id("kw"));
- kw.sendKeys("java");
- WebElement button=driver.findElement(By.id("su"));
- button.click();
- WebElement element = driver.findElement(By.name("ie"));
- System.out.println(element.getAttribute("value"));
- List<WebElement> elements = driver.findElements(By.partialLinkText("大"));
- for (WebElement element : elements){
- System.out.println(element.getText());
- }
- public static void main(String[] args) {
- //将驱动加载到Java的JVM虚拟机中
- System.setProperty("webdriver.chrome.driver","C:\\Users\\Administrator\\AppData\\Local\\Google\\Chrome\\Application\\chromedriver.exe");
- //初始化驱动
- WebDriver driver = new ChromeDriver();
- //设置爬取网站
- driver.get("https://www.jd.com/");
- }
- //获取京东网站首页查询条件输入框
- WebElement key = driver.findElement(By.id("key"));
- key.sendKeys("人妻");
- //获取京东网站首页查询按钮并完成点击事件
- WebElement button = driver.findElement(By.cssSelector("button.button"));
- button.click();
- //滚动前先睡眠一会
- sleep(3);
- //设置滚动条移动到最下面
- ((JavascriptExecutor) driver).executeScript("window.scrollTo(0,document.body.scrollHeight)");
- sleep(2);
- *[@id="J_goodsList"]/ul/li[3]
- //获取查询页面中的所有商品
- List<WebElement> elements = driver.findElements(By.xpath("//*[@id=\"J_goodsList\"]/ul/li"));
- for (WebElement element : elements) {
- String price = element.findElement(By.className("p-price")).getText();
- String name = element.findElement(By.className("p-name")).getText();
- System.out.println("【"+price+"】-"+name);
- }
5.完整代码
- package com.zhq.selenium;
-
- import org.openqa.selenium.By;
- import org.openqa.selenium.JavascriptExecutor;
- import org.openqa.selenium.WebDriver;
- import org.openqa.selenium.WebElement;
- import org.openqa.selenium.chrome.ChromeDriver;
- import org.openqa.selenium.chrome.ChromeOptions;
-
- import java.util.List;
-
- public class Demo2 {
- public static void main(String[] args) {
- //将驱动加载到Java的JVM虚拟机中
- System.setProperty("webdriver.chrome.driver","C:\\Users\\Administrator\\AppData\\Local\\Google\\Chrome\\Application\\chromedriver.exe");
- /************************** 方式一:不打开浏览器 **************************/
- //定义浏览器参数
- //ChromeOptions chromeOptions = new ChromeOptions();
- //设置不打开浏览器
- //chromeOptions.addArguments("--headless");
- //初始化驱动
- //WebDriver driver = new ChromeDriver(chromeOptions);
-
- /************************** 方式二:打开浏览器 **************************/
- //初始化驱动
- WebDriver driver = new ChromeDriver();
- //设置爬取网站
- driver.get("https://www.jd.com/");
- //获取京东网站首页查询条件输入框
- WebElement key = driver.findElement(By.id("key"));
- key.sendKeys("人妻");
- //获取京东网站首页查询按钮并完成点击事件
- WebElement button = driver.findElement(By.cssSelector("button.button"));
- button.click();
- //滚动前先睡眠一会
- sleep(3);
- //设置滚动条移动到最下面
- ((JavascriptExecutor) driver).executeScript("window.scrollTo(0,document.body.scrollHeight)");
- sleep(2);
-
- *[@id="J_goodsList"]/ul/li[3]
- //获取查询页面中的所有商品
- List<WebElement> elements = driver.findElements(By.xpath("//*[@id=\"J_goodsList\"]/ul/li"));
- for (WebElement element : elements) {
- String price = element.findElement(By.className("p-price")).getText();
- String name = element.findElement(By.className("p-name")).getText();
- System.out.println("【"+price+"】-"+name);
- }
- }
- public static void sleep(int num){
- try{
- Thread.sleep(num * 1000L);
- }catch (InterruptedException e){
- e.printStackTrace();
- }
- }
- }

某些网站会有反爬虫技术可能访问不到
- package com.zhq.selenium;
-
- import org.openqa.selenium.By;
- import org.openqa.selenium.JavascriptExecutor;
- import org.openqa.selenium.WebDriver;
- import org.openqa.selenium.WebElement;
- import org.openqa.selenium.chrome.ChromeDriver;
- import sun.net.www.protocol.http.HttpURLConnection;
-
- import java.io.*;
- import java.net.URL;
- import java.net.URLConnection;
- import java.util.ArrayList;
- import java.util.List;
- import java.util.UUID;
-
- public class Demo3 {
-
- //定义Driver驱动
- public static WebDriver driver=null;
-
- //定义List集合,用于存储爬取数据中的图片路径
- public static List<String> imgs=new ArrayList<>();
-
- static{
- //将驱动加载到Java的JVM虚拟机中
- System.setProperty("webdriver.chrome.driver","C:\\Users\\Administrator\\AppData\\Local\\Google\\Chrome\\Application\\chromedriver.exe");
- //初始化驱动
- driver = new ChromeDriver();
- }
-
- public static void sleep(int num){
- try {
- Thread.sleep(num*1000L);
- } catch (InterruptedException e) {
- e.printStackTrace();
- }
- }
-
- public static void getImgs(){
- //设置爬取网站
- driver.get("http://www.gaoimg.com/");
- sleep(3);
- //设置滚动条移动到最下面
- ((JavascriptExecutor) driver).executeScript("window.scrollTo(0,document.body.scrollHeight)");
- sleep(2);
-
- //定义爬取的节点
- ///html/body/div[8]/ul/li[2]/a/img
- *[@id="inspiration__content-item-3"]/div/a/div[3]/img
- *[@id="inspiration__content-item-0"]/div[2]/a[1]/div[3]/img
- //#inspiration__content-item-0 > div.inspiration__content-item-list > a > div.s-c__ct > img
-
- List<WebElement> elements = driver.findElements(By.cssSelector("body > div.tuijiantupian > div.flex-images > div > a > img"));
- //循环遍历所有img元素节点
- for (WebElement element : elements) {
- String src = element.getAttribute("src");
- if(null!=src)
- imgs.add(src);
- }
- }
-
- public static void saveImg(){
- try{
- String path="D:\\images\\";
- URL url=null;
- for (String img : imgs) {
- url=new URL(img);
- InputStream is=new DataInputStream(url.openStream());
- String fileName=path+ UUID.randomUUID().toString().replace("-","")+".jpg";
- OutputStream out=new FileOutputStream(new File(fileName));
- byte[] bytes=new byte[1024];
- int len=0;
- while((len=is.read(bytes))!=-1){
- out.write(bytes,0,len);
- }
- is.close();
- out.close();
- }
- }catch (Exception e){
- e.printStackTrace();
- }
- }
-
- public static void main(String[] args) {
- try {
- //爬取图片路径
- getImgs();
- //循环打印图片路径
- for (String img : imgs) {
- System.out.println(img);
- }
- //保存图片
- saveImg();
- } catch (Exception e) {
- e.printStackTrace();
- } finally {
- //一定要记得下载完图片之后释放资源
- if(null!=driver)
- driver.quit();
- }
- }
- }

博主水平有限,难免有错。欢迎评论交流
Copyright © 2003-2013 www.wpsshop.cn 版权所有,并保留所有权利。