赞
踩
wzry.py
import scrapy from LearnScrapy.items import HeroItem class WzrySpider(scrapy.Spider): name = 'wzry' allowed_domains = ['pvp.qq.com'] start_urls = ['https://pvp.qq.com/web201605/herolist.shtml'] hero_detail_base_url = "https://pvp.qq.com/web201605/" def parse(self, response): # print(response) hero_list = response.xpath("//div[contains(@class, 'herolist-content')]/ul[contains(@class, 'herolist')]/li/a/@href").extract() # print(hero_list) # for hero_detail in hero_list: # yield scrapy.Request(url=self.hero_detail_base_url + hero_detail, callback=self.parse_hero_detail, meta={"msg": "ok"}) # yield scrapy.Request(url=response.urljoin(hero_detail), callback=self.parse_hero_detail, meta={"msg": "ok"}) # yield response.follow(url=hero_detail, callback=self.parse_hero_detail, meta={"msg": "ok"}) requests = response.follow_all(urls=hero_list, callback=self.parse_hero_detail, meta={ "msg": "ok"}
Copyright © 2003-2013 www.wpsshop.cn 版权所有,并保留所有权利。