- 博客(14)
- 收藏
- 关注
原创 获取苏宁有优惠的图书
# -*- coding: utf-8 -*-import scrapyfrom copy import deepcopyimport re# https://ds.suning.com/ds/generalForTile/000000011541217219__2_0070937195,000000010567515166__2_0070096274,000000012045814624__2_0070875390,000000000648587472_,000000010567523701__2
2020-07-25 17:51:59
115
原创 爬取苏宁图书
# -*- coding: utf-8 -*-import scrapyfrom copy import deepcopyclass SuSpider(scrapy.Spider): name = 'su' allowed_domains = ['suning.com'] start_urls = ['http://book.suning.com/'] def parse(self, response): div_list=response.xpa
2020-07-18 20:00:13
150
原创 爬取京东图书
# -*- coding: utf-8 -*-import scrapyfrom copy import deepcopyimport re# https://list.jd.com/list.html?cat=1713%2C3258%2C3297&page=1&s=1&click=0# https://list.jd.com/list.html?cat=1713%2C3258%2C3297&page=3&s=53&click=0# https:/
2020-07-17 22:57:57
240
原创 爬取当当图书信息
# -*- coding: utf-8 -*-import scrapyfrom copy import deepcopyclass DangSpider(scrapy.Spider): name = 'dang' allowed_domains = ['dangdang.com'] start_urls = ['http://book.dangdang.com/'] def parse(self, response): div_list=resp
2020-07-17 19:52:12
129
原创 斗图爬取
# https://www.doutula.com/photo/list/?page=1import requestsfrom lxml import etreeimport osclass Doutu(): url='https://www.doutula.com/photo/list/?page=%d' headers={"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHT
2020-07-08 16:23:40
118
原创 selenium下拉链练习
from selenium import webdriverfrom selenium.webdriver.support.ui import Selectdriver=webdriver.Chrome()driver.get('https://www.17sucai.com/pins/demo-show?id=5926')driver.switch_to.frame(driver.find_element_by_id('iframe'))Select(driver.find_element_b
2020-07-05 20:12:52
63
原创 selelium登录QQ邮箱
from selenium import webdriverfrom lxml import etreeimport requests# 微信扫码登录driver=webdriver.Chrome()url='https://mail.qq.com'# i=requests.get(url).text# response=etree.HTML(i)# url1=response.xpath("//iframe/@src")[0]# qq账号密码登录driver.get(url)# 切换
2020-07-05 16:54:32
258
原创 PhantomJS小练习
from selenium import webdriverimport timediver=webdriver.PhantomJS('D:\phantomjs.exe')diver.get('http://www.baidu.com')diver.find_element_by_id('kw').send_keys('python')diver.find_element_by_id('su').click()print(diver.page_source)print(diver.curr
2020-07-04 23:30:25
70
原创 selenium模拟登陆豆瓣
from selenium import webdriverfrom lxml import etreeimport requestsdiver=webdriver.Chrome()url='http://www.douban.com'headers={'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/77.0.3865.75 Safari/.
2020-07-04 23:27:32
111
原创 爬取微信小程序
-- coding: utf-8 --import scrapyfrom scrapy.linkextractors import LinkExtractorfrom scrapy.spiders import CrawlSpider, Ruleclass WxSpider(CrawlSpider):name = ‘wx’allowed_domains = [‘wxapp-un...
2020-04-29 09:52:38
672
原创 scrapy 爬取腾讯招聘
# -*- coding: utf-8 -*-import scrapyimport jsonclass HrSpider(scrapy.Spider): name = 'hr' allowed_domains = ['careers.tencent.com'] one_url='https://careers.tencent.com/tencentcareer/...
2020-04-21 14:00:05
132
原创 scrapy 豆瓣爬虫
db.py爬虫文件# -*- coding: utf-8 -*-import scrapyimport refrom scrapy import Requestfrom copy import deepcopyclass DbSpider(scrapy.Spider): name = 'db' allowed_domains = ['douban.com'] ...
2020-03-07 00:15:24
96
原创 分布式scrapy爬取微信小程序页面标题和链接
items.pyimport scrapyclass WxappItem(scrapy.Item): # define the fields for your item here like: # name = scrapy.Field() title=scrapy.Field() url=scrapy.Field()weiixn.pyimport sc...
2020-02-20 18:16:17
227
原创 爬虫实战:爬取csdn学院所有课程名、价格
import requestsfrom lxml import etreeimport csvimport pandasclass CSDNspider:#爬取csdn学院所有课程名、价格def init(self):self.url=‘https://edu.csdn.net/courses/o280_s355’def fenqu(self):response=requests...
2020-02-18 23:43:35
303
空空如也
空空如也
TA创建的收藏夹 TA关注的收藏夹
TA关注的人