爬虫
幸运的felix
这个作者很懒,什么都没留下…
展开
-
form表单登陆人人网
import jsonfrom urllib import request,parsefrom http import cookiejar# 创建cookie对象cookie = cookiejar.CookieJar()# cookie操作handler = request.HTTPCookieProcessor(cookie)# 实例化cookieopener = reques...原创 2018-08-14 20:50:22 · 756 阅读 · 0 评论 -
urllib/request爬取百度贴吧图片
import refrom urllib import request,parseimport os# 突破下载函数def download(img_html): # 正则匹配图片url img_url = re.findall('<img class="BDE_Image" src="(.*?)"',img_html,re.S) # print(img_u...原创 2019-01-10 16:36:59 · 332 阅读 · 0 评论 -
scrapy框架爬取微博之spider文件
# -*- coding: utf-8 -*-import scrapyfrom scrapy.settings import default_settingsimport jsonfrom ..items import WeiboItemimport refrom w3lib.html import remove_tagsclass WeiboSpider(scrapy.Spid...原创 2018-09-19 17:09:10 · 188 阅读 · 0 评论 -
python爬取社会招聘保存mysql
import requestsfrom lxml import etreeimport pymysqlheaders = { 'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.99 Safari/537.36...原创 2018-08-20 00:30:12 · 259 阅读 · 0 评论 -
python爬取阳光电影保存mysql
import requestsfrom lxml import etreeimport reimport pymysql# 定义urlheaders = { 'User-Agent':'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396...原创 2018-08-20 00:29:09 · 291 阅读 · 0 评论 -
python爬取安居客保存mysql
import requestsfrom lxml import etreeimport pymysqlurl = 'https://bj.zu.anjuke.com/fangyuan/huilongguan/p{}/'headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.3...原创 2018-08-20 00:27:43 · 661 阅读 · 0 评论 -
Python爬取toutiao图片保存到本地
import reimport requestsimport jsonimport osfrom urllib import requesturl = 'https://www.toutiao.com/search_content/?offset={}&format=json&keyword=%E7%BE%8E%E5%9B%BE&autoload=true&a...原创 2018-08-16 22:22:08 · 440 阅读 · 0 评论 -
Python的post和get请求封装
from urllib import request,parsefrom urllib.error import HTTPError, URLError# 定义函数,form传参为post,不传参为getdef urlrequests(url,form=None,headers=None): # 定义默认headers,如果不传参就用默认的,传参就替换默认的。 if hea...原创 2018-08-13 22:29:52 · 1844 阅读 · 1 评论 -
python获取xueqiu数据并保存mysql
from urllib import requestimport jsonimport pymysqlheaders = { 'User-Agent':'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.99 Safari/537.36',...原创 2018-08-16 00:33:43 · 172 阅读 · 0 评论 -
约会网 scrapy-spider
# -*- coding: utf-8 -*-import scrapyimport jsonpathimport jsonfrom ..items import YuehuiItemclass YuehuiSpider(scrapy.Spider): name = 'yuehui' allowed_domains = ['163.com'] start_u...原创 2019-05-14 21:06:43 · 140 阅读 · 0 评论