查询参数-params
1.参数类型
字典,字典中键值对作为查询参数
2.使用方法
1、res = requests.get(url,params=params,headers=headers) 2、特点: * url为基准的url地址,不包含查询参数 * 该方法会自动对params字典编码,然后和url拼接
3.示例
import requests baseurl = 'http://tieba.baidu.com/f?' params = { 'kw' : '赵丽颖吧', 'pn' : '50' } headers = { 'User-Agent' : 'Mozilla/4.0 (compatible; MSIE 8.0; Windows NT 6.1; WOW64; Trident/4.0; SLCC2; .NET CLR 2.0.50727; .NET CLR 3.5.30729; .NET CLR 3.0.30729; Media Center PC 6.0; .NET4.0C; InfoPath.3)'} # 自动对params进行编码,然后自动和url进行拼接,去发请求 res = requests.get(baseurl,params=params,headers=headers) res.encoding = 'utf-8' print(res.text)
web客户端验证 参数-auth
1.作用类型
1、针对于需要web客户端用户名密码认证的网站 2、auth = ('username','password')
2.通过用户名账号密码获取笔记名称案例
import requests from lxml import etree import os class NoteSpider(object): def __init__(self): self.url = 'http://code.com.cn/Code/aid1904/redis/' self.headers = { 'User-Agent':'Mozilla/5.0'} self.auth = ('code','code_2013') # 获取 def get_html(self): html = requests.get(url=self.url,auth=self.auth,headers=self.headers).text return html # 解析提取数据 + 把笔记压缩包下载完成 def parse_page(self): html = self.get_html() xpath_bds = '//a/@href' parse_html = etree.HTML(html) # r_list : ['../','day01','day02','redis_day01.zip'] r_list = parse_html.xpath(xpath_bds) for r in r_list: if r.endswith('zip') or r.endswith('rar