python爬虫模块_Python爬虫(request模块)

Python

Python开发

Python语言

Python爬虫(request模块)

CgpOIF5EAWWAQ0y_AAcB-fDz9P0878.png

发送简单的请求:

1850558-20191112151824122-701934318.png

发送带header的请求:

1850558-20191112153213517-329702603.png

发送带参数的请求:

1850558-20191112154936958-1139844836.png

例子如下:

import requests

headers = {"User-Agent":"Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 "

"(KHTML, like Gecko) Chrome/77.0.3865.120 Safari/537.36"}

p = {'hhs':100}

url = 'http://www.baidu.com'

response = requests.get(url,headers=headers,params=p)

print(response.status_code)

print(response.request.url)

实战(贴吧爬虫):

import requests

class TiebaSpider:

#构造函数,传入想要查询的贴吧名字和查询前num页,定义后面方法需要用的属性

def __init__(self,tieba_name,tieba_num):

self.tieba_num = tieba_num

self.tieba_name = tieba_name

#下面为url访问的格式,kw为贴吧名,pn为代表页数,贴吧可查首页pn=0,第一页50,第二页100,以此类推

self.url_temp = "https://tieba.baidu.com/f?kw=" + tieba_name + "&ie=utf-8&pn={}"

#模仿网页访问贴吧

self.headers = {"User-Agent":"Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 "

"(KHTML, like Gecko) Chrome/77.0.3865.120 Safari/537.36"}

#获取url列表

def get_urlList(self):

url_list = []

for i in range(self.tieba_num):

url_list.append(self.url_temp.format(i * 50))

return url_list

#访问网页,解析网页,返回

def parse_url(self,url):

response = requests.get(url,headers=self.headers)

return response.text #或者response.content.decode('utf-8')

#保存网页

def save_html(self,html_str,page_num):

file_path = "{}-第{}页.html".format(self.tieba_name,page_num)

with open(file_path,'w',encoding='utf-8') as f:

f.write(html_str)

def run(self):

url_list = self.get_urlList()

for url in url_list:

html_str = self.parse_url(url) #保存的内容

page_num = url_list.index(url) + 1 #文件的页码

self.save_html(html_str,page_num)

def main():

tieba_name = input('请输入想查询的贴吧名:')

tieba_num = int(input('请输入想要查询前几页:'))

tieba = TiebaSpider(tieba_name, tieba_num)

tieba.run()

if __name__ == '__main__':

main()

内容来源于网络,如有侵权请联系客服删除

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值