urllib库
import urllib.request
import re
from 爬虫不同的库发出请求.usergent import usergent
header = {"User-Agent":usergent()}
url = r"http://www.youdao.com/"
urr = urllib.request.Request(url,headers=header)
httphandle = urllib.request.HTTPHandler()
opener = urllib.request.build_opener(httphandle)
response = opener.open(url).read().decode()
'''
#定义opener为全局,那么发送请求时,
使用urlopen时都会默认使用自定义opener
urllib.request.install_opener(opener)
response = urllib.request.urlopen(urr).read().decode()
'''
str = r"<title>(.*?)</title>"
str1 = re.findall(str,response)
print(str1)
requests库
import requests
from 爬虫不同的库发出请求.usergent import usergent
'''
url = "https://zhengzhou.anjuke.com/sale/?" #安居客网
header = {"User-Agent":usergent()}
#定义字典wd
wd = {"kw":"和润林湖美景"}
#https://zhengzhou.anjuke.com/sale?
response = requests.get(url, params=wd, headers = header)
try:
print(response.text)
except ConnectionError:
print("停止输出")
'''
'''
url = "https://www.iqianyue.com/mypost"
#向服务器发送的数据
formdata = {"name":"lau","pass":"1234567"}
res = requests.post(url,data=formdata)
print(res.text)
'''
'''
url = "https://www.douban.com/" #豆瓣网
params = {}
header = {"User-Agent":'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/86.0.4240.75 Safari/537.36',
"Cookie":'bid=9NeBIpRMcgA; douban-fav-remind=1; __yadk_uid=3iD14p0fcxvP0n6iCkuDlDT9hepomKUQ; __gads=ID=0e83425bd55684c3:T=1\
599739192:S=ALNI_MYtlY7GlAQOEDr2e4gdIpCfbiy-rA; __utmz=30149280.1601262569.2.2.utmcsr=baidu|utmccn=(organic)|utmcmd=organic;\
ll="118163"; push_noty_num=0; push_doumail_num=0; __utmv=30149280.22511; __utma=30149280.1710552684.1599739191.1602768951.16\
02826779.4; __utmc=30149280; _pk_ref.100001.8cb4=%5B%22%22%2C%22%22%2C1603017305%2C%22https%3A%2F%2Fwww.baidu.com%\
2Flink%3Furl%3DogJfE080UPFVsLV17_aBgVbxeps-23zwz0h0njmmCXTsaH-hhy3TRISY3yGBeIvrXwD2gLlw3fmXVoR5b_Exsq%26wd%3D%26eqi\
d%3Da18b53850000683e000000055f71537b%22%5D; _pk_ses.100001.8cb4=*; ap_v=0,6.0; dbcl2="225112613:1oJIKQB3pRQ"; ck=NKF4; _pk_id.\
100001.8cb4=45ef19b90fa8e5d7.1599739191.5.1603017428.1602826794.'}
res = requests.get(url,headers=header)
print(res.text)
'''
'''
header = {"User-Agent":'Mozilla/5.0 (Windows NT 6.1; Win64; x64)\
AppleWebKit/537.36 (KHTML, like Gecko) Chrome/86.0.4240.75 Safari/537.36'
}
url = "https://www.douban.com/" #豆瓣网
#1.发出请求 获得服务器响应
res = requests.get(url,headers=header)
#2.响应对象调用cookies,返回cookiejar对象
cookiesjar = res.cookies
#3.将cookiejar对象转化为字典
cookiesdic = requests.utils.dict_from_cookiejar(cookiesjar)
#得到是一个字典,cookie的name为键,value为值
print(cookiesdic)
'''
'''
#豆瓣登录问题:https://fishc.com.cn/thread-169934-1-1.html
#先登录输入错误的账号和密码,在network中找到post请求网址和需要提交的参数
url = "https://accounts.douban.com/j/mobile/login/basic"
header = {"User-Agent": 'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/86.0.4240.75 Safari/537.36'}
#创建session对象
session = requests.session()
#登录所需要的参数
data = {"ck":"",
"remember":"true",
"name":"15895885470",
"password":"pn13986737527"}
#提交数据获取cookie
#先用get请求一次,再进行传参
res1 = session.get(url=url,headers=header)
res2 = session.post(url=url,data=data,headers=header)
print(res2.text)
#session再次请求
res3 = session.get(url)
print(res3.text)
#多次请求会有验证码,过段时间再提交
'''