import re
import urllib.request
#urllib.request.urlcleanup()
超时设置
for i in range(0,5):
file=urllib.request.urlopen(“https://read.douban.com/provider/all”,timeout=1)
try:
print(len(file.read().decode(“utf-8”))) #值太大了,可以输出长度
except Exception as err:
print(“出现异常!”)
#自动模拟http请求 --向网页发送http请求(post,get)
get请求–实现百度信息自动搜索
keywd = “Python”
keywd = urllib.request.quote(keywd) #转码
url = “http://www.baidu.com/s?wd=” + keywd #注意:http 千万不要加s
data = urllib.request.urlopen(url).read().decode(“utf-8”) # get 直接爬
pat = ‘’‘title:’(.?)’,|title":"(.?)",’’’ #写法1
result = re.compile(pat).findall(data)
print(result)
‘’’
成功了
‘’’
keywd = “韦玮”
keywd = urllib.request.quote(keywd) #转码
page = (num-1)*10
for i in range(1,11):
url = “http://www.baidu.com/s?wd=” + keywd + “&pn=” + str((i-1)10) # 注意:http 千万不要加s
data = urllib.request.urlopen(url).read().decode(“utf-8”) # get 直接爬
pat1 = "title:’(.?)’," # 写法2
pat2 = ‘title":"(.*?)",’
result1 = re.compile(pat1).findall(data)
result2 = re.compile(pat2).findall(data)
for j in range(0,len(result1)):
print(result1[j])
for z in range(0,len(result2)):
print(result2[z])
print(result1)
print(result2)
‘’’
韦玮_百度百科
韦玮_百度图片
韦玮_歌曲在线试听_网易云音乐
韦玮(女演员)_百度百科
…
‘’’
post 请求
‘’’
https://www.iqianyue.com/mypost/
‘’’
import urllib.parse
posturl = “https://www.iqianyue.com/mypost/”
postdata = urllib.parse.urlencode(
{
“name”:“111”,
“pass”:“222”,
}
).encode(“utf-8”)
#进行post,就需要使用urllib.request下面的Request(真实post地址,post数据)
req = urllib.request.Request(posturl,postdata)
result = urllib.request.urlopen(req).read().decode(“utf-8”)
print(result)
‘’’
passwd:
you input name is:111
you input passwd is:222 '''
posturl = “https://www.iqianyue.com/mypost/”
postdata = urllib.parse.urlencode( # 构造数据 注意:数据是以字典形式构造的
{
“name”:“111”,
“pass”:“222”,
}
).encode(“utf-8”)
#进行post,就需要使用urllib.request下面的Request(真实post地址,post数据)
req = urllib.request.Request(posturl,postdata) # 直接先请求一下
result = urllib.request.urlopen(req).read().decode(“utf-8”) # 把请求爬下来
fh = open(“D:\百度网盘\post.html”,“w”) # 存在本地
fh.write(result)
fh.close()
‘’’
成功了
‘’’