'''
#url基础
import urllib.request as r
#urlretrieve(网页,本地存储地址) 直接下载网页到本地
#r.urlretrieve("https://blog.csdn.net/weixin_43870649/article/details/101419805","D:\\Desktop\\python爬虫\\download1.html")
#urlcleanup
r.urlcleanup()
#info
file=r.urlopen("https://read.douban.com/provider/all/")
#print(file.info())
#getcode返回当前爬取页面的状态
print(file.getcode())
#geturl
print(file.geturl())
'''
'''
#超时设置
import urllib.request as r
for i in range(0,100):
file=r.urlopen("http://www.baodu.com",timeout=0.2)
try:
print(len(file.read()))
except Exception as err:
print(str(err))
'''
'''
#自动模拟http请求
#get请求
import urllib.request as r
import re
keyword="李博浩"
keyword=r.quote(keyword)
for i in range(1,1000):
url="http://www.baidu.com/s?wd="+keyword+"&pn="+str((i-1)*10)
file=r.urlopen(url).read().decode('utf-8')
pat1="title:'(.*?)',"
pat2='"title":"(.*?)"'
rst1=re.compile(pat1).findall(file)
rst2=re.compile(pat2).findall(file)
for j in range(0,len(rst1)):
print(rst1[j])
for j in range(0,len(rst2)):
print(rst2[j])
'''
'''
#post请求
import urllib.request as r
import urllib.parse as p
import re
posturl="http://39.105.30.139:8081/f7/loginadmin.action"
postdata=p.urlencode({
"a_id":"17",
"a_pwd":"11111111",
}).encode("utf-8")
for i in range(0,50):
try:
req=r.Request(posturl,postdata)
rst=r.urlopen(req).read().decode("utf-8")
if(re.search("论坛登录成功",rst)!="NONE"):
print("论坛登录成功")
except Exception as err:
print(str(err))
'''