#python-循环结构
#1.for循环
#定义:for循环是可以依次得到序列循环中的每个元素,并依次处理。
#格式:
#for 元素变量 in 序列:
#执行语句
#for循环的循环指定次数:for i in range(初始值,结束值,步长):
#for i in range(1,100,1):
print(i)
#以下为案例:----打印1000遍hello word
#for i in range(1000): #生成0-999的序列,对这个序列进行遍历。
print(“第”,i+1,“hello world”)
‘’’
list1=[1,2,3,4,5] #列表
tupe1=(5,6,7,8,9) #元组
dict1={“name”:“张三”,“age”:18} #字典
col={“a”,“b”,“c”} #集合
for i in list1:
print(i)
for i in tupe1:
print(i)
for i in dict1: #字典只会拿到到键。
print(i)
for i in col:
print(i)
‘’’
#总结:
#1.字典只会拿到到键。要拿到里面的值必须通过这个键才能能到
#for i in dict1:
print(dict1[i])
#2.for 循环适合做确定次数的循环。比如知道打印机打印1000份。
#Day1----爬虫学习
#import urllib.request
#伪装成浏览器的爬虫
from urllib import request
import re
import random
agent1=“Mozilla/5.0 (Linux; U; Android 8.1.0; zh-cn; BLA-AL00 Build/HUAWEIBLA-AL00) AppleWebKit/537.36 (KHTML, likeGecko) Version/4.0 Chrome/57.0.2987.132 MQQBrowser/8.9 Mobile Safari/537.36”
agent2=“Mozilla/5.0 (Linux; Android 8.1; EML-AL00 Build/HUAWEIEML-AL00; wv) AppleWebKit/537.36 (KHTML, like Gecko) Version/4.0 Chrome/53.0.2785.143Crosswalk/24.53.595.0 XWEB/358 MMWEBSDK/23 Mobile Safari/537.36 MicroMessenger/6.7.2.1340(0x2607023A) NetType/4G Language/zh_CN”
agent3=“Mozilla/5.0 (Linux; U; Android 8.0.0; zh-CN; MHA-AL00 Build/HUAWEIMHA-AL00) AppleWebKit/537.36 (KHTML, like Gecko) Version/4.0 Chrome/57.0.2987.108 UCBrowser/12.1.4.994 Mobile Safari/537.36”
agent4=“Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.132 Safari/537.36 QIHU 360SE”
agent5=“Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/76.0.3809.100 Safari/537.36”
list1=[agent1,agent2,agent3,agent4,agent5]
agent=random.choice(list1)
print(agent)
url=r"http://www.baidu.com/"
#构造请求头信息
header={
“user-agent”:agent
}
#创建自定义请求对象
#反爬虫机制1:判断用户是否是浏览器访问
#可以通过伪装浏览器进行访问
req=request.Request(url,headers=header)
#发送请求,获取响应信息。
reponse=request.urlopen(req).read().decode() #解码----编码incode()
pat=r"
data=re.findall(pat,reponse)
print(data[0])
‘’’
‘’’
#DAY2
#反爬虫2:判断请求来源的IP地址
#措施:使用代理ip
#115.238.59.86 53400
#113.195.153.134 9999
#36.248.133.20 9999
proxylist=[
{“http”:“101.248.64.82:80”},
{“http”:“115.238.59.86:53400”},
{“http”:“113.195.153.134:9999”},
{“http”:“36.248.133.20:9999”},
{“http”:“117.88.4.10:3000”},
{“http”:“117.88.177.189:3000”},
{“http”:“175.42.68.61:9999”},
]
proxy=random.choice(proxylist)
#构建代理处理器对象
proxyHandler=request.ProxyHandler(proxy)
#创建自定义opener
opener=request.build_opener(proxyHandler)
#创建请求对象
req=request.Request(“http://www.baidu.com”)
res=opener.open(req)
print(res)
‘’’
#day3
#处理get请求
from urllib import request
import urllib
#https://www.baidu.com/s?ie=utf-8&f=8&rsv_bp=1&rsv_idx=1&tn=baidu&wd=java
#https://www.baidu.com/s?ie=utf-8&f=8&rsv_bp=1&rsv_idx=1&tn=baidu&wd=%E5%8C%97%E4%BA%AC&oq #url编码
wd={“wd”:“北京”}
url=“http://www,baidu.com/s?”
#构造url编码
wdd=urllib.parse.urlencode(wd)
url=url+wdd
req=request.Request(url)
reponse=request.urlopen(req).read().decode()
print(reponse)