文章目录
加速下载pip3下载速度
国外的源访问很慢,可以用国内的源进行下载,这里以清华的源为例,命令如下:
pip3 install -i https://pypi.tuna.tsinghua.edu.cn/simple 要安装的库的名称
python3爬虫学习
字符串报错
#将带有中文url转译成计算机可以识别的字符串
result2 = urllib.parse.quote(result,safe=string.printable)
字典传参
params = {
"wd": "张三",
"key": "zhang"
}
str_params = urllib.parse.urlencode(params)
result = url + str_params
将数据写入文件
with open("01Header.txt","w")as f:
f.write(data)
json文件写入cvs
import json
import csv
#创建文件
json_f = open("content.txt","r")
csv_f = open("c.csv","w")
#读取json文件
json_list = json.load(json_f)
#csv文件的表头
head = json_list[0].keys()
#csv文件的内容
rows = []
for da in json_list:
rows.append(da.values())
#csv写入
writer = csv.writer(csv_f)
writer.writerow(head)
writer.writerows(rows)
#关闭文件
json_f.close()
csv_f.close()
随机获取数组中的元素
user_agent_list = [
"Mozilla/5.0 (Linux; Android 4.1.1; Nexus 7 Build/JRO03D) AppleWebKit/535.19 (KHTML, like Gecko) Chrome/18.0.1025.166 Safari/535.19",
"Mozilla/5.0 (Linux; U; Android 4.0.4; en-gb; GT-I9300 Build/IMM76D) AppleWebKit/534.30 (KHTML, like Gecko) Version/4.0 Mobile Safari/534.30",
"Mozilla/5.0 (Linux; U; Android 2.2; en-gb; GT-P1000 Build/FROYO) AppleWebKit/533.1 (KHTML, like Gecko) Version/4.0 Mobile Safari/533.1",
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_7_2) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/27.0.1453.93 Safari/537.36"
]
#使每次请求的浏览器都不一样
random_user_agent = random.choice(user_agent_list)
使用代理访问url
proxy = {
"http":"http://113.214.13.1:1080"
}
#创建代理处理器
proxy_header = urllib.request.ProxyHandler(proxy)
#创建自己的opener
opener = urllib.request.build_opener(proxy_header)
#带着opener请求url
data = opener.open(url).read()
print(data)
带着cookie访问url
#登陆获取cookie
url = "https://www.yaozh.com/login/"
#登陆的表单数据
login_form = {
"type": 0,
"username": "ajay_ljj",
"pwd": "11133811ljj",
"country": "86_zh-CN",
"formhash": "E6BE776E83",
"backurl": "https%3A%2F%2Fwww.yaozh.com%2Fmember%2F"
}
longin_form_data_byte = urllib.parse.urlencode(login_form).encode("utf-8")
header_data = {
"User-Agent":"Mozilla/5.0 (Macintosh; Intel Mac OS X 11_2_2) AppleWebKit/537.36 (KHTML, like