爬虫2021-10-22

import requests

#将内容写到文件中
def write_to_file(filename, html):
f=open(filename,‘w’,encoding=‘utf8’);
f.write(html);
f.close();

#加载网页到本地目录
def load_page_with_urllib(filename, res):
html = res.read().decode(‘utf-8’)
print(html)
write_to_file(filename, html)

#加载网页用requests
def load_page_with_requests(filename, res):
html = res.content.decode(‘utf-8’)
print(html)
write_to_file(filename, html)

#基本请求
def send_url():
url = “http://httpbin.org/get”
res = requests.get(url)
print(res.status_code)
print(res.encoding)
print(res.headers)
print(res.text)
load_page_with_requests(‘send_url.html’,res)
print(“Host:”+res.json()[‘headers’][‘Host’])

#获取图片
def send_url_with_pic():
url = “https://gimg2.baidu.com/image_search/src=http%3A%2F%2F5b0988e595225.cdn.sohucs.com%2Fq_70%2Cc_zoom%2Cw_640%2Fimages%2F20180208%2F2f11e11351254d61be2e41da68cff160.jpeg&refer=http%3A%2F%2F5b0988e595225.cdn.sohucs.com&app=2002&size=f9999,10000&q=a80&n=0&g=0n&fmt=jpeg?sec=1637465556&t=56b242f7b1e2ab212d4258d25114f179”
res = requests.get(url)
print(res.status_code)
with open(‘logo.png’,‘wb’) as f:
f.write(res.content)
f.close()

def send_with_text_encoding():
url = “http://www.google.cn/”
res = requests.get(url)
print(res.headers.get(“Content-Type”))
print(res.encoding)
print(res.apparent_encoding)
print(res.text)

res.encoding = res.apparent_encoding
print(res.text)

#text 编码问题
def send_with_text_encoding_baudu():
url = “http://www.baidu.com/”
headers = {
“User-Agent”:“Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/84.0.4147.105 Safari/537.36”
}

res = requests.get(url, headers=headers)
print(res.headers.get("Content-Type"))
print(res.encoding)
print(res.apparent_encoding)
print(res.text)

#带参数的get请求
def send_get_with_param():
url = “http://cn.bing.com/search”
word = {“q”: “Python网络爬虫”}
headers = {
“User-Agent”: “Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/84.0.4147.105 Safari/537.36”
}
res = requests.get(url, params=word, headers=headers)
load_page_with_requests(‘send_get_with_search.html’,res)

#带参数的post请求
def send_post_with_from():
url = “http://fanyi.youdao.com/translate?smartresult=dict&client=fanyideskweb”;
headers = {
“User-Agent”: “Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/84.0.4147.105 Safari/537.36”
}
formdata = {
“i”: “我很喜欢你”,
“from”: “zh- CHS”,
“to”: “en”,
“smartresult”: “dict”,
“client”: “fanyideskweb”,
“salt”: “16323927269914”,
“sign”: “007b3f6883f0f3c416a901c9ad793e9e”,
“lts”: “1632392726991”,
“bv”: “af63988f520164170458f1e28c9a002f”,
“doctype”: “json”,
“version”: “2.1”,
“keyfrom”: “fanyi.web”,
“action”: “FY_BY_REALTlME”
}
res = requests.post(url, data=formdata, headers=headers)
print(res.status_code)
print(res.text)
print(“翻译前:” + res.json()[‘translateResult’][0][0][‘src’])
print(“翻译后:” + res.json()[‘translateResult’][0][0][‘tgt’])

#ssl
def send_with_ssl():
url = “https://kyfw.12306.cn/otn/leftTicket/init”
#res001 = requests.get(url)
#print(res001.status_code)
#print(res001.text)

#res002 = requests.get(url, verify=False)
#print(res002.status_code)
#print(res002.text)

res003 = requests.get(url, verify="D:/temp.txt")
print(res003.status_code)
print(res003.text)

#cookie
def send_with_cookie():
url = “http://httpbin.org/cookies”
headers = {
“User-Agent”: “Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/84.0.4147.105 Safari/537.36”,
“Cookie”:“BAIDUID=BA8FA74EFFBA879E955AF4D663DC8704:FG=1; BIDUPSID=BA8FA74EFFBA879E955AF4D663DC8704; PSTM=1633746545; __yjs_duid=1_74504ccf626209dc8ef2de77b3f62efc1633748666329; BAIDUID_BFESS=BA8FA74EFFBA879E955AF4D663DC8704:FG=1; COOKIE_SESSION=1039_0_4_5_4_2_0_0_4_2_0_0_1037_0_5_0_1633761595_0_1633761590%7C5%230_0_1633761590%7C1; BD_HOME=1; H_PS_PSSID=34443_34067_34864_34712_34584_34518_34829_26350_34826; BD_UPN=12314753; BA_HECTOR=0ka0a084a12g2480bp1gn472q0r”
}

res001 = requests.get(url, headers=headers)
print(res001.status_code)
print(res001.text)

headers = {
    "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/84.0.4147.105 Safari/537.36"
    # "Cookie": "BAIDUID=BA8FA74EFFBA879E955AF4D663DC8704:FG=1; BIDUPSID=BA8FA74EFFBA879E955AF4D663DC8704; PSTM=1633746545; __yjs_duid=1_74504ccf626209dc8ef2de77b3f62efc1633748666329; BAIDUID_BFESS=BA8FA74EFFBA879E955AF4D663DC8704:FG=1; COOKIE_SESSION=1039_0_4_5_4_2_0_0_4_2_0_0_1037_0_5_0_1633761595_0_1633761590%7C5%230_0_1633761590%7C1; BD_HOME=1; H_PS_PSSID=34443_34067_34864_34712_34584_34518_34829_26350_34826; BD_UPN=12314753; BA_HECTOR=0ka0a084a12g2480bp1gn472q0r"
}

cookies = {
    "JSESSIONID": "aaaVUedIImSIE84hV5LYx",
    "OUTFOX_SEARCH_USER_ID": "1126546161@10.108.160.105",
    "OUTFOX_SEARCH_USER_ID_NCOO": "301032269.40844065"
}

res002 = requests.get(url, headers=headers, cookies=cookies)
print(res002.status_code)
print(res002.text)

if name == ‘main’:
send_url()
send_url_with_pic()
send_with_text_encoding()
send_with_text_encoding_baudu()
send_get_with_param()
send_post_with_from()
#send_with_ssl()
send_with_cookie()

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值