爬虫基础
import requests
#引用requests库
res = requests.get('https://localprod.pandateacher.com/python-manuscript/crawler-html/sanguo.md')
#下载《三国演义》第一回,我们得到一个对象,它被命名为res
res.encoding='utf-8'
#定义Reponse对象的编码为utf-8。
print(res.status_code)
#打印变量res的响应状态码,以检查请求是否成功 200为请求成功
novel=res.text
#把Response对象的内容以字符串的形式返回
print(novel[:800])
#打印小说的前800个字。
'''
response.status_code 检查请求是否成功
response.content 把response对象转换为二进制数据
response.text 把response对象转换为字符串数据
response.encoding 定义response对象的编码
'''
#爬音乐,二进制文件 重点wb
#open(file, mode='r', buffering=-1, encoding=None, errors=None, newline=None, closefd=True, opener=None)
'''
'r' open for reading (default)
'w' open for writing, truncating the file first
'x' create a new file and open it for writing
'a' open for writing, appending to the end of the file if it exists
'b' binary mode
't' text mode (default)
'+' open a disk file for updating (reading and writing)
'U' universal newline mode (deprecated)
'''
import requests
url='https://static.pandateacher.com/Over%20The%20Rainbow.mp3'
res=requests.get(url)
music_marker=res.status_code
print(music_marker)
music=res.content
with open('xx2.mp3','wb') as tt:
tt.write(music)
顺便引入一些字符知识
print('臂章'.encode('utf-8')) #'ASCII' 'gbk'
print(b'\xe8\x87\x82\xe7\xab\xa0'.decode('utf-8'))
str.split(str-) #分割str 目标字符 str- 分割字符
str+.joint(list) #合并 list目标列表 str+ 合并添加字符
顺便引入一些文件读写的知识
file = open('path','r',encoding='utf-8')
file_lines = file.readlines() #注意与read()区别
file.close()
winner = open('path','w',encoding='utf-8')
winner.writelines(final_scores) #注意与write(),append()的区别
winner.close()