python-study-day3

最新推荐文章于 2024-07-26 17:36:26 发布

苦逼的猿宝

最新推荐文章于 2024-07-26 17:36:26 发布

阅读量207

点赞数 3

分类专栏： python 文章标签： python java 开发语言 mysql javascript

本文链接：https://blog.csdn.net/GAGGAAAAA/article/details/139060824

版权

python 专栏收录该内容

5 篇文章 0 订阅

订阅专栏

这期比较乱,简单记录一下

百度爬取图片


url = 'https://tieba.baidu.com/p/5475267611'  # 请求的地址
response = requests.get(url).text # 获取表单信息的内容

selector = etree.HTML(response) # 将请求的信息拿来做解析
images_urls = selector.xpath('//img[@class="BDE_Image"]/@src') #使用xpath的解析表标签

offset = 0  #文件命名

for image_url in images_urls: #循环这个返回的到的列表
    image_content = requests.get(image_url).content  # 拿到内容此处不能使用text
    with open('D:\Python\PythonProject2\{}.jpg'.format(offset), 'wb') as f: #打开计算机文件系统,存储下载图的位置
        f.write(image_content)  #将文件写入计算机当前位置
        offset += 1 #下载命名的不能重复,否则会覆盖,所以每次都会加一

模拟信息的编写（headers）

headers = {
    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/124.0.0.0 Safari/537.36'
}

文件的读写

# 文件的打开和关闭

# open函数 可以打开一个已经存在的文件，或者创建一个新的文件
# open('文件的路径','访问模式')
# 模式 w 可写
#     r 可读

# open('1.text','w')


# 向文件中写入文字
# fp = open('1.text','w')
# fp.write('Hello World')


# 关闭文件
# fp = open('2.text','w')
# fp.write('Hello')
# fp.close()


# 字符串乘法
# fp = open('333.txt','w')
# fp.write('Hello\n' * 5)
# fp.close


# 读数据
# fp = open('333.txt','r')
# content = fp.read()
# content = fp.readline()
# content = fp.readlines()
# print(content)

序列化和反序列化

# 序列化
# # 引入就送库
import json
# # 创建一个文件
# fp = open('test.txt','w')
# # 定义一个数组
# arr = ['zhangsan','lkisi']
# # 将数组转换成字符串
# names = json.dumps(arr)
# # 写入文件
# fp.write(names)
# # 关闭文件
# fp.close



# 反序列化
fp = open('test.txt','r')
content = fp.read()
names = json.loads(content)
print(names)
print(type(names))

主要涉及的库

requests 、json

酷狗音乐单曲爬取（未完成）

# 发送请求
import requests

headers = {
    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/124.0.0.0 Safari/537.36'
}

music_list_url="https://complexsearch.kugou.com/v2/search/song?callback=callback123&srcappid=2919&clientver=1000&clienttime=1715694313463&mid=1f9fb71c7d9dfc610678eb2420d6e269&uuid=1f9fb71c7d9dfc610678eb2420d6e269&dfid=0R7dfV4WfNdS2l6O9v1VvMmL&keyword=%E6%88%91%E4%BC%9A%E7%AD%89&page=1&pagesize=30&bitrate=0&isfuzzy=0&inputtype=0&platform=WebFilter&userid=0&iscorrection=1&privilege_filter=0&filter=10&token=&appid=1014&signature=ee0bd52446518b89f030f308dbdd7341"

res_pon = requests.get(music_list_url, headers=headers)
print(res_pon.text[12:-2])



# # 音乐的地址
# music_url = 'https://webfs.hw.kugou.com/202405142108/6767d7147ed153becf7a6e0a1e6a59b9/part/0/960115/KGTX/CLTX001/clip_e2c910c96ce2bf6aeeb97eb7e77c8932.mp3'
# m_response = requests.get(music_url, headers=headers)
# with open('zzz6.mp3', 'wb') as f:
#     f.write(m_response.content)
# # 发送请求到服务器,获取音乐资源