Python七个获取各种信息的实例 有兴趣可以看一下

前言:

本篇记录了自己敲过的几种不同类型(文字、图片、音乐等)的获取信息的文章,因为这种文章不适合讲太多话,所以我就直接放源代码与最终效果展示图片,有需要的自取。

一、最基础的进行伪装并保存

import requests
#自定义请求信息,进行user-agent伪装
word = input('请输入您想要查询的内容')
headers = {
    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/98.0.47.102 Safari/537.36'
}
#目标url
url = ''
#对url中参数处理
param = {
    'wd':word
}
#带着伪装好的信息发起请求
response = requests.get(url=url,params=param,headers=headers)
#对下载内容进行中文处理
response.encoding = response.apparent_encoding
#将下载好的text文档赋值给data
data = response.text
#持久化存储,对下载好的内容进行存储规定
name = word + '.html'
with open(name,'w',encoding='utf-8')as f:
    f.write(data)
print('okk')

二、获取文字信息并进行排名

import re
import csv
import requests
#定义UA伪装
headers = {
    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/98.0.48.102 Safari/537.36'
}
#2.指定url
url = ''
#3.发起请求
response = requests.get(url=url,headers=headers)
#4.测试一下响应是否成功
data = response.text
# print(data)
#开始进行数据解析
obj  = re.compile(r'<li>.*?<div class="item">.*?<span class="title">(?P<name>.*?)</span>'
                  r'.*?<p class="">.*?<br>(?P<year>.*?)&nbsp'
                  r'.*?<span class="rating_num" property="v:average">(?P<score>.*?)</span>'
                  r'.*?<span>(?P<renshu>.*?)人评价</span>',re.S)
result = obj.finditer(data)
f = open('./dy.csv','w',encoding='utf-8')
csvwriter = csv.writer(f)
for i in result:
    print(i.group('name'))
    print(i.group('year'))
    print(i.group('score'))
    print(i.group('renshu'))
    dic = i.groupdict()
    csvwriter.writerow(dic.values())
    print('成功')

效果图片:

 

三、获取图片信息

import requests
from lxml import etree
#1.进行ua伪装
headers = {
    'user-agent': 'Mozilla/5.0 (Winds NT 10.0; Win64; x64) AppleWebKit/5376 (KHTML, like Gecko) Chrome/98.0.4758.102 Safari/537.36'
}
#2.指定url
url = ''
#发起请求
resposne = requests.get(url=url,headers=headers)
#获取源码后处理乱码
resposne.encoding='gbk'
data = resposne.text
# print(data)
#开始数据解析
tree = etree.HTML(data)
li_list = tree.xpath('/html/body/div[2]/div[1]/div[3]/ul/li')
for li in li_list:
    name = li.xpath('./a/b/text()')[0] + '.jpg'
    href = 'https://pic.netbian.com/' + li.xpath('./a/img/@src')[0]
    img_response = requests.get(url=href,headers=headers)
    img_data = img_response.content
    # print(img_data)
#持久化存储
    img_path = './tupian/' + name
    with open(img_path,'wb') as f:
        f.write(img_data)
    print(name + '下载成功')

最终效果(图片违规放不出来,给大家看一下目录吧): 

四、获取多章节文字信息

import requests
from lxml import etree
#1.进行ua伪装
headers = {
    'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/5.36 (KHTML, like Gecko) Chrome/98.0.4758.102 Safi/537.36'
}
#2.指定url
url = ''
#发起请求
resposne = requests.get(url=url,headers=headers)
#获取源码后处理乱码
resposne.encoding='gbk'
data = resposne.text
# print(data)
#开始数据解析
tree = etree.HTML(data)
li_list = tree.xpath('/html/body/div[2]/div[1]/div[3]/ul/li')
for li in li_list:
    name = li.xpath('./a/b/text()')[0] + '.jpg'
    href = 'https://pic.netbian.com/' + li.xpath('./a/img/@src')[0]
    img_response = requests.get(url=href,headers=headers)
    img_data = img_response.content
    # print(img_data)
#持久化存储
    img_path = './tupian/' + name
    with open(img_path,'wb') as f:
        f.write(img_data)
    print(name + '下载成功')

最终效果:

五、自动联动识别

import requests
from lxml import etree
from hashlib import md5

def get_code(un,pw,id):
    class Chaojiying_Client(object):

        def __init__(self, username, password, soft_id):
            self.username = username
            password =  password.encode('utf8')
            self.password = md5(password).hexdigest()
            self.soft_id = soft_id
            self.base_params = {
                'user': self.username,
                'pass2': self.password,
                'softid': self.soft_id,
            }
            self.headers = {
                'Connection': 'Keep-Alive',
                'User-Agent': 'Mozilla/4.0 (compatible; MSIE 8.0; Windows NT 5.1; Trident/4.0)',
            }

        def PostPic(self, im, codetype):
            """
            im: 图片字节
            codetype: 题目类型 参考 http://www.chaojiying.com/price.html
            """
            params = {
                'codetype': codetype,
            }
            params.update(self.base_params)
            files = {'userfile': ('ccc.jpg', im)}
            r = requests.post('http://upload.chaojiying.net/Upload/Processing.php', data=params, files=files, headers=self.headers)
            return r.json()

        def ReportError(self, im_id):
            """
            im_id:报错题目的图片ID
            """
            params = {
                'id': im_id,
            }
            params.update(self.base_params)
            r = requests.post('http://upload.chaojiying.net/Upload/ReportError.php', data=params, headers=self.headers)
            return r.json()


    if __name__ == '__main__':
        chaojiying = Chaojiying_Client(un, pw, id)	#用户中心>>软件ID 生成一个替换 96001
        im = open('a.jpg', 'rb').read()													#本地图片文件路径 来替换 a.jpg 有时WIN系统须要//
        # print(chaojiying.PostPic(im, 1902))
    return chaojiying.PostPic(im,1902)

url =''
headers = {
    'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWeb6 (KHTML, like Gecko) Chrome/98.0.4758.102 Safari/537.36'
}
response = requests.get(url=url,headers=headers)
data = response.text
tree = etree.HTML(data)
img = 'https://so.gushiwen.cn' + tree.xpath('//*[@id="imgCode"]/@src')[0]
# img = 'https://so.gushiwen.cn' + tree.xpath('//*[@id="imgCode"]/@src')[0]
r = requests.get(img,headers)
d = r.content
with open('./a.jpg','wb')as f:
    f.write(d)
result = get_code('textzhanghu', 'qin729700', '930758')
print(result['pic_str'])

 效果图片:

六、获取音乐信息

import os
from lxml import etree
import requests
headers = {
    'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKi6 (KHTML, like Gecko) Chrome/98.0.4758.102 Safari/537.36'
}
#把井号删了
url =''
response = requests.get(url=url,headers=headers)
data = response.text
# print(data)
tree = etree.HTML(data)
#对源码进行xpath,无杂质
tr_list = tree.xpath('//ul[@class="f-hide"]/li')
for tr in tr_list:
    name = tr.xpath('./a/text()')[0]
    href = tr.xpath('./a/@href')[0]
    #通过下标取值
    # id = href[9:]
    #通过分片取值
    id = href.split('=')[1]
    download = 'https://music.163.com/song/media/outer/url?id='+id
    # print(download)
    # print(id)
    # print(name)
    # print(href)

#contains方法,但是有杂质
# tr_list = tree.xpath('//a[contains(@href,"/song?")]')
# for tr in tr_list:
#     name = tr.xpath('./text()')
#     href = tr.xpath('./@href')[0]
#     print(href)
    r = requests.get(download,headers)
    d = r.content
    with open('./music/%s.mp3'%name,'wb')as f:
        f.write(d)
        print('%s 下载成功'%name)

最终效果:

七、获取音乐附带信息

import requests
import json
import re
url = ''
headers = {
    'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWe.36 (KHTML, like Gecko) Chrome/98.0.4758.102 Safari/537.36'
}
data = {
'params':'NiHz5BCMsR5TXmlrbME6QMJhzWT3diBLZj3iAUWAJ+hccTjMLGjMp95hqS1wVKP9eqG/+JpTNpGaTMI3OMhBPB5nKxTeYTIWjyYFF3uy19UUScojX3GIZb2r9+Z1Chvc',
'encSecKey':'df5b6563fd7b802e9189f9fb204da42eabc5f182bb91072013be7f489014e1e7cb116f546ee8ae325f6d419ba984be3a5cb7fd6ec7afc9582584e77f8345e65c6f36f7bc6b0e66adc26b4bf0dee4529a4f72596f19546895d50f1ace61bcb430da30cc2188f8b278eac02828f4776de5b844d0df3378c37b929fb8d32179ec4c'
}
r = requests.post(url,data,headers)
# r.encoding='utf-8'
d = r.text
#不可使用
# print(d['lrc']['lyric'])
#用json将数据转换为字典
obj = json.loads(d)
lrc = obj['lrc']['lyric']
pattern = re.compile(r'\[.*\]')
lrc1 = re.sub(pattern,'',lrc)
# print(lrc1)
with open('./歌词.txt','w')as f:
    f.write(lrc1)
    print('下载成功')

最终效果:

  • 5
    点赞
  • 8
    收藏
    觉得还不错? 一键收藏
  • 打赏
    打赏
  • 8
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论 8
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包

打赏作者

秦同学学学

你的鼓励将是我创作的最大动力

¥1 ¥2 ¥4 ¥6 ¥10 ¥20
扫码支付:¥1
获取中
扫码支付

您的余额不足,请更换扫码支付或充值

打赏作者

实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值