Python示例

最新推荐文章于 2024-07-18 15:53:23 发布

liang墨竹

最新推荐文章于 2024-07-18 15:53:23 发布

阅读量164

点赞数

文章标签： python 开发语言

本文链接：https://blog.csdn.net/oschina_41140683/article/details/134048803

版权

1、PDF转Word工具

from pdf2docx import Converter

cv = Converter(r"E:/AAA.PDF")
cv.convert("E:/AAA.docx", start=0, end=None)
cv.close()

2、百度下载【可爱猫咪】

import requests
import json
import os
import io
from PIL import Image
import random
import string

api_url = 'https://image.baidu.com/search/acjson'
query_word = '可爱猫咪' # 搜索关键字
page_num = 1 # 搜索的页数
num_per_page = 30 # 每页搜索结果数量

def get_random_string(length):
    # 构造字符集
    letters = string.ascii_lowercase
    # 从字符集中随机选取 length 个字符
    result_str = ''.join(random.choice(letters) for i in range(length))
    return result_str

# 创建目录
if not os.path.exists(query_word):
    os.mkdir(query_word)
    print(f'已创建目录：{query_word}')
    
output_dir = './可爱猫咪/' # 图片保存路径

params = {
    'tn': 'resultjson_com',
    'ipn': 'rj',
    'ct': 201326592,
    'is': '',
    'fp': 'result',
    'queryWord': query_word,
    'cl': 2,
    'lm': -1,
    'ie': 'utf-8',
    'oe': 'utf-8',
    'adpicid': '',
    'st': -1,
    'z': '',
    'ic': 0,
    'word': query_word,
    's': '',
    'se': '',
    'tab': '',
    'width': '',
    'height': '',
    'face': 0,
    'istype': 2,
    'qc': '',
    'nc': 1,
    'fr': '',
    'pn': str((page_num - 1) * num_per_page),
    'rn': str(num_per_page),
    'gsm': '1e',
}

headers = {
    'Referer': 'https://image.baidu.com/',
    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3',
}

response = requests.get(api_url, params=params, headers=headers)

if response.status_code != 200:
    print('请求失败')
else:
    json_data = json.loads(response.text)
    for data in json_data['data']:
        if data.get('thumbURL') is not None:
            try:
                img_url = data['thumbURL']
                response = requests.get(img_url, timeout=10)
                img_data = response.content
                img = Image.open(io.BytesIO(img_data))
                #img_name = os.path.basename(img_url)
                img_name = get_random_string(10)
                print(img_name)
                img.save(os.path.join(output_dir, img_name + '.jpg'))
                print(f'{img_name} 保存成功')
            except Exception as e:
                print(f'保存失败：{e}')

3、获取热点内容

import requests
from bs4 import BeautifulSoup

url= 'https://tophub.today/n/Jb0vmloB1G'

headers = {
    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3'}

# 发送请求并获取页面内容
response = requests.get(url, headers=headers)
html = response.text

# 解析页面内容，提取信息
soup = BeautifulSoup(html, 'html.parser')

#hot_list = soup.find_all('div', class_='jc-c')[0]

hot_list = soup.find_all('tbody')[0]

sub_hot_list = hot_list.find_all('tr')

#print(sub_hot_list)

for hot in sub_hot_list:
    # print(hot)
    # 热点序号
    serial = hot.find('td').text.strip()
    #print(serial)
    # 热点标题
    title = hot.find('td', class_='al').text.strip()
    #print(title)
    # 热点链接
    href = 'https://tophub.today' + hot.find('a')['href']
    #print(href)
    # 热点链接
    view = hot.find_all('td')[2].get_text()
    # print(view)
    print(serial, title, href, view)

4、获取豆瓣电影

import requests
from bs4 import BeautifulSoup

url = 'https://movie.douban.com/chart'

headers = {
    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3'}

response = requests.get(url, headers=headers)
soup = BeautifulSoup(response.text, 'html.parser')

items = soup.find_all('div', class_='pl2')

for item in items:
    title = item.find('a').text.strip()  # 获取电影标题
    link = item.find('a')['href']  # 获取电影链接
    rating = item.find('span', class_='rating_nums').text.strip()  # 获取电影评分
    print(title + ' ' + rating + ' ' + link)  # 输出电影信息

5、获取豆瓣电影保存至Excel：

import requests
from bs4 import BeautifulSoup
from openpyxl import Workbook

# 发送请求并获取页面内容
url = 'https://movie.douban.com/chart'

headers = {
    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3'}

response = requests.get(url, headers=headers)
html = response.text

# 解析页面内容，提取信息
soup = BeautifulSoup(html, 'html.parser')

movie_list = soup.find_all('div', class_='pl2')

print(movie_list)

data = []
for movie in movie_list:
    # 电影名称
    name = movie.find('a').text.strip()
    print(name)
    # 电影评分
    rating = movie.find('span', class_='rating_nums').text.strip()
    print(rating)
    # 电影链接
    href = movie.find('a')['href']
    print(href)
    data.append([name, rating, href])
    print(data)


# 将数据保存到 Excel 文件中
wb = Workbook()
ws = wb.active
ws.title = '豆瓣电影榜单'

# 增加表头
ws.append(['电影名称', '评分', '链接'])

# 写入数据
for row in data:
    ws.append(row)

# 保存 Excel 文件
wb.save('豆瓣电影榜单.xlsx')

print('豆瓣电影榜单已保存到豆瓣电影榜单.xlsx')

6、下载视频

import requests

url = 'https://aweme.snssdk.com/aweme/v1/playwm/?video_id=视频ID&line=0'
headers = {
    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.36 Edge/16.16299'
}
response = requests.get(url, headers=headers)

with open('video.mp4', 'wb') as f:
    f.write(response.content)

7、获取微博热点新闻

import requests
from bs4 import BeautifulSoup

url = "https://weibo.cn/search/mblog?hideSearchFrame=&keyword=%E7%94%98%E8%82%83%E9%99%87%E5%8D%97&advancedfilter=1&hasori=1&starttime=20230530&endtime=20230530&sort=time&page="

headers = {
    "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/90.0.4430.93 Safari/537.36"
}

for i in range(1, 20):  # 爬取前20页数据
    resp = requests.get(url+str(i), headers=headers)
    soup = BeautifulSoup(resp.content.decode('utf-8'), 'html.parser')

    contents = soup.find_all(class_="c")

    for content in contents:
        if 'id=' not in str(content):
            continue
        text = content.get_text().strip()

        href = content.find('a')['href']
        print(text, href)