import random
import requests
import re
# 连接MySQL
import pymysql
import multiprocessing
# 连接数据库
db = pymysql.connect(host="sql.s1270.vhostgo.com", user="csp72699", password="213333333333333333333333333333333333333333333333333333", database="csp72699")
a = []
while len(a) < 9999:
b = random.randint(1555, 69999)
if b not in a:
a.append("https://ht32yy.xyz:9527/vod/details/" + str(b))
# 打乱列表
random.shuffle(a)
def get_url(url):
try:
r = requests.get(url, timeout=5)
if r.status_code == 200:
html = r.text
# 先把空格和换行符去掉
html = html.replace(" ", "")
视频标题 = re.findall(r'<h1>(.*?)</h1>', html)
# 正则匹配<div class="vod-header-label">人氣: 3.1w+</div>
人气 = re.findall(r'<divclass="vod-header-label">人氣:(.*?)</div>', html)
# 正则匹配<input type="text"value="https://cdn1.hongtaocdn3.com/video/m3u8/202312/28/46e122f5b178/46e122f5b178.mp4"class="form-control" readonly></div>
视频链接 = re.findall(r'<inputtype="text"value="(.*?)"class="form-control"readonly></div>', html)
# 数据库插入数据
cursor = db.cursor()
# 如果SELECT * FROM `hp` WHERE `spbt` LIKE 视频标题 ORDER BY `id` DESC 有数据则不插入
sql = "SELECT * FROM `hp` WHERE `spbt` LIKE %s ORDER BY `id` DESC"
cursor.execute(sql, 视频标题)
results = cursor.fetchall()
if results:
pass
else:
sql1 = "INSERT INTO `hp` (`id`, `spbt`, `spdz`, `spbfl`) VALUES (NULL, %s, %s, %s)"
cursor.execute(sql1, (视频标题, 视频链接, 人气))
db.commit()
# 打印新增的ID
print(cursor.lastrowid)
except:
pass
# 假设这是你要在各个进程中执行的函数
def your_function(param):
# 函数体内容
# 循环遍历a列表
for url in a:
get_url(url)
if __name__ == "__main__":
# 创建参数列表,这里以数字为例
params = [i for i in range(6)]
# 创建一个进程池或直接创建多个进程
with multiprocessing.Pool(processes=multiprocessing.cpu_count()) as pool:
# 使用map方法将参数列表和目标函数结合起来并发执行
pool.map(your_function, params)
# 或者手动创建并启动进程(适用于需要更精细控制的情况)
processes = []
for param in params:
p = multiprocessing.Process(target=your_function, args=(param,))
processes.append(p)
p.start()
# 确保所有子进程完成后再继续执行主进程代码
for process in processes:
process.join()
# 执行sql语句DELETE h1 FROM hp h1 JOIN (SELECT spbt, MIN(id) as min_id FROM hp GROUP BY spbt HAVING COUNT(spbt) > 1) h2 ON h1.spbt = h2.spbt AND h1.id > h2.min_id;
cursor = db.cursor()
sql = "DELETE h1 FROM hp h1 JOIN (SELECT spbt, MIN(id) as min_id FROM hp GROUP BY spbt HAVING COUNT(spbt) > 1) h2 ON h1.spbt = h2.spbt AND h1.id > h2.min_id"
cursor.execute(sql)
db.commit()
# 断开连接
cursor.close()
python爬虫
于 2024-02-14 23:04:22 首次发布