import redis
import pymysql
import datetime
import json
# 指定redis数据库信息
rediscli = redis.StrictRedis(host="localhost", port=6379, db=0)
# 指定mysql数据库
mysqlconn = pymysql.connect(host="localhost", port=3306, user="root", password="123456", db="xiao", charset="utf8")
while True:
sourse, data = rediscli.blpop(["zh:items"])
print(sourse, data)
item = json.loads(data)
print(item)
cursor = mysqlconn.cursor()
print('*' * 30)
if b"book_name" in data:
sql = "select id from novel_copy1 where book_name=%s and author=%s" # 确保数据中没有
cursor.execute(sql, (item["book_name"], item["author"]))
if not cursor.fetchone(): # 如果这里没有找到
# 写入小说数据
sql = "insert into novel_copy1(category,book_name,author,status,book_nums,description,c_time,book_url,catalog_url)" \
"values (%s,%s,%s,%s,%s,%s,%s,%s,%s)"
# 补充sql语句 并执行
cursor.execute(sql, (
item["category"],
item["book_name"],
item["author"],
item["status"],
item["book_nums"],
item["description"],
item["c_time"],
item["book_url"],
item["catalog_url"],
))
mysqlconn.commit()
cursor.close()
elif b"chapter_list" in data:
sql = "insert into chapter_copy1(title,ordernum,c_time,chapter_url,catalog_url) values(%s,%s,%s,%s,%s)"
data_list = []
for index, chapter in enumerate(item["chapter_list"]):
c_time = datetime.datetime.now()
ordernum = index + 1
title, chapter_url, catalog_url = chapter # (title, chapter_url, catalog_url)
data_list.append((title, ordernum, c_time, chapter_url, catalog_url))
cursor.executemany(sql, data_list) # [(),(),()]
mysqlconn.commit()
cursor.close()
elif b"content" in data:
sql = "update chapter_copy1 set content=%s where chapter_url=%s"
content = item["content"]
chapter_url = item["chapter_url"]
print("项目1中的章节url", chapter_url)
cursor.execute(sql, (content, chapter_url))
mysqlconn.commit()
cursor.close()
分布式爬取的数据存储到redis后整合到数据库(纵横小说)
最新推荐文章于 2023-01-02 14:17:40 发布
该博客讨论了一个实时数据处理系统,它利用Redis作为消息队列,将数据从NoSQL数据库推送到MySQL。系统通过监听Redis中的键值对变化,当接收到`book_name`、`chapter_list`或`content`时,分别进行书籍信息插入、章节列表更新和章节内容更新操作。此实现确保了数据的一致性和实时性。
摘要由CSDN通过智能技术生成