py小说网站数据

import os

import urllib.request as request
from urllib.request import Request,urlopen,urlretrieve

'''
解压缩或压缩
'''
import gzip
'''
ssl协议验签
'''
import ssl
'''
解析数据
'''
from lxml import etree
'''
连接数据库
'''
import pymysql

import time


def getURLData(path,charset):
    '''
        下载数据
        :param path: 网址
        :param charset: 编码格式
        :return: 下载到的数据
        '''
    ssl._create_default_https_context = ssl._create_unverified_context
    headers={
        "Accept-Encoding":"gzip, deflate, br",
        "User-Agent":"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/109.0.0.0 Safari/537.36"
    }

    req = Request(url=path,headers=headers)
    conn=urlopen(req)
    if conn.code == 200:
        data = conn.read()
        if conn.headers["Content-Encoding"] == "gzip":
            data = gzip.decompress(data)
        data = data.decode(encoding=charset)
        return data
    else:
        return ""


# 数据库连接
db = pymysql.connect(host="127.0.0.1",user="root",password="root",db="test")
# 设置数据库游标,使其指向数据库
cursor = db.cursor()



if __name__ == '__main__':
    # 接收书的id,用于目录表的数据存入
    bookid=0




    bookpath="https://www.6yzw.com/63_63834/"
    bookdata = getURLData(path=bookpath,charset="gbk")
    # print("content", data)
    html=etree.HTML(bookdata)
    booknameTag="//div[@id='info']/h1/text()"
    bookname=html.xpath(booknameTag)[0]
    print("bookname",bookname)

    bookAuthorTag="//div[@id='info']/p/text()"
    bookAuthor=html.xpath(bookAuthorTag)[2]
    print("bookAuthor",bookAuthor[7:])
    bookType=html.xpath(bookAuthorTag)[0]
    print("bookType",bookType[3:])

    #向数据库添加小说Info数据
    try:
        # 执行SQL语句
        cursor.execute("INSERT INTO novallist(bookname,bookauthor,booktype) VALUES(\"" + str(bookname) + "\",\"" + str(
            bookAuthor[7:]) + "\",\"" + str(bookType[3:]) + "\")")

        # //输出最新插入行的id
        print("输出最新插入行的id",db.insert_id())
        bookid=db.insert_id()

        # 向数据库提交执行SQL语句————凡是涉及修改数据库的操作,均需要db.commit()
        db.commit()
        print("add a new user successful")
    except Exception as e:
        print("add a new user failed:", e)
        # 回滚操作
        db.rollback()
    # cursor.execute("INSERT INTO testnoval(bookname,bookauthor,bookType) VALUES(\""+str(bookname)+"\",\""+str(bookAuthor[7:])+"\","+str(bookType[3:])+")")
    # bookCateTag="//dl//dd/a/text()"
    # bookCates=html.xpath(bookCateTag)


    # 向数据库添加具体章节及内容

    # print("bookCate",bookCate)
    bookListHrefTag="//dl//dd/a/@href"
    bookListHref=html.xpath(bookListHrefTag)
    booksHref=[]
    for bookhref in bookListHref:
        cateurl="https://www.6yzw.com"+bookhref
        booksHref.append(cateurl)
        bookContentData=getURLData(path="https://www.6yzw.com"+bookhref,charset="gbk")
        Contenthtml = etree.HTML(bookContentData)
        contentTag="//div[@id='content']/text()"
        content=Contenthtml.xpath(contentTag)
        # print('content',content)
        temp=''
        for i in content:
            temp=temp+i
            print("temp",temp)
        print("在这里把每一章存到数据库",content[0])
        print("#####################################################################################")
        try:
            # 执行SQL语句
            cursor.execute(
                "INSERT INTO novalcate(bookid,catehref,content,cataname) VALUES(\"" + str(bookid) + "\",\"" + str(
                    cateurl) + "\",\"" + str(temp) + "\",\"" + str(content[0]) + "\")")
            # 向数据库提交执行SQL语句————凡是涉及修改数据库的操作,均需要db.commit()
            db.commit()
            print("add a new user successful")
        except Exception as e:
            print("add a new user failed:", e)
            # 回滚操作
            db.rollback()
        # novalContent=temp
        # print("novalContent",novalContent)
    print("bookListHref",booksHref)




























  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 打赏
    打赏
  • 1
    评论
评论 1
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包

打赏作者

就躺了吧

你的鼓励将是我创作的最大动力

¥1 ¥2 ¥4 ¥6 ¥10 ¥20
扫码支付:¥1
获取中
扫码支付

您的余额不足,请更换扫码支付或充值

打赏作者

实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值