爬取起点中文网的小说

运行无反应,也不报错,啥毛病??

import xlwt
import requests
from lxml import etree
import time

all_info_list=[]
def get_info(url):
    html=requests.get(url)
    selector=etree.HTML(html.text)
    infos=selector.xpath('//ul[@class="all_img_list cf"]/li')
    for info in infos:
        title=info.xpath('div[2]/h4/a/text()')[0]
        author=info.xpath('div[2]/p[1]/a[1]/text()')[0]
        style_1=info.xpath('div[2]/p[1]/a[2]/text()')[0]
        style_2=info.xpath('div[2]/p[1]/a[3]/text()')[0]
        style=style_1+'.'+style_2
        complete=info.xpath('div[2]/p[1]/span[1]/text()')
        introduce=info.xpath('div[2]/p[2]')[0].strip()
        word=info.xpath('div[2]/p[3]/span/span[1]/text()')[0].strip('万字')
        info_list=['title','author','style','complete','imtroduce','word']
        all_info_list.append(info_list)
    time.sleep(3)
if __name__=="__main__":
    urls=['https://www.qidian.com/all?page={}'.format(str(i)) for i 
    in range(1,29655)]
    for url in urls:
        get_info(url)
    header=['title','author','style','complete','introduce','word'] 
    book=xlwt.Workbook(encoding='utf-8')
    sheet=book.add_sheet('Sheet1')
    for h in range(len(header)):
        sheet.write(0,h,header[h])
    i=1
    for list in all_info_list:
        j=0
        for data in list:
            sheet.write(i,j,data)
            j+=1
        i+=1
book.save('H:/python/xiaoshuo.xls')            
            
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值