#coding=utf-8
import requests
from lxml import etree
from bs4 import BeautifulSoup
print('#'*30)
info=''' *** 趣事百科 ***
爬取文本笑话
使用模块requests,lxml的etree
爬出下一页地址返回循环判断'''
print(info)
print('#'*30)
class Qsbk(object):
def __init__(self):
self.homeurl='https://www.qiushibaike.com'
self.xpathname='./div[1]/a[1]/img/@alt'
self.xpathtext='./a[1]//span/text()'
self.xpathname_text = '//div[@class="col1"]/div'
self.xpathpage = '//*[@id="content-left"]/ul/li[last()]/a/@href'
def def_name(self,url):
response=requests.get(url)
html=response.text
tree=etree.HTML(html)
name_texts=tree.xpath(self.xpathname_text)
#每页用户ID和内容
for name_text in name_texts:
#print(name_text)
python爬取趣事百科文本笑话,另中间两个for能同时输出不解,请看到的朋友帮忙解答
最新推荐文章于 2022-01-18 13:40:03 发布