这个项目我爬的是19楼中我要爆料模块,具体代码如下:
import requests
import re
import time
import random
import threading
import pymysql
from lxml import etree
from bs4 import BeautifulSoup
def lou_spider(key=None):
url_str = 'https://www.19lou.com/forum-269-{}.html'
def child_spider(i):
url_ = url_str.format(i)
# time.sleep(random.uniform(2, 3))
result = requests.get(url_)
html = etree.HTML(result.text)
divs = html.xpath("//div[@class='list-data ']//div[@class='list-data-item second-data']")
for child in divs:
try:
title = child.xpath(".//div[@class='title']//a//span/text()")[0]
url1 = child.xpath(".//div[@class='title']//a/@href")[0]
author = child.xpath(".//div[@class='author']/a//text()")[0]