import random
import time
import csv
import requests
from fake_useragent import UserAgent
from lxml import etree
class House:
def __init__(self):
self.url = 'https://bj.lianjia.com/ershoufang/pg{}/'
self.f = open('链家100.csv','w')
self.write = csv.writer(self.f)
def get_html(self, url):
headers = {
'UserAgent': UserAgent().random}
html = requests.get(url=url, headers=headers).text
self.parser_html(html)
def parser_html(self, html):
eobj = etree.HTML(html)
li_list = eobj.xpath("//li[@class='clear LOGVIEWDATA LOGCLICKDATA']")
for li in
链家二手房100页Xpath爬取保存csv
最新推荐文章于 2024-05-07 20:50:30 发布