import parsel import requests # from fake_useragent import UserAgent import csv # 列表存储地区 distriction = ['wulumuqixian', 'tianshanqu', 'jingjijishukaifaqutoutunhequ', 'xinshiqu', 'shuimogouqu', 'shayibakequ' , 'midongqu', 'dabanchengqu'] distriction2 = ["乌鲁木齐县", '天山区', '经济开发区(头屯河区', '新市区', '水磨沟区', '沙依巴克区', '米东区', '达坂城区'] # 获取所有区域二手房的数据 with open('./链家网数据集1.csv', 'w', newline='',encoding='utf-8') as csvfile: writer = csv.writer(csvfile) for i in range(0, len(distriction)): # url网址 url = f'https://wlmq.lianjia.com/ershoufang/{distriction[i]}/' # 请求头部 # header = {'UserAgent': UserAgent().random} # 进行请求 response = requests.get(url) html_data = response.text # 数据解析 selector = parsel.Selector(html_data) # lis = selector.css('.clear.LOGCLICKDATA') lt = selector.css('.resultDes.clear') pages=lt.css('h2 span::text').get() pages=int(int(pages)/30) for page in range(1,pages+1): # url网址 url = f'https://wlmq.lianjia.com/ershoufang/{distriction[i]}/pg{page}' # 请求头部 # header = {'UserAgent': UserAgent().random} # 进行请求 response = requests.get(url) html_data = response.text # 数据解析 selector = parsel.Selector(html_data) lis = selector.css('.clear.LOGCLICKDATA') print(f"-----------------------{distriction2[i]}区域---------------------第{page}页---------------------------------") print(f"-----------------------{distriction2[i]}区域---------------------第{page}页---------------------------------") print(f"-----------------------{distriction2[i]}区域---------------------第{page}页---------------------------------") list=[] for li in lis: List = [] title = li.css(".title a::text").get() # 获取标题 address = li.css(".positionInfo a::text").getall() # 地址 address = address[0] + address[1] house = li.css(".houseInfo::text").get() #房型 attention = li.css(".followInfo::text").get() #关注度 pricewei = li.css(".totalPrice.totalPrice2 i::text").getall()[1]#万 price = li.css(".priceInfo span::text").getall() #房价 price[0] = price[0] + pricewei List.append(title),List.append(address),List.append(house),List.append(attention),List.append(price[0]),List.append(price[1]) print(List) #数据清洗 list.append(List) writer.writerow(List) print(list)
Python使用css方式爬去链家网数据写入csv文档
最新推荐文章于 2024-04-04 20:13:44 发布