数据爬取
爬取北京二手房数据信息python代码:
# coding : utf-8
from requests import get
from bs4 import BeautifulSoup as bs
from tqdm import tqdm
from multiprocessing import Pool
from time import time
from os import listdir
from csv import writer
import re
def url_spider(url):
"""
爬取网页信息
"""
headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/57.0.2987.98 Safari/537.36 LBBROWSER'}
try:
response = get(url, headers=headers, verify=True, timeout=10)
if response.status_code == 200:
soup = bs(response.text, features="html.parser")
return soup
else:
print("\n\n*** (%s)请求状态异常 ***\n\n" % url)
return None # 状态码非200则返回空值
except Exception as e:
print('\n\n*** Requests.get(%s) gets wrong! ***\nThe program wil