from bs4 import BeautifulSoup
import requests
import time
import random
import csv
import pandas as pd
import numpy as np
from pandas import Series,DataFrame
def get_html(url):
user_agent = [
"Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10_6_8; en-us) AppleWebKit/534.50 (KHTML, like Gecko) Version/5.1 Safari/534.50",
"Mozilla/5.0 (Windows; U; Windows NT 6.1; en-us) AppleWebKit/534.50 (KHTML, like Gecko) Version/5.1 Safari/534.50",
"Mozilla/5.0 (Windows NT 10.0; WOW64; rv:38.0) Gecko/20100101 Firefox/38.0",
"Mozilla/5.0 (Windows NT 10.0; WOW64; Trident/7.0; .NET4.0C; .NET4.0E; .NET CLR 2.0.50727; .NET CLR 3.0.30729; .NET CLR 3.5.30729; InfoPath.3; rv:11.0) like Gecko",
"Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; Trident/5.0)"
] # 游览器头部信息
# 代理IP
proxies = [
"http://175.44.108.161:9999",
"http://191.241.34.210:8080",
"http://122.4.50.96:9999",
"http://175.42.123.222:9999",
'http://119.108.165.8:9000',
'http://183.166.111.202:9999',
'http://113.120.32.246:9999',
'http://113.120.36.25:9999',
'http://110.243.2.233:9999',
'http://123.55.106.215:9999',
'http://223.242.224.4:9999',
'http://182.32.231.5:9999',
'http://125.108.83.188:9000',
'http://123.101.64.67:9999'
]
res = requests.get(url, headers={
"User-Agent": random.choice(user_agent)},
proxies={
"http": random.choice(proxies)}) # 3-访问网址和添加访问游览器时的头部信息
res.encoding = 'utf-8' # 设置编码
return res
# ### 获取租房链接
def main(start, end):
for i
爬取贝壳找房数据
最新推荐文章于 2024-01-17 14:06:10 发布