第一步:发送网页请求,获得响应内容
五种请求方法
第一种方法:直接请求
import requests
web=requests.get("https://gz.fang.anjuke.com/?from=HomePage_TopBar")
web.text
resp=requests.get('https://gz.lianjia.com/ershoufang/tianhe/')
print(resp.text)
resp.status_code
resp.text
resp.content
第二种方法:带参数请求网页
headers={'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/109.0.0.0 Safari/537.36'}
url='https://gz.lianjia.com/ershoufang/tianhe/'
resp=requests.get(url,headers=headers)
resp.status_code
resp.text
第三种方法:添加cookies,写入headers
headers={'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9',
'Accept-Encoding': 'gzip, deflate, br',
'Accept-Language': 'zh-CN,zh;q=0.9',
'Cache-Control': 'max-age=0',
'Connection': 'keep-alive',
'Cookie': 'lianjia_uuid=d4fe3bda-8ceb-4f16-8ce0-9d88dc728e2f; select_city=440100; lianjia_ssid=033f33a0-083a-4067-8bd9-091fdd133d81; _smt_uid=653f145f.42d52bd9; _jzqa=1.4465419545725586000.1698632800.1698632800.1698632800.1; _jzqc=1; _jzqy=1.1698632800.1698632800.1.jzqsr=baidu.-; _jzqckmp=1; _qzjc=1; sajssdk_2015_cross_new_user=1; sensorsdata2015jssdkcross=%7B%22distinct_id%22%3A%2218b7e6798a8b47-004211a819a233-26021051-2073600-18b7e6798a982a%22%2C%22%24device_id%22%3A%2218b7e6798a8b47-004211a819a233-26021051-2073600-18b7e6798a982a%22%2C%22props%22%3A%7B%22%24latest_traffic_source_type%22%3A%22%E8%87%AA%E7%84%B6%E6%90%9C%E7%B4%A2%E6%B5%81%E9%87%8F%22%2C%22%24latest_referrer%22%3A%22https%3A%2F%2Fwww.baidu.com%2Flink%22%2C%22%24latest_referrer_host%22%3A%22www.baidu.com%22%2C%22%24latest_search_keyword%22%3A%22%E6%9C%AA%E5%8F%96%E5%88%B0%E5%80%BC%22%7D%7D; _ga=GA1.2.1382280153.1698632806; _gid=GA1.2.1206859274.1698632806; Hm_lvt_9152f8221cb6243a53c83b956842be8a=1698632814; Hm_lpvt_9152f8221cb6243a53c83b956842be8a=1698632827; srcid=eyJ0Ijoie1wiZGF0YVwiOlwiNGRkMjM3MzkxMzk0ZWYxZjM4MDNlMGM1MzU2OGI2ZDAyM2ZiYWJhNDM4ZjY2YmM1MjVhYWU2NTMwNzdjMDZjYTQ5NTI4ZWFjZDYxYTlkMGNkYzc4MWEwOTA0NjIyMTc2Y2IzMGMyNzFjYmZlODczYWYxNzYwYWUzZWFiMDFmZjk2NDgyY2FjNjk0Y2M5NTJlYjBjYWM5ZmZkODg1ZjMzMWYyMzYzYzRmMDFmNjQzMzNhZjY5NTAwMDNiOWMzMjY3N2ViNzk5OGNiMDhmODJkMmI2NjdiNjAzZGYwY2I4NmUxMzdhYWEwMmJlMWVkYmRkYzVkNmNiNGVmZjM2ZGNhOFwiLFwia2V5X2lkXCI6XCIxXCIsXCJzaWduXCI6XCJlNzc4NzYyOFwifSIsInIiOiJodHRwczovL2d6LmxpYW5qaWEuY29tL2Vyc2hvdWZhbmcvdGlhbmhlLyIsIm9zIjoid2ViIiwidiI6IjAuMSJ9; _qzja=1.2068627131.1698632799798.1698632799798.1698632799798.1698632814256.1698632827770.0.0.0.3.1; _qzjb=1.1698632799798.3.0.0.0; _qzjto=3.1.0; _jzqb=1.3.10.1698632800.1; _ga_654P0WDKYN=GS1.2.1698632807.1.1.1698632829.0.0.0',
'Host': 'gz.lianjia.com',
'Referer': 'https://gz.lianjia.com/ershoufang/',
'sec-ch-ua': '"Not_A Brand";v="99", "Google Chrome";v="109", "Chromium";v="109"',
'sec-ch-ua-mobile': '?0',
'sec-ch-ua-platform': '"Windows"',
'Sec-Fetch-Dest': 'document',
'Sec-Fetch-Mode': 'navigate',
'Sec-Fetch-Site': 'same-origin',
'Sec-Fetch-User': '?1',
'Upgrade-Insecure-Requests': '1',
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/109.0.0.0 Safari/537.36'
}
url='https://gz.lianjia.com/ershoufang/tianhe/'
resp=requests.get(url,headers=headers)
resp.status_code
resp.text
第四种方法:单独添加cookies
headers={'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/109.0.0.0 Safari/537.36'}
cookies='lianjia_uuid=d4fe3bda-8ceb-4f16-8ce0-9d88dc728e2f; select_city=440100; lianjia_ssid=033f33a0-083a-4067-8bd9-091fdd133d81; _smt_uid=653f145f.42d52bd9; _jzqa=1.4465419545725586000.1698632800.1698632800.1698632800.1; _jzqc=1; _jzqy=1.1698632800.1698632800.1.jzqsr=baidu.-; _jzqckmp=1; _qzjc=1; sajssdk_2015_cross_new_user=1; sensorsdata2015jssdkcross=%7B%22distinct_id%22%3A%2218b7e6798a8b47-004211a819a233-26021051-2073600-18b7e6798a982a%22%2C%22%24device_id%22%3A%2218b7e6798a8b47-004211a819a233-26021051-2073600-18b7e6798a982a%22%2C%22props%22%3A%7B%22%24latest_traffic_source_type%22%3A%22%E8%87%AA%E7%84%B6%E6%90%9C%E7%B4%A2%E6%B5%81%E9%87%8F%22%2C%22%24latest_referrer%22%3A%22https%3A%2F%2Fwww.baidu.com%2Flink%22%2C%22%24latest_referrer_host%22%3A%22www.baidu.com%22%2C%22%24latest_search_keyword%22%3A%22%E6%9C%AA%E5%8F%96%E5%88%B0%E5%80%BC%22%7D%7D; _ga=GA1.2.1382280153.1698632806; _gid=GA1.2.1206859274.1698632806; Hm_lvt_9152f8221cb6243a53c83b956842be8a=1698632814; Hm_lpvt_9152f8221cb6243a53c83b956842be8a=1698632827; srcid=eyJ0Ijoie1wiZGF0YVwiOlwiNGRkMjM3MzkxMzk0ZWYxZjM4MDNlMGM1MzU2OGI2ZDAyM2ZiYWJhNDM4ZjY2YmM1MjVhYWU2NTMwNzdjMDZjYTQ5NTI4ZWFjZDYxYTlkMGNkYzc4MWEwOTA0NjIyMTc2Y2IzMGMyNzFjYmZlODczYWYxNzYwYWUzZWFiMDFmZjk2NDgyY2FjNjk0Y2M5NTJlYjBjYWM5ZmZkODg1ZjMzMWYyMzYzYzRmMDFmNjQzMzNhZjY5NTAwMDNiOWMzMjY3N2ViNzk5OGNiMDhmODJkMmI2NjdiNjAzZGYwY2I4NmUxMzdhYWEwMmJlMWVkYmRkYzVkNmNiNGVmZjM2ZGNhOFwiLFwia2V5X2lkXCI6XCIxXCIsXCJzaWduXCI6XCJlNzc4NzYyOFwifSIsInIiOiJodHRwczovL2d6LmxpYW5qaWEuY29tL2Vyc2hvdWZhbmcvdGlhbmhlLyIsIm9zIjoid2ViIiwidiI6IjAuMSJ9; _qzja=1.2068627131.1698632799798.1698632799798.1698632799798.1698632814256.1698632827770.0.0.0.3.1; _qzjb=1.1698632799798.3.0.0.0; _qzjto=3.1.0; _jzqb=1.3.10.1698632800.1; _ga_654P0WDKYN=GS1.2.1698632807.1.1.1698632829.0.0.0'
cookies_jar=requests.cookies.RequestsCookieJar()
for cookie in cookies.split(';'):
key,value=cookie.split('=',1)
cookies_jar.set(key,value)
resp=requests.get(url,headers=headers,)
resp.status_code
resp.text
第五种方法:使用网站 https://curlconverter.com/python/ ,必须找到网址,选择copy as cURL(bash)
import requests
cookies = {
'aQQ_ajkguid': 'B2D35BF2-3DD3-4CDC-89B9-16F3FBF13294',
'sessid': 'DA507749-458E-4D63-80C1-89533831C9FA',
'ajk-appVersion': '',
'seo_source_type': '0',
'ctid': '12',
'fzq_h': '833adce7d952e6b67c082cc69aa23a81_1698627484565_62a5e8954b6a411396554965a705f0d4_3688775804',
'id58': 'CrIclWU+/58sfzWlWp2KAg==',
'twe': '2',
'isp': 'true',
'ved_loupans': '239486',
'wmda_uuid': '8a507efd21c6c833112f1e79cc4a92df',
'wmda_new_uuid': '1',
'wmda_visited_projects': '%3B8788302075828',
'58tj_uuid': 'a7dc21c9-9d14-4a2d-89cb-40053f2567de',
'als': '0',
'os': 'other',
'obtain_by': '2',
'isp': 'true',
'wmda_session_id_8788302075828': '1698634915772-753e81d0-2ddf-f1c7',
'new_session': '1',
'init_refer': 'https%253A%252F%252Fguangzhou.anjuke.com%252F',
'new_uv': '2',
'xxzl_cid': '3758a81755ea4cc186dea9df2d9cc312',
'xxzl_deviceid': '8xChmTQlmCK7B48GgUdBaQGV0YUO74Fym3SCEVmDyAGZ8+OFsrVMpEhTPncjWlit',
}
headers = {
'authority': 'gz.fang.anjuke.com',
'accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9',
'accept-language': 'zh-CN,zh;q=0.9',
'cache-control': 'max-age=0',
'if-modified-since': 'Mon, 30 Oct 2023 03:01:54 GMT',
'referer': 'https://guangzhou.anjuke.com/',
'sec-ch-ua': '"Not_A Brand";v="99", "Google Chrome";v="109", "Chromium";v="109"',
'sec-ch-ua-mobile': '?0',
'sec-ch-ua-platform': '"Windows"',
'sec-fetch-dest': 'document',
'sec-fetch-mode': 'navigate',
'sec-fetch-site': 'same-site',
'sec-fetch-user': '?1',
'upgrade-insecure-requests': '1',
'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/109.0.0.0 Safari/537.36',
}
params = {
'from': 'HomePage_TopBar',
}
response = requests.get('https://gz.fang.anjuke.com/', params=params, cookies=cookies, headers=headers)
response.status_code
response.text
例子: 试试请求链家网页
import requests
cookies = {
'lianjia_uuid': 'd4fe3bda-8ceb-4f16-8ce0-9d88dc728e2f',
'select_city': '440100',
'lianjia_ssid': '033f33a0-083a-4067-8bd9-091fdd133d81',
'_smt_uid': '653f145f.42d52bd9',
'_jzqc': '1',
'_jzqy': '1.1698632800.1698632800.1.jzqsr=baidu.-',
'_jzqckmp': '1',
'_qzjc': '1',
'sajssdk_2015_cross_new_user': '1',
'_ga': 'GA1.2.1382280153.1698632806',
'_gid': 'GA1.2.1206859274.1698632806',
'Hm_lvt_9152f8221cb6243a53c83b956842be8a': '1698632814',
'sensorsdata2015jssdkcross': '%7B%22distinct_id%22%3A%2218b7e6798a8b47-004211a819a233-26021051-2073600-18b7e6798a982a%22%2C%22%24device_id%22%3A%2218b7e6798a8b47-004211a819a233-26021051-2073600-18b7e6798a982a%22%2C%22props%22%3A%7B%22%24latest_traffic_source_type%22%3A%22%E7%9B%B4%E6%8E%A5%E6%B5%81%E9%87%8F%22%2C%22%24latest_referrer%22%3A%22%22%2C%22%24latest_referrer_host%22%3A%22%22%2C%22%24latest_search_keyword%22%3A%22%E6%9C%AA%E5%8F%96%E5%88%B0%E5%80%BC_%E7%9B%B4%E6%8E%A5%E6%89%93%E5%BC%80%22%7D%7D',
'_jzqa': '1.4465419545725586000.1698632800.1698632800.1698635528.2',
'_gat': '1',
'_gat_global': '1',
'_gat_new_global': '1',
'_gat_dianpu_agent': '1',
'_ga_654P0WDKYN': 'GS1.2.1698635530.2.1.1698637717.0.0.0',
'Hm_lpvt_9152f8221cb6243a53c83b956842be8a': '1698637719',
'_jzqb': '1.6.10.1698635528.1',
'_qzja': '1.2068627131.1698632799798.1698632799798.1698635528072.1698637713937.1698637719384.0.0.0.11.2',
'_qzjb': '1.1698635528072.6.0.0.0',
'_qzjto': '11.2.0',
'srcid': 'eyJ0Ijoie1wiZGF0YVwiOlwiNGRkMjM3MzkxMzk0ZWYxZjM4MDNlMGM1MzU2OGI2ZDAyM2ZiYWJhNDM4ZjY2YmM1MjVhYWU2NTMwNzdjMDZjYTQ5NTI4ZWFjZDYxYTlkMGNkYzc4MWEwOTA0NjIyMTc2Y2IzMGMyNzFjYmZlODczYWYxNzYwYWUzZWFiMDFmZjk2NDgyY2FjNjk0Y2M5NTJlYjBjYWM5ZmZkODg1ZjMzMTI4MjhhZjcxNDViNzc2N2FkOTVlYjEzMmFjOTQ3OGNiMzM2MWNiZjNmNTBkNjc2MDE4ZGQwMzE1MDc4ZjViOWVhY2EyY2E4NGI2YzI0NGUyYzA0MGY1NmJmNWE1MDUzNFwiLFwia2V5X2lkXCI6XCIxXCIsXCJzaWduXCI6XCI1YjhlOWQ0M1wifSIsInIiOiJodHRwczovL2d6LmxpYW5qaWEuY29tL2Vyc2hvdWZhbmcvdGlhbmhlL3BnMi8iLCJvcyI6IndlYiIsInYiOiIwLjEifQ==',
}
headers = {
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9',
'Accept-Language': 'zh-CN,zh;q=0.9',
'Connection': 'keep-alive',
'Referer': 'https://gz.lianjia.com/ershoufang/tianhe/pg2/',
'Sec-Fetch-Dest': 'document',
'Sec-Fetch-Mode': 'navigate',
'Sec-Fetch-Site': 'same-origin',
'Sec-Fetch-User': '?1',
'Upgrade-Insecure-Requests': '1',
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/109.0.0.0 Safari/537.36',
'sec-ch-ua': '"Not_A Brand";v="99", "Google Chrome";v="109", "Chromium";v="109"',
'sec-ch-ua-mobile': '?0',
'sec-ch-ua-platform': '"Windows"',
}
response = requests.get('https://gz.lianjia.com/ershoufang/tianhe/', cookies=cookies, headers=headers)
response.status_code
response.text
===============================================================
第二步:解析网页,获取需要的内容
请听下回分析 ===============================================================