# requests包 import requests # xpath包 from lxml import etree # 本地mysql包(mysql_def包下mysql_conn函数) from mysql_def import mysql_conn # 定义url,含分页 %d base_url='https://bj.5i5j.com/zufang/changpingqu/n%d/' # 定义headers头 headers ={ "Cookie": "_Jo0OQK=27E3AE8F401F48377EC641A97E866EA9401E4BF430D59F325019A8A1C06A982D5A716B0F691F396132740C3E5383A69B7EF67E84EC402AE9D0D7E4FE54D996F94C4DE8682CA7D10E3B498FB9E3C853EFEE298FB9E3C853EFEE215D8BEE34E43E5C0GJ1Z1SA==; yfx_c_g_u_id_10000001=_ck18081814101818670131507573338; yfx_mr_f_n_10000001=baidu%3A%3Amarket_type_ppzq%3A%3A%3A%3A%3A%3A%3A%3A%3A%3A%25E6%25A0%2587%25E9%25A2%2598%3A%3Abj.5i5j.com%3A%3A%3A%3A%3A%3A%25E5%25B7%25A6%25E4%25BE%25A7%25E6%25A0%2587%25E9%25A2%2598%3A%3A%25E6%25A0%2587%25E9%25A2%2598%3A%3A160%3A%3Apmf_from_adv%3A%3Abj.5i5j.com%2F; _ga=GA1.2.510886705.1534572619; _gid=GA1.2.416216966.1534572619; domain=bj; yfx_mr_n_10000001=baidu%3A%3Amarket_type_ppzq
xpath匹配 爬取房源信息(我爱我家)
最新推荐文章于 2024-01-02 16:34:38 发布
该博客介绍如何利用Python的requests和lxml库爬取我爱我家网站的房源信息。首先导入所需库,定义URL和headers,然后通过for循环遍历不同页码的URL,使用XPath匹配提取房源的标题、面积、地址和价格等信息,并存储到MySQL数据库中。
摘要由CSDN通过智能技术生成