查看页面源码
案例
"""
1. 提取页面源代码
2. 解析页面源代码,提取数据
"""
import requests
from pyquery import PyQuery
f = open("qingchezhijia.csv",mode='w',encoding='utf-8')
def get_page_source(url):
resp = requests.get(url)
resp.encoding = "gbk"
return resp.text
def parse_page_source(html):
doc =PyQuery(html)
mt_list = doc(".mt-10").items()
for mt in mt_list:
if not mt("div >dl:nth-child(3)>dt:contains(购车经销商)"):
mt("div >dl:nth-child(2)").after(PyQuery("""<dl class="choose-dl">
<dt>购车经销商</dt>
<dd>
<a href="###" class="js-dearname" data-val="81115,51982" data-evalid="4033271" target="_blank">
</a>
</dd>
</dl>"""))
car =mt("div>dl:nth-child(1)>dd").eq(0).text().replace("\n","").replace(" ","")
place = mt("div>dl:nth-child(2)>dd").eq(0).text()
time = mt("div>dl:nth-child(4)>dd").eq(0).text()
price = mt("div>dl:nth-child(5)>dd").eq(0).text().replace("万元","")
youhao = mt("div>dl:nth-child(6)>dd >p:nth-child(1)").eq(0).text().replace("升/百公里","")
kilometer = mt("div>dl:nth-child(6)>dd >p:nth-child(2)").eq(0).text().replace("公里","")
other = mt("div>div>dl>dd").text().split()
f.write(f"购买车型:{car},购买地点:{place},购买时间:{time},购车购买价:{price},油耗:{youhao},目前行驶:{kilometer},其它:{other}\n")
def main():
url = "https://k.autohome.com.cn/146/"
html = get_page_source(url)
parse_page_source(html)
if __name__ == '__main__':
main()
运行结果: