import requests
import time
import csv
from lxml import etree
# headers={"User-Agent":}
for i in range(1):
final = []
url = "https://dianying.taobao.com/showList.htm?spm=a1z21.3046609.w2.3.4d60112aCdaBZl&n_s=new"
headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/78.0.3904.108 Safari/537.36'}
html = requests.get(url,headers=headers)
code = html.status_code
print(code)
if code == 200:
selecter = etree.HTML(html.text)
files = selecter.xpath('/html/body/div[4]/div[1]/div[2]/div[1]/div')
# print(files)
temp = []
for file in files:
book_name=file.xpath("./a[1]/div[3]/span[1]/text()")[0]
# /html/body/div[4]/div[1]/div[2]/div[1]/div[1]/a[1]/div[3]/span[1]
# book_introduce=file.xpath('./a[1]/div[4]/div[2]/span[2]/text()')[0]
book_introduce_files = file.xpath('./a[1]/div[4]/div[2]/span')
book_introduce_temp = []
for book_introduce_file in book_introduce_files:
book_introduce_temp.append(book_introduce_file.text)
# book_introduce_1 = file.xpath('./text()')
# /html/body/div[4]/div[1]/div[2]/div[1]/div[1]/a[1]/div[4]/div[2]
# //*[@id="content"]/div/div[1]/div/div/table[1]/tbody/tr/td[2]/div/p/text()
# book_grades=file.xpath('./a[1]/div[3]/span[2]/text()')
if file.xpath('./a[1]/div[3]/span[2]/text()') != []:
book_grades=file.xpath('./a[1]/div[3]/span[2]/text()')[0]
else:
book_grades=''
# /html/body/div[4]/div[1]/div[2]/div[1]/div[1]/a[1]/div[3]/span[2]
# //*[@id="content"]/div/div[1]/div/div/table[1]/tbody/tr/td[2]/div/div/span[2]
# book_scrible=file.xpath('./tr/td[2]/p[2]/span/text()')
print("电影名:{}\n".format(book_name))
temp.append(book_name)
print("电影基本信息:\n")
for s in book_introduce_temp:
print(" {}\n".format(s))
temp.append(s)
print("电影评分:{}\n".format(book_grades))
temp.append(book_grades)
temp.append('\n')
print('\n')
else:
continue
final.append(temp)
with open('maoyan.text', 'w+', errors='ignore', newline='',encoding='utf-8') as f:
f_txt = csv.writer(f)
f_txt.writerows(final)
python爬虫(可用)
最新推荐文章于 2024-07-08 00:01:13 发布