python爬虫(可用)

import requests
import time
import csv
from lxml import etree
# headers={"User-Agent":}

for i in range(1):
    final = []
    url = "https://dianying.taobao.com/showList.htm?spm=a1z21.3046609.w2.3.4d60112aCdaBZl&n_s=new"
    headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/78.0.3904.108 Safari/537.36'}
    html = requests.get(url,headers=headers)
    code = html.status_code
    print(code)
    if code == 200:
      selecter = etree.HTML(html.text)
      files = selecter.xpath('/html/body/div[4]/div[1]/div[2]/div[1]/div')
     #  print(files)
      temp = []
      for file in files:        
          book_name=file.xpath("./a[1]/div[3]/span[1]/text()")[0]
          # /html/body/div[4]/div[1]/div[2]/div[1]/div[1]/a[1]/div[3]/span[1]
         #  book_introduce=file.xpath('./a[1]/div[4]/div[2]/span[2]/text()')[0]


          book_introduce_files = file.xpath('./a[1]/div[4]/div[2]/span')
          book_introduce_temp = []
          for book_introduce_file in book_introduce_files:
                
                book_introduce_temp.append(book_introduce_file.text)
               #  book_introduce_1 = file.xpath('./text()')
                
                
          # /html/body/div[4]/div[1]/div[2]/div[1]/div[1]/a[1]/div[4]/div[2]
          # //*[@id="content"]/div/div[1]/div/div/table[1]/tbody/tr/td[2]/div/p/text()
         #  book_grades=file.xpath('./a[1]/div[3]/span[2]/text()')
          if file.xpath('./a[1]/div[3]/span[2]/text()') != []:
            book_grades=file.xpath('./a[1]/div[3]/span[2]/text()')[0]
          else:
            book_grades=''
          # /html/body/div[4]/div[1]/div[2]/div[1]/div[1]/a[1]/div[3]/span[2]
          # //*[@id="content"]/div/div[1]/div/div/table[1]/tbody/tr/td[2]/div/div/span[2]
          # book_scrible=file.xpath('./tr/td[2]/p[2]/span/text()')
          print("电影名:{}\n".format(book_name))
          temp.append(book_name)
          print("电影基本信息:\n")
          for s in book_introduce_temp:
            print("                  {}\n".format(s))
            temp.append(s)
          print("电影评分:{}\n".format(book_grades))
          temp.append(book_grades)
          temp.append('\n')
          print('\n')
    
    else:
        continue
    final.append(temp)
with open('maoyan.text', 'w+', errors='ignore', newline='',encoding='utf-8') as f:
        f_txt = csv.writer(f)
        f_txt.writerows(final)
  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值