豆瓣短评.py
from multiprocessing import Process
import threading
from lxml import etree
import requests
import time
import random
import ciyun as cy
start=time.clock()
def paqu(id,startpage,endpage,save):
j = 1
with open("/Users/zhangpengxu/Desktop/爬取影评/paquresult/"+str(save)+".txt", 'w', encoding='utf-8') as f:
for a in range(startpage,endpage):
for next in range(1,21):
url="https://movie.douban.com/subject/"+str(id)+"/comments?start="+str(20*a)
result=requests.get(url).content
req = etree.HTML(result)
links1=req.xpath('//*[@id="comments"]/div['+str(next)+']/div[2]/h3/span[2]/a/text()')
links2=req.xpath('//*[@id="comments"]/div['+str(next)+']/div[2]/p/span/text()')