分类,例如‘综艺类’
response.xpath('//*[contains(concat( " ", @class, " " ), concat( " ", "yk-filter-item", " " ))]//a').xpath("./@href").extract()
页面url
links = response.xpath('//@href').re("http://v.youku.com/v_show/id_.+?\.html")
next页面
response.xpath('//li[@class="next"]//a').xpath("./@href").extract()
标题
response.xpath('//title/text()').extract()
调用cmd命令
cmd = 'you-get --json http://v.youku.com/v_show/id_XMjc3MTkxODQyOA==.html?spm=a2htv.20009910.m_86821.5~5!3~5!2~5!2~A&from=y1.3-tv-grid-1007-9910.86827-86824-86821.8-1'
f = subprocess.Popen(cmd, shell=False, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
(stdoutput, erroutput) = f.communicate()
print(stdoutput)
print(erroutput)