import requests
import time
from lxml import etree
from fake_useragent import UserAgent
def getHTML(url):
try:
r = requests.get(url)
r.raise_for_status
r.encoding = r.apparent_encoding
return r.text
except Exception as e:
return "爬取错误{}".format(e.args)
def appHTML(html):
e = etree.HTML(html)
time.sleep(2)
names = e.xpath('//a[@class="title"]/text()')
rooms = e.xpath('//i[@class="nick"]/text()')
counts = e.xpath('//i[@class="js-num"]/text()')
return names,rooms,counts
def main():
if __name__ == '__main__':
url = "https://www.huya.com/g/lol"
html = getHTML(url)
x = appHTML(html)
for n in range(120):
print(x[0][n],x[1][n],x[2][n])
main()