接着上一篇文章,这次爬取小米app的数据。
主要是爬取应用和游戏这两类的app数据
import requests
from lxml import etree
import re
import datetime
url_2 = "http://app.mi.com/"
def fun(url, page1, pageId1):
data = requests.get(url).text
# 去除“”
a = re.sub('"', '', data)
# 去除 :
b = re.sub(':', '', a)
# 去除 ,
c = re.sub(',', '', b)
d = re.sub('{', '', c)
e = re.sub('}', '', d)
strId = re.findall(r"packageName(.+?)appId", e)
leng = len(strId)
if leng == 0:
print(leng)
return
for a in strId:
nstr = "http://app.mi.com/details?id=" + a
data2 = requests.get(nstr).text
s = etree.HTML(data2)
try:
name =