原文中通过Google提供的API来抓取价格,但由于现在访问不了Google,所以,直接通过文本给的html文件直接读取价格信息。然后进一步进行分析预测。
代码实现:
# -*- coding: utf-8 -*-
"""
Created on Mon May 7 09:55:34 2018
@author: lizihua
"""
#from time import sleep
#import json
#import urllib
#from BeautifulSoup import BeautifulSoup
from bs4 import BeautifulSoup
from numpy import random,zeros,mat,var, mean,array,nonzero,multiply,linalg,eye,shape,exp,ones
import numpy as np
import matplotlib.pyplot as plt
"""
#由于Google购物API关闭,采用下一段读取网页文件代码代替此爬虫过程
def searchForSet(retX,retY,setNum,yr,numPce,origPrc):
sleep(10)
myAPIstr = 'get from code.google.com'
searchURL = 'https://www.googleapis.com/shopping/search/v1/public/products?key=%s&country=US&q=lego+%d&alt=json' % (myAPIstr, setNum)
pg = urllib.request.urlopen(searchURL)
retDict = json.loads(pg.read())
for i in range(len(retDict['items'])):
try:
currItem = retDict['items'][i]
if currItem['product']['condition'] == 'new':
newFlag = 1
else: newFlag = 0
listOfInv = currItem['product']['inventories']
for item in listOfInv:
sellingPrice = item['price']
if sellingPrice > origPrc * 0.5:
print ("%d\t%d\t%d\t%f\t%f" % (yr,numPce,newFlag,origPrc, sellingPrice))
retX.append([yr, numPce, newFlag, origPrc])
retY.append(sellingPrice)
except: print ('problem with item %d' % i)
def setDataCollect(retX, retY):
searchForSet(retX, retY, 8288, 2006, 80