# -*- coding: utf-8 -*-
import urllib
import sys
import pandas as pd
from bs4 import BeautifulSoup
import simplejson as json
import importlib
importlib.reload(sys)
#设置目标url,利用urllib.request.Request创建请求
urls=[]
header={}
header['User-Agent']='Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.36 SE 2.X MetaSr 1.0'
for i in range(1,6):
url='http://www.custeel.com/reform/json/luliao/coloredPrice00500'+ str(i) + '.json'
urls.append(url)
js_price=pd.DataFrame(columns=['JYSC','JYSNAME','PRICE','PRICE_CCL','PRICE_CJL','PRICE_H',
'PRICE_K','PRICE_L','PRICE_S','PRICE_UP','TIME','VARIETY','VNAME'])
for url in urls:
req=urllib.request.Request(url,headers=header)
#使用add_header设置请求头,将代码伪装成浏览器
#req.add_header('User-Agent','Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.36 SE 2.X MetaSr 1.0')
# 使用urllib.request.urlopen打开页面,使用read方法保存html代码
req=urllib.request.urlopen(req)
dt_code=req.read()
plain_text=str(dt_code,'utf-8')
# 使用BeautifulSoup创建html代码的BeautifulSoup实例,存为soup
soup=BeautifulSoup(plain_text)
dtsoup=soup.find('p').text
soups=json.loads(dtsoup)
for s in soups:
ss=pd.Series(s,index=js_price.columns)
js_price=js_price.append(ss,ignore_index=True)
pd.DataFrame.to_csv(js_price,"js_price.csv",',')
writer1 = pd.ExcelWriter('js_price.xls')
js_price.to_excel(writer1,'Sheet1')
writer1.save()