1 #coding=utf-8
2
3 #读取‘[wait]‘开头的csv文件
4 #copyright @ WangXinsheng
5 #http://www.cnblogs.com/wangxinsheng/
6 importos7 importgzip8 importre9 importhttp.cookiejar10 importurllib.request11 importurllib.parse12 importtime13 importdatetime14
15 defgetOpener(head):16 #deal with the Cookies
17 cj =http.cookiejar.CookieJar()18 pro =urllib.request.HTTPCookieProcessor(cj)19 opener =urllib.request.build_opener(pro)20 header =[]21 for key, value inhead.items():22 elem =(key, value)23 header.append(elem)24 opener.addheaders =header25 returnopener26
27 defungzip(data):28 try: #尝试解压
29 print(‘正在解压.....‘)30 data =gzip.decompress(data)31 print(‘解压完毕!‘)32 except:33 print(‘未经压缩, 无需解压‘)34 returndata35
36 #常量
37 header ={38 #‘Connection‘: ‘Keep-Alive‘,
39 ‘Accept‘: ‘*/*‘,40 ‘Accept-Language‘: ‘zh-CN,zh;q=0.8‘,41 ‘User-Agent‘: ‘Mozilla/5.0 (Windows NT 6.2) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/40.0.2214.111 Safari/537.36‘,42 ‘Accept-Encoding‘: ‘gzip, deflate‘,43 ‘Host‘: ‘yahoo.com‘,44 ‘Referer‘ : ‘http://www.yahoo.com‘
45 }46 priceUrl = ‘http://table.finance.yahoo.com/table.csv?47 s={%1}&d={%2}&e={%3}48 &f={%4}&g=d&a={%5}&b={%6}&c={%7}&ignore=.csv‘
49 #%1:000001.sz
50 #END: %2:月-1 %3:日-1 %4:年
51 #STRAT: %5:月-1 %6:日 %7:年
52
53
54 path=r‘.‘
55 files =os.listdir(path)56 files.sort()57 out=[]58
59 for f infiles:60 if(f.startswith(‘[wait]‘) and
61 f.endswith(‘.csv‘)):62 #读取文件
63 print(‘读取文件:‘+path+‘/‘+f)64
65 f=open(path+‘/‘+f,‘rt‘)66 infos =f.readlines()67 f.close()68
69 i =070 add =False71 for info ininfos:72 if(i==0):73 i=i+1
74 info = info.replace(‘\n‘,‘‘)+‘,"一天后","二天后","三天后","四天后","五天后"\n‘
75 out.append(info)76 continue
77 elif(len(info.split(‘,‘))>9):78 out.append(info)79 continue
80 else:81 #确认需要取的数据范围
82 tmp = info.split(‘,‘)83 try:84 timeArray = time.strptime(tmp[0], "%Y-%m-%d")85 except:86 timeArray = time.strptime(tmp[0], "%Y/%m/%d")87 timeStamp =int(time.mktime(timeArray))88 fromDay =datetime.datetime.utcfromtimestamp(timeStamp)89 fromDay = fromDay + datetime.timedelta(days = 1)90 endDay = fromDay + datetime.timedelta(days = 15)91 code = tmp[1].replace(‘"‘,‘‘).replace("‘","")92 if(code.startswith(‘6‘)):93 code = code +‘.ss‘
94 else:95 code = code +‘.sz‘
96 url = priceUrl.replace(‘{%1}‘,code).replace(‘{%2}‘,str(endDay.month-1))97 url = url.replace(‘{%3}‘,str(endDay.day)).replace(‘{%4}‘,str(endDay.year))98 url = url.replace(‘{%5}‘,str(fromDay.month-1)).replace(‘{%6}‘,str(fromDay.day))99 url = url.replace(‘{%7}‘,str(fromDay.year))100 print(‘抓取URL:‘+url)101
102 #通过雅虎获取价格
103 dd = ‘‘
104 try:105 opener =getOpener(header)106 op =opener.open(url)107 data =op.read()108 data =ungzip(data)109 dd =data.decode()110 except:111 print(‘网络抓取失败‘)112 out.append(info)113 continue
114 #计算涨跌幅百分比
115 if(dd!=‘‘):116 dataInfo = dd.split(‘\n‘)117 j=0118 dayCount =0119 startPrice =0120 for x in range(len(dataInfo)-1,0,-1):121 #处理数据
122 if(dataInfo[x]==‘‘):123 continue
124 #print(dataInfo[x])
125 if(dayCount>5):126 break
127 di =dataInfo[x]128 if(dayCount==0):129 startPrice = float(di.split(‘,‘)[4])130 elif(int(di.split(‘,‘)[5])!=0):131 add =True132 closeP = float(di.split(‘,‘)[4])133 info = info.replace(‘\n‘,‘‘)+‘,"‘+str(round((closeP-startPrice)/startPrice*100,2))+‘%[‘+str(closeP)+‘]"‘
134 #print(info)
135 if(dayCount==0 or int(di.split(‘,‘)[5])!=0):136 dayCount=dayCount+1
137
138 if(add):139 out.append(info+‘\n‘)140 #print(out)
141 continue
142 #输出
143 ff = open(path+‘/‘+f.name.replace(‘[wait]‘,‘[处理完了]‘),‘w‘)144 for o inout:145 ff.write(o)146 ff.close()147 print(‘处理完了\n文件地址:‘+path+‘/‘+f.name.replace(‘[wait]‘,‘[处理完了]‘))148 else:149 continue