#! /usr/bin/python2 # coding=utf-8 import urllib2 import csv import time import pymongo import mechanize from datetime import datetime from datetime import timedelta global stocks global k_ri global list_code global conn_list, conn_k list_code = [] def init_mongodb_list(): global stocks global conn_list # mongodb_link = 'mongodb://127.0.0.1:27017' # mongoClient = MongoClient(mongodb_link) conn_list = pymongo.MongoClient("localhost", 27017) conn_list.stock_list.authenticate("d", "zz") db = conn_list.stock_list stocks = db.stocks # return stocks def init_mongodb_k(): global k_ri global conn_k conn_k = pymongo.MongoClient("localhost", 27017) conn_k.db_stock.authenticate("d", "zz") db = conn_k.db_stock k_ri = db.k_ri def get_newst_date(code): date = "" l = k_ri.find({"code": code}).sort([('date', -1)]).limit(1) for i in l: code = i.get("code") date = i.get("date") print code, date # print l.get("date") # print code, date,"----------------------------------------" return date; # yahoo ''' def update_ri(code, date, open, high, low, close, volume, adj_close): print code, date, open, high, low, close, volume, adj_close k_ri.update({"code": code, "date": date}, {"$setOnInsert":{"open": open, "high": high, "low": low, "close": close, "volume": volume, "adj_close": adj_close}}, upsert = True) ''' # 163 def update_ri(code, date, close, high, low, open, pre_close, change_value, change_quote, turnover_rate, \ volume, turnover, total_market_value, market_capitalization, turnover_number): print "sql", code, date, close, high, low, open, pre_close, change_value, change_quote, turnover_rate, \ volume, turnover, total_market_value, market_capitalization, turnover_number k_ri.update({"code": code, "date": date}, {"$setOnInsert": {"open": open, "high": high, "low": low, "close": close, "pre_close": pre_close, "change_quote": change_quote, "change_value": change_value, "turnover_rate": turnover_rate, "volume": volume, "turnover": turnover, "total_market_value": total_market_value, "market_capitalization": market_capitalization, "turnover_number": turnover_number}}, upsert=True) def get_stock_list(): l = stocks.find() for i in l: code = i.get("code") name = i.get("name") list_code.append({"code": code, "name": name}) # print type(name), name, type(code),code list_code.append({"code": "000001", "name": "上证指数".decode('utf8')}) list_code.append({"code": "399001", "name": "深证指数".decode('utf8')}) list_code.append({"code": "399006", "name": "创业板指".decode('utf8')}) def day_plus(str): # now = datetime.now() day = datetime.strptime(str, "%Y-%m-%d") #day_diff = timedelta(days=1) #day = day + day_diff return day def day_str_change(str): day = datetime.strptime(str, "%Y-%m-%d") return day.strftime('%Y%m%d') # ''' def stock_header(code): url = 'http://quotes.money.163.com/service/chddata.html?code=' t1 = ('60', '900') t2 = ('000', '002', '300', '200') t3 = ('399001', '399006') if code.startswith(t1): str = '0' + code elif code.startswith('000001'): str = '0' + code elif code.startswith(t3): str = '1' + code elif code.startswith(t2): str = '1' + code else: str = code print code url = url + str return url # ''' ''' def stock_header(code): url = 'http://table.finance.yahoo.com/table.csv?s=' t1 = ('60', '900') t2 = ('000', '002', '300', '200') t3 = ('399001','399006') if code.startswith(t1): str = code + '.ss' elif code.startswith('000001'): str = code + '.ss' elif code.startswith(t3): str = code + '.sz' elif code.startswith(t2): str = code + '.sz' else: str = code print code url = url + str return url ''' def deal_url(str_day, url): if ("" == str_day): print url return url; day = day_plus(str_day) now = datetime.now() if (day.strftime('%Y%m%d') >= now.strftime('%Y%m%d')): print day.strftime('%Y%m%d'), now.strftime('%Y%m%d') return "" # 163 data 日期 股票代码 名称 收盘价 最高价 最低价 开盘价 前收盘 涨跌额 涨跌幅 换手率 成交量 成交金额 总市值 流通市值 成交笔数 str_url = url + '&start=' + day.strftime('%Y%m%d') + '&end=' + now.strftime('%Y%m%d') # yahoo data Date Open High Low Close volume Adj Close ''' mon1 = int(now.strftime('%m')) - 1 mon2 = int(day.strftime('%m')) - 1 str_url = url + '&d=%s&e=%s&f=%s&g=d&a=%s&b=%s&c=%s&ignore=.csv' % ( str(mon1), now.strftime('%d'), now.strftime('%Y'), str(mon2), day.strftime('%d'), day.strftime('%Y')) ''' # print str_url return str_url def file_to_sql(code, content): #print code, content rows = content.split('\n') rows.sort() i = 0 cnt = len(rows) - 1 print cnt for row in rows: # 忽略第一行和最后一行 网易 列名和多个空字符 if (i == cnt): #i += 1 print "end file", i break split_row = row.split(",") #print split_row # split_row[1] = int(split_row[1]) # full_data.append(split_row) # print "-----------------type--",type(split_row), split_row # print split_row[1],split_row[0] try: # yahoo # if ("" == split_row[0]): # continue # update_ri(code, split_row[0], float(split_row[1]), float(split_row[2]), float(split_row[3]), float(split_row[4]), int(split_row[5]), float(split_row[6])) # 163 full_data = [] for row_s in split_row: # print row_s, len(row_s) str = row_s.replace("\r", '').replace('None', '0') # print str if ('' == str): # print "kong" str = '0' # print row_s # print "--------------------" full_data.append(str) # print row_s,"---" # print code, len(full_data) # print full_data # print split_row,i if (16 != len(full_data)): break # print "full",full_data update_ri(code, full_data[0], float(full_data[3]), float(full_data[4]), float(full_data[5]), float(full_data[6]), \ float(full_data[7]), float(full_data[8]), float(full_data[9]), float(full_data[10]), float(full_data[11]), \ float(full_data[12]), float(full_data[13]), float(full_data[14]), float(full_data[15])) except ValueError: print '\033[1;31;40m' print split_row print "--------------------ValueError----------------------------------------------" print '\033[0m' continue i += 1 def get_day(code, url): print code, url if ("" == url): print "---newst---date---------------------------------" return # url = 'http://quotes.money.163.com/service/chddata.html?code=1000002' # url = 'http://quotes.money.163.com/service/chddata.html?code=0601398&start=20000720&end=20150508' # url = 'http://table.finance.yahoo.com/table.csv?s=000002.sz' # url = 'http://table.finance.yahoo.com/table.csv?s=000002.sz&d=6&e=22&f=2006&g=d&a=11&b=16&c=1991&ignore=.csv' # url = 'http://hq.sinajs.cn/?list=sh600127' # http://market.finance.sina.com.cn/downxls.php?date=2016-10-28&symbol=sz300127 # print url req_header = { 'User-Agent': 'Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.11 (KHTML, like Gecko) Chrome/23.0.1271.64 Safari/537.11', 'Accept': 'text/html;q=0.9,*/*;q=0.8', 'Accept-Charset': 'ISO-8859-1,utf-8;q=0.7,*;q=0.3', 'Accept-Encoding': 'gzip', 'Connection': 'close', 'Referer': None # 注意如果依然不能抓取的话,这里可以设置抓取网站的host } req_header = {('User-agent','Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.9.0.1) Gecko/2008071615 Fedora/3.0.1-1.fc9 Firefox/3.0.1')} req_timeout = 500 req = urllib2.Request(url,None,req_header) #req = urllib2.Request(url) # print req # 如果不需要设置代理,下面的set_proxy就不用调用了。由于公司网络要代理才能连接外网,所以这里有set_proxy… # req.set_proxy('proxy.XXX.com:911', 'http') # socket = urllib2.urlopen(req,None,req_timeout) try: socket = urllib2.urlopen(req, None, req_timeout) # print socket content = socket.read().strip() # content = socket.read().decode('GB18030') socket.close() except urllib2.HTTPError, e: print '\033[1;31;40m' print 'The server couldn\'t fulfill the request.' print 'Error code: ', e.code print 'Error reason: ', e.reason print '\033[0m' except urllib2.URLError, e: print '\033[1;31;40m' print 'We failed to reach a server.' print 'Reason: ', e.reason print '\033[0m' else: # everything is fine file_to_sql(code, content) class NoHistory(object): def add(self, *a, **k): pass def clear(self): pass def browser(url): print url if ("" == url): print "---newst---date---------------------------------" return "" br = mechanize.Browser(history=NoHistory()) #options br.set_handle_equiv(True) #br.set_handle_gzip(True) br.set_handle_redirect(True) br.set_handle_referer(True) br.set_handle_robots(False) #Follows refresh 0 but not hangs on refresh > 0 br.set_handle_refresh(mechanize._http.HTTPRefreshProcessor(), max_time=1) br.set_debug_http(True) br.set_debug_redirects(True) br.set_debug_responses(True) #欺骗行为 br.addheaders = [('User-agent', 'Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.9.0.1) Gecko/2008071615 Fedora/3.0.1-1.fc9 Firefox/3.0.1')] #上面的代码主要用于初始化设置,最好设置一下 try: # 打开百度 r = br.open(url) #获取百度的表单,从中找到输入汉字的位置 ''' for f in br.forms(): print f br.select_form(nr = 0) #搜索关键字“火车” br.form['wd'] = "火车" br.submit() # 查看搜索结果 brr=br.response().read() #是html代码,能看到火车的搜索结果 print brr ''' content = r.read().strip() #print content ''' rows = content.split('\n') rows.sort() print rows print "---------------" print len(rows) i = 0 cnt = len(rows) - 1 for row in rows: print row print cnt ''' except: print '\033[1;31;40m' print "open err ------------------------------------" print '\033[0m' return "" return content def get_day_list(code): str_day = get_newst_date(code) print str_day url = stock_header(code) # print url url = deal_url(str_day, url) # print url #get_day(code, url) content = browser(url) #print content if("" != content): file_to_sql(code, content) if __name__ == '__main__': init_mongodb_list() print stocks.count() get_stock_list() conn_list.close() # print list_code init_mongodb_k() # get_code_k_ri() print len(list_code) #for i in list_code: # print i["code"],i["name"] for code in list_code: get_day_list(code["code"]) # get_day_list("399006") conn_k.close()
根据股票列表 下载日k数据 存入mongodb
最新推荐文章于 2024-08-12 15:08:41 发布