一个户外论坛的特点: 列出一些活动,有翻页功能,点向一个活动显示当前活动信息,在二楼一般显示报名名单! 需要的数据: 就是活动的信息, 报名的名单,价钱,主
一个股票数据(沪深)爬虫和选股策略测试框架,数据基于雅虎YQL和新浪财经。
根据选定的日期范围抓取所有沪深两市股票的行情数据。
根据指定的选股策略和指定的日期进行选股测试。
计算选股测试实际结果(包括与沪深300指数比较)。
保存数据到JSON文件、CSV文件。
支持使用表达式定义选股策略。
支持多线程处理。
代码
main.py
from stockholm import Stockholm
import option
import os
def checkFoldPermission(path):
if(path == 'USER_HOME/tmp/stockholm_export'):
path = os.path.expanduser('~') + '/tmp/stockholm_export'
try:
if not os.path.exists(path):
os.makedirs(path)
else:
txt = open(path + os.sep + "test.txt","w")
txt.write("test")
txt.close()
os.remove(path + os.sep + "test.txt")
except Exception as e:
print(e)
return False
return True
def main():
args = option.parser.parse_args()
if not checkFoldPermission(args.store_path):
print('\nPermission denied: %s' % args.store_path)
print('Please make sure you have the permission to save the data!\n')
else:
print('Stockholm is starting...\n')
stockh = Stockholm(args)
stockh.run()
print('Stockholm is done...\n')
if __name__ == '__main__':
main()
option.py
import argparse
import datetime
def get_date_str(offset):
if(offset is None):
offset = 0
date_str = (datetime.datetime.today() + datetime.timedelta(days=offset)).strftime("%Y-%m-%d")
return date_str
_default = dict(
reload_data = 'Y',
gen_portfolio = 'N',
output_type = 'json',
charset = 'utf-8',
test_date_range = 60,
start_date = get_date_str(-90),
end_date = get_date_str(None),
target_date = get_date_str(None),
store_path = 'USER_HOME/tmp/stockholm_export',
thread = 10,
testfile_path = './portfolio_test.txt',
db_name = 'stockholm',
methods = ''
)
parser = argparse.ArgumentParser(description='A stock crawler and portfolio testing framework.')
parser.add_argument('--reload', type=str, default=_default['reload_data'], dest='reload_data', help='Reload the stock data or not (Y/N), Default: %s' % _default['reload_data'])
parser.add_argument('--portfolio', type=str, default=_default['gen_portfolio'], dest='gen_portfolio', help='Generate the portfolio or not (Y/N), Default: %s' % _default['gen_portfolio'])
parser.add_argument('--output', type=str, default=_default['output_type'], dest='output_type', help='Data output type (json/csv/all), Default: %s' % _default['output_type'])
parser.add_argument('--charset', type=str, default=_default['charset'], dest='charset', help='Data output charset (utf-8/gbk), Default: %s' % _default['charset'])
parser.add_argument('--testrange', type=int, default=_default['test_date_range'], dest='test_date_range', help='Test date range(days): %s' % _default['test_date_range'])
parser.add_argument('--startdate', type=str, default=_default['start_date'], dest='start_date', help='Data loading start date, Default: %s' % _default['start_date'])
parser.add_argument('--enddate', type=str, default=_default['end_date'], dest='end_date', help='Data loading end date, Default: %s' % _default['end_date'])
parser.add_argument('--targetdate', type=str, default=_default['target_date'], dest='target_date', help='Portfolio generating target date, Default: %s' % _default['target_date'])
parser.add_argument('--storepath', type=str, default=_default['store_path'], dest='store_path', help='Data file store path, Default: %s' % _default['store_path'])
parser.add_argument('--thread', type=int, default=_default['thread'], dest='thread', help='Thread number, Default: %s' % _default['thread'])
parser.add_argument('--testfile', type=str, default=_default['testfile_path'], dest='testfile_path', help='Portfolio test file path, Default: %s' % _default['testfile_path'])
parser.add_argument('--dbname', type=str, default=_default['db_name'], dest='db_name', help='MongoDB DB name, Default: %s' % _default['db_name'])
parser.add_argument('--methods', type=str, default=_default['methods'], dest='methods', help='Target methods for back testing, Default: %s' % _default['methods'])
def main():
args = parser.parse_args()
print(args)
if __name__ == '__main__':
main()
stockholm.py
#coding:utf-8
import requests
import json
import datetime
import timeit
import time
import io
import os
import csv
import re
from pymongo import MongoClient
from multiprocessing.dummy import Pool as ThreadPool
from functools import partial
class Stockholm(object):
def __init__(self, args):
## flag of if need to reload all stock data
self.reload_data = args.reload_data
## flag of if need to generate portfolio
self.gen_portfolio = args.gen_portfolio
## type of output file json/csv or both
self.output_type = args.output_type
## charset of output file utf-8/gbk
self.charset = args.charset
## portfolio testing date range(# of days)
self.test_date_range = args.test_date_range
## stock data loading start date(e.g. 2014-09-14)
self.start_date = args.start_date
## stock data loading end date
self.end_date = args.end_date
## portfolio generating target date
self.target_date = args.target_date
## thread number
self.thread = args.thread
## data file store path
if(args.store_path == 'USER_HOME/tmp/stockholm_export'):
self.export_folder = os.path.expanduser('~') + '/tmp/stockholm_export'
else:
self.export_folder = args.store_path
## portfolio testing file path
self.testfile_path = args.testfile_path
## methods for back testing
self.methods = args.methods
## for getting quote symbols
self.all_quotes_url = 'http://money.finance.sina.com.cn/d/api/openapi_proxy.php'
## for loading quote data
self.yql_url = 'http://query.yahooapis.com/v1/public/yql'
## export file name
self.export_file_name = 'stockholm_export'
self.index_array = ['000001.SS', '399001.SZ', '000300.SS']
self.sh000001 = {'Symbol': '000001.SS', 'Name': '上证指数'}
self.sz399001 = {'Symbol': '399001.SZ', 'Name': '深证成指'}
self.sh000300 = {'Symbol': '000300.SS