Matlab - 数据准备
1. 用tushare数据源
先上代码:
% 演示如何利用tushare 下载中国金融数据
% 获取股票列表的示例
token = 'pro token, 需要的话私信我';
api = pro_api(token);
%df_basic = api.query('stock_basic');
%disp(df_basic(1:10,:));
%获取日线行情数据的示例
df_daily = api.query('daily', 'ts_code', '000001.SZ', 'start_date', '19990101', 'end_date', '');
disp(df_daily(1:10,:));
%获取日内/分钟行情数据的示例
%dd1 = pro_bar('000001.SZ', api, '19990101', '20181031');
%disp(dd1(1:10,:));
%table数据转换timetable后以.mat保存本地
% readtable 命令后table2timetable
% save('文件名字','保存的变量')
2. 用本地databse数据源
%% mongoDB database
server = 'localhost';
port = 27017;
dbname = 'stockDaysDbTuShare';
conn = mongo(server,port,dbname);
collection = '000001.SZ';
documents = find(conn,collection);
documents(1);
3. 用FRED databse数据源
% data rettrieving and preprocessing using Matlab FRED datafeed toolbox
symbol = 'SP500';
endDate = datenum ('Dec-31-2019');
c=fred;
d = fetch(c,symbol, endDate-3652,endDate);
time = datetime(d.Data(:,1),'ConvertFrom','datenum');
prices = d.Data(:,2);
T=timetable(time,prices);
clearvars -except T
save('data1.mat')
% data cleaning,remove missing values
T = rmmissing(T);
% Factor creation, create datetime factors
[T.y,T.m,T.d] = ymd(T.time);
% create day of the week where day 1 of the week is Sunday.
T.dayOfWeek = day(T.time, 'dayofweek');
4. 用Yahoo数据源
自定义函数- getMarketDataViaYahoo:
function data = getMarketDataViaYahoo(symbol, startdate, enddate, interval)
% Downloads market data from Yahoo Finance for a specified symbol and
% time range.
%
% INPUT:
% symbol - is a ticker symbol i.e. 'AMD', 'BTC-USD'
% startdate - the date from which the market data will be requested
% enddate - the market data will be requested till this date
% interval - the market data will be returned in this intervals
% supported intervals are '1d', '5d', '1wk', '1mo', '3mo'
%
% Example:
% data = getMarketDataViaYahoo('AMD', '1-Jan-2018', datetime('today'), '5d');
%
% Author: Artem Lenskiy, PhD
% Version: 0.91
%
% Special thanks to Patryk Dwórznik (https://github.com/dworznik) for
% a hint on JavaScript processing.
%
% Alternative approach is given here
% https://stackoverflow.com/questions/50813539/user-agent-cookie-workaround-to-web-scraping-in-matlab
if(nargin() == 1)
startdate = posixtime(datetime('1-Jan-2018'));
enddate = posixtime(datetime()); % now
interval = '1d';
elseif (nargin() == 2)
startdate = posixtime(datetime(startdate));
enddate = posixtime(datetime()); % now
interval = '1d';
elseif (nargin() == 3)
startdate = posixtime(datetime(startdate));
enddate = posixtime(datetime(enddate));
interval = '1d';
elseif(nargin() == 4)
startdate = posixtime(datetime(startdate));
enddate = posixtime(datetime(enddate));
else
error('At least one parameter is required. Specify ticker symbol.');
data = [];
return;
end
%% Construct an URL to obtain the crumb value that is linked to the session cookie.
% It could be important to request data for the same range, however to
% save bandwidth and time, request data for one day.
uri = matlab.net.URI(['https://finance.yahoo.com/quote/', upper(symbol), '/history'],...
'period1', num2str(uint64(posixtime(datetime())), '%.10g'),...
'period2', num2str(uint64(posixtime(datetime())), '%.10g'),...
'interval', interval,...
'filter', 'history',...
'frequency', interval,...
'guccounter', 1);
options = matlab.net.http.HTTPOptions('ConnectTimeout', 20, 'DecodeResponse', 1, 'Authenticate', 0, 'ConvertResponse', 0);
%% Extract the crumb value
% The ideas is taken from here:
% http://blog.bradlucas.com/posts/2017-06-02-new-yahoo-finance-quote-download-url/
% The while loop is used to make sure that generated crumb value does
% not contains '\', since requestObj.send does not correctly send URLs
% with slash
crumb = "\";
while(contains(crumb, '\'))
requestObj = matlab.net.http.RequestMessage();
[response, ~, ~] = requestObj.send(uri, options);
ind = regexp(response.Body.Data, '"CrumbStore":{"crumb":"(.*?)"}');
if(isempty(ind))
error(['Possibly ', symbol ,' is not found']);
end
crumb = response.Body.Data.extractBetween(ind(1)+23, ind(1)+33);
end
%% Find the session cookie
% The idea is taken from here:
% https://stackoverflow.com/questions/40090191/sending-session-cookie-with-each-subsequent-http-request-in-matlab?rq=1
% It is important:
% (1) to add session cookie that matches crumb values;
% (2) specify UserAgent
setCookieFields = response.getFields('Set-Cookie');
setContentFields = response.getFields('Content-Type');
if ~isempty(setCookieFields)
cookieInfos = setCookieFields.convert(uri);
contentInfos = setContentFields.convert();
requestObj = requestObj.addFields(matlab.net.http.field.CookieField([cookieInfos.Cookie]));
requestObj = requestObj.addFields(matlab.net.http.field.ContentTypeField(contentInfos));
requestObj = requestObj.addFields(matlab.net.http.field.GenericField('User-Agent', 'Mozilla/5.0'));
else
disp('Check ticker symbol and that Yahoo provides data for it');
data = [];
return;
end
%% Send a request for data
% Construct an URL for the specific data
uri = matlab.net.URI(['https://query1.finance.yahoo.com/v7/finance/download/', upper(symbol) ],...
'period1', num2str(uint64(startdate), '%.10g'),...
'period2', num2str(uint64(enddate), '%.10g'),...
'interval', interval,...
'events', 'history',...
'crumb', crumb,...
'literal');
options = matlab.net.http.HTTPOptions('ConnectTimeout', 20,...
'DecodeResponse', 1, 'Authenticate', 0, 'ConvertResponse', 0);
[response, ~, ~] = requestObj.send(uri, options);
if(strcmp(response, 'NotFound'))
disp('No data available');
data = [];
else
data = formTable(response.Body.Data);
end
end
%% Convert data to the table format
function procData = formTable(data)
records = data.splitlines;
header = records(1).split(',');
content = zeros(size(records, 1) - 2, size(header, 1) - 1);
for k = 1:size(records, 1) - 2
items = records(k + 1).split(',');
dates(k) = datetime(items(1));
for l = 2:size(header, 1)
content(k, l - 1) = str2double(items(l));
end
end
% Some tables contain 'null' values in certain rows, that are converted
% to NaN by str2double. Such rows needs to be removed.
remInds = find(sum(isnan(content), 2) == 6);
content(remInds, :) = [];
dates(remInds) = [];
% create a table
procData = table(dates', content(:,1), content(:,2),...
content(:,3), content(:,4), content(:,5),...
content(:,6));
for k = 1:size(header, 1)
procData.Properties.VariableNames{k} = char(header(k).replace(' ', ''));
end
end
再通过函数调用自己要下载的数据,如5099.KL
StockData = getMarketDataViaYahoo('5099.KL', '1-July-2013', '31-July-2018');