Note:
1.The data we gained from yfinance contains 5 columns: open, close, high, low, volume.
2. The latest API allows us visit 100 records at most per round.
OK, let’s take Airbnb as example (code.ANBN)
Step1. import pkgs
import yfinance
import requests
import pandas as pd
from pandas_datareader import data as pdr
import time
import simplejson as json
from datetime import date
import math
from datetime import timedelta
Step2. run the function
def data_access(d0,d1,ticker):
delta = d1 - d0
dat = pd.DataFrame()
for i in range(1,math.ceil(delta.days/100)+1):
print(i)
bg = str(d0) +" 03:59:59"
ed = str(d0 + timedelta(days=99)) +" 03:59:59"
print(bg,ed)
timeArray_bg = time.strptime(bg, "%Y-%m-%d %H:%M:%S")
timeArray_ed = time.strptime(ed, "%Y-%m-%d %H:%M:%S")
timestamp_bg = str(int(time.mktime(timeArray_bg)))
timestamp_ed = str(int(time.mktime(timeArray_ed)))
link = 'https://finance.yahoo.com/quote/' + ticker + '/history?period1='+timestamp_bg+'&period2='+timestamp_ed+'&interval=1d&frequency=1d&filter=history'
r = requests.get(link, headers = {'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'})
pandas_data = pd.read_html(r.text, parse_dates=True)
dt = pandas_data[0:1][0]
dt = dt.iloc[:-1].set_index("Date")
dat = dt.append(dat)
d0 = d0 + timedelta(days=100)
return dat
Step3. pass the parameter
date_begin = date(2020, 12, 20)
date_end = date(2021, 11, 12)
ticker_code = 'ABNB'
Step4. generate the data
rawdata = data_access(date_begin,date_end,ticker_code)
filename = ticker_code + "_" + str(date_begin) + "_" + str(date_end)
rawdata.to_excel('%s.xlsx' %(filename))
Output