数据来源:https://srh.bankofchina.com/search/whpj/search_cn.jsp
import time
import requests
from urllib.parse import urlencode
import datetime
from datetime import timedelta
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import os
from tqdm import tqdm
#显示所有列
pd.set_option('display.max_columns', None)
#显示所有行
pd.set_option('display.max_rows', None)
pd.set_option('display.width',120)
# 精度要求
# pd.set_option('precision',3)
np.set_printoptions(precision=3)
pd.set_option('display.float_format', lambda x: '%.3f' % x)
url = 'https://srh.bankofchina.com/search/whpj/search_cn.jsp'
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/94.0.4606.81 Safari/537.36 Edg/94.0.992.47',
'Content-Type': 'application/x-www-form-urlencoded',
'Host': 'srh.bankofchina.com',
'Origin': 'https://srh.bankofchina.com',
'Referer': 'https://srh.bankofchina.com/search/whpj/search_cn.jsp', # 通用文字识别接口,高精度不带位置
# # '':'',
}
data = {
'erectDate': '', # 起始时间,如2023-08-01
'nothing': '', # 结束时间
'pjname': '港币',
# 'page': 2,
'head': 'head_620.js',
'bottom': 'bottom_591.js',
}
data = urlencode(data)
def get_rate():
html = requests.post(url, data, headers=headers)
# print(html)
df_list = pd.read_html(html.text)[1]
df_list = df_list.drop(['Unnamed: 7', 'Unnamed: 8', 'Unnamed: 9', 'Unnamed: 10'], axis=1).dropna()
df_list = df_list.dropna()
# print(df_list)
path = f'./rate/rate_{time.strftime("%Y-%m-%d", time.localtime())}'
if not os.path.exists(path):
os.makedirs(path)
df_list.to_excel(f'{path}/{time.time()}.xlsx')
print(datetime.datetime.now().strftime("%Y-%m-%dT%H:%M:%SZ"),
f" CR_rolling:{df_list['现汇买入价'][0]}, {df_list['现汇卖出价'][0]}, [{min(df_list['现汇买入价'])}, {max(df_list['现汇买入价'])}]")
return df_list
def plot_buying_rate():
pf = get_rate()
buying_rate = pf['现汇买入价'].to_list()[::-1]
selling_rate = pf['现汇卖出价'].to_list()[::-1]
release_time = pf['发布时间'][0]
while True:
try:
plt.plot(buying_rate, '--b.')[0]
plt.plot(selling_rate, '--r.')[0]
plt.pause(30)
pf = get_rate()
if pf['发布时间'][0] == release_time:
continue
else:
buying_rate.append(pf['现汇买入价'][0])
selling_rate.append(pf['现汇卖出价'][0])
release_time = pf['发布时间'][0]
print(f'min:{min(buying_rate)}, max:{max(buying_rate)}, max:{max(selling_rate)}')
except Exception as e:
time.sleep(3600)
def merge_record():
# 设置文件夹路径
folder_path = 'rate'
"获取文件夹中的所有CSV文件"
# # csv_files = [file for file in os.listdir(folder_path) if file.endswith('.csv')]
# csv_files = [file for file in os.listdir(folder_path) if file.endswith('.xlsx')]
# # 创建一个空的DataFrame,用于存储合并后的数据
# merged_data = pd.DataFrame()
# # 遍历每个CSV文件并合并数据
# for file in tqdm(csv_files):
# file_path = os.path.join(folder_path, file)
# # df = pd.read_csv(file_path)
# df = pd.read_excel(file_path)
# # merged_data = merged_data.append(df)
# merged_data = pd.concat([merged_data, df])
# # 将合并后的数据保存为一个新的CSV文件
# # merged_data.to_csv('merged_data.csv', index=False, encoding='utf_8_sig')
# # merged_data.to_excel('merged_data.xlsx', index=False, encoding='utf_8_sig')
"递归获取文件夹中的所有CSV文件"
csv_files = []
for root, dirs, files in os.walk(folder_path):
for file in files:
if file.endswith('.xlsx'):
csv_files.append(os.path.join(root, file))
# 创建一个空的DataFrame,用于存储合并后的数据
merged_data = pd.DataFrame()
# 遍历每个CSV文件并合并数据
for file in tqdm(csv_files):
# df = pd.read_csv(file)
df = pd.read_excel(file)
# merged_data = merged_data.append(df)
merged_data = pd.concat([merged_data, df])
# 将合并后的数据保存为一个新的CSV文件
# merged_data.to_csv('merged_data.csv', index=False, encoding='utf_8_sig')
# merged_data.to_excel('merged_data.xlsx', index=False, encoding='utf_8_sig')
du = merged_data.drop_duplicates('发布时间')
# du = du.drop(['Unnamed: 0','Unnamed: 7','Unnamed: 8','Unnamed: 9','Unnamed: 10'],axis=1).dropna()
du = du.drop(['Unnamed: 0', ], axis=1).dropna()
du = du.sort_values('发布时间')
du.plot('发布时间', ['现汇买入价', '现汇卖出价'])
du.to_excel('du_merged_data.xlsx', index=False)
if __name__ == '__main__':
plot_buying_rate()
# merge_record()