网页表格数据
import requests
import json
headers = {
'Accept': '*/*',
'authority': 'www3.hkexnews.hk',
'content-type': 'application/x-www-form-urlencoded',
'Cookie': 'AMCV_DD0356406298B0640A495CB8@AdobeOrg=179643557|MCIDTS|19439|MCMID|17476624355030571581838522540278947371|vVersion|5.5.0; s_cc=true; WT_FPC=id=23.43.249.170-2679618240.31022185:lv=1679465202144:ss=1679465192255',
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/108.0.0.0 Safari/537.36 Edg/108.0.1462.54'
}
form_data = {
"_VIEWSTATE": "/wEPDwUJNjIxMTYzMDAwZGQ79IjpLOM+JXdffc28A8BMMA9+yg==",
" __VIEWSTATEGENERATOR": "EC4ACD6F",
"__EVENTVALIDATION": "/wEdAAdtFULLXu4cXg1Ju23kPkBZVobCVrNyCM2j+bEk3ygqmn1KZjrCXCJtWs9HrcHg6Q64ro36uTSn/Z2SUlkm9HsG7WOv0RDD9teZWjlyl84iRMtpPncyBi1FXkZsaSW6dwqO1N1XNFmfsMXJasjxX85jz8PxJxwgNJLTNVe2Bh/bcg5jDf8=",
"today": "20230321",
"sortBy": "stockcode",
"sortDirection": "asc",
"alertMsg": '',
"txtShareholdingDate": "20230320",
"btnSearch": "搜尋"
}
def get_board_fn():
all_ = []
for ex in ['sz', 'sh']:
url = 'https://www3.hkexnews.hk/sdw/search/mutualmarket_c.aspx?t={}'.format(ex)
files = form_data
response = requests.post(url, headers=headers, data=form_data )
txt = response.text
soup = BeautifulSoup(txt, 'html.parser')
table = soup.find('table', class_='table table-scroll table-sort table-mobile-list')
tr_arr = table.find_all("tr")
for tr in tr_arr:
# //查询所有td
tds = tr.find_all('td')
if tds:
text_linr = tr.text
list_ = text_linr.split('\n')
row = [i for i in list_ if i != '']
line = {}
line[row[0]] = row[1]
line[row[2]] = row[3]
line[row[4]] = row[5]
line[row[6]] = row[7]
all_.append(line)
def get_board_fn2(url):
all_ =[]
df1 = pd.read_html(url, encoding='utf-8')[1]
df_data = df1.values.tolist()
df_data = df_data[:]
for row in df_data:
line = {}
line[row[0].split(':')[0]] = row[0].split(':')[1]
line[row[1].split(':')[0]] = row[1].split(':')[1]
line[row[2].split(':')[0]] = row[2].split(':')[1]
line[row[3].split(':')[0]] = row[3].split(':')[1]
all_.append(line)
if __name__ == '__main__':
get_board_fn()
参考:
- Python中post提交数据格式攻略 - 知乎
- Python发送form表单请求 - 知乎
- https://www.cnblogs.com/xiaoyunxiaogang/p/16913374.html