1数据来源:http://www.chinaclear.cn/zdjs/tjyb1/center_tjbg.shtml
2获取内容 :09到至今的主要指标概览数据内容
3可参考代码,直接用就行
import requests
import re
import datetime
def get_month_range(start_day,end_day):
months = (end_day.year - start_day.year)*12 + end_day.month - start_day.month
month_range = ['%s年%s月'%(start_day.year + mon//12,str(mon%12+1).zfill(2)) for mon in range(start_day.month-1,start_day.month + months)]
return month_range
def spider(date_list):
for i in date_list:
date = int(i.replace('年','').replace('月',''))
if date >=200904 and date < 201001 :
Headers = {
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9',
'Accept-Encoding': 'gzip, deflate',
'Accept-Language': 'zh-CN,zh;q=0.9',
'Cache-Control': 'max-age=0',
'Connection': 'keep-alive',
'Content-Length': '122',
'Content-Type': 'application/x-www-form-urlencoded',
'Cookie': 'JSESSIONID=00005q0oN93pCb5mAK5eZQGAa7t:1amj63rte',
'Host': 'www.chinaclear.cn',
'Origin': 'http://www.chinaclear.cn',
'Referer': 'http://www.chinaclear.cn/cms-search/monthview.action?action=china',
'Upgrade-Insecure-Requests': '1',
'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/86.0.4240.193 Safari/537.36'
}
data = {
'riqi': '{0}'.format(i),
'channelFidStr': 'e990411f19544e46be84333c25b63de6',
'channelIdStr': 'bd095cc08f744c089b159a3bb744b9d0'
}
url = 'http://www.chinaclear.cn/cms-search/monthview.action?action=china'
response = requests.post(url, headers=Headers, data=data)
response.encoding = 'utf-8'
html = response.text
get_data = r'<tr style=.*?>.*?<td width="277" .*?><font .*?>.*?</font>.*?<p .*?><span .*?>(.*?)</span></p>.*?</font></td>.*?</tr>'
pattern = re.compile(get_data, re.I | re.S | re.M)
data = pattern.findall(html)
# print(data)
month = data[0]
new_investors = data[3]
end_investors = data[2]
registered_securities_number = data[4]
registered_securities_totalparvalue = data[5]
registered_securities_totalmarketvalue = data[6]
non_restricted_market_value = data[7]
total_number_of_transfers = data[9]
total_amount_of_transfer = data[10]
total_settlement = data[11]
net_settlement = data[12]
print(month, new_investors, end_investors, registered_securities_number,registered_securities_totalparvalue, registered_securities_totalmarketvalue,non_restricted_market_value, total_number_of_transfers, total_amount_of_transfer, total_settlement,net_settlement)
elif date == 201001:
Headers = {
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9',
'Accept-Encoding': 'gzip, deflate',
'Accept-Language': 'zh-CN,zh;q=0.9',
'Cache-Control': 'max-age=0',
'Connection': 'keep-alive',
'Content-Length': '122',
'Content-Type': 'application/x-www-form-urlencoded',
'Cookie': 'JSESSIONID=00005q0oN93pCb5mAK5eZQGAa7t:1amj63rte',
'Host': 'www.chinaclear.cn',
'Origin': 'http://www.chinaclear.cn',
'Referer': 'http://www.chinaclear.cn/cms-search/monthview.action?action=china',
'Upgrade-Insecure-Requests': '1',
'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/86.0.4240.193 Safari/537.36'
}
data = {
'riqi': '{0}'.format(i),
'channelFidStr': 'e990411f19544e46be84333c25b63de6',
'channelIdStr': 'bd095cc08f744c089b159a3bb744b9d0'
}
url = 'http://www.chinaclear.cn/cms-search/monthview.action?action=china'
response = requests.post(url, headers=Headers, data=data)
response.encoding = 'utf-8'
html = response.text
get_data = r'<tr style=.*?>.*?<td .*?>.*?<p align="right" .*?><span .*?>(.*?)</span></p>.*?</td>.*?</tr>'
pattern = re.compile(get_data, re.I | re.S | re.M)
data = pattern.findall(html)
month = data[0]
new_investors = data[3]
end_investors = data[2]
registered_securities_number = data[4]
registered_securities_totalparvalue = data[5]
registered_securities_totalmarketvalue = data[6]
non_restricted_market_value = data[7]
total_number_of_transfers = data[9]
total_amount_of_transfer = data[10]
total_settlement = data[11]
net_settlement = data[12]
print(month, new_investors, end_investors, registered_securities_number,registered_securities_totalparvalue, registered_securities_totalmarketvalue,non_restricted_market_value, total_number_of_transfers, total_amount_of_transfer, total_settlement,net_settlement)
elif date >201001 and date <=201311:
Headers = {
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9',
'Accept-Encoding': 'gzip, deflate',
'Accept-Language': 'zh-CN,zh;q=0.9',
'Cache-Control': 'max-age=0',
'Connection': 'keep-alive',
'Content-Length': '122',
'Content-Type': 'application/x-www-form-urlencoded',
'Cookie': 'JSESSIONID=00005q0oN93pCb5mAK5eZQGAa7t:1amj63rte',
'Host': 'www.chinaclear.cn',
'Origin': 'http://www.chinaclear.cn',
'Referer': 'http://www.chinaclear.cn/cms-search/monthview.action?action=china',
'Upgrade-Insecure-Requests': '1',
'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/86.0.4240.193 Safari/537.36'
}
data = {
'riqi': '{0}'.format(i),
'channelFidStr': 'e990411f19544e46be84333c25b63de6',
'channelIdStr': 'bd095cc08f744c089b159a3bb744b9d0'
}
url = 'http://www.chinaclear.cn/cms-search/monthview.action?action=china'
response = requests.post(url, headers=Headers, data=data)
response.encoding = 'utf-8'
html = response.text
get_data = r'<tr style="height:13.5pt">.*?<td .*?>.*?<p .*?><span .*?>.*?</span></p>.*?</td>.*?<td .*?>.*?<p .*? align="right"><span .*?>(.*?)</span></p>.*?</td>.*?</tr>'
pattern = re.compile(get_data, re.I | re.S | re.M)
data = pattern.findall(html)
# print(data)
month = data[0]
new_investors = data[3]
end_investors = data[2]
registered_securities_number = data[4]
registered_securities_totalparvalue = data[5]
registered_securities_totalmarketvalue = data[6]
non_restricted_market_value = data[7]
total_number_of_transfers = data[9]
total_amount_of_transfer = data[10]
total_settlement = data[11]
net_settlement = data[12]
print(month, new_investors, end_investors, registered_securities_number,registered_securities_totalparvalue, registered_securities_totalmarketvalue,non_restricted_market_value, total_number_of_transfers, total_amount_of_transfer, total_settlement,net_settlement)
elif date == 201312:
Headers = {
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9',
'Accept-Encoding': 'gzip, deflate',
'Accept-Language': 'zh-CN,zh;q=0.9',
'Cache-Control': 'max-age=0',
'Connection': 'keep-alive',
'Content-Length': '122',
'Content-Type': 'application/x-www-form-urlencoded',
'Cookie': 'JSESSIONID=00005q0oN93pCb5mAK5eZQGAa7t:1amj63rte',
'Host': 'www.chinaclear.cn',
'Origin': 'http://www.chinaclear.cn',
'Referer': 'http://www.chinaclear.cn/cms-search/monthview.action?action=china',
'Upgrade-Insecure-Requests': '1',
'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/86.0.4240.193 Safari/537.36'
}
data = {
'riqi': '{0}'.format(i),
'channelFidStr': 'e990411f19544e46be84333c25b63de6',
'channelIdStr': 'bd095cc08f744c089b159a3bb744b9d0'
}
url = 'http://www.chinaclear.cn/cms-search/monthview.action?action=china'
response = requests.post(url, headers=Headers, data=data)
response.encoding = 'utf-8'
html = response.text
# print(html)
get_data = r'<tr style="height: 13.5pt;">.*?<td width="19%" .*?>.*?<p align="right" .*?><span .*?>(.*?)</span></p>.*?</td>.*?</tr>'
pattern = re.compile(get_data, re.I | re.S | re.M)
data = pattern.findall(html)
# print(data)
month = data[0]
new_investors = data[3]
end_investors = data[2]
registered_securities_number = data[4]
registered_securities_totalparvalue = data[5].replace('<span> </span>','')
registered_securities_totalmarketvalue = data[6]
non_restricted_market_value = data[7].replace('<span> </span>','')
total_number_of_transfers = data[9]
total_amount_of_transfer = data[10]
total_settlement = data[11]
net_settlement = data[12]
print(month, new_investors, end_investors, registered_securities_number,registered_securities_totalparvalue, registered_securities_totalmarketvalue,non_restricted_market_value, total_number_of_transfers, total_amount_of_transfer, total_settlement,net_settlement)
elif date == 201401:
Headers = {
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9',
'Accept-Encoding': 'gzip, deflate',
'Accept-Language': 'zh-CN,zh;q=0.9',
'Cache-Control': 'max-age=0',
'Connection': 'keep-alive',
'Content-Length': '122',
'Content-Type': 'application/x-www-form-urlencoded',
'Cookie': 'JSESSIONID=00005q0oN93pCb5mAK5eZQGAa7t:1amj63rte',
'Host': 'www.chinaclear.cn',
'Origin': 'http://www.chinaclear.cn',
'Referer': 'http://www.chinaclear.cn/cms-search/monthview.action?action=china',
'Upgrade-Insecure-Requests': '1',
'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/86.0.4240.193 Safari/537.36'
}
data = {
'riqi': '{0}'.format(i),
'channelFidStr': 'e990411f19544e46be84333c25b63de6',
'channelIdStr': 'bd095cc08f744c089b159a3bb744b9d0'
}
url = 'http://www.chinaclear.cn/cms-search/monthview.action?action=china'
response = requests.post(url, headers=Headers, data=data)
response.encoding = 'utf-8'
html = response.text
# print(html)
get_data = r'<tr style=.*?>.*?<td width="19%" .*?>.*?<p .*? align="right"><span .*?>(.*?)</span></p>.*?</td>.*?</tr>'
pattern = re.compile(get_data, re.I | re.S | re.M)
data = pattern.findall(html)
# print(data)
month = data[0]
new_investors = data[3]
end_investors = data[2]
registered_securities_number = data[4]
registered_securities_totalparvalue = data[5].replace('<span> </span>','')
registered_securities_totalmarketvalue = data[6]
non_restricted_market_value = data[7].replace('<span> </span>','')
total_number_of_transfers = data[9]
total_amount_of_transfer = data[10]
total_settlement = data[11]
net_settlement = data[12]
print(month, new_investors, end_investors, registered_securities_number,registered_securities_totalparvalue, registered_securities_totalmarketvalue,non_restricted_market_value, total_number_of_transfers, total_amount_of_transfer, total_settlement,net_settlement)
elif date > 201401 and date < 201410:
Headers = {
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9',
'Accept-Encoding': 'gzip, deflate',
'Accept-Language': 'zh-CN,zh;q=0.9',
'Cache-Control': 'max-age=0',
'Connection': 'keep-alive',
'Content-Length': '122',
'Content-Type': 'application/x-www-form-urlencoded',
'Cookie': 'JSESSIONID=00005q0oN93pCb5mAK5eZQGAa7t:1amj63rte',
'Host': 'www.chinaclear.cn',
'Origin': 'http://www.chinaclear.cn',
'Referer': 'http://www.chinaclear.cn/cms-search/monthview.action?action=china',
'Upgrade-Insecure-Requests': '1',
'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/86.0.4240.193 Safari/537.36'
}
data = {
'riqi': '{0}'.format(i),
'channelFidStr': 'e990411f19544e46be84333c25b63de6',
'channelIdStr': 'bd095cc08f744c089b159a3bb744b9d0'
}
url = 'http://www.chinaclear.cn/cms-search/monthview.action?action=china'
response = requests.post(url, headers=Headers, data=data)
response.encoding = 'utf-8'
html = response.text
# print(html)
get_data = r'<tr style=.*?>.*?<td width="20%" .*?>.*?<p .*?><span .*?>(.*?)</span></p>.*?</td>.*?</tr>'
pattern = re.compile(get_data, re.I | re.S | re.M)
data = pattern.findall(html)
# print(data)
month = data[0]
new_investors = data[3]
end_investors = data[2]
registered_securities_number = data[4]
registered_securities_totalparvalue = data[5].replace('<span> </span>', '')
registered_securities_totalmarketvalue = data[6]
non_restricted_market_value = data[7].replace('<span> </span>', '')
total_number_of_transfers = data[9]
total_amount_of_transfer = data[10]
total_settlement = data[11]
net_settlement = data[12]
print(month, new_investors, end_investors, registered_securities_number,registered_securities_totalparvalue, registered_securities_totalmarketvalue,non_restricted_market_value, total_number_of_transfers, total_amount_of_transfer, total_settlement,net_settlement)
elif date >= 201410 and date <= 201412:
Headers = {
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9',
'Accept-Encoding': 'gzip, deflate',
'Accept-Language': 'zh-CN,zh;q=0.9',
'Cache-Control': 'max-age=0',
'Connection': 'keep-alive',
'Content-Length': '122',
'Content-Type': 'application/x-www-form-urlencoded',
'Cookie': 'JSESSIONID=00005q0oN93pCb5mAK5eZQGAa7t:1amj63rte',
'Host': 'www.chinaclear.cn',
'Origin': 'http://www.chinaclear.cn',
'Referer': 'http://www.chinaclear.cn/cms-search/monthview.action?action=china',
'Upgrade-Insecure-Requests': '1',
'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/86.0.4240.193 Safari/537.36'
}
data = {
'riqi': '{0}'.format(i),
'channelFidStr': 'e990411f19544e46be84333c25b63de6',
'channelIdStr': 'bd095cc08f744c089b159a3bb744b9d0'
}
url = 'http://www.chinaclear.cn/cms-search/monthview.action?action=china'
response = requests.post(url, headers=Headers, data=data)
response.encoding = 'utf-8'
html = response.text
# print(html)
get_data = r'<tr style=.*?>.*?<td width="20%" .*?>.*?<p .*?><span .*?>(.*?)</span></p>.*?</td>.*?</tr>'
pattern = re.compile(get_data, re.I | re.S | re.M)
data = pattern.findall(html)
# print(data)
month = data[0]
new_investors = data[4]
end_investors = data[2]
registered_securities_number = data[5]
registered_securities_totalparvalue = data[6]
registered_securities_totalmarketvalue = data[7]
non_restricted_market_value = data[8]
total_number_of_transfers = data[10]
total_amount_of_transfer = data[11]
total_settlement = data[12]
net_settlement = data[13]
print(month, new_investors, end_investors, registered_securities_number, registered_securities_totalparvalue,registered_securities_totalmarketvalue, non_restricted_market_value, total_number_of_transfers,total_amount_of_transfer, total_settlement, net_settlement)
elif date >= 201501 and date <201503:
Headers = {
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9',
'Accept-Encoding': 'gzip, deflate',
'Accept-Language': 'zh-CN,zh;q=0.9',
'Cache-Control': 'max-age=0',
'Connection': 'keep-alive',
'Content-Length': '122',
'Content-Type': 'application/x-www-form-urlencoded',
'Cookie': 'JSESSIONID=00005q0oN93pCb5mAK5eZQGAa7t:1amj63rte',
'Host': 'www.chinaclear.cn',
'Origin': 'http://www.chinaclear.cn',
'Referer': 'http://www.chinaclear.cn/cms-search/monthview.action?action=china',
'Upgrade-Insecure-Requests': '1',
'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/86.0.4240.193 Safari/537.36'
}
data = {
'riqi': '{0}'.format(i),
'channelFidStr': 'e990411f19544e46be84333c25b63de6',
'channelIdStr': 'bd095cc08f744c089b159a3bb744b9d0'
}
url = 'http://www.chinaclear.cn/cms-search/monthview.action?action=china'
response = requests.post(url, headers=Headers, data=data)
response.encoding = 'utf-8'
html = response.text
# print(html)
get_data1 = r'<tr style=.*?>.*?<td .*?>.*?<p .*? align=.*?><span .*?>.*?</span></p>.*?</td>.*?<td .*?>.*?<p .*? align=.*?><span .*?>(.*?)</span></p>.*?</td>.*?<td .*?>.*?<p .*? align=.*?><span .*?>.*?</span></p>.*?</td>.*?</tr>'
pattern1 = re.compile(get_data1, re.I | re.S | re.M)
data1 = pattern1.findall(html)
get_data = r'<tr style=.*?>.*?<td .*?>.*?<p .*? align="left"><span .*?>.*?</span></p>.*?</td>.*?.*?<td .*?>.*?<p .*? align="right"><span .*?>(.*?)</span></p>.*?</td>.*?.*?<td .*?>.*?<p .*? align="right"><span .*?>.*?</span></p>.*?</td>.*?</tr>'
pattern = re.compile(get_data, re.I | re.S | re.M)
data = pattern.findall(html)
month = data1[0]
new_investors = data[3]
end_investors = data[1]
registered_securities_number = data[4]
registered_securities_totalparvalue = data[5]
registered_securities_totalmarketvalue = data[6]
non_restricted_market_value = data[7]
total_number_of_transfers = data[9]
total_amount_of_transfer = data[10]
total_settlement = data[11]
net_settlement = data[12]
print(month, new_investors, end_investors, registered_securities_number, registered_securities_totalparvalue,registered_securities_totalmarketvalue, non_restricted_market_value, total_number_of_transfers,total_amount_of_transfer, total_settlement, net_settlement)
elif date == 201503:
Headers = {
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9',
'Accept-Encoding': 'gzip, deflate',
'Accept-Language': 'zh-CN,zh;q=0.9',
'Cache-Control': 'max-age=0',
'Connection': 'keep-alive',
'Content-Length': '122',
'Content-Type': 'application/x-www-form-urlencoded',
'Cookie': 'JSESSIONID=00005q0oN93pCb5mAK5eZQGAa7t:1amj63rte',
'Host': 'www.chinaclear.cn',
'Origin': 'http://www.chinaclear.cn',
'Referer': 'http://www.chinaclear.cn/cms-search/monthview.action?action=china',
'Upgrade-Insecure-Requests': '1',
'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/86.0.4240.193 Safari/537.36'
}
data = {
'riqi': '{0}'.format(i),
'channelFidStr': 'e990411f19544e46be84333c25b63de6',
'channelIdStr': 'bd095cc08f744c089b159a3bb744b9d0'
}
url = 'http://www.chinaclear.cn/cms-search/monthview.action?action=china'
response = requests.post(url, headers=Headers, data=data)
response.encoding = 'utf-8'
html = response.text
# print(html)
get_data = r'<tr style=.*?>.*?<td .*?>.*?<p .*?><span .*?>.*?</span></p>.*?</td>.*?<td .*?>.*?<p .*? align=.*?><span .*?>(.*?)</span></p>.*?</td>.*?<td .*?>.*?<p .*? align=.*?><span .*?>.*?</span></p>.*?</td>.*?</tr>'
pattern = re.compile(get_data, re.I | re.S | re.M)
data = pattern.findall(html)
month = data[0]
new_investors = data[3]
end_investors = data[1]
registered_securities_number = data[4]
registered_securities_totalparvalue = data[5]
registered_securities_totalmarketvalue = data[6]
non_restricted_market_value = data[7]
total_number_of_transfers = data[9]
total_amount_of_transfer = data[10]
total_settlement = data[11]
net_settlement = data[12]
print(month, new_investors, end_investors, registered_securities_number, registered_securities_totalparvalue,registered_securities_totalmarketvalue, non_restricted_market_value, total_number_of_transfers,total_amount_of_transfer, total_settlement, net_settlement)
elif date > 201503 and date <=201506:
Headers = {
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9',
'Accept-Encoding': 'gzip, deflate',
'Accept-Language': 'zh-CN,zh;q=0.9',
'Cache-Control': 'max-age=0',
'Connection': 'keep-alive',
'Content-Length': '122',
'Content-Type': 'application/x-www-form-urlencoded',
'Cookie': 'JSESSIONID=00005q0oN93pCb5mAK5eZQGAa7t:1amj63rte',
'Host': 'www.chinaclear.cn',
'Origin': 'http://www.chinaclear.cn',
'Referer': 'http://www.chinaclear.cn/cms-search/monthview.action?action=china',
'Upgrade-Insecure-Requests': '1',
'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/86.0.4240.193 Safari/537.36'
}
data = {
'riqi': '{0}'.format(i),
'channelFidStr': 'e990411f19544e46be84333c25b63de6',
'channelIdStr': 'bd095cc08f744c089b159a3bb744b9d0'
}
url = 'http://www.chinaclear.cn/cms-search/monthview.action?action=china'
response = requests.post(url, headers=Headers, data=data)
response.encoding = 'utf-8'
html = response.text
# print(html)
get_data = r'<tr style=.*?>.*?<td width="100" .*?>.*?<p .*?><span .*?>(.*?)</span></p>.*?</td>.*?</tr>'
pattern = re.compile(get_data, re.I | re.S | re.M)
data = pattern.findall(html)
# print(data)
month = data[0]
new_investors = data[1]
end_investors = data[2]
registered_securities_number = data[5]
registered_securities_totalparvalue = data[6]
registered_securities_totalmarketvalue = data[7]
non_restricted_market_value = data[8]
total_number_of_transfers = data[10]
total_amount_of_transfer = data[11]
total_settlement = data[12]
net_settlement = data[13]
print(month, new_investors, end_investors, registered_securities_number,registered_securities_totalparvalue, registered_securities_totalmarketvalue,non_restricted_market_value, total_number_of_transfers, total_amount_of_transfer, total_settlement,net_settlement)
elif date == 201507:
Headers = {
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9',
'Accept-Encoding': 'gzip, deflate',
'Accept-Language': 'zh-CN,zh;q=0.9',
'Cache-Control': 'max-age=0',
'Connection': 'keep-alive',
'Content-Length': '122',
'Content-Type': 'application/x-www-form-urlencoded',
'Cookie': 'JSESSIONID=00005q0oN93pCb5mAK5eZQGAa7t:1amj63rte',
'Host': 'www.chinaclear.cn',
'Origin': 'http://www.chinaclear.cn',
'Referer': 'http://www.chinaclear.cn/cms-search/monthview.action?action=china',
'Upgrade-Insecure-Requests': '1',
'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/86.0.4240.193 Safari/537.36'
}
data = {
'riqi': '{0}'.format(i),
'channelFidStr': 'e990411f19544e46be84333c25b63de6',
'channelIdStr': 'bd095cc08f744c089b159a3bb744b9d0'
}
url = 'http://www.chinaclear.cn/cms-search/monthview.action?action=china'
response = requests.post(url, headers=Headers, data=data)
response.encoding = 'utf-8'
html = response.text
# print(html)
get_data = r'<tr style=.*?>.*?<td width="158" .*?>.*?<p .*?><span .*?>(.*?)</span></p>.*?</td>.*?</tr>'
pattern = re.compile(get_data, re.I | re.S | re.M)
data = pattern.findall(html)
# print(data)
month = data[0]
new_investors = data[1]
end_investors = data[2]
registered_securities_number = data[3]
registered_securities_totalparvalue = data[4]
registered_securities_totalmarketvalue = data[5]
non_restricted_market_value = data[6]
total_number_of_transfers = data[8]
total_amount_of_transfer = data[9]
total_settlement = data[10]
net_settlement = data[11]
print(month, new_investors, end_investors, registered_securities_number,registered_securities_totalparvalue, registered_securities_totalmarketvalue,non_restricted_market_value, total_number_of_transfers, total_amount_of_transfer, total_settlement,net_settlement)
elif date == 201508:
Headers = {
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9',
'Accept-Encoding': 'gzip, deflate',
'Accept-Language': 'zh-CN,zh;q=0.9',
'Cache-Control': 'max-age=0',
'Connection': 'keep-alive',
'Content-Length': '122',
'Content-Type': 'application/x-www-form-urlencoded',
'Cookie': 'JSESSIONID=00005q0oN93pCb5mAK5eZQGAa7t:1amj63rte',
'Host': 'www.chinaclear.cn',
'Origin': 'http://www.chinaclear.cn',
'Referer': 'http://www.chinaclear.cn/cms-search/monthview.action?action=china',
'Upgrade-Insecure-Requests': '1',
'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/86.0.4240.193 Safari/537.36'
}
data = {
'riqi': '{0}'.format(i),
'channelFidStr': 'e990411f19544e46be84333c25b63de6',
'channelIdStr': 'bd095cc08f744c089b159a3bb744b9d0'
}
url = 'http://www.chinaclear.cn/cms-search/monthview.action?action=china'
response = requests.post(url, headers=Headers, data=data)
response.encoding = 'utf-8'
html = response.text
# print(html)
get_data = r'<tr style=.*?>.*?<td width="100" .*?>.*?<p .*?><span .*?>(.*?)</span></p>.*?</td>.*?</tr>'
pattern = re.compile(get_data, re.I | re.S | re.M)
data = pattern.findall(html)
# print(data)
month = data[0]
new_investors = data[1]
end_investors = data[2]
registered_securities_number = data[3]
registered_securities_totalparvalue = data[4]
registered_securities_totalmarketvalue = data[5]
non_restricted_market_value = data[6]
total_number_of_transfers = data[8]
total_amount_of_transfer = data[9]
total_settlement = data[10]
net_settlement = data[11]
print(month, new_investors, end_investors, registered_securities_number,registered_securities_totalparvalue, registered_securities_totalmarketvalue,non_restricted_market_value, total_number_of_transfers, total_amount_of_transfer, total_settlement,net_settlement)
elif date == 201509:
Headers = {
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9',
'Accept-Encoding': 'gzip, deflate',
'Accept-Language': 'zh-CN,zh;q=0.9',
'Cache-Control': 'max-age=0',
'Connection': 'keep-alive',
'Content-Length': '122',
'Content-Type': 'application/x-www-form-urlencoded',
'Cookie': 'JSESSIONID=00005q0oN93pCb5mAK5eZQGAa7t:1amj63rte',
'Host': 'www.chinaclear.cn',
'Origin': 'http://www.chinaclear.cn',
'Referer': 'http://www.chinaclear.cn/cms-search/monthview.action?action=china',
'Upgrade-Insecure-Requests': '1',
'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/86.0.4240.193 Safari/537.36'
}
data = {
'riqi': '{0}'.format(i),
'channelFidStr': 'e990411f19544e46be84333c25b63de6',
'channelIdStr': 'bd095cc08f744c089b159a3bb744b9d0'
}
url = 'http://www.chinaclear.cn/cms-search/monthview.action?action=china'
response = requests.post(url, headers=Headers, data=data)
response.encoding = 'utf-8'
html = response.text
# print(html)
get_data = r'<tr style=.*?>.*?<td width="158" .*?>.*?<p .*?><span .*?>(.*?)</span></p>.*?</td>.*?</tr>'
pattern = re.compile(get_data, re.I | re.S | re.M)
data = pattern.findall(html)
# print(data)
month = data[0]
new_investors = data[1]
end_investors = data[2]
registered_securities_number = data[3]
registered_securities_totalparvalue = data[4]
registered_securities_totalmarketvalue = data[5]
non_restricted_market_value = data[6]
total_number_of_transfers = data[8]
total_amount_of_transfer = data[9]
total_settlement = data[10]
net_settlement = data[11]
print(month, new_investors, end_investors, registered_securities_number,registered_securities_totalparvalue, registered_securities_totalmarketvalue,non_restricted_market_value, total_number_of_transfers, total_amount_of_transfer, total_settlement,net_settlement)
elif date > 201509 and date <= 201511:
Headers = {
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9',
'Accept-Encoding': 'gzip, deflate',
'Accept-Language': 'zh-CN,zh;q=0.9',
'Cache-Control': 'max-age=0',
'Connection': 'keep-alive',
'Content-Length': '122',
'Content-Type': 'application/x-www-form-urlencoded',
'Cookie': 'JSESSIONID=00005q0oN93pCb5mAK5eZQGAa7t:1amj63rte',
'Host': 'www.chinaclear.cn',
'Origin': 'http://www.chinaclear.cn',
'Referer': 'http://www.chinaclear.cn/cms-search/monthview.action?action=china',
'Upgrade-Insecure-Requests': '1',
'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/86.0.4240.193 Safari/537.36'
}
data = {
'riqi': '{0}'.format(i),
'channelFidStr': 'e990411f19544e46be84333c25b63de6',
'channelIdStr': 'bd095cc08f744c089b159a3bb744b9d0'
}
url = 'http://www.chinaclear.cn/cms-search/monthview.action?action=china'
response = requests.post(url, headers=Headers, data=data)
response.encoding = 'utf-8'
html = response.text
# print(html)
get_data = r'<tr style=.*?>.*?<td width="26%" .*?>.*?<p .*?><span .*?>(.*?)</span></p>.*?</td>.*?</tr>'
pattern = re.compile(get_data, re.I | re.S | re.M)
data = pattern.findall(html)
# print(data)
month = data[0]
new_investors = data[1]
end_investors = data[2]
registered_securities_number = data[3]
registered_securities_totalparvalue = data[4]
registered_securities_totalmarketvalue = data[5]
non_restricted_market_value = data[6]
total_number_of_transfers = data[8]
total_amount_of_transfer = data[9]
total_settlement = data[10]
net_settlement = data[11]
print(month, new_investors, end_investors, registered_securities_number,registered_securities_totalparvalue, registered_securities_totalmarketvalue,non_restricted_market_value, total_number_of_transfers, total_amount_of_transfer, total_settlement,net_settlement)
elif date == 201512:
Headers = {
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9',
'Accept-Encoding': 'gzip, deflate',
'Accept-Language': 'zh-CN,zh;q=0.9',
'Cache-Control': 'max-age=0',
'Connection': 'keep-alive',
'Content-Length': '122',
'Content-Type': 'application/x-www-form-urlencoded',
'Cookie': 'JSESSIONID=00005q0oN93pCb5mAK5eZQGAa7t:1amj63rte',
'Host': 'www.chinaclear.cn',
'Origin': 'http://www.chinaclear.cn',
'Referer': 'http://www.chinaclear.cn/cms-search/monthview.action?action=china',
'Upgrade-Insecure-Requests': '1',
'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/86.0.4240.193 Safari/537.36'
}
data = {
'riqi': '{0}'.format(i),
'channelFidStr': 'e990411f19544e46be84333c25b63de6',
'channelIdStr': 'bd095cc08f744c089b159a3bb744b9d0'
}
url = 'http://www.chinaclear.cn/cms-search/monthview.action?action=china'
response = requests.post(url, headers=Headers, data=data)
response.encoding = 'utf-8'
html = response.text
# print(html)
get_data = r'<tr style=.*?>.*?<td width="27%" .*?>.*?<p .*?><span .*?>(.*?)</span></p>.*?</td>.*?</tr>'
pattern = re.compile(get_data, re.I | re.S | re.M)
data = pattern.findall(html)
# print(data)
month = data[0]
new_investors = data[1]
end_investors = data[2]
registered_securities_number = data[3]
registered_securities_totalparvalue = data[4]
registered_securities_totalmarketvalue = data[5]
non_restricted_market_value = data[6]
total_number_of_transfers = data[8]
total_amount_of_transfer = data[9]
total_settlement = data[10]
net_settlement = data[11]
print(month, new_investors, end_investors, registered_securities_number,registered_securities_totalparvalue, registered_securities_totalmarketvalue,non_restricted_market_value, total_number_of_transfers, total_amount_of_transfer, total_settlement,net_settlement)
elif date == 201601:
Headers = {
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9',
'Accept-Encoding': 'gzip, deflate',
'Accept-Language': 'zh-CN,zh;q=0.9',
'Cache-Control': 'max-age=0',
'Connection': 'keep-alive',
'Content-Length': '122',
'Content-Type': 'application/x-www-form-urlencoded',
'Cookie': 'JSESSIONID=00005q0oN93pCb5mAK5eZQGAa7t:1amj63rte',
'Host': 'www.chinaclear.cn',
'Origin': 'http://www.chinaclear.cn',
'Referer': 'http://www.chinaclear.cn/cms-search/monthview.action?action=china',
'Upgrade-Insecure-Requests': '1',
'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/86.0.4240.193 Safari/537.36'
}
data = {
'riqi': '{0}'.format(i),
'channelFidStr': 'e990411f19544e46be84333c25b63de6',
'channelIdStr': 'bd095cc08f744c089b159a3bb744b9d0'
}
url = 'http://www.chinaclear.cn/cms-search/monthview.action?action=china'
response = requests.post(url, headers=Headers, data=data)
response.encoding = 'utf-8'
html = response.text
# print(html)
get_data = r'<tr style=.*?>.*?<td width="29%" .*?>.*?<p .*?><span .*?>(.*?)</span></p>.*?</td>.*?</tr>'
pattern = re.compile(get_data, re.I | re.S | re.M)
data = pattern.findall(html)
# print(data)
month = data[0]
new_investors = data[1]
end_investors = data[2]
registered_securities_number = data[3]
registered_securities_totalparvalue = data[4]
registered_securities_totalmarketvalue = data[5]
non_restricted_market_value = data[6]
total_number_of_transfers = data[8]
total_amount_of_transfer = data[9]
total_settlement = data[10]
net_settlement = data[11]
print(month, new_investors, end_investors, registered_securities_number,registered_securities_totalparvalue, registered_securities_totalmarketvalue,non_restricted_market_value, total_number_of_transfers, total_amount_of_transfer, total_settlement,net_settlement)
elif date > 201601 and date <=201607:
Headers = {
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9',
'Accept-Encoding': 'gzip, deflate',
'Accept-Language': 'zh-CN,zh;q=0.9',
'Cache-Control': 'max-age=0',
'Connection': 'keep-alive',
'Content-Length': '122',
'Content-Type': 'application/x-www-form-urlencoded',
'Cookie': 'JSESSIONID=00005q0oN93pCb5mAK5eZQGAa7t:1amj63rte',
'Host': 'www.chinaclear.cn',
'Origin': 'http://www.chinaclear.cn',
'Referer': 'http://www.chinaclear.cn/cms-search/monthview.action?action=china',
'Upgrade-Insecure-Requests': '1',
'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/86.0.4240.193 Safari/537.36'
}
data = {
'riqi': '{0}'.format(i),
'channelFidStr': 'e990411f19544e46be84333c25b63de6',
'channelIdStr': 'bd095cc08f744c089b159a3bb744b9d0'
}
url = 'http://www.chinaclear.cn/cms-search/monthview.action?action=china'
response = requests.post(url, headers=Headers, data=data)
response.encoding = 'utf-8'
html = response.text
# print(html)
get_data = r'<tr style=.*?>.*?<td width="26%" .*?>.*?<p .*?><span .*?>(.*?)</span></p>.*?</td>.*?</tr>'
pattern = re.compile(get_data, re.I | re.S | re.M)
data = pattern.findall(html)
# print(data)
month = data[0]
new_investors = data[1]
end_investors = data[2]
registered_securities_number = data[3]
registered_securities_totalparvalue = data[4]
registered_securities_totalmarketvalue = data[5]
non_restricted_market_value = data[6]
total_number_of_transfers = data[8]
total_amount_of_transfer = data[9]
total_settlement = data[10]
net_settlement = data[11]
print(month, new_investors, end_investors, registered_securities_number,registered_securities_totalparvalue, registered_securities_totalmarketvalue,non_restricted_market_value, total_number_of_transfers, total_amount_of_transfer, total_settlement,net_settlement)
elif date ==201608:
Headers = {
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9',
'Accept-Encoding': 'gzip, deflate',
'Accept-Language': 'zh-CN,zh;q=0.9',
'Cache-Control': 'max-age=0',
'Connection': 'keep-alive',
'Content-Length': '122',
'Content-Type': 'application/x-www-form-urlencoded',
'Cookie': 'JSESSIONID=00005q0oN93pCb5mAK5eZQGAa7t:1amj63rte',
'Host': 'www.chinaclear.cn',
'Origin': 'http://www.chinaclear.cn',
'Referer': 'http://www.chinaclear.cn/cms-search/monthview.action?action=china',
'Upgrade-Insecure-Requests': '1',
'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/86.0.4240.193 Safari/537.36'
}
data = {
'riqi': '{0}'.format(i),
'channelFidStr': 'e990411f19544e46be84333c25b63de6',
'channelIdStr': 'bd095cc08f744c089b159a3bb744b9d0'
}
url = 'http://www.chinaclear.cn/cms-search/monthview.action?action=china'
response = requests.post(url, headers=Headers, data=data)
response.encoding = 'utf-8'
html = response.text
# print(html)
get_data = r'<tr style=.*?>.*?<td .*? width="142" noWrap="">.*?<p .*?><span .*?>(.*?)</span></p>.*?</td>.*?</tr>'
pattern = re.compile(get_data, re.I | re.S | re.M)
data = pattern.findall(html)
# print(data)
month = data[0]
new_investors = data[1]
end_investors = data[2]
registered_securities_number = data[3].replace('<span> </span>','').strip()
registered_securities_totalparvalue = data[4].replace('<span> </span>','').strip()
registered_securities_totalmarketvalue = data[5].replace('<span> </span>','').strip()
non_restricted_market_value = data[6].replace('<span> </span>','').strip()
total_number_of_transfers = data[8]
total_amount_of_transfer = data[9]
total_settlement = data[10]
net_settlement = data[11]
print(month, new_investors, end_investors, registered_securities_number,registered_securities_totalparvalue, registered_securities_totalmarketvalue,non_restricted_market_value, total_number_of_transfers, total_amount_of_transfer, total_settlement,net_settlement)
elif date >= 201609 and date<=201610:
Headers = {
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9',
'Accept-Encoding': 'gzip, deflate',
'Accept-Language': 'zh-CN,zh;q=0.9',
'Cache-Control': 'max-age=0',
'Connection': 'keep-alive',
'Content-Length': '122',
'Content-Type': 'application/x-www-form-urlencoded',
'Cookie': 'JSESSIONID=00005q0oN93pCb5mAK5eZQGAa7t:1amj63rte',
'Host': 'www.chinaclear.cn',
'Origin': 'http://www.chinaclear.cn',
'Referer': 'http://www.chinaclear.cn/cms-search/monthview.action?action=china',
'Upgrade-Insecure-Requests': '1',
'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/86.0.4240.193 Safari/537.36'
}
data = {
'riqi': '{0}'.format(i),
'channelFidStr': 'e990411f19544e46be84333c25b63de6',
'channelIdStr': 'bd095cc08f744c089b159a3bb744b9d0'
}
url = 'http://www.chinaclear.cn/cms-search/monthview.action?action=china'
response = requests.post(url, headers=Headers, data=data)
response.encoding = 'utf-8'
html = response.text
# print(html)
get_data = r'<tr style=.*?>.*?<td width="26%" .*?>.*?<p .*?><span .*?>(.*?)</span></p>.*?</td>.*?</tr>'
pattern = re.compile(get_data, re.I | re.S | re.M)
data = pattern.findall(html)
# print(data)
month = data[0]
new_investors = data[1]
end_investors = data[2]
registered_securities_number = data[3].replace(' ','').replace('<span>','').replace('</span>','').strip()
registered_securities_totalparvalue = data[4].replace(' ','').replace('<span>','').replace('</span>','').strip()
registered_securities_totalmarketvalue = data[5].replace(' ','').replace('<span>','').replace('</span>','').strip()
non_restricted_market_value = data[6].replace(' ','').replace('<span>','').replace('</span>','').strip()
total_number_of_transfers = data[8]
total_amount_of_transfer = data[9]
total_settlement = data[10]
net_settlement = data[11]
print(month, new_investors, end_investors, registered_securities_number,registered_securities_totalparvalue, registered_securities_totalmarketvalue,non_restricted_market_value, total_number_of_transfers, total_amount_of_transfer, total_settlement,net_settlement)
elif date == 201611:
Headers = {
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9',
'Accept-Encoding': 'gzip, deflate',
'Accept-Language': 'zh-CN,zh;q=0.9',
'Cache-Control': 'max-age=0',
'Connection': 'keep-alive',
'Content-Length': '122',
'Content-Type': 'application/x-www-form-urlencoded',
'Cookie': 'JSESSIONID=00005q0oN93pCb5mAK5eZQGAa7t:1amj63rte',
'Host': 'www.chinaclear.cn',
'Origin': 'http://www.chinaclear.cn',
'Referer': 'http://www.chinaclear.cn/cms-search/monthview.action?action=china',
'Upgrade-Insecure-Requests': '1',
'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/86.0.4240.193 Safari/537.36'
}
data = {
'riqi': '{0}'.format(i),
'channelFidStr': 'e990411f19544e46be84333c25b63de6',
'channelIdStr': 'bd095cc08f744c089b159a3bb744b9d0'
}
url = 'http://www.chinaclear.cn/cms-search/monthview.action?action=china'
response = requests.post(url, headers=Headers, data=data)
response.encoding = 'utf-8'
html = response.text
# print(html)
get_data = r'<tr style=.*?>.*?<td width="26%" .*?><span style=.*?><font .*?>.*?</font></span>.*?<p align="right" .*?><span style=.*?><font .*?>(.*?)</font></span></p>.*?<span style=.*?><font .*?>.*?</font></span></td>.*?</tr>'
pattern = re.compile(get_data, re.I | re.S | re.M)
data = pattern.findall(html)
# print(data)
month = data[0]
new_investors = data[1]
end_investors = data[2]
registered_securities_number = data[3].replace(' ','').replace('<span style="font-size: 10.5pt;">','').replace('</span>','').strip()
registered_securities_totalparvalue = data[4].replace(' ','').replace('<span style="font-size: 10.5pt;">','').replace('</span>','').strip()
registered_securities_totalmarketvalue = data[5].replace(' ','').replace('<span style="font-size: 10.5pt;">','').replace('</span>','').strip()
non_restricted_market_value = data[6]
total_number_of_transfers = data[8]
total_amount_of_transfer = data[9]
total_settlement = data[10]
net_settlement = data[11]
print(month, new_investors, end_investors, registered_securities_number,registered_securities_totalparvalue, registered_securities_totalmarketvalue,non_restricted_market_value, total_number_of_transfers, total_amount_of_transfer, total_settlement,net_settlement)
elif date == 201612:
Headers = {
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9',
'Accept-Encoding': 'gzip, deflate',
'Accept-Language': 'zh-CN,zh;q=0.9',
'Cache-Control': 'max-age=0',
'Connection': 'keep-alive',
'Content-Length': '122',
'Content-Type': 'application/x-www-form-urlencoded',
'Cookie': 'JSESSIONID=00005q0oN93pCb5mAK5eZQGAa7t:1amj63rte',
'Host': 'www.chinaclear.cn',
'Origin': 'http://www.chinaclear.cn',
'Referer': 'http://www.chinaclear.cn/cms-search/monthview.action?action=china',
'Upgrade-Insecure-Requests': '1',
'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/86.0.4240.193 Safari/537.36'
}
data = {
'riqi': '{0}'.format(i),
'channelFidStr': 'e990411f19544e46be84333c25b63de6',
'channelIdStr': 'bd095cc08f744c089b159a3bb744b9d0'
}
url = 'http://www.chinaclear.cn/cms-search/monthview.action?action=china'
response = requests.post(url, headers=Headers, data=data)
response.encoding = 'utf-8'
html = response.text
# print(html)
get_data = r'<tr style=.*?>.*?<td width="26%" .*?><span style=.*?>.*?</span>.*?<p align="right" .*?><span .*?>(.*?)</span></p>.*?<span style=.*?>.*?</span></td>.*?</tr>'
pattern = re.compile(get_data, re.I | re.S | re.M)
data = pattern.findall(html)
# print(data)
month = data[0]
new_investors = data[1]
end_investors = data[2].replace('<span>','').strip()
registered_securities_number = data[3].replace('<span>','').replace(' ','').replace('<span style="font-size: 10.5pt;">','').replace('</span>','').strip()
registered_securities_totalparvalue = data[4].replace('<span>','').replace(' ','').replace('<span style="font-size: 10.5pt;">','').replace('</span>','').strip()
registered_securities_totalmarketvalue = data[5].replace('<span>','').replace(' ','').replace('<span style="font-size: 10.5pt;">','').replace('</span>','').strip()
non_restricted_market_value = data[6]
total_number_of_transfers = data[8]
total_amount_of_transfer = data[9]
total_settlement = data[10]
net_settlement = data[11]
print(month, new_investors, end_investors, registered_securities_number,registered_securities_totalparvalue, registered_securities_totalmarketvalue,non_restricted_market_value, total_number_of_transfers, total_amount_of_transfer, total_settlement,net_settlement)
elif date == 201701:
Headers = {
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9',
'Accept-Encoding': 'gzip, deflate',
'Accept-Language': 'zh-CN,zh;q=0.9',
'Cache-Control': 'max-age=0',
'Connection': 'keep-alive',
'Content-Length': '122',
'Content-Type': 'application/x-www-form-urlencoded',
'Cookie': 'JSESSIONID=00005q0oN93pCb5mAK5eZQGAa7t:1amj63rte',
'Host': 'www.chinaclear.cn',
'Origin': 'http://www.chinaclear.cn',
'Referer': 'http://www.chinaclear.cn/cms-search/monthview.action?action=china',
'Upgrade-Insecure-Requests': '1',
'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/86.0.4240.193 Safari/537.36'
}
data = {
'riqi': '{0}'.format(i),
'channelFidStr': 'e990411f19544e46be84333c25b63de6',
'channelIdStr': 'bd095cc08f744c089b159a3bb744b9d0'
}
url = 'http://www.chinaclear.cn/cms-search/monthview.action?action=china'
response = requests.post(url, headers=Headers, data=data)
response.encoding = 'utf-8'
html = response.text
# print(html)
get_data = '<tbody>.*?<tr style=.*?>.*?<td .*?>.*?</td>.*?<td .*?>.*?<p .*?><span style=.*?>(.*?)</span></p>.*?</td>.*?<td .*?>.*?</td>.*?</tr>.*?<tr style=.*?>.*?<td .*?>.*?</td>.*?<td .*?>.*?<p .*?><span style=.*?>(.*?)</span></p>.*?</td>.*?<td .*?>.*?</td>.*?</tr>.*?</tbody>'
pattern = re.compile(get_data, re.I | re.S | re.M)
data = pattern.findall(html)
month = data[0][0]
new_investors = data[0][1].replace('<span>', '').replace('</span>', '').replace(' ', '').strip()
get_data_one = '<tr .*?>.*?<td .*?>.*?<p .*?>.*?<span .*?>.*?</span>.*?</p>.*?</td>.*?<td .*?>.*?<p .*?>.*?<span style=.*?>.*?<span>.*?</span>(.*?)</span>.*?</p>.*?</td>.*?</tr>'
pattern_one = re.compile(get_data_one, re.I | re.S | re.M)
data_one = pattern_one.findall(html)[1:-1]
# print(data_one)
end_investors = data_one[0]
registered_securities_number = data_one[1]
registered_securities_totalparvalue = data_one[2]
registered_securities_totalmarketvalue = data_one[3]
non_restricted_market_value = data_one[4]
total_number_of_transfers = data_one[5]
total_amount_of_transfer = data_one[6]
total_settlement = data_one[6]
get_data_second ='<td .*?>.*?<p .*?>.*?<span .*?>.*?<span>.*?</span>.*?<span>.*?</span>(.*?)</span>.*?</p>.*?</td>'
pattern_second = re.compile(get_data_second , re.I | re.S | re.M)
net_settlement = pattern_second.findall(html)[-2]
print(month,new_investors,end_investors,registered_securities_number,registered_securities_totalparvalue,registered_securities_totalmarketvalue,non_restricted_market_value,total_number_of_transfers,total_amount_of_transfer,total_settlement,net_settlement)
elif date > 201701 and date <= 201705:
Headers = {
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9',
'Accept-Encoding': 'gzip, deflate',
'Accept-Language': 'zh-CN,zh;q=0.9',
'Cache-Control': 'max-age=0',
'Connection': 'keep-alive',
'Content-Length': '122',
'Content-Type': 'application/x-www-form-urlencoded',
'Cookie': 'JSESSIONID=00005q0oN93pCb5mAK5eZQGAa7t:1amj63rte',
'Host': 'www.chinaclear.cn',
'Origin': 'http://www.chinaclear.cn',
'Referer': 'http://www.chinaclear.cn/cms-search/monthview.action?action=china',
'Upgrade-Insecure-Requests': '1',
'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/86.0.4240.193 Safari/537.36'
}
data = {
'riqi': '{0}'.format(i),
'channelFidStr': 'e990411f19544e46be84333c25b63de6',
'channelIdStr': 'bd095cc08f744c089b159a3bb744b9d0'
}
url = 'http://www.chinaclear.cn/cms-search/monthview.action?action=china'
response = requests.post(url,headers=Headers, data=data)
response.encoding= 'utf-8'
html = response.text
# print(html)
get_data = '<tbody>.*?<tr style=.*?>.*?<td .*?>.*?</td>.*?<td .*?>.*?<p .*?><span style=.*?>(.*?)</span></p>.*?</td>.*?<td .*?>.*?</td>.*?</tr>.*?<tr style=.*?>.*?<td .*?>.*?</td>.*?<td .*?>.*?<p .*?><span style=.*?>(.*?)</span></p>.*?</td>.*?<td .*?>.*?</td>.*?</tr>.*?</tbody>'
pattern = re.compile(get_data, re.I | re.S | re.M)
data = pattern.findall(html)
month = data[0][0]
get_data1 = '<tr .*?>.*?<td .*?>.*?<p .*?>.*?<span .*?>.*?</span>.*?</p>.*?</td>.*?<td .*?>.*?<p .*?>.*?<span style=.*?>.*?<span>.*?</span>(.*?)</span>.*?</p>.*?</td>.*?</tr>'
pattern1 = re.compile(get_data1, re.I | re.S | re.M)
data1 = pattern1.findall(html)[1:]
new_investors = data1[0]
end_investors = data1[1]
registered_securities_number = data1[2]
registered_securities_totalparvalue = data1[3]
registered_securities_totalmarketvalue = data1[4]
non_restricted_market_value = data1[5]
total_number_of_transfers = data1[6]
total_amount_of_transfer = data1[7]
total_settlement = data1[8]
net_settlement = data1[9]
print(month,new_investors,end_investors,registered_securities_number,registered_securities_totalparvalue,registered_securities_totalmarketvalue,non_restricted_market_value,total_number_of_transfers,total_amount_of_transfer,total_settlement,net_settlement)
elif date >= 201706 and date <201709:
Headers = {
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9',
'Accept-Encoding': 'gzip, deflate',
'Accept-Language': 'zh-CN,zh;q=0.9',
'Cache-Control': 'max-age=0',
'Connection': 'keep-alive',
'Content-Length': '122',
'Content-Type': 'application/x-www-form-urlencoded',
'Cookie': 'JSESSIONID=00005q0oN93pCb5mAK5eZQGAa7t:1amj63rte',
'Host': 'www.chinaclear.cn',
'Origin': 'http://www.chinaclear.cn',
'Referer': 'http://www.chinaclear.cn/cms-search/monthview.action?action=china',
'Upgrade-Insecure-Requests': '1',
'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/86.0.4240.193 Safari/537.36'
}
data = {
'riqi': '{0}'.format(i),
'channelFidStr': 'e990411f19544e46be84333c25b63de6',
'channelIdStr': 'bd095cc08f744c089b159a3bb744b9d0'
}
url = 'http://www.chinaclear.cn/cms-search/monthview.action?action=china'
response = requests.post(url, headers=Headers, data=data)
response.encoding = 'utf-8'
html = response.text
get_data = '<tr style=.*?>.*?<td .*?>.*?<p .*?><span style=.*?>.*?</span></p>.*?</td>.*?<td .*?>.*?<p .*?><span style=.*?>(.*?)</span></p>.*?</td>.*?<td .*?>.*?<p .*?><span style=.*?>.*?</span></p>.*?</td>.*?</tr>'
pattern = re.compile(get_data, re.I | re.S | re.M)
data = pattern.findall(html)
month = data[0]
new_investors = data[1]
end_investors = data[2]
registered_securities_number = data[3].replace('<span>', '').replace('</span>', '').replace(' ', '').strip()
registered_securities_totalparvalue = data[4].replace('<span>', '').replace('</span>', '').replace(' ', '').strip()
registered_securities_totalmarketvalue = data[5].replace('<span>', '').replace('</span>', '').replace(' ', '').strip()
non_restricted_market_value = data[6].replace('<span>', '').replace('</span>', '').replace(' ', '').strip()
total_number_of_transfers = data[7]
total_amount_of_transfer = data[8]
total_settlement = data[9]
net_settlement = data[10]
print(month, new_investors, end_investors, registered_securities_number,registered_securities_totalparvalue, registered_securities_totalmarketvalue,non_restricted_market_value, total_number_of_transfers, total_amount_of_transfer, total_settlement,net_settlement)
elif date >= 201709:
Headers = {
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9',
'Accept-Encoding': 'gzip, deflate',
'Accept-Language': 'zh-CN,zh;q=0.9',
'Cache-Control': 'max-age=0',
'Connection': 'keep-alive',
'Content-Length': '122',
'Content-Type': 'application/x-www-form-urlencoded',
'Cookie': 'JSESSIONID=00005q0oN93pCb5mAK5eZQGAa7t:1amj63rte',
'Host': 'www.chinaclear.cn',
'Origin': 'http://www.chinaclear.cn',
'Referer': 'http://www.chinaclear.cn/cms-search/monthview.action?action=china',
'Upgrade-Insecure-Requests': '1',
'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/86.0.4240.193 Safari/537.36'
}
data = {
'riqi': '{0}'.format(i),
'channelFidStr': 'e990411f19544e46be84333c25b63de6',
'channelIdStr': 'bd095cc08f744c089b159a3bb744b9d0'
}
url = 'http://www.chinaclear.cn/cms-search/monthview.action?action=china'
response = requests.post(url, headers=Headers, data=data)
response.encoding = 'utf-8'
html = response.text
# print(html)
get_data = r'<tr .*?>.*?<td .*?>.*?<p .*?>.*?<span .*?>.*?</span>.*?</p>.*?</td>.*?<td .*?>.*?<p .*?>.*?<span .*?>(.*?)</span>.*?</p>.*?</td>.*?</tr>'
pattern = re.compile(get_data, re.I | re.S | re.M)
data = pattern.findall(html)
# print(data)
month = data[0]
new_investors = data[1]
end_investors = data[2]
registered_securities_number = data[3].replace('<span> ','')
registered_securities_totalparvalue = data[4].strip().replace('<span style="font-size:10.5pt;font-family:宋体;color:#424242;">','').replace('<span style="font-size: 10.5pt; font-family: 宋体; color: rgb(66, 66, 66);">','').replace('<span style="font-size:10.5pt;font-family: 宋体;color:#424242;">','').replace(' <span style="font-size: 9.0pt;font-family:宋体;color:#424242;">','')
registered_securities_totalmarketvalue = data[5]
non_restricted_market_value = data[6]
total_number_of_transfers = data[7]
total_amount_of_transfer = data[8]
total_settlement = data[9]
net_settlement = data[10]
print(month, new_investors, end_investors, registered_securities_number,registered_securities_totalparvalue, registered_securities_totalmarketvalue,non_restricted_market_value, total_number_of_transfers, total_amount_of_transfer, total_settlement,net_settlement)
else:
pass
if __name__ == '__main__':
date_list = get_month_range(datetime.date(2005, 1, 31),datetime.date(2020,9,1))
spider(date_list)