#!/usr/bin/python3
# coding: utf-8
'''
@auth: levAndreev
@time: 2022/4/7
'''
import requests
from bs4 import BeautifulSoup
import numpy as np
import os
import sys
Hostreferer = {
'User-Agent': 'Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1)',
}
def getHtml(url):
req = requests.get(url, headers=Hostreferer)
html = req.text
return html
def get17500(page, last):
ret = []
url = f'https://www.17500.cn/widget/_ssq/ssqfanjiang/p/{page}.html'
html = getHtml(url)
soup = BeautifulSoup(html, 'html.parser')
trs = soup.findAll('tr')
if len(trs)>0: del(trs[0])
if len(trs)>0: trs.pop()
if len(trs)<1:
return ret, True
for it in trs:
# print(it.contents)
# continue
if int(it.contents[0].text)>last:
ret.insert(0,
it.contents[0].text+'\t'+
it.contents[3].text+'\n')
print(ret[0][:-1])
else:
return ret, True
return ret, False
def lastSeq(path):
line = []
if not os.path.exists(path):
open(path, 'w').close()
return 0
with open(path, 'rb+') as f:
n=os.path.getsize(path)
if n>100: n=100
if n<3: return 0
f.seek(-n, os.SEEK_END)
line = f.readlines()
f.seek(-3, os.SEEK_END)
f.truncate()
last = int(str(line[-2][0:7], "utf-8"))
#print(last)
return last
def main(argv):
dest = './d2.txt'
if len(argv)>0: dest = argv[0]
last = lastSeq(dest)
data = []
for page in range(1,100):
ret, end = get17500(page, last)
data = ret + data
if end:
break
suff = ''
if last == 0: suff = f'var history_data=`\n'
if len(data)<1: data.insert(0, '\n')
with open(dest,'a+') as f:
f.write(f'{suff}{"".join(data)}`;')
print(f'write to {dest} ok.')
main(sys.argv[1:])
##############################################
#!/usr/bin/python3
# coding: utf-8
import requests
from bs4 import BeautifulSoup
import numpy as np
import os
import sys
Hostreferer = {
'User-Agent': 'Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1)',
}
def getHtml(url):
req = requests.get(url, headers=Hostreferer)
html = req.text
return html
s1=[1,9,10,19,20,29,30,33]
s2=[1,11,12,22,23,33]
def scope2(it):
return ["3"][int(it)]
return 0
def scope(it):
for i in range(0,7,2):
if s1[i]<=int(it) and s1[i+1]>=int(it):
# print(f'[{s1[i]}, {s1[i+1]}] <= {it}')
return int(i/2)
return 0
def getitem(line):
items = line.split(' ')[:-1]
ret = [0,0,0,0]
for it in items:
s = scope(it)
ret[s] = ret[s] + 1
return ret
def get17500(page, last):
ret = []
url = f'https://www.17500.cn/widget/_ssq/ssqfanjiang/p/{page}.html'
html = getHtml(url)
soup = BeautifulSoup(html, 'html.parser')
trs = soup.findAll('tr')
if len(trs)>0: del(trs[0])
if len(trs)>0: trs.pop()
if len(trs)<1:
return ret, True
for it in trs:
# print(it.contents)
# continue
line = it.contents[3].text.replace('+', ' +')
sc = ' '.join(str(i) for i in getitem(line))
# print(getitem(line))
# print(len(it.contents[3].text.split(' ')))
if int(it.contents[0].text)>last:
ret.insert(0,
it.contents[0].text+'\t'+
line+f' | {sc}\n')
else:
return ret, True
return ret, False
def lastSeq2(path):
if not os.path.exists(path):
open(path, 'w').close()
return 0
with open(path, 'rb+') as f:
n=os.path.getsize(path)
if n>100: n=100
if n<1: return 0
f.seek(-n, os.SEEK_END)
line = f.readlines()
t=-3
f.seek(t, os.SEEK_END)
f.truncate()
n = int(str(line[-2][0:7], 'utf-8'))
return n
def main(argv):
dest = './2d'
if len(argv)>0: dest = argv[0]
last = lastSeq2(dest)
data = []
for page in range(1,100):
ret, end = get17500(page, last)
data = ret + data
if end: break
suff = ''
if last == 0: suff = f'var history_data=`\n'
with open(dest,'a+') as f:
f.write(f'{suff}{"".join(data)}`;')
print(f'write to {dest} ok.')
main(sys.argv[1:])
01-04
1648