这几天整个机房都在刷51nod的题。于是,我就做了一点点python的插件供我们逍遥
1 自动抓取做题记录
每天都要用手统计做题记录,还不如让py抓呢。
于是,我就看了看51nod的api。发现找不到。
于是,我也就玩了一玩,于是发现了在每一个目录下去掉.html后缀就可以获得原网页数据。于是就可以做出这样:虽然一开始有锅( 后来就
改掉了。
import requests
import time
r=requests.session()
def find_lastAC(pid,uid): #返回最早AC时间,防止重复
minn=10029272457492
pi=1
p=r.get('https://www.51nod.com/Challenge/UserProblemSubmitList?problemId='+str(pid)+'&userId='+str(uid)+'&page='+str(pi)).text
while p.find('JudgeViews')!=-1:
loca=p.find('FinishedTime')
while loca!=-1:
l2=p.find('Score',loca)
l2_end=p.find(',',l2)
kuid=p.find('"Id":',loca+1)
kuid=p.find('"Id":',kuid+1)
kuid=p.find('"Id":',kuid+1)
uid_end=p.find(',',kuid)
#print(p[l2+7:l2_end])
#print(uid,p[kuid+5:uid_end],int(p[kuid+5:uid_end])==int(uid))
if int(p[l2+7:l2_end])>=40 and int(p[kuid+5:uid_end])==int(uid):
minn=min(minn,int(p[loca+14:loca+27]))
#print(p[loca+14:loca+27])
loca=p.find('FinishedTime',loca+1)
pi=pi+1
p=r.get('https://www.51nod.com/Challenge/UserProblemSubmitList?problemId='+str(pid)+'&userId='+str(uid)+'&page='+str(pi)).text
#print(minn)
return minn
nowtime=time.time()
today=time.localtime(nowtime)
nowtime=nowtime-today.tm_hour*3600
nowtime=nowtime-today.tm_min*60
nowtime=nowtime-today.tm_sec
f=open('Userlist.csv','r',encoding='UTF-8')
s=f.read()
uid=s.split('\n',)
f.close()
print('检测到:',uid,'\n\n')
beginning='1959,2050,2056,2057,2059,2086,2087,2088,2090,2091,2092,2093,2094,2101,2103,2104,2110,2111,2112,2118,2119,2121,2135,2148,2149,2153,2393,2394,2395,2396,2397,2398,2399,2400,2401,2402,2403,2404,2405,2406,2407,2408,2409,2410,2411,2412,2413,2414,2415,2416,2417,2418,2419,2420,2578,2579,2580,2581,3025,3027,3031,3032,3037,3130,3131,3132,3133,3134,3135,3136,3193,3194,3195,3196,3197,3206,3259,3260,3261,3262,3263,3265,3266,3270,3271,3273,3274,3276,3277,3278,3287,3314,3342,3372,3415,3428,3429,1015,1504,1505,1507,1508,1545,1887,1888,1911,1912,1915,1916,1917,1922,1941,1950,1956,1957,2074,2089,2102,2105,2115,2116,2122,2130,2131,2134,2136,2137,2138,2140,2144,2145,2150,2151,2159,2174,2175,2176,2279,3033,3034,3035,3056,3191,3192,3208,3211,3264,3267,3268,3275,3281,3299,3309,3328,3388,3405,3420,1008,1182,1283,1590,1591,1874,1879,1882,1891,1909,1926,1928,1951,1955,2058,2060,2061,2062,2064,2106,2108,2128,2129,2133,2139,2141,2142,2143,2152,2160,2165,2166,2171,2172,2173,2177,2178,2350,2353,2377,2381,2443,2444,2452,2643,2644,2645,2738,3036,3041,3058,3100,3198,3199,3207,3212,3214,3216,3285,3286,3288,3288,3290,3291,3291,3292,3293,3294,3295,3296,3298,3301,3302,3304,3317,3323,3348,3354,3381,3386,3394,3410,1012,1138,1878,1896,1897,1905,1936,1954,2063,2068,2075,2076,2113,2114,2167,2169,2170,2179,2185,2186,2187,2189,2282,2455,2456,2509,2592,2638,2639,2640,2641,2642,2646,3028,3029,3042,3043,3044,3047,3055,3057,3059,3200,3201,3202,3204,3205,3213,3280,3289,3313,3320,3324,3330,3395,1003,1004,1009,1011,1049,1080,1082,1083,1344,1894,1902,1918,1927,1958,2066,2069,2070,2072,2073,2080,2081,2107,2163,2164,2168,2180,2181,2182,2183,2188,3159,3203,3210,3346,3411,3413,3414,3422,'
#打表了入门基础题
f=open('Sublist.csv','w+')
f.write('Uid,Name,Sub,Nickname\n')
sawa=""
for u in range(0,len(uid)):
tuid=uid[u].split(',')
if(len(tuid)==1): continue
ans=r.get('https://www.51nod.com/Challenge/UserSubmitList?userID='+str(tuid[1])+'&page='+str(1)).text
ni=ans.find('Name')
niend=ans.find(',',ni)
name=ans[ni+7:niend-1]
print('User:',uid[u],'----->',name,'\n----------------')
tot=0
sawa=""
for i in range(1,100000):
ans=r.get('https://www.51nod.com/Challenge/UserSubmitList?userID='+str(tuid[1])+'&page='+str(i)).text
loca=ans.find('FinishedTime')
if loca==-1:
break
#print("page:"+str(i))
while loca!=-1 :
donetime=int(ans[loca+14:loca+27])
if donetime<nowtime*1000:
loca=ans.find('FinishedTime',loca+1)
continue
l1=ans.find('ProblemId',loca)
donetime=find_lastAC(ans[l1+11:l1+15],tuid[1])
#print(donetime,nowtime*1000)
l2=ans.find('Score',loca)
l2_end=ans.find(',',l2)
l3=ans.find('Title',loca)
l3_end=ans.find(',',l3)
if donetime>=nowtime*1000 and int(ans[l2+7:l2_end])>=40 and beginning.find(ans[l1+11:l1+15])==-1:
#40代表4级题
print(ans[l1+11:l1+15],ans[l3+8:l3_end-1])
sawa=sawa+ans[l1+11:l1+15]+',';
tot=tot+1 #统计
loca=ans.find('FinishedTime',loca+1)
f.write(tuid[1]+','+tuid[0]+','+str(tot)+','+name+','+sawa+'\n')
print('-----tot:'+str(tot)+'------\n')
f.close()
time.sleep(50)
用户保存已csv格式读入,结果以csv格式写出。需要注意编码问题。最好用UTF-8
读入用户范例:
XXX,215946 #XXX是真实姓名,后面是用户编号
XXX,XXXXXX
···
保存至UserList.csv即可。
2 抓取用户做题记录
怎么说,就是统计用户已经做过多少道题,以及哪些题。
主要通过用户主页的做题情况统计。
依然以UserList.csv读入用户,结果也以csv写出。
import requests
import time
f=open('Userlist.csv','r',encoding='UTF-8')
s=f.read()
uid=s.split('\n',)
f.close()
print('检测到:',uid,'\n\n')
f=open('Sublist.csv','w+')
f.write('Uid,Name,基础题,1级题,2级题,3级题,4级题,5级题,6级题,7级题,8级题,9级题,难题,TOT-Pre,TOT-Pro,Sub\n')
#csv输出格式
r=requests.session()
level=[]
sawa=""
for u in range(0,len(uid)):
level.clear()
level.append(0)
level.append(0)
level.append(0)
level.append(0)
level.append(0)
level.append(0)
level.append(0)
level.append(0)
level.append(0)
level.append(0)
level.append(0)
level.append(0)
level.append(0)
tuid=uid[u].split(',') #提前加好数组
if(len(tuid)==1): continue
ans=r.get('http://www.51nod.com/Challenge/UserIndex?userId='+str(tuid[1])).text
ni=ans.find('Name')
niend=ans.find(',',ni)
name=ans[ni+7:niend-1]
print('User:',uid[u])
tot=0
sawa=""
loca=ans.find('UserProblemSimplify')
while loca!=-1:
l1=ans.find('IsAccepted',loca)
l2=ans.find('ProblemId',loca)
l3=ans.find('Level',loca)
l4=ans.find(',',l3)
#print(ans[l1+12:l1+13],ans[l2+11:l2+15],ans[l3+7:l4])
loca=ans.find('UserProblemSimplify',loca+1)
if ans[l1+12:l1+13]=='t':
tot=tot+1
sawa=sawa+ans[l2+11:l2+15]+','
level[int(ans[l3+7:l4])]=level[int(ans[l3+7:l4])]+1
sawa=sawa+'|,'
ans=r.get('http://www.51nod.com/Challenge/BeginnerIndex?userId='+str(tuid[1])).text
tot2=0
loca=ans.find('UserProblemSimplify')
while loca!=-1:
l1=ans.find('IsAccepted',loca)
l2=ans.find('ProblemId',loca)
#print(ans[l1+12:l1+13],ans[l2+11:l2+15])
loca=ans.find('UserProblemSimplify',loca+1)
if ans[l1+12:l1+13]=='t':
tot2=tot2+1
sawa=sawa+ans[l2+11:l2+15]+','
f.write(tuid[1]+','+tuid[0]+','+str(level[1])+','+str(level[2])+','+str(level[3])+','+str(level[4])+','+str(level[5])+','+str(level[6])+','+str(level[7])+','+str(level[8])+','+str(level[9])+','+str(level[10])+','+str(level[11])+','+str(tot2)+','+str(tot)+','+sawa+'\n')
print('-----tot:'+str(tot)+'------\n')
f.close()
time.sleep(50)
3 批量修改题目标签
可以发现,51nod题目是可以直接修改标签的。所以,我们就可以写一个post来修改标签。hhh过于颓废,仅供玩耍,请使用后改回来
通过抓包获取接口,找到接口传递的方法,就可以直接修改。
import requests
from selenium import webdriver
import time
def c(cookies_str):
cookies_dict={}
for cookie in cookies_str.split('; '):
cookies_dict[cookie.split('=')[0]]=cookie.split('=')[-1]
return cookies_dict
d=open(r'C:\Users\admin\Desktop\cookies.txt','r').read() #请将cookies取到这个目录下,或修改目录
#没有登录是修改不了的
#请自行查询cookies的取出方法
cookies=c(d)
s=requests.session()
tot=0
for i in range(1000,1500): #修改题目范围
print(i)
txt=s.get('http://www.51nod.com/Challenge/Problem?problemId='+str(i),cookies=cookies).text
rt=txt.find('"Topics"')
op=txt.find('"User"')
nf=txt[rt:op]
loca=nf.find('"Id"')
loca_end=nf.find(',',loca)
l=[]
while loca!=-1:
#print(nf[loca+5:loca_end])
if int(nf[loca+5:loca_end])!=612: #去除ID为612的标签
l.append(int(nf[loca+5:loca_end])) #用完一定要改回去啊!!!
loca=nf.find('"Id"',loca+1)
loca_end=nf.find(',',loca)
#l.append(612) 去掉注释即可添加ID为612的标签
s.post('http://www.51nod.com/ProblemTopicWriter/Rework',data={'LinkId':i,'Topic':l},cookies=cookies)
#print('-----')
做个好人!
这个post还是挺好玩的qaq
4 既然实现了标签修改的post,那么我们就给用户发亿点点消息吧
一样,使用post。找到post接口,就可以发送了。
同样需要cookies。保存目录是一样的。
import requests
import time
def c(cookies_str):
cookies_dict={}
for cookie in cookies_str.split('; '):
cookies_dict[cookie.split('=')[0]]=cookie.split('=')[-1]
return cookies_dict
d=open(r'C:\Users\admin\Desktop\cookies.txt','r').read()
cookies=c(d)
s=requests.session()
st=time.time()
tot=0
while 1: #循环post
s.post('http://www.51nod.com/MessageWriter/Append',data={'ReceiveId':209919,'Content':'WB AK IOI!'},cookies=cookies)
#ReceiveId 是指接受信息的用户id content是发送内容
tot=tot+1
t=time.time()
print(tot/(t-st)) #速度,单位:每秒
注意,若直接运行,可能速度不会拉满,这要看python最大可以使用的内存和cpu了。
若不想调整,可以多开几个窗口,这样速度更快!
测试:我们学校带宽500Mbps,这破玩意电脑能跑到20Mbps!大约能达到100条消息每秒。
提醒:整人挺快乐,事后麻烦至!
虽然我这么无聊