快下班了快下班了,话不多说先放代码,不懂得可以留言联系!重在找规律!!!!
"""
@author: Cjp
@file: cssfan.py
@time: 2020/9/11 16:37
"""
import re
from operator import itemgetter
import requests
from lxml import etree
def gao(url):
items = []
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/85.0.4183.102 Safari/537.36',
'Cookie': '_ga=GA1.2.2076166588.1599182485; __gads=ID=c94e2e5e9c6d1406:T=1599184036:S=ALNI_Ma_JRzKSRhVrnobJU2CRfVfW3AS7A; footprints=eyJpdiI6InU5eklsSU9NSkhCWFVIaEkyUnM0cGc9PSIsInZhbHVlIjoiaG9wUEJqejI0MXlvZDZzc3c0T2NSRHhoTVVkcEZHcTJqTDZYdldqQTA4ZkY5bk1KYVRPR3l5dEFJZlNmM1FXNyIsIm1hYyI6Ijc2NDMzOTEwMDhmMGE0YzM5ZDAxNThhMzAwMzZhZjFlNWZiZGUzNWU0MWZjNTIyNzU3ZWRhZmY2ODdhNmJhMWMifQ%3D%3D; Hm_lvt_020fbaad6104bcddd1db12d6b78812f6=1599182485,1599184017,1600066558; _gid=GA1.2.1388436899.1600066559; _gat_gtag_UA_75859356_3=1; XSRF-TOKEN=eyJpdiI6IkNnVGc2RWRJMnIxWnc3SDNpNGFYRXc9PSIsInZhbHVlIjoiQzNCZzg2NVYwQ2pCVGloREpva1M4RFBZbGIwaTRWQTBTQXJPNXFLc0RONDdPYWhUVjlKdzZuWGNcL09uNUZvSmEiLCJtYWMiOiI3YzRlNGM0M2EwYzE2NmY2MDk5OTZmNWMzYWRlMzI2OWRmZDNlNzEzMzUxMmExNGE2NDkzM2YzNTk4NmVlMDczIn0%3D; glidedsky_session=eyJpdiI6IkxoSGlhRFhIcVhvQVBRMmltN2Z3SFE9PSIsInZhbHVlIjoiQ1p6TlBwWnNEYXFxRnFoNXRacVRBaVcyeE9QdFp6Y1BKaG8xbGw0RVp4bzFDSHloSExiS05FaHZNMUtMclJnTCIsIm1hYyI6ImVjZWQzMzZhZGI4YzYzMWNiZmNkZWVlNGFiZDQxMGRkZDkzNzg4OWQwZTY5NTYzMGE5YzRiNzA0NWU4YmEwMDQifQ%3D%3D; Hm_lpvt_020fbaad6104bcddd1db12d6b78812f6=1600067459', # 这里换上自己账号的cookies
}
response = requests.get(url, headers=headers, timeout=20)
# print(response.content.decode())
html = etree.HTML(response.content.decode())
divs = html.xpath('//div[@class="row"]/div[@class="col-md-1"]')
result = 0
# print(len(divs))
for div in divs:
clas = div.xpath('./div')
a_items = []
# 获取每个数字所在位置
left = 1 # 数字第几位
for cla in clas:
class_name = cla.xpath('./@class')
res = class_name[0] + '.*{( .* )?}'
div_num = cla.xpath('./text()') # 获取每一个的值
class_value = re.findall(res, response.content.decode())
item = {}
for value in class_value:
vals = value.split(':')
item[vals[0].strip()] = vals[1].strip().strip('em')
valu = False
if 'opacity' in item:
if item['opacity'] == 0:
# 隐藏元素
pass
elif 'content' in item:
# 该位置显示的最后图片
result = item['content']
# print(type(result), int(result.strip('"')))
items.append(int(result.strip('"')))
elif 'left' in item:
valu = left + int(item['left']) # 元素移动之后所在位置
else:
valu = left # 元素保持原来的位置
if valu:
left += 1
item_num = {}
item_num['num'] = ''.join(div_num)
item_num['valu'] = valu
a_items.append(item_num)
if a_items:
a_items.sort(key=itemgetter('valu'), reverse=False)
nums = ''
for item in a_items:
num = item['num']
nums += num
items.append(int(nums))
# 这一页所有的数据列表
print(items)
return items
def main():
list = []
total = 0
for i in range(1,1001):
url = 'http://www.glidedsky.com/level/web/crawler-css-puzzle-1?page={}'.format(i)
for i in gao(url):
list.append(i)
print(list)
for ele in range(0, len(list)):
total = total + list[ele]
print('合为:',total)
if __name__ == '__main__':
main()