http_get.py
# -*- coding: UTF-8 -*-
import requests
import random
import string
import time
import threading
import datetime
def get(url):
res = requests.get(url)
print(res.text)
def random_str(randomlength):
str_list = [random.choice(string.digits + string.ascii_letters) for i in range(randomlength)]
return ''.join(str_list)
def get_url():
base_url = 'http://jingjinji.mschina2014.com/index/index/_like.html?id=%s&u=%s&t=%s'
user = random_str(26)
base_url = (base_url) % ('70', user, int(time.time() * 1000))
return base_url
def start():
num = 0
count = 10
c = 10
for i in range(count):
get(get_url())
num += 1
time.sleep(0.1)
c = c - 1
print(threading.currentThread().name + '--->还剩{%s}次,就完了,请耐心等待' % str(c) + '--->' + str(datetime.datetime.now()))
print(threading.currentThread().name + '--->一共点赞{%s}次' % str(num) + '--->' + str(datetime.datetime.now()))
def multi_thread(thread_num):
for i in range(thread_num):
threading.Thread(target=start(), args=()).start()
if __name__ == "__main__":
multi_thread(3)
分析点赞小程序
get_click_nums.py
# -*- coding: UTF-8 -*-
import requests
from bs4 import BeautifulSoup
import pymysql
import uuid
import datetime
import time
conn = pymysql.connect(
host="ip",
user="root",
password="root",
database="play",
charset="utf8")
sql = '''INSERT INTO compants (id, data_id, name, click_nums, says, create_date) VALUES ('%s', '%s', '%s', '%s',
'%s', '%s') '''
def get_contents():
url = 'http://jingjinji.mschina2014.com/index/index/like.html'
return requests.get(url)
def parse_xml(contents):
soup = BeautifulSoup(contents.text, 'lxml')
data = soup.find_all('li')
process_data(data)
def process_data(datas):
for data in datas:
nums = data.text
try:
name = data.contents[0].text
say = data.contents[2].text
click_nums = data.contents[4].next.text.split('票')[0]
id = data.contents[4].contents[1].attrs['data-id']
if int(id) > 47:
sql_exec = sql % (uuid.uuid4(), id, name, click_nums, say, str(datetime.datetime.now()))
print(sql_exec)
mysql(sql_exec)
except Exception as e:
print(e)
pass
def mysql(sql):
cursor = conn.cursor()
cursor.execute(sql)
conn.commit()
if __name__ == "__main__":
while True:
parse_xml(get_contents())
time.sleep(60)
requirement.txt
requests~=2.24.0
PyMySQL~=0.10.1
beautifulsoup4~=4.9.3
lxml~=4.6.2
start.sh
nohup python3 -u get_click_nums.py >nohup.click.log &
数据分析
select t.data_id, t.`name`, t.says, max(t.click_nums) from compants t group by t.data_id, t.`name`, t.says
order by length(max(t.click_nums)) desc;
select t.data_id, t.name, t.says,t.click_nums,t.create_date from compants t having t.data_id=70 order by t.data_id, t.create_date;
select t.data_id, t.name as 公司名, t.says as 公司标语,t.click_nums as 点赞数,min(t.create_date) as '最早点赞时间(美国时间)', max(t.create_date) as '最晚点赞时间(美国时间)' from compants t group by t.data_id, t.name, t.says,t.click_nums having t.data_id=70 order by t.data_id;
select t.data_id, t.name as 公司名, t.says as 公司标语,t.click_nums as 点赞数,group_concat(t.create_date) as '点赞时间(美国时间)' from compants t group by t.data_id, t.name, t.says,t.click_nums having t.data_id=70 order by t.data_id;
2. 山西人在北京
http_get.py
# -*- coding: UTF-8 -*-
import requests
import random
import string
import time
import threading
import datetime
import requests
from bs4 import BeautifulSoup
import pymysql
import uuid
import datetime
import time
static_contents = ''''''
conn = pymysql.connect(
host="144.34.165.106",
user="root",
password="root",
database="xiangqin",
charset="utf8")
sql = '''INSERT INTO person_title (msgid, id, title, url, small_images, create_time, remark, flag) VALUES ('%s', '%s', '%s', '%s',
'%s', '%s', '%s', '%s') '''
person_info_sql = '''INSERT INTO person_info (msgid, name, wechat_no, age, height, body_weight, hometown, address_bj, profession, education, monthly_salary, self_description, object_description, images, views, messages, remark, flag) VALUES
('%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s')'''
def mysql(sql):
cursor = conn.cursor()
cursor.execute(sql)
conn.commit()
def get_contents(url):
res = requests.get(url)
print(res.text)
return res.text
def get_url():
base_url = 'https://mp.weixin.qq.com/mp/appmsgalbum?__biz=MzIxMjc5NjA5OA==&action=getalbum&album_id=1561178087793459204&scene=173&subscene=10000&sessionid=0&enterid=1624410485&from_msgid=2247505611&from_itemidx=2&count=30&nolastread=1#wechat_redirect'
return base_url
def get(url):
res = requests.get(url)
return res.text
def get_object_lists(base_contents):
pass
def parse_xml(contents, num, msgid):
soup = BeautifulSoup(contents, 'lxml')
if num == 0:
data = soup.find_all('li')
process_data(data)
if num == 1:
data = soup.find_all('section')
img_data = soup.find_all('img')
process_data_info(data, img_data, msgid)
def process_data_info(datas, img_data, msgid):
'''
姓名:曹鸿
微信号:微信公众号后台内回复 1722获取联系方式
年龄:1996-11-03
籍贯:太原: 清徐
在京所在地:海淀区
身高:163
体重:63
职业:高校招生就业
最高学历:本科
月薪:/
我是一个这样的人:
可以大大咧咧做个小憨憨,也可以聪明睿智怼死人!。
宿舍,食堂,办公室。
要说忙,也挺忙;要说不忙,也挺闲。
工作霸占了生活,天气允许还可以打个羽毛球,减减肥。
我希望你是一个这样的人:
我的理想型是双眼皮大眼睛,个子高180+。偏爱制服系男生,对军人,医生没有抵抗力。最主要是喜欢狗狗。
能在北京定居的,属猴,猪,牛的,星座最好是天蝎座。
'''
try:
self_start_i = None
self_end_i = None
object_end_i = None
for i in range(20):
if datas[i].text == '我是一个这样的人:' and i > 9:
self_start_i = i
if datas[i].text == '我希望你是一个这样的人:' and i > 9:
self_end_i = i
if datas[i].text == '如果你也想在此找对象仅限山西在京老乡长按识别下方二维码认真填写信息' and i > 9:
object_end_i = i
if self_start_i is not None and self_end_i is not None and object_end_i is not None:
break
name = datas[0].text.split('姓名:')[1]
wechat_no = datas[1].text.split('微信号:')[1]
age = datas[2].text.split('年龄:')[1]
hometown = datas[3].text.split('籍贯:')[1]
address_bj = datas[4].text.split('在京所在地:')[1]
height = datas[5].text.split('身高:')[1]
body_weight = datas[6].text.split('体重:')[1]
profession = datas[7].text.split('职业:')[1]
education = datas[8].text.split('最高学历:')[1]
monthly_salary = datas[9].text.split('月薪:')[1]
self_description = ''
for i in range(self_end_i - self_start_i):
self_description = self_description + datas[self_start_i + i].text
object_description = ''
for i in range(object_end_i - self_end_i):
object_description = object_description + datas[self_end_i + i].text
image_end_i = None
for i in range(5):
if i > 0 and img_data[i]['data-src'] == 'https://mmbiz.qpic.cn/mmbiz_png/WmIsW0lJdmFX2LkFhYng5QhJY4ujiaibIYE6Bsh9p9JPCm5vBpRjcSpldKLQzEjHFQIbIjlbyZcpyQftjcPHGkOw/640?wx_fmt=png':
image_end_i = i
if image_end_i is not None:
break
images = ''
for i in range(image_end_i - 2):
images += img_data[i + 1]['data-src'] + ';'
views = ''
messages = ''
remark = ''
flag = 1
# inset mysql
sql_exec = person_info_sql % (
msgid, name, wechat_no, age, height, body_weight, hometown, address_bj, profession, education, monthly_salary,
self_description, object_description, images, views, messages, remark, flag)
# print(sql_exec)
mysql(sql_exec)
print('插入成功:%s' % name)
except Exception as e:
print(e)
def process_data(datas):
for data in datas:
try:
msgid = data['data-msgid']
title = data['data-title']
if not title.startswith('山西在京老乡找对象'):
continue
id = title.split('山西在京老乡找对象第')[1].split('弹')[0]
url = data['data-link']
if url is not None:
# 解析url插入person_info
parse_xml(get(url), 1, msgid)
small_images = ''
create_time = ''
remark = ''
flag = 1
# inset mysql
sql_exec = sql % (msgid, id, title, url, small_images, create_time, remark, flag)
# print(sql_exec)
mysql(sql_exec)
print('插入成功:%s' % title)
except Exception as e:
print(e)
continue
def start():
# contents = get_contents(get_url())
contents = static_contents
parse_xml(contents, 0, '')
def multi_thread(thread_num):
for i in range(thread_num):
threading.Thread(target=start(), args=()).start()
if __name__ == "__main__":
multi_thread(3)