from os.path import dirname, abspath
import sys
path = dirname(dirname(dirname(abspath(__file__))))
sys.path.append(path)
import json
import hashlib
from mitmproxy import http, ctx
from group_control.simulation.mitm_conf import host_channel_mapping, channel_name_num_mapping, channel_name_pkg_mapping
from api.models.sp_raw.scrapy_raw_url_task import ScrapyRawUrlTask
from group_control.request_urls.mitm.mysql_conf import LocalScrapyRawUrlTask
class MitmLocalPacketCapture(object):
def __init__(self):
self.online_database = ScrapyRawUrlTask
self.local_database = LocalScrapyRawUrlTask
def request(self, flow: http.HTTPFlow):
pass
def response(self, flow: http.HTTPFlow):
url = flow.request.url
print(url)
channel = self.get_channel(url)
if channel:
print('正在开始解析数据')
self.request_capture(flow, channel)
def request_capture(self, flow, platform):
device_os = 2
url = flow.request.url
# ctx.log.alert(url)
header = flow.request.headers
cookie = flow.request.cookies
headers = dict()
for k, v in header.items():
if k.startswith(':'):
k = k.replace(':', '')
headers[k] = v
if 'iphone' in headers.get('user-agent', '').lower() or 'iphone' in headers.get('User-Agent', '').lower():
device_os = 1
cookies = dict()
for k, v in cookie.items():
if k.startswith(':'):
k = k.replace(':', '')
cookies[k] = v
data = flow.request.get_text()
request_detailed = {}
request_detailed['url'] = url
request_detailed['headers'] = headers
request_detailed['cookies'] = cookies
request_detailed['os'] = device_os
request_detailed['source_app'] = channel_name_pkg_mapping.get(platform)
request_detailed['platform'] = platform
if data:
request_detailed['data'] = data
message_dict = {
'platform': platform,
'os': device_os,
'request_detailed': request_detailed
}
hl = hashlib.md5()
hl.update(json.dumps(request_detailed).encode(encoding='utf-8'))
etl = hl.hexdigest()
raw = self.local_database
res = raw(platform=platform, os=device_os, request_url=json.dumps(request_detailed), request_url_md5=etl)
res.save()
msg = "[platform: {}] send data successfully, count: {}".format(
platform, res)
print(msg)
def get_channel(self, url):
for host in host_channel_mapping.keys():
if host in url:
print("get a ad url {}".format(url))
return channel_name_num_mapping[host_channel_mapping[host]]
if 'gfp.veta.naver.com' in url:
return 40
addons = [
MitmLocalPacketCapture()
]
if __name__ == '__main__':
raw = ScrapyRawUrlTask
res = raw(channel=40, os=2, request_url='这只是一个测试而已')
res.save()
启动中间人命令:mitmdump -s mitm_local.py
Peewee绑定数据库表结构
from peewee import TextField, IntegerField, CharField, DateTimeField
from api.models.mysql_db import SpRawBaseModel
from peewee import *
from group_control.request_urls.mitm.mysql_local_db import SpRawLocalBaseModel
class LocalScrapyRawUrlTask(SpRawLocalBaseModel):
channel = IntegerField(null=True)
os = IntegerField(null=True)
geo = CharField(null=True)
request_url = TextField()
request_url_md5 = CharField()
total_crawl_times = IntegerField(null=True)
total_crawl_ads_times = IntegerField(null=True)
total_new_ads_num = IntegerField(null=True)
total_updated_ads_num = IntegerField(null=True)
is_available = IntegerField(null=True)
source_app = CharField(null=True)
created_at = DateTimeField()
updated_at = DateTimeField()
bk_int = IntegerField(null=True)
bk_string = CharField(null=True)
class Meta:
db_table = 'scrapy_raw_url_task'
indexes = (
(('channel', 'is_avaliable'), False),
(('os', 'request_url_md5'), False),
)
连接Mysql数据库
from playhouse.pool import PooledMySQLDatabase
from peewee import Model
def get_database(database, connections=20):
return PooledMySQLDatabase(
host='127.0.0.1',
user='root',
passwd='123456',
port=3306,
database=database,
max_connections=connections,
)
sp_raw_database = get_database("sp_raw")
class SpRawLocalBaseModel(Model):
class Meta:
database = sp_raw_database