梳理思路:
1、 获取 7month 和 8month fullreport 报表中 所有offer;输出结果:offerid, totalClickCount;
2、 分析数据7month totalClickCount=0 and 8month totalClickCount=0 的offer去除;
result.loc[(result['totalClickCount_7'] >0)| (result['totalClickCount_8'] > 0)] 获取有效的offer数据data2;
3、 获取点击系统自动跑量列表中offer数据data3,输出结果:offerid,dataSourceid;
4、 分析数据 根据data2有效offer,获取到有效的offer对应的每个dataSourceid;
result = pd.merge(data1, data2, on='offerId', how='left')
5、 计算出删除后,dataSourceid应该保留的个数及id;
6、 计算出删除offer的个数及offer_id;
操作方法:
一、 获取 7month 和 8month fullreport 报表中 所有offer;
class getFullreportOffer_8month
class getFullreportOffer_7month
class getFullreportOffer_8month:
def _process(self,page):
offer_lit = []
totalClickCount_lit = []
fromDate = "2023-08-01"
toDate = "2023-08-31"
url = "···?page="+str(page)+"&pageSize=100&fromDate="+ fromDate +"&toDate="+ toDate +"&fromHour=00&toHour=23&timezone=%2B00:00&affiliateIds=&sourceIds=&sorting=rejectionTotal&sortType=desc&ruleIds=&businessTypes=&accountTypes=&columns=offer_id"
res = (requests.get(url=url, headers=header, verify=False).json())['result']['dataList']
for result in res:
offer_lit.append(result['offerId'])
totalClickCount_lit.append(result['totalClickCount'])
return offer_lit,totalClickCount_lit
def _process_multithread(self,list_):
# 多线程 下载
task_list = [thread_pool_executor.submit(self._process,(page)) for page in list_]
response_list = [task.result() for task in task_list]
return response_list
def run(self):
offer_lit_total = []
totalClickCount_lit = []
lit = np.arange(1,33)
offer_lit = self._process_multithread(lit)
for res in offer_lit:
offer_lit_total.extend(res[0])
totalClickCount_lit.extend(res[1])
lis_dic = {
'offerId': offer_lit_total,
'totalClickCount8': totalClickCount_lit,
}
result = pd.DataFrame(lis_dic)
result.to_csv(filename8,index=False)
class getFullreportOffer_7month:
def _process(self,page):
offer_lit = []
totalClickCount_lit = []
fromDate = "2023-07-01"
toDate = "2023-07-31"
url = "···?page="+str(page)+"&pageSize=100&fromDate="+ fromDate +"&toDate="+ toDate +"&fromHour=00&toHour=23&timezone=%2B00:00&affiliateIds=&sourceIds=&sorting=rejectionTotal&sortType=desc&ruleIds=&businessTypes=&accountTypes=&columns=offer_id"
res = (requests.get(url=url, headers=header, verify=False).json())['result']['dataList']
for result in res:
offer_lit.append(result['offerId'])
totalClickCount_lit.append(result['totalClickCount'])
return offer_lit,totalClickCount_lit
def _process_multithread(self,list_):
# 多线程 下载
task_list = [thread_pool_executor.submit(self._process,(page)) for page in list_]
response_list = [task.result() for task in task_list]
return response_list
def run(self):
offer_lit_total = []
totalClickCount_lit = []
lit = np.arange(1,34)
offer_lit = self._process_multithread(lit)
for res in offer_lit:
offer_lit_total.extend(res[0])
totalClickCount_lit.extend(res[1])
lis_dic = {
'offerId': offer_lit_total,
'totalClickCount7': totalClickCount_lit,
}
result = pd.DataFrame(lis_dic)
result.to_csv(filename7,index=False)
二、分析数据7month totalClickCount=0 and 8month totalClickCount=0 的offer去除;
result.loc[(result['totalClickCount_7'] >0)| (result['totalClickCount_8'] > 0)] 获取有效的offer数据data2;
class dataAnalysis:
def get_datas(self):
data7 = pd.read_csv(filename7)
data8 = pd.read_csv(filename8)
result = pd.merge(data7,data8, on='offerId', how='outer')
result1 = result.loc[(result['totalClickCount7'] >0)| (result['totalClickCount8'] > 0)]
result1.to_csv(filename, index=False)
三、获取点击系统自动跑量列表中offer数据data3,输出结果:offerid,dataSourceid;
class get_click_offer_datasourceid:
def process(self,page):
offer_lit = []
datanumber_lit = []
url = "····?pageNum="+str(page)+"&pageSize=10"
res = (requests.get(url=url, headers=header, verify=False).json())['result']['records']
for result in res:
value_lit = []
try:
if result['dataSourceId'] != "-1":
# print() # 实时数据包
value1 = result['dataSourceId']
value_lit.append(value1)
if result['dataSourceIds'] != "-1":
if "," in result['dataSourceIds']:
# print("存在多个数据包{}".format(result['dataSourceIds']))
value2 = result['dataSourceIds'].split(',')
value_lit.extend(value2)
else:
value3 = result['dataSourceIds']
value_lit.append(value3)
except:
value_lit.append("NaN")
offer_lit.append(result['offerId'])
datanumber_lit.append(value_lit)
# print("offer: {}, 数据包id:{}".format(result['offerId'],value_lit))
return offer_lit,datanumber_lit
def process_multithread(self,list_):
# 多线程 下载
task_list = [thread_pool_executor.submit(self.process,(page)) for page in list_]
response_list = [task.result() for task in task_list]
return response_list
def run(self):
offer_lit_total = []
datanumber_lit_total = []
lit = np.arange(1,457)
offer_lit = self.process_multithread(lit)
for res in offer_lit:
offer_lit_total.extend(res[0])
datanumber_lit_total.extend(res[1])
lis_dic = {
'offerId': offer_lit_total,
'dataSources': datanumber_lit_total,
}
result = pd.DataFrame(lis_dic)
result.to_csv(filenameclick,index=False)
四、分析数据 根据data2有效offer,获取到有效的offer对应的每个dataSourceid;
result = pd.merge(data1, data2, on='offerId', how='left')
class effectiveOffer:
def get_datas(self):
data1 = pd.read_csv(filename)
data2 = pd.read_csv(filenameclick)
result = pd.merge(data1, data2, on='offerId', how='left')
result.to_csv(filenameoffer, index=False)
五、计算出删除后,dataSourceid应该保留的个数及id;
class offerdatasource:
def get_datasource(self):
datasource_id = []
data = pd.read_csv(filenameoffer)
data1 = data['dataSources']
data2 = data1.dropna(axis=0)
for res in data2.values:
lst = ast.literal_eval(res)
datasource_id.extend(lst)
# #
ll = list(set(datasource_id))
print(ll)
print(len(ll))
六、计算出删除offer的个数及offer_id;
class deleteOfferid:
def get_delete_offerid(self):
data1 = pd.read_csv(filename) # 3547
data2 = pd.read_csv(filenameclick) # 4544
data1_new = data1['offerId'].values
data2_new = data2['offerId'].values
# lis02中存在,lis01中不存在
d = [y for y in data2_new if y not in data1_new]
print(len(d))
print(d)