删除无点击数据offer数据分析使用-CSDN博客

本文链接：https://blog.csdn.net/jingzhaopan/article/details/132456580

梳理思路：

1、获取 7month 和 8month fullreport 报表中所有offer；输出结果：offerid， totalClickCount；

2、分析数据7month totalClickCount=0 and 8month totalClickCount=0 的offer去除；

result.loc[(result['totalClickCount_7'] >0)| (result['totalClickCount_8'] > 0)] 获取有效的offer数据data2；

3、获取点击系统自动跑量列表中offer数据data3，输出结果：offerid，dataSourceid;

4、分析数据根据data2有效offer，获取到有效的offer对应的每个dataSourceid；

result = pd.merge(data1, data2, on='offerId', how='left')

5、计算出删除后，dataSourceid应该保留的个数及id；

6、计算出删除offer的个数及offer_id;

操作方法：

一、获取 7month 和 8month fullreport 报表中所有offer；

class getFullreportOffer_8month

class getFullreportOffer_7month

class getFullreportOffer_8month:

    def _process(self,page):
        offer_lit = []
        totalClickCount_lit = []
        fromDate = "2023-08-01"
        toDate = "2023-08-31"
        url = "···?page="+str(page)+"&pageSize=100&fromDate="+ fromDate +"&toDate="+ toDate +"&fromHour=00&toHour=23&timezone=%2B00:00&affiliateIds=&sourceIds=&sorting=rejectionTotal&sortType=desc&ruleIds=&businessTypes=&accountTypes=&columns=offer_id"
        res = (requests.get(url=url, headers=header, verify=False).json())['result']['dataList']
        for result in res:
            offer_lit.append(result['offerId'])
            totalClickCount_lit.append(result['totalClickCount'])
        return offer_lit,totalClickCount_lit

    def _process_multithread(self,list_):
        # 多线程 下载
        task_list = [thread_pool_executor.submit(self._process,(page)) for page in list_]
        response_list = [task.result() for task in task_list]
        return response_list

    def run(self):
        offer_lit_total = []
        totalClickCount_lit = []
        lit = np.arange(1,33)
        offer_lit = self._process_multithread(lit)
        for res in offer_lit:
            offer_lit_total.extend(res[0])
            totalClickCount_lit.extend(res[1])

        lis_dic = {
            'offerId': offer_lit_total,
            'totalClickCount8': totalClickCount_lit,

        }
        result = pd.DataFrame(lis_dic)
        result.to_csv(filename8,index=False)

class getFullreportOffer_7month:

    def _process(self,page):
        offer_lit = []
        totalClickCount_lit = []
        fromDate = "2023-07-01"
        toDate = "2023-07-31"
        url = "···?page="+str(page)+"&pageSize=100&fromDate="+ fromDate +"&toDate="+ toDate +"&fromHour=00&toHour=23&timezone=%2B00:00&affiliateIds=&sourceIds=&sorting=rejectionTotal&sortType=desc&ruleIds=&businessTypes=&accountTypes=&columns=offer_id"
        res = (requests.get(url=url, headers=header, verify=False).json())['result']['dataList']
        for result in res:
            offer_lit.append(result['offerId'])
            totalClickCount_lit.append(result['totalClickCount'])
        return offer_lit,totalClickCount_lit

    def _process_multithread(self,list_):
        # 多线程 下载
        task_list = [thread_pool_executor.submit(self._process,(page)) for page in list_]
        response_list = [task.result() for task in task_list]
        return response_list

    def run(self):
        offer_lit_total = []
        totalClickCount_lit = []
        lit = np.arange(1,34)
        offer_lit = self._process_multithread(lit)
        for res in offer_lit:
            offer_lit_total.extend(res[0])
            totalClickCount_lit.extend(res[1])

        lis_dic = {
            'offerId': offer_lit_total,
            'totalClickCount7': totalClickCount_lit,

        }
        result = pd.DataFrame(lis_dic)
        result.to_csv(filename7,index=False)

二、分析数据7month totalClickCount=0 and 8month totalClickCount=0 的offer去除；

result.loc[(result['totalClickCount_7'] >0)| (result['totalClickCount_8'] > 0)] 获取有效的offer数据data2；

class dataAnalysis:

    def get_datas(self):
        data7 = pd.read_csv(filename7)
        data8 = pd.read_csv(filename8)
        result = pd.merge(data7,data8, on='offerId', how='outer')
        result1 = result.loc[(result['totalClickCount7'] >0)| (result['totalClickCount8'] > 0)]
        result1.to_csv(filename, index=False)

三、获取点击系统自动跑量列表中offer数据data3，输出结果：offerid，dataSourceid;

class get_click_offer_datasourceid:

    def process(self,page):
        offer_lit = []
        datanumber_lit = []
        url = "····?pageNum="+str(page)+"&pageSize=10"
        res = (requests.get(url=url, headers=header, verify=False).json())['result']['records']
        for result in res:
            value_lit = []
            try:
                if result['dataSourceId'] != "-1":
                    # print()     #  实时数据包
                    value1 = result['dataSourceId']
                    value_lit.append(value1)
                if result['dataSourceIds'] != "-1":
                    if "," in result['dataSourceIds']:
                        # print("存在多个数据包{}".format(result['dataSourceIds']))
                        value2 = result['dataSourceIds'].split(',')
                        value_lit.extend(value2)
                    else:
                        value3 = result['dataSourceIds']
                        value_lit.append(value3)
            except:
                value_lit.append("NaN")
            offer_lit.append(result['offerId'])
            datanumber_lit.append(value_lit)
            # print("offer: {}, 数据包id：{}".format(result['offerId'],value_lit))
        return offer_lit,datanumber_lit

    def process_multithread(self,list_):
        # 多线程 下载
        task_list = [thread_pool_executor.submit(self.process,(page)) for page in list_]
        response_list = [task.result() for task in task_list]
        return response_list

    def run(self):
        offer_lit_total = []
        datanumber_lit_total = []
        lit = np.arange(1,457)
        offer_lit = self.process_multithread(lit)
        for res in offer_lit:
            offer_lit_total.extend(res[0])
            datanumber_lit_total.extend(res[1])

        lis_dic = {
            'offerId': offer_lit_total,
            'dataSources': datanumber_lit_total,

        }
        result = pd.DataFrame(lis_dic)

        result.to_csv(filenameclick,index=False)

四、分析数据根据data2有效offer，获取到有效的offer对应的每个dataSourceid；

result = pd.merge(data1, data2, on='offerId', how='left')

class effectiveOffer:
    def get_datas(self):
        data1 = pd.read_csv(filename)
        data2 = pd.read_csv(filenameclick)
        result = pd.merge(data1, data2, on='offerId', how='left')
        result.to_csv(filenameoffer, index=False)

五、计算出删除后，dataSourceid应该保留的个数及id；

class offerdatasource:
    def get_datasource(self):
        datasource_id = []
        data = pd.read_csv(filenameoffer)
        data1 = data['dataSources']
        data2 = data1.dropna(axis=0)
        for res in data2.values:
            lst = ast.literal_eval(res)
            datasource_id.extend(lst)
        # #
        ll = list(set(datasource_id))
        print(ll)
        print(len(ll))

六、计算出删除offer的个数及offer_id;

class deleteOfferid:

    def get_delete_offerid(self):
        data1 = pd.read_csv(filename)           #  3547
        data2 = pd.read_csv(filenameclick)      #  4544
        data1_new = data1['offerId'].values
        data2_new = data2['offerId'].values
        # lis02中存在，lis01中不存在
        d = [y for y in data2_new if y not in data1_new]

        print(len(d))
        print(d)