[ltp][eas]eas_big_to_small.c

11 篇文章 2 订阅
8 篇文章 2 订阅

目录

 

 

测试用例图示

测试用例源码  

测试程序在corner case时的bug

如何解决bug


 

测试用例图示

测试用例源码  

  1 /*
  2  * Copyright (c) 2018 Google, Inc.
  3  *
  4  * SPDX-License-Identifier: GPL-2.0-or-later
  5  *
  6  * Task starts out as a CPU hog and then becomes small.
  7  * Task placement and downmigration latency are verified.
  8  */
  9 
 10 #define _GNU_SOURCE
 11 #include <errno.h>
 12 #include <pthread.h>
 13 #include <sched.h>
 14 #include <time.h>
 15 
 16 #include "tst_test.h"
 17 #include "tst_safe_file_ops.h"
 18 #include "tst_safe_pthread.h"
 19 
 20 #include "trace_parse.h"
 21 #include "util.h"
 22 
 23 #define TRACE_EVENTS "sched_switch"
 24 
 25 static int task_tid;
 26 
 27 #define MAX_DOWNMIGRATE_LATENCY_US 100000
 28 #define MAX_INCORRECT_CLUSTER_PCT 10
 29 #define BURN_SEC 3
 30 static void *task_fn(void *arg LTP_ATTRIBUTE_UNUSED)
 31 {
 32     task_tid = gettid();
 33 
 34     printf("Big task executing for %ds...\n", BURN_SEC);
 35     burn(BURN_SEC * USEC_PER_SEC, 0);
 36 
 37     printf("Changing to small task...\n");
 38     SAFE_FILE_PRINTF(TRACING_DIR "trace_marker", "SMALL TASK");
 39     burn(BURN_SEC * USEC_PER_SEC, 1);
 40 
 41     return NULL;
 42 }
 43 
 44 static int parse_results(void)
 45 {
 46     int i, pct, rv = 0;
/*task每次sched_switch in的是时间戳*/
 47     unsigned long long exec_start_us = 0;
/*小task迁移到小核后,在大核的运行时间*/
 48     unsigned long long too_big_cpu_us = 0;
/*大task在小核上运行的时间*/
 49     unsigned long long too_small_cpu_us = 0;
/*大task运行时间*/
 50     unsigned long long big_task_us = 0;
/*小task运行时间*/
 51     unsigned long long small_task_us = 0;
/*小task开始运行时间戳*/
 52     unsigned long long smalltask_ts_usec = 0;
/*小task开始运行后,第一次从大核迁移到小核,开始运行的时间戳*/
 53     unsigned long long downmigrate_ts_usec = 0;
/*小task迁移到小核前,在大核运行时间*/
 54     unsigned long long downmigrate_latency_usec = 0; 
 55     cpu_set_t cpuset;
 56 
 57     if (find_cpus_with_capacity(0, &cpuset)) {
 58         printf("Failed to find the CPUs in the little cluster.\n");
 59         return -1;
 60     }
 61 
 62     for (i = 0; i < num_trace_records; i++) {
 63         unsigned long long segment_us;
 64         struct trace_sched_switch *t = trace[i].event_data;
 65 
 66         if (trace[i].event_type == TRACE_RECORD_TRACING_MARK_WRITE &&
 67                !strcmp(trace[i].event_data, "SMALL TASK")) {
 68             smalltask_ts_usec = TS_TO_USEC(trace[i].ts);
 69             continue;
 70         }
 71 
 72         if (trace[i].event_type != TRACE_RECORD_SCHED_SWITCH)
 73             continue;
 74 
 75         if (t->next_pid == task_tid) {   /*task sched_switch in trace event*/
 76             /* Start of task execution segment. */
 77             if (exec_start_us) {
 78                 printf("Trace parse fail: double exec start\n");
 79                 return -1;
 80             }
/*记录task sched_switch in的时间戳*/
 81             exec_start_us = TS_TO_USEC(trace[i].ts);
/*
 *如果小task已经开始运行,downmigrate_ts_usec还没被赋值,且task正在小核运行,
 *表示小task才被迁移到小核运行,记录trace时间戳作为downmigrate_ts_usec
 */
 82             if (smalltask_ts_usec && !downmigrate_ts_usec &&
 83                 CPU_ISSET(trace[i].cpu, &cpuset))
 84                 downmigrate_ts_usec = exec_start_us;
 85             continue;
 86         }
 87         if (t->prev_pid != task_tid)    /*task sched_switch out trace event*/
 88             continue;
 89         /* End of task execution segment. */
/*记录task从上一个sched_switch in到此刻sched_switch out时间,即task此次运行时长*/
 90         segment_us = TS_TO_USEC(trace[i].ts);
 91         segment_us -= exec_start_us;
 92         exec_start_us = 0;
/*继续处理switch out trace event,表示此次task在小核运行*/
 93         if (CPU_ISSET(trace[i].cpu, &cpuset)) {
 94             /* Task is running on little CPUs. */
 95             if (!smalltask_ts_usec)
/*如果小task还没开始运行,则表示此次是大task在小核运行*/
 96                 too_small_cpu_us += segment_us;
 97         } else {
 98             /* Task is running on big CPUs. */
 99             if (smalltask_ts_usec) {    /*小task已经开始执行*/
100                 /*
101                  * Downmigration is accounted separately, so
102                  * only record mis-scheduled time here if it
103                  * happened after downmigration.
104                  */
/*表示小task迁移到小核后,在大核运行的时间*/
105                 if (downmigrate_ts_usec)
106                     too_big_cpu_us += segment_us;
107             }
108         }
/*根据小task开始运行的时间戳,统计大小task各自运行的时长,此处是 "SMALL TASK"trace marker
 *的时间戳,并非小task真实开始运行的时间戳,如果这个trace marker在大task sched_switch out
 *之前,则大task最后一次运行的时长统计会被遗漏
 */
109         if (smalltask_ts_usec)
110             small_task_us += segment_us;
111         else
112             big_task_us += segment_us;
113     }
114 
/*大task在小核运行时间/大task运行时间<10% ? pass : fail*/
115     pct = (too_small_cpu_us * 100) / big_task_us;
116     rv |= (pct > MAX_INCORRECT_CLUSTER_PCT);
117     printf("Time incorrectly scheduled on small when task was big: "
118            "%lld usec (%d%% of big task CPU time)\n", too_small_cpu_us,
119            pct);
/*小task在大核运行时间(downmigrate后)/小task运行时间<10% ? pass : fail*/
120     pct = (too_big_cpu_us * 100) / small_task_us;
121     rv |= (pct > MAX_INCORRECT_CLUSTER_PCT);
122     printf("Time incorrectly scheduled on big when task was small, after "
123            "downmigration: %lld usec (%d%% of small task CPU time)\n",
124            too_big_cpu_us, pct);
125 
126     if (downmigrate_ts_usec) {
/*小task从大核迁移到小核的延时时长*/
127         downmigrate_latency_usec = downmigrate_ts_usec -
128             smalltask_ts_usec;
129         printf("Downmigration latency: %lld usec\n",
130                downmigrate_latency_usec);
131     } else {
132         printf("Task never downmigrated!\n");
133         downmigrate_latency_usec = UINT_MAX;
134     }
135 
136     return (rv || downmigrate_latency_usec > MAX_DOWNMIGRATE_LATENCY_US);
137 }
138 
139 static void run(void)
140 {
141     pthread_t task_thread;
142 
143     tst_res(TINFO, "Maximum incorrect cluster time percentage: %d%%",
144         MAX_INCORRECT_CLUSTER_PCT);
145     tst_res(TINFO, "Maximum downmigration latency: %d usec",
146         MAX_DOWNMIGRATE_LATENCY_US);
147 
148     /* configure and enable tracing */
149     SAFE_FILE_PRINTF(TRACING_DIR "tracing_on", "0");
150     SAFE_FILE_PRINTF(TRACING_DIR "buffer_size_kb", "16384");
151     SAFE_FILE_PRINTF(TRACING_DIR "set_event", TRACE_EVENTS);
152     SAFE_FILE_PRINTF(TRACING_DIR "trace", "\n");
153     SAFE_FILE_PRINTF(TRACING_DIR "tracing_on", "1");
154 
155     SAFE_PTHREAD_CREATE(&task_thread, NULL, task_fn, NULL);
156     SAFE_PTHREAD_JOIN(task_thread, NULL);
157 
158     /* disable tracing */
159     SAFE_FILE_PRINTF(TRACING_DIR "tracing_on", "0");
160     LOAD_TRACE();
161 
162     if (parse_results())
163         tst_res(TFAIL, "Task placement/migration latency goals "
164             "not met.\n");
165     else
166         tst_res(TPASS, "Task placement/migration latency goals "
167             "met.\n");
168 }
169 
170 static struct tst_test test = {
171     .test_all = run,
172     .cleanup = trace_cleanup,
173 };
                

测试程序在corner case时的bug

当35行小task运行完,切换到小task前,会在trace中做一个mark:SMALL TASK,标示小task开始运行。
但偶尔会出现如下情况,SMALL_TASK marker在大task sched_switch out之前,对应109~112行代码,会遗漏大task的统计时间:

     kworker/1:1-9884  [001] d..2 27284.732544: sched_switch: prev_comm=kworker/1:1 prev_pid=9884 prev_prio=120 prev_state=I ==> next_comm=swapper/1 next_pid=0 next_prio=120
 eas_big_to_smal-10059 [006] .... 27284.761959: tracing_mark_write: SMALL TASK
 eas_big_to_smal-10059 [006] d..2 27284.761978: sched_switch: prev_comm=eas_big_to_smal prev_pid=10059 prev_prio=120 prev_state=S ==> next_comm=swapper/6 next_pid=0 next_prio=120
          <idle>-0     [000] d..2 27284.762113: sched_switch: prev_comm=swapper/0 prev_pid=0 prev_prio=120 prev_state=R ==> next_comm=kworker/u16:0 next_pid=10044 next_prio=120

此时case测试失败,因为大task统计时间变短,(大task在小核运行时间/大task运行时间) = 88%,log如下:

$:./eas_big_to_small                                                                                                                                                                                          
external/ltp/lib/tst_test.c:1217: INFO: Timeout per run is 0h 05m 00s
external/ltp/testcases/kernel/sched/eas/eas_big_to_small.c:144: INFO: Maximum incorrect cluster time percentage: 10%
external/ltp/testcases/kernel/sched/eas/eas_big_to_small.c:146: INFO: Maximum downmigration latency: 100000 usec
Big task executing for 3s...
Changing to small task...
Time incorrectly scheduled on small when task was big: 30633 usec (88% of big task CPU time)
Time incorrectly scheduled on big when task was small, after downmigration: 0 usec (0% of small task CPU time)
Downmigration latency: 111429 usec
external/ltp/testcases/kernel/sched/eas/eas_big_to_small.c:164: FAIL: Task placement/migration latency goals not met.

通过添加debug log,发现big_task_us统计错误,印证了代码逻辑:

  1 diff --git a/testcases/kernel/sched/eas/eas_big_to_small.c b/testcases/kernel/sched/eas/eas_big_to_small.c
  2 index 54f02993a..3e346ccbb 100644
  3 --- a/testcases/kernel/sched/eas/eas_big_to_small.c
  4 +++ b/testcases/kernel/sched/eas/eas_big_to_small.c
  5 @@ -50,6 +50,7 @@ static int parse_results(void)
  6     unsigned long long big_task_us = 0;
  7     unsigned long long small_task_us = 0;
  8     unsigned long long smalltask_ts_usec = 0;
  9 +   unsigned long long smalltask_tm_usec = 0;
 10     unsigned long long downmigrate_ts_usec = 0;
 11     unsigned long long downmigrate_latency_usec = 0;
 12     cpu_set_t cpuset;
 13 @@ -62,37 +63,49 @@ static int parse_results(void)
 14     for (i = 0; i < num_trace_records; i++) {
 15         unsigned long long segment_us;
 16         struct trace_sched_switch *t = trace[i].event_data;
 17 +       unsigned long long trace_ts_usec = TS_TO_USEC(trace[i].ts);
 18 
 19         if (trace[i].event_type == TRACE_RECORD_TRACING_MARK_WRITE &&
 20                !strcmp(trace[i].event_data, "SMALL TASK")) {
 21 -           smalltask_ts_usec = TS_TO_USEC(trace[i].ts);
 22 +           smalltask_tm_usec = trace_ts_usec;
 23 +           printf("smalltask_tm_usec: %lld\n", smalltask_tm_usec);
 24             continue;
 25         }
 26 
 27         if (trace[i].event_type != TRACE_RECORD_SCHED_SWITCH)
 28             continue;
 29 
 30 +       printf("ts=%lld tss=%d.%d cpu=%d task=%s prev_pid=%d next_pid=%d\n", TS_TO_USEC(trace[i].ts), trace[i].ts.sec, trace[i].ts.usec, trace[i].cpu, trace[i].task, t->prev_pid, t->next_pid);
 31 +
 32         if (t->next_pid == task_tid) {
 33 +           if (!smalltask_ts_usec && smalltask_tm_usec && trace_ts_usec > smalltask_tm_usec) {
 34 +               smalltask_ts_usec = trace_ts_usec;
 35 +               printf("smalltask_ts_usec: %lld\n", smalltask_ts_usec);
 36 +           }
 37             /* Start of task execution segment. */
 38             if (exec_start_us) {
 39                 printf("Trace parse fail: double exec start\n");
 40                 return -1;
 41             }
 42 -           exec_start_us = TS_TO_USEC(trace[i].ts);
 43 +           exec_start_us = trace_ts_usec;
 44 +           printf("exec_start_us=%lld\n", exec_start_us);
 45             if (smalltask_ts_usec && !downmigrate_ts_usec &&
 46 -               CPU_ISSET(trace[i].cpu, &cpuset))
 47 +               CPU_ISSET(trace[i].cpu, &cpuset)) {
 48                 downmigrate_ts_usec = exec_start_us;
 49 +               printf("downmigrate_ts_usec: %lld\n", downmigrate_ts_usec);
 50 +           }
 51             continue;
 52         }
 53         if (t->prev_pid != task_tid)
 54             continue;
 55         /* End of task execution segment. */
 56 -       segment_us = TS_TO_USEC(trace[i].ts);
 57 +       segment_us = trace_ts_usec;
 58 +       printf("exec_start_us=%lld end_us=%lld\n", exec_start_us, segment_us);
 59         segment_us -= exec_start_us;
 60         exec_start_us = 0;
 61         if (CPU_ISSET(trace[i].cpu, &cpuset)) {
 62             /* Task is running on little CPUs. */
 63 -           if (!smalltask_ts_usec)
 64 +           if (!smalltask_tm_usec)
 65                 too_small_cpu_us += segment_us;
 66         } else {
 67             /* Task is running on big CPUs. */
 68 @@ -112,6 +125,7 @@ static int parse_results(void)
 69             big_task_us += segment_us;
 70     }
 71 
 72 +   printf("too_small_cpu_us=%lld big_task_us=%lld too_big_cpu_us=%lld small_task_us=%lld\n", too_small_cpu_us, big_task_us, too_big_cpu_us, small_task_us);
 73     pct = (too_small_cpu_us * 100) / big_task_us;
 74     rv |= (pct > MAX_INCORRECT_CLUSTER_PCT);
 75     printf("Time incorrectly scheduled on small when task was big: "

如何解决bug

在trace中我们发现,SMALL_TASK marker并非小task开始执行的真实时间戳,(sched_switch in)才是真实的时间戳,修正smalltask_ts_usec为sched_switch对应的时间戳,问题不在复现。

From c2fb6c9f2ede80e2026c323e6c50060ca7fdaa7d Mon Sep 17 00:00:00 2001
From: ***
Date: Tue, 27 Oct 2020 10:55:58 +0800
Subject: [PATCH] eas: eas_big_to_small: fix big task runing time in corner
 case

Bug: 171682472

when big task run out of time 3s, sometimes the "SMALL TASK" trace
marker sign before trace event of big task schedule out in this way:

eas_big_to_smal [big core] .... 27284.761959: SMALL TASK
eas_big_to_smal [big core] d..2 27284.761978: sched_switch:
  prev_comm=eas_big_to_small ==> next_comm=swapper/6

this case will lose big task segment and caused wrong pct of time:
"Time incorrectly scheduled on small when task was big: 30633 usec
  (88% of big task CPU time)".

find the first sched_switch of task switch in after "SMALL TASK",
and mark this time stamp as samll task start is accurate.

Signed-off-by: ***
Change-Id: I073b7a58a5e1188fe60f3c623e74cc7b1b4ca73b
---
 testcases/kernel/sched/eas/eas_big_to_small.c | 12 ++++++++----
 1 file changed, 8 insertions(+), 4 deletions(-)

diff --git a/testcases/kernel/sched/eas/eas_big_to_small.c b/testcases/kernel/sched/eas/eas_big_to_small.c
index 36f9598..1dd4270 100644
--- a/testcases/kernel/sched/eas/eas_big_to_small.c
+++ b/testcases/kernel/sched/eas/eas_big_to_small.c
@@ -50,6 +50,7 @@ static int parse_results(void)
    unsigned long long big_task_us = 0;
    unsigned long long small_task_us = 0;
    unsigned long long smalltask_ts_usec = 0;
+   unsigned long long smalltask_tm_usec = 0;
    unsigned long long downmigrate_ts_usec = 0;
    unsigned long long downmigrate_latency_usec = 0;
    cpu_set_t cpuset;
@@ -62,10 +63,11 @@ static int parse_results(void)
    for (i = 0; i < num_trace_records; i++) {
        unsigned long long segment_us;
        struct trace_sched_switch *t = trace[i].event_data;
+       unsigned long long trace_ts_usec = TS_TO_USEC(trace[i].ts);
 
        if (trace[i].event_type == TRACE_RECORD_TRACING_MARK_WRITE &&
               !strcmp(trace[i].event_data, "SMALL TASK")) {
-           smalltask_ts_usec = TS_TO_USEC(trace[i].ts);
+           smalltask_tm_usec = trace_ts_usec;
            continue;
        }   
 
@@ -73,12 +75,14 @@ static int parse_results(void)
            continue;
 
        if (t->next_pid == task_tid) {
+           if (!smalltask_ts_usec && smalltask_tm_usec && trace_ts_usec > smalltask_tm_usec)
+               smalltask_ts_usec = trace_ts_usec;
            /* Start of task execution segment. */
            if (exec_start_us) {
                printf("Trace parse fail: double exec start\n");
                return -1; 
            }   
-           exec_start_us = TS_TO_USEC(trace[i].ts);
+           exec_start_us = trace_ts_usec;
            if (smalltask_ts_usec && !downmigrate_ts_usec &&
                CPU_ISSET(trace[i].cpu, &cpuset))
                downmigrate_ts_usec = exec_start_us;
@@ -87,12 +91,12 @@ static int parse_results(void)
        if (t->prev_pid != task_tid)
            continue;
        /* End of task execution segment. */
-       segment_us = TS_TO_USEC(trace[i].ts);
+       segment_us = trace_ts_usec;
        segment_us -= exec_start_us;
        exec_start_us = 0;
        if (CPU_ISSET(trace[i].cpu, &cpuset)) {
            /* Task is running on little CPUs. */
-           if (!smalltask_ts_usec)
+           if (!smalltask_tm_usec)
                too_small_cpu_us += segment_us;
        } else {
            /* Task is running on big CPUs. */
-- 
2.7.4

 

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值