8/31/2011 10:51:45 AM
QEMU is a dynamic translator. When it first encounters a piece of code, it converts it to the host instruction set.
动态翻译
Usually dynamic translators are very complicated and highly CPU dependent.
高度依赖CPU
QEMU uses some tricks which make it relatively easily portable and simple while achieving good performances.
The basic idea is to split every x86 instruction into fewer simpler instructions.
将x86指令分割成一些简单的指令
Each simple instruction is implemented by a piece of C code (see `target-i386/op.c').
Then a compile time tool (`dyngen') takes the corresponding object file (`op.o') to generate a dynamic code generator which concatenates
the simple instructions to build a function (see `op.h:dyngen_code()').
编译成一个函数
In essence, the process is similar to [1], but more work is done at compile time.
A key idea to get optimal performances is that constant parameters can be passed to the simple operations. For that purpose,
dummy ELF relocations are generated with gcc for each constant parameter. Then, the tool (`dyngen')
can locate the relocations and generate the appriopriate C code to resolve them when building the dynamic code.
That way, QEMU is no more difficult to port than a dynamic linker.
To go even faster, GCC static register variables are used to keep the state of the virtual CPU.
--------------------------------------------------------------------------------
Since QEMU uses fixed simple instructions,
no efficient register allocation can be done.
However, because RISC CPUs have a lot of register,
most of the virtual CPU state can be put in registers without doing complicated register allocation.
寄存器优化
--------------------------------------------------------------------------------
Good CPU condition codes emulation (EFLAGS register on x86) is a critical point to get good performances.
QEMU uses lazy condition code evaluation: instead of computing the condition codes after each x86 instruction,
it just stores one operand (called CC_SRC), the result (called CC_DST) and the type of operation (called CC_OP).
CC_OP is almost never explicitely set in the generated code because it is known at translation time.
In order to increase performances, a backward pass is performed on the generated simple instructions (see target-i386/translate.c:optimize_flags()). When it can be proved that the condition codes are not needed by the next instructions, no condition codes are computed at all.
After each translated basic block is executed, QEMU uses the simulated Program Counter (PC) and other cpu state informations (such as the CS segment base value)
to find the next basic block.
// 查找下一个块
In order to accelerate the most common cases where the new simulated PC is known, QEMU can patch a basic block so that it jumps directly to the next one.
快速找到下一个块
The most portable code uses an indirect jump. An indirect jump makes it easier to make the jump target modification atomic.
On some host architectures (such as x86 or PowerPC), the JUMP opcode is directly patched so that the block chaining has no overhead.
间接跳转
--------------------------------------------------------------------------------
现在在来产生中间代码
/* generate intermediate code in gen_opc_buf and gen_opparam_buf for
basic block 'tb'. If search_pc is TRUE, also generate PC
information for each intermediate instruction. */
产生的指令放在指令buf中
参数放在参数buf中
将所有的
num_temps = 0;
memset(temps, 0, sizeof(temps));
置为0
2K的short
TCG的标签定义 最多不能超过256个
typedef struct TCGLabel {
int has_value;
union {
tcg_target_ulong value;
TCGRelocation *first_reloc;
} u;
} TCGLabel;
int first_free_temp[TCG_TYPE_COUNT * 2];
free temps index
if (temp_local)
k += TCG_TYPE_COUNT;
idx = s->first_free_temp[k];
这个index 能不能找到
在first_free_temp数组中 通过类型下标(可能修改)找index
tcg_temp_alloc
注意tcg最多只能有512个tmp
alloc 只是验证一下是否超过这个值而已
tcg_temp_new_internal
首先查找需要的类型在释放链表中是否存在
如果存在就直接就直接取第一个元素
否则就走正常流程
local和非local 之分
翻译成x86的指令执行
动态翻译出来的指令无法下断点?
bootloader的第一条指令是怎么走得?
typedef struct
{
void * next;
void * prev;
int refCnt;
int size;
char * cluster;
}MBLK;
typedef struct
{
LIST blkLst;
int num;
int allocCnt;
int isPending;
sem_t usage;
sem_t lock;
}MPOOL;
net_client_init
const char *net_clients[MAX_NET_CLIENTS];
指定了2个网络客户端
/* init network clients */
if (nb_net_clients == 0) {
/* if no clients, we use a default config */
net_clients[nb_net_clients++] = "nic";
#ifdef CONFIG_SLIRP
net_clients[nb_net_clients++] = "user";
#endif
}
如果没有就指定了2个默认的客户端,叫做nic 和user
for(i = 0;i < nb_net_clients; i++) {
if (net_client_parse(net_clients[i]) < 0)
exit(1);
}
接着对这2个网络客户端进行初始化
解析VLan的id
if (get_param_value(buf, sizeof(buf), "vlan", p)) {
vlan_id = strtol(buf, NULL, 0);
}
没有
static int init_timer_alarm(void)
{
struct qemu_alarm_timer *t = NULL;
int i, err = -1;
for (i = 0; alarm_timers[i].name; i++) {
t = &alarm_timers[i];
err = t->start(t);
if (!err)
break;
}
if (err) {
err = -ENOENT;
goto fail;
}
alarm_timer = t;
return 0;
fail:
return err;
}
现在有一系列的定时器 找一个能用的
{"dynticks", ALARM_FLAG_DYNTICKS, dynticks_start_timer,
dynticks_stop_timer, dynticks_rearm_timer, NULL},
/* HPET - if available - is preferred */
{"hpet", 0, hpet_start_timer, hpet_stop_timer, NULL, NULL},
/* ...otherwise try RTC */
{"rtc", 0, rtc_start_timer, rtc_stop_timer, NULL, NULL},
3个时钟源
名字 标志 方法
f (timer_create(CLOCK_REALTIME, &ev, &host_timer))
创建了一个实时时钟
容易进入到时钟中断里
main_loop_wait
主循环等待
struct QEMUBH {
QEMUBHFunc *cb;
void *opaque;
int scheduled;
int idle;
int deleted;
QEMUBH *next;
};
static QEMUBH *first_bh = NULL;
所有的bh以链表的形式串接起来
static void qemu_bh_update_timeout(int *timeout)
{
QEMUBH *bh;
for (bh = first_bh; bh; bh = bh->next) {
if (!bh->deleted && bh->scheduled) {
if (bh->idle) {
/* idle bottom halves will be polled at least
* every 10ms */
*timeout = MIN(10, *timeout);
} else {
/* non-idle bottom halves will be executed
* immediately */
*timeout = 0;
break;
}
}
}
}
更新bh超时时间,注意idle的至少是10ms
#define MAX_CHAR_PIPES 8
static CharPipeState _s_charpipes[ MAX_CHAR_PIPES ];
8个字符管道
typedef struct BipBuffer {
struct BipBuffer* next;
CBuffer cb[1];
char buff[ BIP_BUFFER_SIZE ];
} BipBuffer;
单链表构成buffer链表
/* this models each half of the charpipe */
typedef struct CharPipeHalf {
CharDriverState cs[1];
BipBuffer* bip_first;
BipBuffer* bip_last;
struct CharPipeHalf* peer; /* NULL if closed */
} CharPipeHalf;
里面有2个cs
void
charpipe_poll( void )
{
CharPipeState* cp = _s_charpipes;
CharPipeState* cp_end = cp + MAX_CHAR_PIPES;
CharBuffer* cb = _s_charbuffers;
CharBuffer* cb_end = cb + MAX_CHAR_BUFFERS;
/* poll the charpipes */
for ( ; cp < cp_end; cp++ ) {
CharPipeHalf* half;
half = cp->a;
if (half->peer != NULL)
charpipehalf_poll(half);
half = cp->b;
if (half->peer != NULL)
charpipehalf_poll(half);
}
/* poll the charbuffers */
for ( ; cb < cb_end; cb++ ) {
if (cb->endpoint != NULL)
charbuffer_poll(cb);
}
}
/** This models a charbuffer, an object used to buffer
** the data that is sent to a given endpoint CharDriverState
** object.
**
** On the other hand, any can_read() / read() request performed
** by the endpoint will be passed to the CharBuffer's corresponding
** handlers.
**/
typedef struct CharBuffer {
CharDriverState cs[1];
BipBuffer* bip_first;
BipBuffer* bip_last;
CharDriverState* endpoint; /* NULL if closed */
char closing;
} CharBuffer;
有自己的cs 还有节点 很强大
static void
charbuffer_poll( CharBuffer* cbuf )
{
CharDriverState* peer = cbuf->endpoint;
if (peer == NULL)
return;
while (1) {
BipBuffer* bip = cbuf->bip_first;
uint8_t* base;
int avail;
int size;
if (bip == NULL)
break;
avail = cbuffer_read_peek( bip->cb, &base );
if (avail == 0) {
cbuf->bip_first = bip->next;
if (cbuf->bip_first == NULL)
cbuf->bip_last = NULL;
bip_buffer_free(bip);
continue;
}
size = qemu_chr_write( peer, base, avail );
if (size < 0) /* just to be safe */
size = 0;
else if (size > avail)
size = avail;
cbuffer_read_step( bip->cb, size );
if (size < avail)
break;
}
}
现在知道char_buffer是如何进行poll的吗?
CBuffer 是怎么读的?
cbuffer_read_peek 返回起始有效地址
avail = cbuffer_read_peek( bip->cb, &base );
if (avail == 0) {
cbuf->bip_first = bip->next;
if (cbuf->bip_first == NULL)
cbuf->bip_last = NULL;
bip_buffer_free(bip);
continue;
}
释放
那就是说
charbuffer_poll( CharBuffer* cbuf )
就是看charbuffer是不是有剩余buffer ?
有就直接调用peer的方法
看看采集芯片是否支持qcif的采集分辨率
charpipe_poll
字符管道poll
ALARM_FLAG_DYNTICKS
是否有dynticks
如果有dynticks 需要rearm一下
/* rearm timer, if not periodic */
if (alarm_timer->flags & ALARM_FLAG_EXPIRED) {
alarm_timer->flags &= ~ALARM_FLAG_EXPIRED;
qemu_rearm_alarm_timer(alarm_timer);
}
void vm_start(void)
{
if (!vm_running) {
cpu_enable_ticks();
vm_running = 1;
vm_state_notify(1, 0);
qemu_rearm_alarm_timer(alarm_timer);
resume_all_vcpus();
}
}
void vm_start(void)
{
if (!vm_running) {
cpu_enable_ticks();
vm_running = 1;
vm_state_notify(1, 0);
qemu_rearm_alarm_timer(alarm_timer);
resume_all_vcpus();
}
}
static int cpu_can_run(CPUState *env)
{
if (env->stop)
return 0;
if (env->stopped)
return 0;
return 1;
}
判断一个CPU是否能够运行?