不同数据类型的内部秘密----编程内幕(2)

程序员小迷

于 2024-05-12 12:06:55 发布

阅读量461

点赞数 8

分类专栏：编程语言你想知道的C语言小话c语言文章标签：编程内幕数据类型面试 c++ 汇编

本文链接：https://blog.csdn.net/cxsjabcabc/article/details/77870427

版权

编程语言同时被 3 个专栏收录

29 篇文章 0 订阅

订阅专栏

你想知道的C语言

29 篇文章 12 订阅

订阅专栏

小话c语言

29 篇文章 11 订阅

订阅专栏

Q： char类型是如何被当成int处理的？

A: 我们可以看看char类型变量在何时才会被当做int处理.

#include <stdio.h>

int main()
{
    char ch;
    ch = 'a';
    
    printf("%c\n", ch);
    return 0;
}

汇编代码如下：

hello`main:
    0x100000f60 <+0>:  pushq  %rbp
    0x100000f61 <+1>:  movq   %rsp, %rbp
    0x100000f64 <+4>:  subq   $0x10, %rsp
    0x100000f68 <+8>:  leaq   0x43(%rip), %rdi          ; "%c\n"
    0x100000f6f <+15>: movl   $0x0, -0x4(%rbp)
->  0x100000f76 <+22>: movb   $0x61, -0x5(%rbp)
    0x100000f7a <+26>: movsbl -0x5(%rbp), %esi
    0x100000f7e <+30>: movb   $0x0, %al
    0x100000f80 <+32>: callq  0x100000f92               ; symbol stub for: printf

movb $0x61, -0x5(%rbp)是把字符'a'保存到ch变量中. movb是单个字节的拷贝，显然此时ch并没有当做int来处理.

而在后面： movsbl -0x5(%rbp), %esi表明ch被放到了int大小的register中，说明ch被提升为int.

再看一个例子：

#include <stdio.h>

int main()
{
    char ch;
    int i = 0xF;
    
    ch = 'a';
    i = i + ch;
    printf("%d\n", i);

    return 0;
}

hello`main:
    0x100000f40 <+0>:  pushq  %rbp
    0x100000f41 <+1>:  movq   %rsp, %rbp
    0x100000f44 <+4>:  subq   $0x10, %rsp
    0x100000f48 <+8>:  leaq   0x57(%rip), %rdi          ; "%d\n"
    0x100000f4f <+15>: movl   $0x0, -0x4(%rbp)
    0x100000f56 <+22>: movl   $0xf, -0xc(%rbp)
    0x100000f5d <+29>: movb   $0x61, -0x5(%rbp)
->  0x100000f61 <+33>: movl   -0xc(%rbp), %eax
    0x100000f64 <+36>: movsbl -0x5(%rbp), %ecx
    0x100000f68 <+40>: addl   %ecx, %eax
    0x100000f6a <+42>: movl   %eax, -0xc(%rbp)
    0x100000f6d <+45>: movl   -0xc(%rbp), %esi
    0x100000f70 <+48>: movb   $0x0, %al
    0x100000f72 <+50>: callq  0x100000f84               ; symbol stub for: printf

在执行i = i + ch：

movsbl -0x5(%rbp), %ecx

addl   %ecx, %eax

此时，证明了char被提升为int.

由上面可见，char被提升为int是在char和int一起处理或者当参数传递时才会产生，如果char变量单独使用，又有什么必要提升为int呢？

Q: unsigned int和int究竟有何区别？

A：

#include <stdio.h>

int main()
{
    int i;
    unsigned int j;
    
    i = 1;
    j = 2;

    return 0;
}

hello`main:
    0x100000f90 <+0>:  pushq  %rbp
    0x100000f91 <+1>:  movq   %rsp, %rbp
    0x100000f94 <+4>:  xorl   %eax, %eax
    0x100000f96 <+6>:  movl   $0x0, -0x4(%rbp)
->  0x100000f9d <+13>: movl   $0x1, -0x8(%rbp)
    0x100000fa4 <+20>: movl   $0x2, -0xc(%rbp)
    0x100000fab <+27>: popq   %rbp
    0x100000fac <+28>: retq

两条movl语句并没有太大区别，看起来int i和unsigned int j在cpu看来并没有什么区别，都是4字节，对应不同地址而已.

下面我们用int和unsigned int比较大小：

#include <stdio.h>

int main()
{
    int i;
    unsigned int j;
    
    i = 1;
    j = -1;
    
    printf("%d\n", i > j);
    
    return 0;
}

    0x100000f56 <+22>: movl   $0x1, -0x8(%rbp)
    0x100000f5d <+29>: movl   $0xffffffff, -0xc(%rbp)   ; imm = 0xFFFFFFFF 
    0x100000f64 <+36>: movl   -0x8(%rbp), %eax
    0x100000f67 <+39>: cmpl   -0xc(%rbp), %eax
->  0x100000f6a <+42>: seta   %cl
    0x100000f6d <+45>: andb   $0x1, %cl
    0x100000f70 <+48>: movzbl %cl, %esi
    0x100000f73 <+51>: movb   $0x0, %al
    0x100000f75 <+53>: callq  0x100000f88               ; symbol stub for: printf

在断点在如上位置时，

(lldb) register read rflags
  rflags = 0x0000000000000213

RFLAGS寄存器 b0: CF = 1, b6: ZF = 0.

所以seta %cl保存到cl寄存器的数值为0: 只有CF = 0, ZF = 0的时候cl才会是1.

注意： seta指令是对无符号数比较的结果. 这里是印证了int和unsigned int在一起操作会被提升成unsigned int.

所以最终printf输出的结果为0.

对此规则，可能有人会提出异议，但基于一个基本的准则：两个数据操作，向数据更长不会丢失数值的方向去转换.

Q： 1左移32位是多少？

A：

#include <stdio.h>

int main()
{
    int i = 1;
    int j;
    
    j = i << 32;
    printf("%d\n", j);
    
    return 0;
}

    0x100000f5f <+15>: movl   $0x20, %ecx
    0x100000f64 <+20>: movl   $0x0, -0x4(%rbp)
    0x100000f6b <+27>: movl   $0x1, -0x8(%rbp)
    0x100000f72 <+34>: movl   -0x8(%rbp), %eax
    0x100000f75 <+37>: shll   %cl, %eax
    0x100000f77 <+39>: movl   %eax, -0xc(%rbp)
->  0x100000f7a <+42>: movl   -0xc(%rbp), %esi
    0x100000f7d <+45>: movb   $0x0, %al
    0x100000f7f <+47>: callq  0x100000f92               ; symbol stub for: printf

可以看到shll左移%cl: 0x20即32位. 有一部分书籍说，左移语句对于超过数据大小比特长度会采用模比特长度的方式得到最终左移的位数，并认为这是编译器的行为. 其实不然，这是指令集的行为.

如下为Intel指令集手册的原文：

Shifts the bits in the first operand (destination operand) to the left or right 
by the number of bits specified in the second operand (count operand). Bits shifted 
beyond the destination operand boundary are first shifted into the CF flag, then 
discarded. At the end of the shift operation, the CF flag contains the last bit
shifted out of the destination operand.
The destination operand can be a register or a memory location. The count operand 
can be an immediate value or the CL register. The count is masked to 5 bits 
(or 6 bits if in 64-bit mode and REX.W is used). The count range is limited to 0 to 31
(or 63 if 64-bit mode and REX.W is used). A special opcode encoding is provided for a count of 1.

微风不燥，阳光正好，你就像风一样经过这里，愿你停留的片刻温暖舒心。

我是程序员小迷（致力于C、C++、Java、Kotlin、Android、Shell、JavaScript、TypeScript、Python等编程技术的技巧经验分享），若作品对您有帮助，请关注、分享、点赞、收藏、在看、喜欢，您的支持是我们为您提供帮助的最大动力。

欢迎关注。助您在编程路上越走越好！

程序员小迷

关注

8
点赞
踩
10

收藏

觉得还不错? 一键收藏
1
评论
不同数据类型的内部秘密----编程内幕(2)

可以看到shll左移%cl: 0x20即32位. 有一部分书籍说，左移语句对于超过数据大小比特长度会采用模比特长度的方式得到最终左移的位数，并认为这是编译器的行为. 其实不然，这是指令集的行为.此时，证明了char被提升为int.
复制链接

扫一扫