Busybox支持中文的解决办法

最新推荐文章于 2023-03-13 18:39:09 发布

wuruixn

最新推荐文章于 2023-03-13 18:39:09 发布

阅读量1.1k

点赞数

分类专栏： Linux-app

Linux-app 专栏收录该内容

2 篇文章 0 订阅

订阅专栏

在嵌入式linux系统中，busybox是最常见的用来构建文件系统的。可是从busybox1.17.0以上之后，对ls命令不做修改是无法显示中文的。就算是内核设置了支持中文的话，在shell下用ls命令也是无法显示中文的，这是因为busybox1.17.0以后版本对中文的支持进行了限制。现在就来讲讲如何修改让busybox1.17.0以上版本支持中文，要想让busybox1.17.0以上支持中文，需要修改两个文件：printable_string.c以及unicode.c 。下面来分析，为什么ls命令无法显示中文。请看printable_string.c未修改过的代码：

[cpp]view plaincopy 
   
 const char* FAST_FUNC printable_string(uni_stat_t *stats, const char *str)  
 {  
     static char *saved[4];  
     static unsigned cur_saved; /* = 0 */  
   
     char *dst;  
     const char *s;  
   
     s = str;  
     while (1) {  
         unsigned char c = *s;  
         if (c == '\0') {  
             /* 99+% of inputs do not need conversion */  
             if (stats) {  
                 stats->byte_count = (s - str);  
                 stats->unicode_count = (s - str);  
                 stats->unicode_width = (s - str);  
             }  
             return str;  
         }  
         if (c < ' ')  
             break;  
         if (c >= 0x7f)  
             break;  
         s++;  
     }  
   
 #if ENABLE_UNICODE_SUPPORT  
     dst = unicode_conv_to_printable(stats, str);  
 #else  
     {  
         char *d = dst = xstrdup(str);  
         while (1) {  
             unsigned char c = *d;  
             if (c == '\0')  
                 break;  
                 if (c < ' ' || c >= 0x7f)  
                    *d = '?';  
             d++;  
         }  
         if (stats) {  
             stats->byte_count = (d - dst);  
             stats->unicode_count = (d - dst);  
             stats->unicode_width = (d - dst);  
         }  
     }  
 #endif  
   
     free(saved[cur_saved]);  
     saved[cur_saved] = dst;  
     cur_saved = (cur_saved + 1) & (ARRAY_SIZE(saved)-1);  
   
     return dst;  
 }  

从上面代码23和24行以及37和38行可以看出：大于0x7F的字符直接被break掉，或者直接被“？”代替了。所以就算是linux内核设置了支持中文，也是无法显示出来的，被“？”代替了。修改红色加粗的代码如下：

[cpp]view plaincopy 
   
 const char* FAST_FUNC printable_string(uni_stat_t *stats, const char *str)  
 {  
     static char *saved[4];  
     static unsigned cur_saved; /* = 0 */  
   
     char *dst;  
     const char *s;  
   
     s = str;  
     while (1) {  
         unsigned char c = *s;  
         if (c == '\0') {  
             /* 99+% of inputs do not need conversion */  
             if (stats) {  
                 stats->byte_count = (s - str);  
                 stats->unicode_count = (s - str);  
                 stats->unicode_width = (s - str);  
             }  
             return str;  
         }  
         if (c < ' ')  
             break;  
     /* 
         if (c >= 0x7f) 
             break; 
     */  
         s++;  
     }  
   
 #if ENABLE_UNICODE_SUPPORT  
     dst = unicode_conv_to_printable(stats, str);  
 #else  
     {  
         char *d = dst = xstrdup(str);  
         while (1) {  
             unsigned char c = *d;  
             if (c == '\0')  
                 break;  
             if (c < ' ' /*|| c >= 0x7f */)  
                 *d = '?';  
             d++;  
         }  
         if (stats) {  
             stats->byte_count = (d - dst);  
             stats->unicode_count = (d - dst);  
             stats->unicode_width = (d - dst);  
         }  
     }  
 #endif  
   
     free(saved[cur_saved]);  
     saved[cur_saved] = dst;  
     cur_saved = (cur_saved + 1) & (ARRAY_SIZE(saved)-1);  
   
     return dst;  
 }  

经过以上的修改之后，同时busybox1.17.0配置的时候没有选中[] Support Unicode 的话，那么采用ls命令是可以看到中文的，这个我自己已经亲自测试过的。可是还有一种情况：busybox1.17.0在配置的时候选中了：[*] Support Unicode，见下：

[html]view plaincopy 
   
 在配置里，有Support Unicode选上的：   
 Busybox Settings->General Configuration->  
    │ │[ ] Enable locale support (system needs locale for this to work)     │ │    
    │ │[*] Support Unicode                                                  │ │    
    │ │[*] Support for --long-options                                       │ │    
     

那么这样还需要修改一个文件，这个文件就是：unicode.c。如果不修改这个文件，ls命令也是无法显示出中文的。见下未修改的代码：

[cpp]view plaincopy 
   
 static char* FAST_FUNC unicode_conv_to_printable2(uni_stat_t *stats, const char *src, unsigned width, int flags)  
 {  
     char *dst;  
     unsigned dst_len;  
     unsigned uni_count;  
     unsigned uni_width;  
   
     if (unicode_status != UNICODE_ON) {  
         char *d;  
         if (flags & UNI_FLAG_PAD) {  
             d = dst = xmalloc(width + 1);  
             while ((int)--width >= 0) {  
                 unsigned char c = *src;  
                 if (c == '\0') {  
                     do  
                         *d++ = ' ';  
                     while ((int)--width >= 0);  
                     break;  
                 }  
                 *d++ = (c >= ' ' && c < 0x7f) ? c : '?';  
                 src++;  
             }  
             *d = '\0';  
         } else {  
             d = dst = xstrndup(src, width);  
             while (*d) {  
                 unsigned char c = *d;  
                 if (c < ' ' || c >= 0x7f)  
                     *d = '?';  
                 d++;  
             }  
         }  
         if (stats) {  
             stats->byte_count = (d - dst);  
             stats->unicode_count = (d - dst);  
             stats->unicode_width = (d - dst);  
         }  
         return dst;  
     }  
   
     dst = NULL;  
     uni_count = uni_width = 0;  
     dst_len = 0;  
     while (1) {  
         int w;  
         wchar_t wc;  
   
 #if ENABLE_UNICODE_USING_LOCALE  
         {  
             mbstate_t mbst = { 0 };  
             ssize_t rc = mbsrtowcs(&wc, &src, 1, &mbst);  
             /* If invalid sequence is seen: -1 is returned, 
              * src points to the invalid sequence, errno = EILSEQ. 
              * Else number of wchars (excluding terminating L'\0') 
              * written to dest is returned. 
              * If len (here: 1) non-L'\0' wchars stored at dest, 
              * src points to the next char to be converted. 
              * If string is completely converted: src = NULL. 
              */  
             if (rc == 0) /* end-of-string */  
                 break;  
             if (rc < 0) { /* error */  
                 src++;  
                 goto subst;  
             }  
             if (!iswprint(wc))  
                 goto subst;  
         }  
 #else  
         src = mbstowc_internal(&wc, src);  
         /* src is advanced to next mb char 
          * wc == ERROR_WCHAR: invalid sequence is seen 
          * else: wc is set 
          */  
         if (wc == ERROR_WCHAR) /* error */  
             goto subst;  
         if (wc == 0) /* end-of-string */  
             break;  
 #endif  
         if (CONFIG_LAST_SUPPORTED_WCHAR && wc > CONFIG_LAST_SUPPORTED_WCHAR)  
             goto subst;  
         w = wcwidth(wc);  
         if ((ENABLE_UNICODE_COMBINING_WCHARS && w < 0) /* non-printable wchar */  
          || (!ENABLE_UNICODE_COMBINING_WCHARS && w <= 0)  
          || (!ENABLE_UNICODE_WIDE_WCHARS && w > 1)  
         ) {  
  subst:  
             wc = CONFIG_SUBST_WCHAR;  
             w = 1;  
         }  
         width -= w;  
         /* Note: if width == 0, we still may add more chars, 
          * they may be zero-width or combining ones */  
         if ((int)width < 0) {  
             /* can't add this wc, string would become longer than width */  
             width += w;  
             break;  
         }  
   
         uni_count++;  
         uni_width += w;  
         dst = xrealloc(dst, dst_len + MB_CUR_MAX);  
 #if ENABLE_UNICODE_USING_LOCALE  
         {  
             mbstate_t mbst = { 0 };  
             dst_len += wcrtomb(&dst[dst_len], wc, &mbst);  
         }  
 #else  
         dst_len += wcrtomb_internal(&dst[dst_len], wc);  
 #endif  
     }  
   
     /* Pad to remaining width */  
     if (flags & UNI_FLAG_PAD) {  
         dst = xrealloc(dst, dst_len + width + 1);  
         uni_count += width;  
         uni_width += width;  
         while ((int)--width >= 0) {  
             dst[dst_len++] = ' ';  
         }  
     }  
     dst[dst_len] = '\0';  
     if (stats) {  
         stats->byte_count = dst_len;  
         stats->unicode_count = uni_count;  
         stats->unicode_width = uni_width;  
     }  
   
     return dst;  
 }  

见上面20行和28行，需要修改一下，修改后的代码见下：

[cpp]view plaincopy 
   
 static char* FAST_FUNC unicode_conv_to_printable2(uni_stat_t *stats, const char *src, unsigned width, int flags)  
 {  
     char *dst;  
     unsigned dst_len;  
     unsigned uni_count;  
     unsigned uni_width;  
   
     if (unicode_status != UNICODE_ON) {  
         char *d;  
         if (flags & UNI_FLAG_PAD) {  
             d = dst = xmalloc(width + 1);  
             while ((int)--width >= 0) {  
                 unsigned char c = *src;  
                 if (c == '\0') {  
                     do  
                         *d++ = ' ';  
                     while ((int)--width >= 0);  
                     break;  
                 }  
                 *d++ = (c >= ' '/* && c < 0x7f */) ? c : '?';  
                 src++;  
             }  
             *d = '\0';  
         } else {  
             d = dst = xstrndup(src, width);  
             while (*d) {  
                 unsigned char c = *d;  
                 if (c < ' '/* || c >= 0x7f */)  
                     *d = '?';  
                 d++;  
             }  
         }  
         if (stats) {  
             stats->byte_count = (d - dst);  
             stats->unicode_count = (d - dst);  
             stats->unicode_width = (d - dst);  
         }  
         return dst;  
     }  
   
     dst = NULL;  
     uni_count = uni_width = 0;  
     dst_len = 0;  
     while (1) {  
         int w;  
         wchar_t wc;  
   
 #if ENABLE_UNICODE_USING_LOCALE  
         {  
             mbstate_t mbst = { 0 };  
             ssize_t rc = mbsrtowcs(&wc, &src, 1, &mbst);  
             /* If invalid sequence is seen: -1 is returned, 
              * src points to the invalid sequence, errno = EILSEQ. 
              * Else number of wchars (excluding terminating L'\0') 
              * written to dest is returned. 
              * If len (here: 1) non-L'\0' wchars stored at dest, 
              * src points to the next char to be converted. 
              * If string is completely converted: src = NULL. 
              */  
             if (rc == 0) /* end-of-string */  
                 break;  
             if (rc < 0) { /* error */  
                 src++;  
                 goto subst;  
             }  
             if (!iswprint(wc))  
                 goto subst;  
         }  
 #else  
         src = mbstowc_internal(&wc, src);  
         /* src is advanced to next mb char 
          * wc == ERROR_WCHAR: invalid sequence is seen 
          * else: wc is set 
          */  
         if (wc == ERROR_WCHAR) /* error */  
             goto subst;  
         if (wc == 0) /* end-of-string */  
             break;  
 #endif  
         if (CONFIG_LAST_SUPPORTED_WCHAR && wc > CONFIG_LAST_SUPPORTED_WCHAR)  
             goto subst;  
         w = wcwidth(wc);  
         if ((ENABLE_UNICODE_COMBINING_WCHARS && w < 0) /* non-printable wchar */  
          || (!ENABLE_UNICODE_COMBINING_WCHARS && w <= 0)  
          || (!ENABLE_UNICODE_WIDE_WCHARS && w > 1)  
         ) {  
  subst:  
             wc = CONFIG_SUBST_WCHAR;  
             w = 1;  
         }  
         width -= w;  
         /* Note: if width == 0, we still may add more chars, 
          * they may be zero-width or combining ones */  
         if ((int)width < 0) {  
             /* can't add this wc, string would become longer than width */  
             width += w;  
             break;  
         }  
   
         uni_count++;  
         uni_width += w;  
         dst = xrealloc(dst, dst_len + MB_CUR_MAX);  
 #if ENABLE_UNICODE_USING_LOCALE  
         {  
             mbstate_t mbst = { 0 };  
             dst_len += wcrtomb(&dst[dst_len], wc, &mbst);  
         }  
 #else  
         dst_len += wcrtomb_internal(&dst[dst_len], wc);  
 #endif  
     }  
   
     /* Pad to remaining width */  
     if (flags & UNI_FLAG_PAD) {  
         dst = xrealloc(dst, dst_len + width + 1);  
         uni_count += width;  
         uni_width += width;  
         while ((int)--width >= 0) {  
             dst[dst_len++] = ' ';  
         }  
     }  
     dst[dst_len] = '\0';  
     if (stats) {  
         stats->byte_count = dst_len;  
         stats->unicode_count = uni_count;  
         stats->unicode_width = uni_width;  
     }  
   
     return dst;  
 }