在嵌入式linux系统中,busybox是最常见的用来构建文件系统的。可是从busybox1.17.0以上之后,对ls命令不做修改是无法显示中文的。就算是内核设置了支持中文的话,在shell下用ls命令也是无法显示中文的,这是因为busybox1.17.0以后版本对中文的支持进行了限制。现在就来讲讲如何修改让busybox1.17.0以上版本支持中文,要想让busybox1.17.0以上支持中文,需要修改两个文件:printable_string.c以及unicode.c 。下面来分析,为什么ls命令无法显示中文。请看printable_string.c未修改过的代码:
- const char* FAST_FUNC printable_string(uni_stat_t *stats, const char *str)
- {
- static char *saved[4];
- static unsigned cur_saved; /* = 0 */
- char *dst;
- const char *s;
- s = str;
- while (1) {
- unsigned char c = *s;
- if (c == '\0') {
- /* 99+% of inputs do not need conversion */
- if (stats) {
- stats->byte_count = (s - str);
- stats->unicode_count = (s - str);
- stats->unicode_width = (s - str);
- }
- return str;
- }
- if (c < ' ')
- break;
- if (c >= 0x7f)
- break;
- s++;
- }
- #if ENABLE_UNICODE_SUPPORT
- dst = unicode_conv_to_printable(stats, str);
- #else
- {
- char *d = dst = xstrdup(str);
- while (1) {
- unsigned char c = *d;
- if (c == '\0')
- break;
- if (c < ' ' || c >= 0x7f)
- *d = '?';
- d++;
- }
- if (stats) {
- stats->byte_count = (d - dst);
- stats->unicode_count = (d - dst);
- stats->unicode_width = (d - dst);
- }
- }
- #endif
- free(saved[cur_saved]);
- saved[cur_saved] = dst;
- cur_saved = (cur_saved + 1) & (ARRAY_SIZE(saved)-1);
- return dst;
- }
- const char* FAST_FUNC printable_string(uni_stat_t *stats, const char *str)
- {
- static char *saved[4];
- static unsigned cur_saved; /* = 0 */
- char *dst;
- const char *s;
- s = str;
- while (1) {
- unsigned char c = *s;
- if (c == '\0') {
- /* 99+% of inputs do not need conversion */
- if (stats) {
- stats->byte_count = (s - str);
- stats->unicode_count = (s - str);
- stats->unicode_width = (s - str);
- }
- return str;
- }
- if (c < ' ')
- break;
- /*
- if (c >= 0x7f)
- break;
- */
- s++;
- }
- #if ENABLE_UNICODE_SUPPORT
- dst = unicode_conv_to_printable(stats, str);
- #else
- {
- char *d = dst = xstrdup(str);
- while (1) {
- unsigned char c = *d;
- if (c == '\0')
- break;
- if (c < ' ' /*|| c >= 0x7f */)
- *d = '?';
- d++;
- }
- if (stats) {
- stats->byte_count = (d - dst);
- stats->unicode_count = (d - dst);
- stats->unicode_width = (d - dst);
- }
- }
- #endif
- free(saved[cur_saved]);
- saved[cur_saved] = dst;
- cur_saved = (cur_saved + 1) & (ARRAY_SIZE(saved)-1);
- return dst;
- }
- 在配置里,有Support Unicode选上的:
- Busybox Settings->General Configuration->
- │ │[ ] Enable locale support (system needs locale for this to work) │ │
- │ │[*] Support Unicode │ │
- │ │[*] Support for --long-options │ │
- static char* FAST_FUNC unicode_conv_to_printable2(uni_stat_t *stats, const char *src, unsigned width, int flags)
- {
- char *dst;
- unsigned dst_len;
- unsigned uni_count;
- unsigned uni_width;
- if (unicode_status != UNICODE_ON) {
- char *d;
- if (flags & UNI_FLAG_PAD) {
- d = dst = xmalloc(width + 1);
- while ((int)--width >= 0) {
- unsigned char c = *src;
- if (c == '\0') {
- do
- *d++ = ' ';
- while ((int)--width >= 0);
- break;
- }
- *d++ = (c >= ' ' && c < 0x7f) ? c : '?';
- src++;
- }
- *d = '\0';
- } else {
- d = dst = xstrndup(src, width);
- while (*d) {
- unsigned char c = *d;
- if (c < ' ' || c >= 0x7f)
- *d = '?';
- d++;
- }
- }
- if (stats) {
- stats->byte_count = (d - dst);
- stats->unicode_count = (d - dst);
- stats->unicode_width = (d - dst);
- }
- return dst;
- }
- dst = NULL;
- uni_count = uni_width = 0;
- dst_len = 0;
- while (1) {
- int w;
- wchar_t wc;
- #if ENABLE_UNICODE_USING_LOCALE
- {
- mbstate_t mbst = { 0 };
- ssize_t rc = mbsrtowcs(&wc, &src, 1, &mbst);
- /* If invalid sequence is seen: -1 is returned,
- * src points to the invalid sequence, errno = EILSEQ.
- * Else number of wchars (excluding terminating L'\0')
- * written to dest is returned.
- * If len (here: 1) non-L'\0' wchars stored at dest,
- * src points to the next char to be converted.
- * If string is completely converted: src = NULL.
- */
- if (rc == 0) /* end-of-string */
- break;
- if (rc < 0) { /* error */
- src++;
- goto subst;
- }
- if (!iswprint(wc))
- goto subst;
- }
- #else
- src = mbstowc_internal(&wc, src);
- /* src is advanced to next mb char
- * wc == ERROR_WCHAR: invalid sequence is seen
- * else: wc is set
- */
- if (wc == ERROR_WCHAR) /* error */
- goto subst;
- if (wc == 0) /* end-of-string */
- break;
- #endif
- if (CONFIG_LAST_SUPPORTED_WCHAR && wc > CONFIG_LAST_SUPPORTED_WCHAR)
- goto subst;
- w = wcwidth(wc);
- if ((ENABLE_UNICODE_COMBINING_WCHARS && w < 0) /* non-printable wchar */
- || (!ENABLE_UNICODE_COMBINING_WCHARS && w <= 0)
- || (!ENABLE_UNICODE_WIDE_WCHARS && w > 1)
- ) {
- subst:
- wc = CONFIG_SUBST_WCHAR;
- w = 1;
- }
- width -= w;
- /* Note: if width == 0, we still may add more chars,
- * they may be zero-width or combining ones */
- if ((int)width < 0) {
- /* can't add this wc, string would become longer than width */
- width += w;
- break;
- }
- uni_count++;
- uni_width += w;
- dst = xrealloc(dst, dst_len + MB_CUR_MAX);
- #if ENABLE_UNICODE_USING_LOCALE
- {
- mbstate_t mbst = { 0 };
- dst_len += wcrtomb(&dst[dst_len], wc, &mbst);
- }
- #else
- dst_len += wcrtomb_internal(&dst[dst_len], wc);
- #endif
- }
- /* Pad to remaining width */
- if (flags & UNI_FLAG_PAD) {
- dst = xrealloc(dst, dst_len + width + 1);
- uni_count += width;
- uni_width += width;
- while ((int)--width >= 0) {
- dst[dst_len++] = ' ';
- }
- }
- dst[dst_len] = '\0';
- if (stats) {
- stats->byte_count = dst_len;
- stats->unicode_count = uni_count;
- stats->unicode_width = uni_width;
- }
- return dst;
- }
- static char* FAST_FUNC unicode_conv_to_printable2(uni_stat_t *stats, const char *src, unsigned width, int flags)
- {
- char *dst;
- unsigned dst_len;
- unsigned uni_count;
- unsigned uni_width;
- if (unicode_status != UNICODE_ON) {
- char *d;
- if (flags & UNI_FLAG_PAD) {
- d = dst = xmalloc(width + 1);
- while ((int)--width >= 0) {
- unsigned char c = *src;
- if (c == '\0') {
- do
- *d++ = ' ';
- while ((int)--width >= 0);
- break;
- }
- *d++ = (c >= ' '/* && c < 0x7f */) ? c : '?';
- src++;
- }
- *d = '\0';
- } else {
- d = dst = xstrndup(src, width);
- while (*d) {
- unsigned char c = *d;
- if (c < ' '/* || c >= 0x7f */)
- *d = '?';
- d++;
- }
- }
- if (stats) {
- stats->byte_count = (d - dst);
- stats->unicode_count = (d - dst);
- stats->unicode_width = (d - dst);
- }
- return dst;
- }
- dst = NULL;
- uni_count = uni_width = 0;
- dst_len = 0;
- while (1) {
- int w;
- wchar_t wc;
- #if ENABLE_UNICODE_USING_LOCALE
- {
- mbstate_t mbst = { 0 };
- ssize_t rc = mbsrtowcs(&wc, &src, 1, &mbst);
- /* If invalid sequence is seen: -1 is returned,
- * src points to the invalid sequence, errno = EILSEQ.
- * Else number of wchars (excluding terminating L'\0')
- * written to dest is returned.
- * If len (here: 1) non-L'\0' wchars stored at dest,
- * src points to the next char to be converted.
- * If string is completely converted: src = NULL.
- */
- if (rc == 0) /* end-of-string */
- break;
- if (rc < 0) { /* error */
- src++;
- goto subst;
- }
- if (!iswprint(wc))
- goto subst;
- }
- #else
- src = mbstowc_internal(&wc, src);
- /* src is advanced to next mb char
- * wc == ERROR_WCHAR: invalid sequence is seen
- * else: wc is set
- */
- if (wc == ERROR_WCHAR) /* error */
- goto subst;
- if (wc == 0) /* end-of-string */
- break;
- #endif
- if (CONFIG_LAST_SUPPORTED_WCHAR && wc > CONFIG_LAST_SUPPORTED_WCHAR)
- goto subst;
- w = wcwidth(wc);
- if ((ENABLE_UNICODE_COMBINING_WCHARS && w < 0) /* non-printable wchar */
- || (!ENABLE_UNICODE_COMBINING_WCHARS && w <= 0)
- || (!ENABLE_UNICODE_WIDE_WCHARS && w > 1)
- ) {
- subst:
- wc = CONFIG_SUBST_WCHAR;
- w = 1;
- }
- width -= w;
- /* Note: if width == 0, we still may add more chars,
- * they may be zero-width or combining ones */
- if ((int)width < 0) {
- /* can't add this wc, string would become longer than width */
- width += w;
- break;
- }
- uni_count++;
- uni_width += w;
- dst = xrealloc(dst, dst_len + MB_CUR_MAX);
- #if ENABLE_UNICODE_USING_LOCALE
- {
- mbstate_t mbst = { 0 };
- dst_len += wcrtomb(&dst[dst_len], wc, &mbst);
- }
- #else
- dst_len += wcrtomb_internal(&dst[dst_len], wc);
- #endif
- }
- /* Pad to remaining width */
- if (flags & UNI_FLAG_PAD) {
- dst = xrealloc(dst, dst_len + width + 1);
- uni_count += width;
- uni_width += width;
- while ((int)--width >= 0) {
- dst[dst_len++] = ' ';
- }
- }
- dst[dst_len] = '\0';
- if (stats) {
- stats->byte_count = dst_len;
- stats->unicode_count = uni_count;
- stats->unicode_width = uni_width;
- }
- return dst;
- }
转载自:wavemcu的CSDN博客