python 判断unicode字符串是汉字/数字/字母,全角/半角转换

最新推荐文章于 2023-04-14 19:27:27 发布

xuh5156

最新推荐文章于 2023-04-14 19:27:27 发布

阅读量6.5k

点赞数

分类专栏： Python

本文链接：https://blog.csdn.net/xuh5156/article/details/9111735

版权

Python 专栏收录该内容

8 篇文章 0 订阅

订阅专栏

文本处理经常会判断字符串是否是字母，数字或者汉字的问题，python str模块提供了些方法，但是不支持unicode，所以模仿str的相关方法，实现了unicode 字符串的汉字/数字/字母判断方法，以及全角半角转换。

#!/usr/bin/env python
# -*- coding: UTF-8 -*-

import sys

#set default encoding as UTF-8
reload(sys)
sys.setdefaultencoding('utf-8')

#judge a character is a Chinese Character
def is_Chinese(uchar):
	if len(uchar) != 1:
		raise TypeError,'expected a character, but a string found!'

	if uchar >= u'\u4e00' and uchar <= u'\u9fa5':
		return True
	else:
		return False

#Judge a ustr is all Chinese
def is_all_Chinese(ustr): 
	for uchar in ustr:
		if not is_Chinese(uchar):
			return False

	return True

#Judge a char is a number
def is_digit(uchar):
	if len(uchar) != 1:
		raise TypeError,'expected a character, but a string found!'

	if uchar >= u'\u0030' and uchar<=u'\u0039':
		return True
	else:
		return False

#Judge a str is all num
def is_all_digit(ustr):
	for uchar in ustr:
		if not is_digit(uchar):
			return False
	
	return True

#Judge a char is a alphabet
def is_alpha(uchar):
	if len(uchar) != 1:
		raise TypeError,'expected a character, but a string found!'

	if (uchar >= u'\u0041' and uchar<=u'\u005a') or \
	   (uchar >= u'\u0061' and uchar<=u'\u007a'):
		return True
	else:
		return False


#Judge a str is all alphabet
def is_all_alpha(ustr):
	for uchar in ustr:
		if not is_alpha(uchar):
			return False
	
	return True

 
#半角转全角
def B2Q(uchar):
	if len(uchar) != 1:
		raise TypeError,'expected a character, but a string found!'

	inner_code = ord(uchar)
	if inner_code < 0x0020 or inner_code > 0x7e:      #不是半角字符就返回原来的字符
		return uchar
	if inner_code == 0x0020: #除了空格其他的全角半角的公式为:半角=全角-0xfee0
		inner_code = 0x3000
	else:
		inner_code += 0xfee0
	
	return unichr(inner_code)
 
#全角转半角
def Q2B(uchar):
	if len(uchar) != 1:
		raise TypeError,'expected a character, but a string found!'

	inner_code = ord(uchar)
	if inner_code == 0x3000:
		inner_code = 0x0020
	else:
		inner_code -= 0xfee0
	if inner_code < 0x0020 or inner_code > 0x7e:      #转完之后不是半角字符返回原来的字符
		return uchar
	
	return unichr(inner_code)


 
#把字符串全角转半角
def stringQ2B(ustring):
	return ''.join([Q2B(uchar) for uchar in ustring])

#main function
if __name__ == '__main__':
	pass