注:以下内容为学习笔记,多数是从书本、资料中得来,只为加深印象,及日后参考。然而本人表达能力较差,写的不好。因非翻译、非转载,只好选原创,但多数乃摘抄,实为惭愧。但若能帮助一二访客,幸甚!
2012-12-10 星期一
1.控制大小写和访问子字符串
1)控制大小写
org_str = 'heLLo PyTHon, Hello worLd'
# 转成大写
big_str = org_str.upper()
print(big_str)
# 转成小写
little_str = org_str.lower()
print(little_str)
# 第一个字母大写,其他小写,相当于
# s[:1].upper() + s[1:].lower()
capitalize_str = org_str[:1].upper() + org_str[1:].lower()
print(capitalize_str)
capitalize_str = org_str.capitalize()
print(capitalize_str)
# 每个单词第一个字母大写
title_str = org_str.title()
print(title_str)
输出:
HELLO PYTHON, HELLO WORLD
hello python, hello world
Hello python, hello world
Hello python, hello world
Hello Python, Hello World
2)访问子字符串
# 访问子字符串
the_line = b'hello keyan python hi hello world!'
# 分片
print(the_line[12:18])
# unpack
import struct
# 得到一个6字节的字符串,跳过6字节,得到一个7字节,跳过3字节,得到8字节及其余部分
base_format = '6s 6x 7s 3x 6s'
# 计算超出的长度
num_remain = len(the_line) - struct.calcsize(base_format)
print(num_remain)
# 用合适的s或x子段完成格式,然后unpack
the_format = '%s %ds' % (base_format, num_remain)
print(the_format)
l, s1, s2, t = struct.unpack(the_format, the_line)
print(l, s1, s2, t)
print(l + s1 + s2 + t)
# 若剩余部分跳过
l, s1, s2 = struct.unpack(base_format, the_line[:struct.calcsize(base_format)])
print(l, s1, s2)
print(l + s1 + s2)
# 获取5个字节一组的数据
print('**************************************************************')
fivers = [the_line[k : k+5] for k in range(0, len(the_line), 5)]
print(fivers)
# 将字符串的前5个字符切成一个个的字符
chars = list(the_line[:5])
print(chars)
# 将数据切成指定长度的列
print('**************************************************************')
cuts = [6, 12, 19, 22, 28]
# zip返回一个列表,其中除最后一项和第一项外每项都是形如(cuts[k], cuts[k+1])这样的数对
# 第一项是(0, cuts[0]), 最后一项是(cuts[len(cuts)-1], None)
pieces = [the_line[i:j] for i, j in zip([0] + cuts, cuts + [None])]
print(pieces)
输出:
b'python'
6
6s 6x 7s 3x 6s 6s
b'hello ' b'python ' b'hello ' b'world!'
b'hello python hello world!'
b'hello ' b'python ' b'hello '
b'hello python hello '
**************************************************************
[b'hello', b' keya', b'n pyt', b'hon h', b'i hel', b'lo wo', b'rld!']
[104, 101, 108, 108, 111]
**************************************************************
[b'hello ', b'keyan ', b'python ', b'hi ', b'hello ', b'world!']
封装成函数:
# 封装成函数
print('**************************************************************')
def fields(base_format, the_line, last_field = False):
num_remain = len(the_line) - struct.calcsize(base_format)
the_format = '%s %d%s' % (base_format, num_remain, last_field and 's' or 'x')
return struct.unpack(the_format, the_line)
print(fields(base_format, the_line, False))
print(fields(base_format, the_line, True))
# 使用memorizing机制的fields版本
# 适用于在循环内部调用
print('**************************************************************')
def fields_mem(base_format, the_line, last_field = False, _cache = {}):
key = base_format, len(the_line), last_field
the_format = _cache.get(key)
if the_format is None:
num_remain = len(the_line) - struct.calcsize(base_format)
_cache[key] = the_format = '%s %d%s' % (
base_format, num_remain, last_field and 's' or 'x')
return struct.unpack(the_format, the_line)
print(fields(base_format, the_line, False))
print(fields(base_format, the_line, True))
# 对按字符个数分隔的方法的封装
def split_by(the_line, n, last_field = False):
pieces = [the_line[k: k+n] for k in range(0, len(the_line), n)]
if not last_field and len(pieces[-1]) < n:
pieces.pop()
return pieces
print(split_by(the_line, 5, False))
print(split_by(the_line, 5, True))
# 将数据切成指定的列的封装
def split_at(the_line, cuts, last_field = False):
pieces = [ the_line[i:j] for i, j in zip([0] + cuts, cuts + [None]) ]
if not last_field:
pieces.pop()
return pieces
print(split_at(the_line, cuts, False))
print(split_at(the_line, cuts, True))
# 用生成器来实现
print('**************************************************************')
def split_at_yield(the_line, cuts, last_field = False):
last = 0
for cut in cuts:
yield the_line[last: cut]
last = cut
if last_field:
yield the_line[last: ]
print(split_at_yield(the_line, cuts, False))
print(split_at_yield(the_line, cuts, True))
def split_by_yield(the_line, n, last_field = False):
return split_at_yield(the_line, range(n, len(the_line), n), last_field)
print(list(split_by_yield(the_line, 5, False)))
print(list(split_by_yield(the_line, 5, True)))
输出:
**************************************************************
(b'hello ', b'python ', b'hello ')
(b'hello ', b'python ', b'hello ', b'world!')
**************************************************************
(b'hello ', b'python ', b'hello ')
(b'hello ', b'python ', b'hello ', b'world!')
[b'hello', b' keya', b'n pyt', b'hon h', b'i hel', b'lo wo']
[b'hello', b' keya', b'n pyt', b'hon h', b'i hel', b'lo wo', b'rld!']
[b'hello ', b'keyan ', b'python ', b'hi ', b'hello ']
[b'hello ', b'keyan ', b'python ', b'hi ', b'hello ', b'world!']
**************************************************************
[b'hello ', b'keyan ', b'python ', b'hi ', b'hello ']
[b'hello ', b'keyan ', b'python ', b'hi ', b'hello ', b'world!']
[b'hello', b' keya', b'n pyt', b'hon h', b'i hel', b'lo wo']
[b'hello', b' keya', b'n pyt', b'hon h', b'i hel', b'lo wo', b'rld!']
2012-12-11 星期二
2.改变多行文本字符串的缩进
'''
Created on Dec 11, 2012
改变多行文本字符串的缩进
@author: liury_lab
'''
# 多行文本的字符串,在每行行首添加或删除一些空格,以保证每行的缩进都是指定数目的空格数
def reindent(s, num_space):
leading_space = num_space * ' '
lines = [ leading_space + line.strip() for line in s.splitlines() ]
return '\n'.join(lines)
org_str = """ hello python
hello python
hello python
hello python"""
print(org_str)
print('**************************************************')
print(reindent(org_str, 4))
# 添加空格
def add_spaces(s, num_add):
white = ' ' * num_add
return white + white.join(s.splitlines(True))
print('**************************************************')
# 获取空格数
def num_space(s):
return [ len(line) - len(line.lstrip()) for line in s.splitlines() ]
# 减少空格
def del_space(s, num_del):
if num_del > min(num_space(s)):
raise ValueError("removing more spaces than there are!")
return '\n'.join([ line[num_del:] for line in s.splitlines() ])
nums = num_space(org_str)
print(nums)
print(del_space(org_str, 1))
print('**************************************************')
print(del_space(org_str, min(num_space(org_str))))
输出:
hello python
hello python
hello python
hello python
**************************************************
hello python
hello python
hello python
hello python
**************************************************
[8, 2, 4, 6]
hello python
hello python
hello python
hello python
**************************************************
hello python
hello python
hello python
hello python
2012-12-12 星期三
3.扩展和压缩制表符
'''
Created on Dec 12, 2012
扩展和压缩制表符
@author: liury_lab
'''
org_str = '\thello\t'
# 将制表符转换为一定数目的空格
org_list = list(org_str)
print(org_str, org_list)
exp_str = org_str.expandtabs()
exp_list = list(exp_str)
print(exp_str, exp_list)
# 将空格转成制表符
def un_expand(astring, tab_len = 4):
import re
# 当正则表达式包含了一个括弧组时,re.split 返回了一个list列表,列表中的
# 每一个pieces列表,所有的连续空白字符串和非空白字符串都成为了它的子项
pieces = re.split(r'( +)', astring.expandtabs(tab_len))
print('pieces = %s' % pieces)
# 记录目前的字符串总长度
len_sofar = 0
for i, piece in enumerate(pieces):
this_len = len(piece)
len_sofar += this_len
if piece.isspace():
# 将各个空格序列改成tabs + spaces
num_blanks = len_sofar % tab_len
num_tabs = (this_len - num_blanks + tab_len - 1) // tab_len
print(len_sofar, num_blanks, num_tabs)
pieces[i] = '\t' * num_tabs + ' ' * num_blanks
return ''.join(pieces)
exp_str = ' hello '
exp_list = list(exp_str)
print(exp_str, exp_list)
org_str = un_expand(exp_str, 4)
org_list = list(org_str)
print(org_str, org_list)
输出:
hello ['\t', 'h', 'e', 'l', 'l', 'o', '\t']
hello [' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', 'h', 'e', 'l', 'l', 'o', ' ', ' ', ' ']
hello [' ', ' ', 'h', 'e', 'l', 'l', 'o', ' ', ' ', ' ', ' ', ' ', ' ', ' ']
pieces = ['', ' ', 'hello', ' ', '']
2 2 0
14 2 2
hello [' ', ' ', 'h', 'e', 'l', 'l', 'o', '\t', '\t', ' ', ' ']
2012-12-13 星期四
4.替换字符串中的子串
'''
Created on Dec 13, 2012
替换字符串中的字串
@author: liury_lab
'''
from string import Template
# 从字符串生成模板,其中标示符被¥标记
temp = Template('this is $thing')
# 给模板的substinute方法传入一个字典参数并调用
print(temp.substitute({'thing' : 5}))
print(temp.substitute({'thing' : 'test'}))
print(temp.substitute(thing = 5))
print(temp.substitute(thing = 'test'))
输出:
this is 5
this is test
this is 5
this is test
# 有时为了给substitute准备一个字典做参数,最简单的方法是设定一些本地变量,
# 然后将所有这些变量交给locals(此函数将创建一个字典,字典的key就是本地变量,
# 本的变量的值可通过key来访问)
temp = Template('the square of $num is $square')
for i in range(3):
print(temp.substitute(num = i, square = i*i))
for num in range(3):
square = num * num
print(temp.substitute(locals()))
for num in range(3):
print(temp.substitute(locals(), square = num * num))
输出:
the square of 0 is 0
the square of 1 is 1
the square of 2 is 4
the square of 0 is 0
the square of 1 is 1
the square of 2 is 4
the square of 0 is 0
the square of 1 is 1
the square of 2 is 4
2012-12-14 星期五 雪
5.使用正则表达式一次完成多个替换
'''
Created on Dec 14, 2012
一次完成多个替换
@author: liury_lab
'''
# 对字符串的某些子串进行替换
# re对象提供的强大sub方法,非常利于进行高效的正则表达式匹配替换
adict = {'hello':'hi', 'perl':'python'}
text = 'hello world, hello perl, hello python, hello ppperlppp'
import re
# 该函数返回一个输入字符串的拷贝,该拷贝中的所有能够在指定字典中找到的
# 子串都被替换为字典中的对应值
def multiple_replace(text, adict):
# re.compile函数:返回一个Regex对象,这个对象就是一个正则表达式的抽象
# re.escape函数:将传入的原字符串进行转义,将那些非字母数字的字符转为\xx之类。
# 比如’\’就会被转义为’\\’
# 创建一个形如a1|a2|...|an 的正则表达式,由n个需要被替换的字符串组成
rx = re.compile('|'.join(map(re.escape, adict)))
# match对象及其group函数:可以由Regex对象的match、search函数生成,
# Regex对象的sub函数其实在执行时也隐式生成该对象。一个Match对象其实就是
# 相应正则表达式(Regex对象)的一个匹配,而其group函数则表示了细粒度的、
# 该Match的各个“组”(简单的表述可看作正则表达式中的小括号),group函数默认参数为0,
# 代表整个匹配。
def one_xlat(match):
return adict[match.group(0)]
# 每遇到一次匹配,re.sub就会调用回调函数,回调函数返回要替换成的字符串
# Regex对象的sub函数,用来替换字符串。
return rx.sub(one_xlat, text)
re_text = multiple_replace(text, adict)
print(re_text)
# 有时只需要使用同一个固定不变的翻译表来完成很多文本替换,只需要做一次准备工作
def make_xlat(*args, **kwds):
adict = dict(*args, **kwds)
rx = re.compile('|'.join(map(re.escape, adict)))
def one_xlat(match):
return adict[match.group(0)]
def xlat(text):
return rx.sub(one_xlat, text)
return xlat
translate = make_xlat(adict)
re_text = translate(text)
print(re_text)
# 实现一个功能类似的类
class make_xlat_class:
def __init__(self, *args, **kwds):
self.adict = dict(*args, **kwds)
self.rx = self.make_rx()
def make_rx(self):
return re.compile('|'.join(map(re.escape, self.adict)))
def one_xlat(self, match):
return self.adict[match.group(0)]
def __call__(self, text):
return self.rx.sub(self.one_xlat, text)
translate = make_xlat_class(adict)
re_text = translate(text)
print(re_text)
# 用子类实现一个准确匹配单词开头结尾的类
class make_xlat_by_whole_words_class(make_xlat_class):
def make_rx(self):
return re.compile(r'\b%s\b' % r'\b|\b'.join(map(re.escape, self.adict)))
translate = make_xlat_by_whole_words_class(adict)
re_text = translate(text)
print(re_text)
输出:
hi world, hi python, hi python, hi pppythonppp
hi world, hi python, hi python, hi pppythonppp
hi world, hi python, hi python, hi pppythonppp
hi world, hi python, hi python, hi ppperlppp
2012-12-15 星期六
6.检查字符串中的结束标记
def any_true(predicate, sequence):
return True in map(predicate, sequence)
def ends_with(s, *endings):
return any_true(s.endswith, endings)
import os
for filename in os.listdir('d:\\下载\\埙'):
if ends_with(filename, '.jpg', '.jpeg', '.gif', '.bmp'):
print(filename)
输出:
22f6362ac65c1038d469f62db2119313b17e8974.jpg
28fb1f4fbfad45dd841b55b79dbb0d77.jpg
8ad4b31c8701a18bd90491f29e2f07082938fea4.jpg
埙 追梦.jpg
2012-12-16 星期天
7.在Unicode和普通字符串之间转换
'''
Created on Dec 14, 2012
在Unicode和普通字符串之间转换
@author: liury_lab
'''
unicode_str = 'hello python'
# 将Unicode转化为普通python字符串:“encode”
utf8_str = unicode_str.encode("utf-8")
print(utf8_str)
ascii_str = unicode_str.encode("ascii")
print(ascii_str)
iso_str = unicode_str.encode("ISO-8859-1")
print(iso_str)
utf16_str = unicode_str.encode("utf-16")
print(utf16_str)
# 将普通python字符串转化为Unicode: "decode"
print(str(utf8_str, "utf-8"))
print(str(ascii_str, "ascii"))
print(str(iso_str, "ISO-8859-1"))
print(str(utf16_str, "utf-16"))
输出:
b'hello python'
b'hello python'
b'hello python'
b'\xff\xfeh\x00e\x00l\x00l\x00o\x00 \x00p\x00y\x00t\x00h\x00o\x00n\x00'
hello python
hello python
hello python
hello python
注python3中默认都是Unicode,输出中前面的b 表示byte。