python cookbook 字符串和文本

最新推荐文章于 2024-11-10 13:43:20 发布

weixin_30617737

最新推荐文章于 2024-11-10 13:43:20 发布

阅读量70

点赞数

文章标签： python shell

原文链接：http://www.cnblogs.com/badboyf/p/6607853.html

版权

使用多个界定符分隔字符串

  import re
  line = 'asdf fjdk; afed, fjek,asdf, foo'
  print(re.split(r'[;,\s]\s*', line))
  print(re.split(r'(;|,|\s)\s*', line))     #加括号表示捕获分组，这样匹配的结果也显示在列表中

匹配开头或结尾

  url = 'http://www.python.org'
  print(url.startswith(('http', 'https', 'ftp')))  # 如果匹配多个一定是元组，list和set必须先调用tuple()转成元祖
  import re
  print(re.match('http:|https:|ftp:', url))　　　　#正则也可以

使用Shell中的通配符匹配

  from fnmatch import fnmatch, fnmatchcase
  print('foo.txt', '*.txt')
  print('foo.txt', '?oo.txt')
  print('Dat45', 'Dat[0-9]*')
  names = ['Dat1.csv', 'Dat2.csv', 'config.ini', 'foo.py']
  print([name for name in names if fnmatch(name, 'Dat*.csv')])

忽略大小写匹配和替换

  import re
  text = 'UPPER PYTHON, lower python, Mixed Python'
  print(re.findall('python', text, re.IGNORECASE))
  print(re.findall('python', text))
  print(re.sub('python', 'java', text, count=100, flags=re.IGNORECASE))

贪婪和非贪婪匹配

(.*)匹配任意字符，贪婪匹配。(.*?)非贪婪匹配

  import re
  str_pat = re.compile(r'\"(.*)\"')
  text = 'Computer says "no." Phone says "yes."'
  print(str_pat.findall(text))
  str_pat = re . compile(r'\"(.*?)\"')
  print(str_pat.findall(text))

多行匹配

  import re
  comment = re.compile(r'/\*(.*?)\*/')
  text1 = '/* this is a comment */'
  text2 = '''/* this is a
  multiline comment */
  '''
  print(comment.findall(text1))
  print(comment.findall(text2))
  
  #在这个模式中,(?:.|\n)指定了一个非捕获组 (也就是它定义了一个仅仅用来做匹配,而不能通过单独捕获或者编号的组)。
  comment = re.compile(r'/\*((?:.|\n)*?)\*/')
  print(comment.findall(text2))

　　#re.DOTALL 它可以让正则表达式中的点(.)匹配包括换行符在内的任意字符。
　　comment = re.compile(r'/\*(.*?)\*/', re.DOTALL)
print(comment.findall(text2))

删除字符串中不需要的字符

  import re
  s = ' hello     world    \n   ' 
  print(s.strip())                
  print(s.strip(' \n'))           
  print(s.replace(" ", ""))       
  print(re.sub('\s+', ' ', s)) 
输出：
hello     world
hello     world
helloworld

 hello world

字符串对齐

  text = 'Hello World'
  print(text.rjust(20, "*"))
  print(text.center(20,'*'))
  #python3
  print(format(text, '>20'))
  print(format(text, '<20'))
  print(format(text, '^20'))
  print(format(text, '*>20'))
  print(format(text, '=<20'))
  print(format(text, '*^20'))
  print('{:>10s} {:>10s}'.format('hello', 'world'))
  x = 1.2345
  print(format(x, '>10'))
  print(format(x, '^10.2f'))
  #python2
  print('%-20s' % text)
  print('%20s' % text)

字符串拼接

  parts = ['Is', 'Chicago', 'Not', 'Chicago?']
  print(' '.join(parts))  　　　　　　　　　　   # 最快的方法
  print('hello' + ' ' + 'world')  　　　　　　  # 如果只是简单的拼接几个字符串，这样就可以了
  print('hello' ' world')  # 这样也ok
  s = ''
  for p in parts:  # never do this
    s += p
  parts = ['now', 'is', 10, ':', '45']
  print(' '.join(str(d) for d in parts))  　　 # 用生成器来连接非str
  a, b, c = ['f', 'z', 'k']
  print (a + ':' + b + ':' + c)  　　　　　　　　# Ugly
  print (':'.join([a, b, c]))  　　　　　　　　  # Still ugly
  print (a, b, c, sep=':')  　　　　　　　　　　  # Better
  def sample():             　　　　　　　　　　 #如果构建大量的小字符串，考虑用生成器的方式
    yield 'Is'
    yield 'Chicago'
    yield 'Not'
    yield 'Chicago?'
  print(' '.join(sample()))

字符串插入变量

  class Info(object):
    def __init__(self, name, n):
      self.name = name
      self.n = n
  s = '{name} has {n} messages.'
  name = 'fzk'
  n = 10
  print(s.format(name=name, n=n))
  print(s.format_map(vars()))
  print(s.format_map(vars(Info(name, n))))
  #如果变量缺失，会印发报错。可以用下面的方法
  class safesub (dict):
    """ 防止key 找不到"""
    def __missing__ (self, key):
      return '{' + key + '}'
  del n
  print(s.format_map(safesub(vars())))