原文链接:http://blog.chinaunix.net/uid-25992400-id-3283846.html
任何语言都离不开字符,那就会涉及对字符的操作,尤其是脚本语言更是频繁,不管是生产环境还是面试考验都要面对字符串的操作。
python的字符串操作通过2部分的方法函数基本上就可以解决所有的字符串操作需求:
- python的字符串属性函数
- python的string模块
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
- 字符串属性函数
系统版本:CentOS release 6.2 (Final)2.6.32-220.el6.x86_64
python版本:Python 2.6.6
字符串属性方法
字符串格式输出对齐
- 1.>>> str='stRINg lEArn'
- 2.>>>
- 3.>>> str.center(20) #生成20个字符长度,str排中间
- 4.' stRINg lEArn '
- 5.>>>
- 6.>>> str.ljust(20) #str左对齐
- 7.'stRINg lEArn '
- 8.>>>
- 9.>>> str.rjust(20) #str右对齐
- 10.' stRINg lEArn'
- 11.>>>
- 12.>>> str.zfill(20) #str右对齐,左边填充0
- 13.'00000000stRINg lEArn'
大小写转换
- 1.>>> str='stRINg lEArn'
- 2.>>>
- 3.>>> str.upper() #转大写
- 4.'STRING LEARN'
- 5.>>>
- 6.>>> str.lower() #转小写
- 7.'string learn'
- 8.>>>
- 9.>>> str.capitalize() #字符串首为大写,其余小写
- 10.'String learn'
- 11.>>>
- 12.>>> str.swapcase() #大小写对换
- 13.'STrinG LeaRN'
- 14.>>>
- 15.>>> str.title() #以分隔符为标记,首字符为大写,其余为小写
- 16.'String Learn'
字符串条件判断
- 1.>>> str='0123'
- 2.>>> str.isalnum() #是否全是字母和数字,并至少有一个字符
- 3.True
- 4.>>> str.isdigit() #是否全是数字,并至少有一个字符
- 5.True
- 6.
- 7.>>> str='abcd'
- 8.>>> str.isalnum()
- 9.True
- 10.>>> str.isalpha() #是否全是字母,并至少有一个字符
- 11.True
- 12.>>> str.islower() #是否全是小写,当全是小写和数字一起时候,也判断为True
- 13.True
- 14.
- 15.>>> str='abcd0123'
- 16.>>> str.islower() #同上
- 17.True
- 18.>>> str.isalnum()
- 19.True
- 20.
- 21.>>> str=' '
- 22.>>> str.isspace() #是否全是空白字符,并至少有一个字符
- 23.True
- 24.>>> str='ABC'
- 25.>>> str.isupper() #是否全是大写,当全是大写和数字一起时候,也判断为True
- 26.True
- 27.>>> str='Abb Acc'
- 28.>>> str.istitle() #所有单词字首都是大写,标题
- 29.True
- 30.
- 31.>>> str='string learn'
- 32.>>> str.startswith('str') #判断字符串以'str'开头
- 33.True
- 34.>>> str.endswith('arn') #判读字符串以'arn'结尾
- 35.True
字符串搜索定位与替换
- 1.>>> str='string lEARn'
- 2.>>>
- 3.>>> str.find('a') #查找字符串,没有则返回-1,有则返回查到到第一个匹配的索引
- 4.-1
- 5.>>> str.find('n')
- 6.4
- 7.>>> str.rfind('n') #同上,只是返回的索引是最后一次匹配的
- 8.11
- 9.>>>
- 10.>>> str.index('a') #如果没有匹配则报错
- 11.Traceback (most recent call last):
- 12. File "<stdin>", line 1, in <module>
- 13.ValueError: substring not found
- 14.>>> str.index('n') #同find类似,返回第一次匹配的索引值
- 15.4
- 16.>>> str.rindex('n') #返回最后一次匹配的索引值
- 17.11
- 18.>>>
- 19.>>> str.count('a') #字符串中匹配的次数
- 20.0
- 21.>>> str.count('n') #同上
- 22.2
- 23.>>>
- 24.>>> str.replace('EAR','ear') #匹配替换
- 25.'string learn'
- 26.>>> str.replace('n','N')
- 27.'striNg lEARN'
- 28.>>> str.replace('n','N',1)
- 29.'striNg lEARn'
- 30.>>>
- 31.>>>
- 32.>>> str.strip('n') #删除字符串首尾匹配的字符,通常用于默认删除回车符
- 33.'string lEAR'
- 34.>>> str.lstrip('n') #左匹配
- 35.'string lEARn'
- 36.>>> str.rstrip('n') #右匹配
- 37.'string lEAR'
- 38.>>>
- 39.>>> str=' tab'
- 40.>>> str.expandtabs() #把制表符转为空格
- 41.' tab'
- 42.>>> str.expandtabs(2) #指定空格数
- 43.' tab'
字符串编码与解码
- 1.>>> str='字符串学习'
- 2.>>> str
- 3.'xe5xadx97xe7xacxa6xe4xb8xb2xe5xadxa6xe4xb9xa0'
- 4.>>>
- 5.>>> str.decode('utf-8') #解码过程,将utf-8解码为unicode
- 6.u'u5b57u7b26u4e32u5b66u4e60'
- 7.
- 8.>>> str.decode('utf-8').encode('gbk') #编码过程,将unicode编码为gbk
- 9.'xd7xd6xb7xfbxb4xaexd1xa7xcfxb0'
- 10.>>> str.decode('utf-8').encode('utf-8') #将unicode编码为utf-8
- 11.'xe5xadx97xe7xacxa6xe4xb8xb2xe5xadxa6xe4xb9xa0'
字符串分割变换
- 1.>>> str='Learn string'
- 2.>>> '-'.join(str)
- 3.'L-e-a-r-n- -s-t-r-i-n-g'
- 4.>>> l1=['Learn','string']
- 5.>>> '-'.join(l1)
- 6.'Learn-string'
- 7.>>>
- 8.>>> str.split('n')
- 9.['Lear', ' stri', 'g']
- 10.>>> str.split('n',1)
- 11.['Lear', ' string']
- 12.>>> str.rsplit('n',1)
- 13.['Learn stri', 'g']
- 14.>>>
- 15.>>> str.splitlines()
- 16.['Learn string']
- 17.>>>
- 18.>>> str.partition('n')
- 19.('Lear', 'n', ' string')
- 20.>>> str.rpartition('n')
- 21.('Learn stri', 'n', 'g')
string模块源代码
- 1."""A collection of string operations (most are no longer used).
- 2.
- 3.Warning: most of the code you see here isn't normally used nowadays.
- 4.Beginning with Python 1.6, many of these functions are implemented as
- 5.methods on the standard string object. They used to be implemented by
- 6.a built-in module called strop, but strop is now obsolete itself.
- 7.
- 8.Public module variables:
- 9.
- 10.whitespace -- a string containing all characters considered whitespace
- 11.lowercase -- a string containing all characters considered lowercase letters
- 12.uppercase -- a string containing all characters considered uppercase letters
- 13.letters -- a string containing all characters considered letters
- 14.digits -- a string containing all characters considered decimal digits
- 15.hexdigits -- a string containing all characters considered hexadecimal digits
- 16.octdigits -- a string containing all characters considered octal digits
- 17.punctuation -- a string containing all characters considered punctuation
- 18.printable -- a string containing all characters considered printable
- 19.
- 20."""
- 21.
- 22.# Some strings for ctype-style character classification
- 23.whitespace = ' tnrvf'
- 24.lowercase = 'abcdefghijklmnopqrstuvwxyz'
- 25.uppercase = 'ABCDEFGHIJKLMNOPQRSTUVWXYZ'
- 26.letters = lowercase + uppercase
- 27.ascii_lowercase = lowercase
- 28.ascii_uppercase = uppercase
- 29.ascii_letters = ascii_lowercase + ascii_uppercase
- 30.digits = '0123456789'
- 31.hexdigits = digits + 'abcdef' + 'ABCDEF'
- 32.octdigits = '01234567'
- 33.punctuation = """!"#$%&'()*+,-./:;<=>?@[]^_`{|}~"""
- 34.printable = digits + letters + punctuation + whitespace
- 35.
- 36.# Case conversion helpers
- 37.# Use str to convert Unicode literal in case of -U
- 38.l = map(chr, xrange(256))
- 39._idmap = str('').join(l)
- 40.del l
- 41.
- 42.# Functions which aren't available as string methods.
- 43.
- 44.# Capitalize the words in a string, e.g. " aBc dEf " -> "Abc Def".
- 45.def capwords(s, sep=None):
- 46. """capwords(s [,sep]) -> string
- 47.
- 48. Split the argument into words using split, capitalize each
- 49. word using capitalize, and join the capitalized words using
- 50. join. If the optional second argument sep is absent or None,
- 51. runs of whitespace characters are replaced by a single space
- 52. and leading and trailing whitespace are removed, otherwise
- 53. sep is used to split and join the words.
- 54.
- 55. """
- 56. return (sep or ' ').join(x.capitalize() for x in s.split(sep))
- 57.
- 58.
- 59.# Construct a translation string
- 60._idmapL = None
- 61.def maketrans(fromstr, tostr):
- 62. """maketrans(frm, to) -> string
- 63.
- 64. Return a translation table (a string of 256 bytes long)
- 65. suitable for use in string.translate. The strings frm and to
- 66. must be of the same length.
- 67.
- 68. """
- 69. if len(fromstr) != len(tostr):
- 70. raise ValueError, "maketrans arguments must have same length"
- 71. global _idmapL
- 72. if not _idmapL:
- 73. _idmapL = list(_idmap)
- 74. L = _idmapL[:]
- 75. fromstr = map(ord, fromstr)
- 76. for i in range(len(fromstr)):
- 77. L[fromstr[i]] = tostr[i]
- 78. return ''.join(L)
- 79.
- 80.
- 81.
- 82.####################################################################
- 83.import re as _re
- 84.
- 85.class _multimap:
- 86. """Helper class for combining multiple mappings.
- 87.
- 88. Used by .{safe_,}substitute() to combine the mapping and keyword
- 89. arguments.
- 90. """
- 91. def __init__(self, primary, secondary):
- 92. self._primary = primary
- 93. self._secondary = secondary
- 94.
- 95. def __getitem__(self, key):
- 96. try:
- 97. return self._primary[key]
- 98. except KeyError:
- 99. return self._secondary[key]
- 100.
- 101.
- 102.class _TemplateMetaclass(type):
- 103. pattern = r"""
- 104. %(delim)s(?:
- 105. (?P<escaped>%(delim)s) | # Escape sequence of two delimiters
- 106. (?P<named>%(id)s) | # delimiter and a Python identifier
- 107. {(?P<braced>%(id)s)} | # delimiter and a braced identifier
- 108. (?P<invalid>) # Other ill-formed delimiter exprs
- 109. )
- 110. """
- 111.
- 112. def __init__(cls, name, bases, dct):
- 113. super(_TemplateMetaclass, cls).__init__(name, bases, dct)
- 114. if 'pattern' in dct:
- 115. pattern = cls.pattern
- 116. else:
- 117. pattern = _TemplateMetaclass.pattern % {
- 118. 'delim' : _re.escape(cls.delimiter),
- 119. 'id' : cls.idpattern,
- 120. }
- 121. cls.pattern = _re.compile(pattern, _re.IGNORECASE | _re.VERBOSE)
- 122.
- 123.
- 124.class Template:
- 125. """A string class for supporting $-substitutions."""
- 126. __metaclass__ = _TemplateMetaclass
- 127.
- 128. delimiter = '$'
- 129. idpattern = r'[_a-z][_a-z0-9]*'
- 130.
- 131. def __init__(self, template):
- 132. self.template = template
- 133.
- 134. # Search for $$, $identifier, ${identifier}, and any bare $'s
- 135.
- 136. def _invalid(self, mo):
- 137. i = mo.start('invalid')
- 138. lines = self.template[:i].splitlines(True)
- 139. if not lines:
- 140. colno = 1
- 141. lineno = 1
- 142. else:
- 143. colno = i - len(''.join(lines[:-1]))
- 144. lineno = len(lines)
- 145. raise ValueError('Invalid placeholder in string: line %d, col %d' %
- 146. (lineno, colno))
- 147.
- 148. def substitute(self, *args, **kws):
- 149. if len(args) > 1:
- 150. raise TypeError('Too many positional arguments')
- 151. if not args:
- 152. mapping = kws
- 153. elif kws:
- 154. mapping = _multimap(kws, args[0])
- 155. else:
- 156. mapping = args[0]
- 157. # Helper function for .sub()
- 158. def convert(mo):
- 159. # Check the most common path first.
- 160. named = mo.group('named') or mo.group('braced')
- 161. if named is not None:
- 162. val = mapping[named]
- 163. # We use this idiom instead of str() because the latter will
- 164. # fail if val is a Unicode containing non-ASCII characters.
- 165. return '%s' % (val,)
- 166. if mo.group('escaped') is not None:
- 167. return self.delimiter
- 168. if mo.group('invalid') is not None:
- 169. self._invalid(mo)
- 170. raise ValueError('Unrecognized named group in pattern',
- 171. self.pattern)
- 172. return self.pattern.sub(convert, self.template)
- 173.
- 174. def safe_substitute(self, *args, **kws):
- 175. if len(args) > 1:
- 176. raise TypeError('Too many positional arguments')
- 177. if not args:
- 178. mapping = kws
- 179. elif kws:
- 180. mapping = _multimap(kws, args[0])
- 181. else:
- 182. mapping = args[0]
- 183. # Helper function for .sub()
- 184. def convert(mo):
- 185. named = mo.group('named')
- 186. if named is not None:
- 187. try:
- 188. # We use this idiom instead of str() because the latter
- 189. # will fail if val is a Unicode containing non-ASCII
- 190. return '%s' % (mapping[named],)
- 191. except KeyError:
- 192. return self.delimiter + named
- 193. braced = mo.group('braced')
- 194. if braced is not None:
- 195. try:
- 196. return '%s' % (mapping[braced],)
- 197. except KeyError:
- 198. return self.delimiter + '{' + braced + '}'
- 199. if mo.group('escaped') is not None:
- 200. return self.delimiter
- 201. if mo.group('invalid') is not None:
- 202. return self.delimiter
- 203. raise ValueError('Unrecognized named group in pattern',
- 204. self.pattern)
- 205. return self.pattern.sub(convert, self.template)
- 206.
- 207.
- 208.
- 209.####################################################################
- 210.# NOTE: Everything below here is deprecated. Use string methods instead.
- 211.# This stuff will go away in Python 3.0.
- 212.
- 213.# Backward compatible names for exceptions
- 214.index_error = ValueError
- 215.atoi_error = ValueError
- 216.atof_error = ValueError
- 217.atol_error = ValueError
- 218.
- 219.# convert UPPER CASE letters to lower case
- 220.def lower(s):
- 221. """lower(s) -> string
- 222.
- 223. Return a copy of the string s converted to lowercase.
- 224.
- 225. """
- 226. return s.lower()
- 227.
- 228.# Convert lower case letters to UPPER CASE
- 229.def upper(s):
- 230. """upper(s) -> string
- 231.
- 232. Return a copy of the string s converted to uppercase.
- 233.
- 234. """
- 235. return s.upper()
- 236.
- 237.# Swap lower case letters and UPPER CASE
- 238.def swapcase(s):
- 239. """swapcase(s) -> string
- 240.
- 241. Return a copy of the string s with upper case characters
- 242. converted to lowercase and vice versa.
- 243.
- 244. """
- 245. return s.swapcase()
- 246.
- 247.# Strip leading and trailing tabs and spaces
- 248.def strip(s, chars=None):
- 249. """strip(s [,chars]) -> string
- 250.
- 251. Return a copy of the string s with leading and trailing
- 252. whitespace removed.
- 253. If chars is given and not None, remove characters in chars instead.
- 254. If chars is unicode, S will be converted to unicode before stripping.
- 255.
- 256. """
- 257. return s.strip(chars)
- 258.
- 259.# Strip leading tabs and spaces
- 260.def lstrip(s, chars=None):
- 261. """lstrip(s [,chars]) -> string
- 262.
- 263. Return a copy of the string s with leading whitespace removed.
- 264. If chars is given and not None, remove characters in chars instead.
- 265.
- 266. """
- 267. return s.lstrip(chars)
- 268.
- 269.# Strip trailing tabs and spaces
- 270.def rstrip(s, chars=None):
- 271. """rstrip(s [,chars]) -> string
- 272.
- 273. Return a copy of the string s with trailing whitespace removed.
- 274. If chars is given and not None, remove characters in chars instead.
- 275.
- 276. """
- 277. return s.rstrip(chars)
- 278.
- 279.
- 280.# Split a string into a list of space/tab-separated words
- 281.def split(s, sep=None, maxsplit=-1):
- 282. """split(s [,sep [,maxsplit]]) -> list of strings
- 283.
- 284. Return a list of the words in the string s, using sep as the
- 285. delimiter string. If maxsplit is given, splits at no more than
- 286. maxsplit places (resulting in at most maxsplit+1 words). If sep
- 287. is not specified or is None, any whitespace string is a separator.
- 288.
- 289. (split and splitfields are synonymous)
- 290.
- 291. """
- 292. return s.split(sep, maxsplit)
- 293.splitfields = split
- 294.
- 295.# Split a string into a list of space/tab-separated words
- 296.def rsplit(s, sep=None, maxsplit=-1):
- 297. """rsplit(s [,sep [,maxsplit]]) -> list of strings
- 298.
- 299. Return a list of the words in the string s, using sep as the
- 300. delimiter string, starting at the end of the string and working
- 301. to the front. If maxsplit is given, at most maxsplit splits are
- 302. done. If sep is not specified or is None, any whitespace string
- 303. is a separator.
- 304. """
- 305. return s.rsplit(sep, maxsplit)
- 306.
- 307.# Join fields with optional separator
- 308.def join(words, sep = ' '):
- 309. """join(list [,sep]) -> string
- 310.
- 311. Return a string composed of the words in list, with
- 312. intervening occurrences of sep. The default separator is a
- 313. single space.
- 314.
- 315. (joinfields and join are synonymous)
- 316.
- 317. """
- 318. return sep.join(words)
- 319.joinfields = join
- 320.
- 321.# Find substring, raise exception if not found
- 322.def index(s, *args):
- 323. """index(s, sub [,start [,end]]) -> int
- 324.
- 325. Like find but raises ValueError when the substring is not found.
- 326.
- 327. """
- 328. return s.index(*args)
- 329.
- 330.# Find last substring, raise exception if not found
- 331.def rindex(s, *args):
- 332. """rindex(s, sub [,start [,end]]) -> int
- 333.
- 334. Like rfind but raises ValueError when the substring is not found.
- 335.
- 336. """
- 337. return s.rindex(*args)
- 338.
- 339.# Count non-overlapping occurrences of substring
- 340.def count(s, *args):
- 341. """count(s, sub[, start[,end]]) -> int
- 342.
- 343. Return the number of occurrences of substring sub in string
- 344. s[start:end]. Optional arguments start and end are
- 345. interpreted as in slice notation.
- 346.
- 347. """
- 348. return s.count(*args)
- 349.
- 350.# Find substring, return -1 if not found
- 351.def find(s, *args):
- 352. """find(s, sub [,start [,end]]) -> in
- 353.
- 354. Return the lowest index in s where substring sub is found,
- 355. such that sub is contained within s[start,end]. Optional
- 356. arguments start and end are interpreted as in slice notation.
- 357.
- 358. Return -1 on failure.
- 359.
- 360. """
- 361. return s.find(*args)
- 362.
- 363.# Find last substring, return -1 if not found
- 364.def rfind(s, *args):
- 365. """rfind(s, sub [,start [,end]]) -> int
- 366.
- 367. Return the highest index in s where substring sub is found,
- 368. such that sub is contained within s[start,end]. Optional
- 369. arguments start and end are interpreted as in slice notation.
- 370.
- 371. Return -1 on failure.
- 372.
- 373. """
- 374. return s.rfind(*args)
- 375.
- 376.# for a bit of speed
- 377._float = float
- 378._int = int
- 379._long = long
- 380.
- 381.# Convert string to float
- 382.def atof(s):
- 383. """atof(s) -> float
- 384.
- 385. Return the floating point number represented by the string s.
- 386.
- 387. """
- 388. return _float(s)
- 389.
- 390.
- 391.# Convert string to integer
- 392.def atoi(s , base=10):
- 393. """atoi(s [,base]) -> int
- 394.
- 395. Return the integer represented by the string s in the given
- 396. base, which defaults to 10. The string s must consist of one
- 397. or more digits, possibly preceded by a sign. If base is 0, it
- 398. is chosen from the leading characters of s, 0 for octal, 0x or
- 399. 0X for hexadecimal. If base is 16, a preceding 0x or 0X is
- 400. accepted.
- 401.
- 402. """
- 403. return _int(s, base)
- 404.
- 405.
- 406.# Convert string to long integer
- 407.def atol(s, base=10):
- 408. """atol(s [,base]) -> long
- 409.
- 410. Return the long integer represented by the string s in the
- 411. given base, which defaults to 10. The string s must consist
- 412. of one or more digits, possibly preceded by a sign. If base
- 413. is 0, it is chosen from the leading characters of s, 0 for
- 414. octal, 0x or 0X for hexadecimal. If base is 16, a preceding
- 415. 0x or 0X is accepted. A trailing L or l is not accepted,
- 416. unless base is 0.
- 417.
- 418. """
- 419. return _long(s, base)
- 420.
- 421.
- 422.# Left-justify a string
- 423.def ljust(s, width, *args):
- 424. """ljust(s, width[, fillchar]) -> string
- 425.
- 426. Return a left-justified version of s, in a field of the
- 427. specified width, padded with spaces as needed. The string is
- 428. never truncated. If specified the fillchar is used instead of spaces.
- 429.
- 430. """
- 431. return s.ljust(width, *args)
- 432.
- 433.# Right-justify a string
- 434.def rjust(s, width, *args):
- 435. """rjust(s, width[, fillchar]) -> string
- 436.
- 437. Return a right-justified version of s, in a field of the
- 438. specified width, padded with spaces as needed. The string is
- 439. never truncated. If specified the fillchar is used instead of spaces.
- 440.
- 441. """
- 442. return s.rjust(width, *args)
- 443.
- 444.# Center a string
- 445.def center(s, width, *args):
- 446. """center(s, width[, fillchar]) -> string
- 447.
- 448. Return a center version of s, in a field of the specified
- 449. width. padded with spaces as needed. The string is never
- 450. truncated. If specified the fillchar is used instead of spaces.
- 451.
- 452. """
- 453. return s.center(width, *args)
- 454.
- 455.# Zero-fill a number, e.g., (12, 3) --> '012' and (-3, 3) --> '-03'
- 456.# Decadent feature: the argument may be a string or a number
- 457.# (Use of this is deprecated; it should be a string as with ljust c.s.)
- 458.def zfill(x, width):
- 459. """zfill(x, width) -> string
- 460.
- 461. Pad a numeric string x with zeros on the left, to fill a field
- 462. of the specified width. The string x is never truncated.
- 463.
- 464. """
- 465. if not isinstance(x, basestring):
- 466. x = repr(x)
- 467. return x.zfill(width)
- 468.
- 469.# Expand tabs in a string.
- 470.# Doesn't take non-printing chars into account, but does understand n.
- 471.def expandtabs(s, tabsize=8):
- 472. """expandtabs(s [,tabsize]) -> string
- 473.
- 474. Return a copy of the string s with all tab characters replaced
- 475. by the appropriate number of spaces, depending on the current
- 476. column, and the tabsize (default 8).
- 477.
- 478. """
- 479. return s.expandtabs(tabsize)
- 480.
- 481.# Character translation through look-up table.
- 482.def translate(s, table, deletions=""):
- 483. """translate(s,table [,deletions]) -> string
- 484.
- 485. Return a copy of the string s, where all characters occurring
- 486. in the optional argument deletions are removed, and the
- 487. remaining characters have been mapped through the given
- 488. translation table, which must be a string of length 256. The
- 489. deletions argument is not allowed for Unicode strings.
- 490.
- 491. """
- 492. if deletions or table is None:
- 493. return s.translate(table, deletions)
- 494. else:
- 495. # Add s[:0] so that if s is Unicode and table is an 8-bit string,
- 496. # table is converted to Unicode. This means that table *cannot*
- 497. # be a dictionary -- for that feature, use u.translate() directly.
- 498. return s.translate(table + s[:0])
- 499.
- 500.# Capitalize a string, e.g. "aBc dEf" -> "Abc def".
- 501.def capitalize(s):
- 502. """capitalize(s) -> string
- 503.
- 504. Return a copy of the string s with only its first character
- 505. capitalized.
- 506.
- 507. """
- 508. return s.capitalize()
- 509.
- 510.# Substring replacement (global)
- 511.def replace(s, old, new, maxsplit=-1):
- 512. """replace (str, old, new[, maxsplit]) -> string
- 513.
- 514. Return a copy of string str with all occurrences of substring
- 515. old replaced by new. If the optional argument maxsplit is
- 516. given, only the first maxsplit occurrences are replaced.
- 517.
- 518. """
- 519. return s.replace(old, new, maxsplit)
- 520.
- 521.
- 522.# Try importing optional built-in module "strop" -- if it exists,
- 523.# it redefines some string operations that are 100-1000 times faster.
- 524.# It also defines values for whitespace, lowercase and uppercase
- 525.# that match <ctype.h>'s definitions.
- 526.
- 527.try:
- 528. from strop import maketrans, lowercase, uppercase, whitespace
- 529. letters = lowercase + uppercase
- 530.except ImportError:
- 531. pass # Use the original versions
- 532.
- 533.########################################################################
- 534.# the Formatter class
- 535.# see PEP 3101 for details and purpose of this class
- 536.
- 537.# The hard parts are reused from the C implementation. They're exposed as "_"
- 538.# prefixed methods of str and unicode.
- 539.
- 540.# The overall parser is implemented in str._formatter_parser.
- 541.# The field name parser is implemented in str._formatter_field_name_split
- 542.
- 543.class Formatter(object):
- 544. def format(self, format_string, *args, **kwargs):
- 545. return self.vformat(format_string, args, kwargs)
- 546.
- 547. def vformat(self, format_string, args, kwargs):
- 548. used_args = set()
- 549. result = self._vformat(format_string, args, kwargs, used_args, 2)
- 550. self.check_unused_args(used_args, args, kwargs)
- 551. return result
- 552.
- 553. def _vformat(self, format_string, args, kwargs, used_args, recursion_depth):
- 554. if recursion_depth < 0:
- 555. raise ValueError('Max string recursion exceeded')
- 556. result = []
- 557. for literal_text, field_name, format_spec, conversion in
- 558. self.parse(format_string):
- 559.
- 560. # output the literal text
- 561. if literal_text:
- 562. result.append(literal_text)
- 563.
- 564. # if there's a field, output it
- 565. if field_name is not None:
- 566. # this is some markup, find the object and do
- 567. # the formatting
- 568.
- 569. # given the field_name, find the object it references
- 570. # and the argument it came from
- 571. obj, arg_used = self.get_field(field_name, args, kwargs)
- 572. used_args.add(arg_used)
- 573.
- 574. # do any conversion on the resulting object
- 575. obj = self.convert_field(obj, conversion)
- 576.
- 577. # expand the format spec, if needed
- 578. format_spec = self._vformat(format_spec, args, kwargs,
- 579. used_args, recursion_depth-1)
- 580.
- 581. # format the object and append to the result
- 582. result.append(self.format_field(obj, format_spec))
- 583.
- 584. return ''.join(result)
- 585.
- 586.
- 587. def get_value(self, key, args, kwargs):
- 588. if isinstance(key, (int, long)):
- 589. return args[key]
- 590. else:
- 591. return kwargs[key]
- 592.
- 593.
- 594. def check_unused_args(self, used_args, args, kwargs):
- 595. pass
- 596.
- 597.
- 598. def format_field(self, value, format_spec):
- 599. return format(value, format_spec)
- 600.
- 601.
- 602. def convert_field(self, value, conversion):
- 603. # do any conversion on the resulting object
- 604. if conversion == 'r':
- 605. return repr(value)
- 606. elif conversion == 's':
- 607. return str(value)
- 608. elif conversion is None:
- 609. return value
- 610. raise ValueError("Unknown converion specifier {0!s}".format(conversion))
- 611.
- 612.
- 613. # returns an iterable that contains tuples of the form:
- 614. # (literal_text, field_name, format_spec, conversion)
- 615. # literal_text can be zero length
- 616. # field_name can be None, in which case there's no
- 617. # object to format and output
- 618. # if field_name is not None, it is looked up, formatted
- 619. # with format_spec and conversion and then used
- 620. def parse(self, format_string):
- 621. return format_string._formatter_parser()
- 622.
- 623.
- 624. # given a field_name, find the object it references.
- 625. # field_name: the field being looked up, e.g. "0.name"
- 626. # or "lookup[3]"
- 627. # used_args: a set of which args have been used
- 628. # args, kwargs: as passed in to vformat
- 629. def get_field(self, field_name, args, kwargs):
- 630. first, rest = field_name._formatter_field_name_split()
- 631.
- 632. obj = self.get_value(first, args, kwargs)
- 633.
- 634. # loop through the rest of the field_name, doing
- 635. # getattr or getitem as needed
- 636. for is_attr, i in rest:
- 637. if is_attr:
- 638. obj = getattr(obj, i)
- 639. else:
- 640. obj = obj[i]
- 641.
- 642. return obj, first