re – Regular Expressions 理解完正则学会一半

前端之家收集整理的这篇文章主要介绍了re – Regular Expressions 理解完正则学会一半前端之家小编觉得挺不错的,现在分享给大家,也给大家做个参考。

理解完正则学会一半:

  1. import re
  2.  
  3. def test_patterns(text,patterns=[]):
  4. """Given source text and a list of patterns,look for
  5. matches for each pattern within the text and print
  6. them to stdout.
  7. """
  8. print
  9. print ''.join(str(i/10 or ' ') for i in range(len(text)))
  10. print ''.join(str(i%10) for i in range(len(text)))
  11. print text
  12.  
  13. # Look for each pattern in the text and print the results
  14. for pattern in patterns:
  15. print
  16. print 'Matching "%s"' % pattern
  17. for match in re.finditer(pattern,text):
  18. s = match.start()
  19. e = match.end()
  20. print ' %2d : %2d = "%s"' % \
  21. (s,e-1,text[s:e])
  22. return
  23.  
  24. if __name__ == '__main__':
  25. print "*"*50
  26. #Pattern Syntax
  27. test_patterns('abbaaabbbbaaaaa',['ab'])
  28. print "*"*50
  29. #Repetition
  30. test_patterns('abbaaabbbbaaaaa',[ 'ab*',# a followed by zero or more b
  31. 'ab+',# a followed by one or more b
  32. 'ab?',# a followed by zero or one b
  33. 'ab{3}',# a followed by three b
  34. 'ab{2,3}',# a followed by two to three b
  35. ])
  36. print "*"*50
  37. #Character Sets
  38. test_patterns('abbaaabbbbaaaaa',[ '[ab]',# either a or b
  39. 'a[ab]+',# a followed by one or more a or b
  40. 'a[ab]+?',# a followed by one or more a or b,not greedy
  41. ])
  42. print "*"*50
  43. test_patterns('This is some text -- with punctuation.',[ '[^-. ]+',# sequences without -,.,or space
  44. ])
  45. print "*"*50
  46. test_patterns('This is some text -- with punctuation.',[ '[a-z]+',# sequences of lower case letters
  47. '[A-Z]+',# sequences of upper case letters
  48. '[a-zA-Z]+',# sequences of lower or upper case letters
  49. '[A-Z][a-z]+',# one upper case letter followed by lower case letters
  50. ])
  51. print "*"*50
  52. test_patterns('abbaaabbbbaaaaa',[ 'a.',# a followed by any one character
  53. 'b.',# b followed by any one character
  54. 'a.*b',# a followed by anything,ending in b
  55. 'a.*?b',ending in b
  56. ])
  57. print "*"*50
  58. #Escape Codes
  59. # Code Meaning
  60. # \d a digit
  61. # \D a non-digit
  62. # \s whitespace (tab,space,newline,etc.)
  63. # \S non-whitespace
  64. # \w alphanumeric
  65. # \W non-alphanumeric
  66. test_patterns('This is a prime #1 example!',[ r'\d+',# sequence of digits
  67. r'\D+',# sequence of non-digits
  68. r'\s+',# sequence of whitespace
  69. r'\S+',# sequence of non-whitespace
  70. r'\w+',# alphanumeric characters
  71. r'\W+',# non-alphanumeric
  72. ])
  73. print "*"*50
  74. test_patterns(r'\d+ \D+ \s+ \S+ \w+ \W+',[ r'\\d\+',r'\\D\+',r'\\s\+',r'\\S\+',r'\\w\+',r'\\W\+',])
  75. #Anchoring
  76. # Code Meaning
  77. # ^ start of string,or line
  78. # $ end of string,or line
  79. # \A start of string
  80. # \Z end of string
  81. # \b empty string at the beginning or end of a word
  82. # \B empty string not at the beginning or end of a word
  83. print "*"*50
  84. test_patterns('This is some text -- with punctuation.',[ r'^\w+',# word at start of string
  85. r'\A\w+',# word at start of string
  86. r'\w+\S*$',# word at end of string,with optional punctuation
  87. r'\w+\S*\Z',with optional punctuation
  88. r'\w*t\w*',# word containing 't'
  89. r'\bt\w+',# 't' at start of word
  90. r'\w+t\b',# 't' at end of word
  91. r'\Bt\B',# 't',not start or end of word
  92. ])
输出结果:

  1. **************************************************
  2.  
  3. 11111
  4. 012345678901234
  5. abbaaabbbbaaaaa
  6.  
  7. Matching "ab"
  8. 0 : 1 = "ab"
  9. 5 : 6 = "ab"
  10. **************************************************
  11.  
  12. 11111
  13. 012345678901234
  14. abbaaabbbbaaaaa
  15.  
  16. Matching "ab*"
  17. 0 : 2 = "abb"
  18. 3 : 3 = "a"
  19. 4 : 4 = "a"
  20. 5 : 9 = "abbbb"
  21. 10 : 10 = "a"
  22. 11 : 11 = "a"
  23. 12 : 12 = "a"
  24. 13 : 13 = "a"
  25. 14 : 14 = "a"
  26.  
  27. Matching "ab+"
  28. 0 : 2 = "abb"
  29. 5 : 9 = "abbbb"
  30.  
  31. Matching "ab?"
  32. 0 : 1 = "ab"
  33. 3 : 3 = "a"
  34. 4 : 4 = "a"
  35. 5 : 6 = "ab"
  36. 10 : 10 = "a"
  37. 11 : 11 = "a"
  38. 12 : 12 = "a"
  39. 13 : 13 = "a"
  40. 14 : 14 = "a"
  41.  
  42. Matching "ab{3}"
  43. 5 : 8 = "abbb"
  44.  
  45. Matching "ab{2,3}"
  46. 0 : 2 = "abb"
  47. 5 : 8 = "abbb"
  48. **************************************************
  49.  
  50. 11111
  51. 012345678901234
  52. abbaaabbbbaaaaa
  53.  
  54. Matching "[ab]"
  55. 0 : 0 = "a"
  56. 1 : 1 = "b"
  57. 2 : 2 = "b"
  58. 3 : 3 = "a"
  59. 4 : 4 = "a"
  60. 5 : 5 = "a"
  61. 6 : 6 = "b"
  62. 7 : 7 = "b"
  63. 8 : 8 = "b"
  64. 9 : 9 = "b"
  65. 10 : 10 = "a"
  66. 11 : 11 = "a"
  67. 12 : 12 = "a"
  68. 13 : 13 = "a"
  69. 14 : 14 = "a"
  70.  
  71. Matching "a[ab]+"
  72. 0 : 14 = "abbaaabbbbaaaaa"
  73.  
  74. Matching "a[ab]+?"
  75. 0 : 1 = "ab"
  76. 3 : 4 = "aa"
  77. 5 : 6 = "ab"
  78. 10 : 11 = "aa"
  79. 12 : 13 = "aa"
  80. **************************************************
  81.  
  82. 1111111111222222222233333333
  83. 01234567890123456789012345678901234567
  84. This is some text -- with punctuation.
  85.  
  86. Matching "[^-. ]+"
  87. 0 : 3 = "This"
  88. 5 : 6 = "is"
  89. 8 : 11 = "some"
  90. 13 : 16 = "text"
  91. 21 : 24 = "with"
  92. 26 : 36 = "punctuation"
  93. **************************************************
  94.  
  95. 1111111111222222222233333333
  96. 01234567890123456789012345678901234567
  97. This is some text -- with punctuation.
  98.  
  99. Matching "[a-z]+"
  100. 1 : 3 = "his"
  101. 5 : 6 = "is"
  102. 8 : 11 = "some"
  103. 13 : 16 = "text"
  104. 21 : 24 = "with"
  105. 26 : 36 = "punctuation"
  106.  
  107. Matching "[A-Z]+"
  108. 0 : 0 = "T"
  109.  
  110. Matching "[a-zA-Z]+"
  111. 0 : 3 = "This"
  112. 5 : 6 = "is"
  113. 8 : 11 = "some"
  114. 13 : 16 = "text"
  115. 21 : 24 = "with"
  116. 26 : 36 = "punctuation"
  117.  
  118. Matching "[A-Z][a-z]+"
  119. 0 : 3 = "This"
  120. **************************************************
  121.  
  122. 11111
  123. 012345678901234
  124. abbaaabbbbaaaaa
  125.  
  126. Matching "a."
  127. 0 : 1 = "ab"
  128. 3 : 4 = "aa"
  129. 5 : 6 = "ab"
  130. 10 : 11 = "aa"
  131. 12 : 13 = "aa"
  132.  
  133. Matching "b."
  134. 1 : 2 = "bb"
  135. 6 : 7 = "bb"
  136. 8 : 9 = "bb"
  137.  
  138. Matching "a.*b"
  139. 0 : 9 = "abbaaabbbb"
  140.  
  141. Matching "a.*?b"
  142. 0 : 1 = "ab"
  143. 3 : 6 = "aaab"
  144. **************************************************
  145.  
  146. 11111111112222222
  147. 012345678901234567890123456
  148. This is a prime #1 example!
  149.  
  150. Matching "\d+"
  151. 17 : 17 = "1"
  152.  
  153. Matching "\D+"
  154. 0 : 16 = "This is a prime #"
  155. 18 : 26 = " example!"
  156.  
  157. Matching "\s+"
  158. 4 : 4 = " "
  159. 7 : 7 = " "
  160. 9 : 9 = " "
  161. 15 : 15 = " "
  162. 18 : 18 = " "
  163.  
  164. Matching "\S+"
  165. 0 : 3 = "This"
  166. 5 : 6 = "is"
  167. 8 : 8 = "a"
  168. 10 : 14 = "prime"
  169. 16 : 17 = "#1"
  170. 19 : 26 = "example!"
  171.  
  172. Matching "\w+"
  173. 0 : 3 = "This"
  174. 5 : 6 = "is"
  175. 8 : 8 = "a"
  176. 10 : 14 = "prime"
  177. 17 : 17 = "1"
  178. 19 : 25 = "example"
  179.  
  180. Matching "\W+"
  181. 4 : 4 = " "
  182. 7 : 7 = " "
  183. 9 : 9 = " "
  184. 15 : 16 = " #"
  185. 18 : 18 = " "
  186. 26 : 26 = "!"
  187. **************************************************
  188.  
  189. 1111111111222
  190. 01234567890123456789012
  191. \d+ \D+ \s+ \S+ \w+ \W+
  192.  
  193. Matching "\\d\+"
  194. 0 : 2 = "\d+"
  195.  
  196. Matching "\\D\+"
  197. 4 : 6 = "\D+"
  198.  
  199. Matching "\\s\+"
  200. 8 : 10 = "\s+"
  201.  
  202. Matching "\\S\+"
  203. 12 : 14 = "\S+"
  204.  
  205. Matching "\\w\+"
  206. 16 : 18 = "\w+"
  207.  
  208. Matching "\\W\+"
  209. 20 : 22 = "\W+"
  210. **************************************************
  211.  
  212. 1111111111222222222233333333
  213. 01234567890123456789012345678901234567
  214. This is some text -- with punctuation.
  215.  
  216. Matching "^\w+"
  217. 0 : 3 = "This"
  218.  
  219. Matching "\A\w+"
  220. 0 : 3 = "This"
  221.  
  222. Matching "\w+\S*$"
  223. 26 : 37 = "punctuation."
  224.  
  225. Matching "\w+\S*\Z"
  226. 26 : 37 = "punctuation."
  227.  
  228. Matching "\w*t\w*"
  229. 13 : 16 = "text"
  230. 21 : 24 = "with"
  231. 26 : 36 = "punctuation"
  232.  
  233. Matching "\bt\w+"
  234. 13 : 16 = "text"
  235.  
  236. Matching "\w+t\b"
  237. 13 : 16 = "text"
  238.  
  239. Matching "\Bt\B"
  240. 23 : 23 = "t"
  241. 30 : 30 = "t"
  242. 33 : 33 = "t"
待续...

猜你在找的正则表达式相关文章