python - 正则表达式Regex

[python 中使用正则表达式的步骤](#python 中使用正则表达式的步骤)

正则表达式匹配模式

search()和findall()

常用字符分类

[组合使用re.IGNOREC ASE、re.DOTALL 和re.VERBOSE](#组合使用re.IGNOREC ASE、re.DOTALL 和re.VERBOSE)

[项目：电话号码和E-mail 地址提取程序](#项目：电话号码和E-mail 地址提取程序)

python 中使用正则表达式的步骤

1．用import re 导入正则表达式模块。

2．用re.compile()函数创建一个Regex 对象（记得使用原始字符串）。

3．向Regex 对象的search()方法传入想查找的字符串。它返回一个Match 对象。

4．调用Match 对象的group()方法，返回实际匹配文本的字符串。

python 复制代码

# Regex

# 导入模块
import re

# 定义电话号码正则表达式regex
phone_pattern = r'\d{3}-\d{3}-\d{4}'

# 创建Regex 对象
phoneNumRegex = re.compile(phone_pattern)

# 匹配Regex对象
# 向search()方法查找传入的字符串，寻找该正则表达式的所有匹配
mo = phoneNumRegex.search("The number is 415-444-7777")

# 无匹配返回None。有匹配返回Match对象，在用Match对象的group()方法返回匹配的文本
if(mo != None) :
	print("phone number found: " + mo.group())

正则表达式匹配模式

符号	模式说明
()	分组例如：正则表达式 r'(\d\d\d)-(\d\d\d-\d\d\d\d)'，第一对括号是第1 组。第二对括号是第2 组。向group()匹配对象方法传入整数1 或2，就可以取得匹配文本的不同部分。向group()方法传入0 或不传入参数，将返回整个匹配的文本。
\|	管道字符，逻辑"或" 匹配许多表达式中的一个例如：正则表达式 r'Batman\|Tina Fey' 将匹配'Batman'或'Tina Fey' 正则表达式 r'(B\|b)bat(man\|bat) 将匹配第一组(B\|b)中的'B'或'b'，以及第二组(man\|bat) 中的'man'或'bat'
?	匹配零次或一次，表明?前面的文本在不在，正则表达式都会认为匹配。例如：正则表达式 r'car(pen)?ter' 表达式中(pen)?表明(pen)为可选分组，匹配文本中，pen 将出现零次或一次。
*	匹配零次或多次即星号之前的分组，可以在文本中出现任意次例如正则表达式 r's(o)*' 文本'soooooo'，'so', 's'都可以匹配
+	匹配一次或多次，加号前面的分组必须"至少出现一次" 例如正则表达式 r's(o)+' 文本'soooooo' 和 'so' 可匹配， 's'不可以匹配
{}	匹配特定次数。使分组重复特定次数 {n}，表示{}前的分组重复n次 {n,m}，表示{}前的分组重复出现n~m次，注意：","两侧不要有空格 {n,}，不限最大值，至少重复n次 {,m}，重复0 ~ m次，至多重复m次例如：正则表达式 r's(o){3}' 表示 'sooo'会匹配，'soo'不会匹配， 'sooooooo'会匹配，匹配的结果为 'sooo' 正则表达式 r'{3,5}' 表示 'sooo'， 'soooo'，'sooooo'会匹配，'soo'不会匹配， 'sooooooo'会匹配，匹配的结果为 'sooooo'

贪婪模式和非贪婪模式

贪婪模式：默认模式，尽可能多地匹配所搜索的字符串。

非贪婪模式：尽可能少地匹配所搜索的字符串。

如何实现非贪婪模式？

在任何一个限制符（*,+,?，{n}，{n,}，{n,m}，{,m}）后面紧跟一个 ?，匹配模式即是非贪婪的。

例如，对于字符串"soooo"，

正则表达式 r'{3,5}' 贪婪模式下匹配结果为 'soooo'，

正则表达式 r'{3,5}?' 非贪婪模式下匹配结果为'sooo'

search()和findall()

作用：用于获取匹配正则表达式的字符串

区别:

search()返回一个Match对象，包含被查找字符串中的"第一次"匹配的文本

findall()返回一组字符串，即list，包含被查找字符串中的所有匹配。

findall()方法的返回结果的总结

1．如果调用在一个没有分组的正则表达式上，例如\d\d\d-\d\d\d-\d\d\d\d，方法

findall()将返回一个匹配字符串的列表，例如['415-555-9999', '212-555-0000']。

2．如果调用在一个有分组的正则表达式上，例如(\d\d\d)-(\d\d\d)-(\d\d\d\d)，方

法findall()将返回一个字符串的元组的列表（每个分组对应一个字符串），例如[('415',

'555', '1122'), ('212', '555', '0000')]。

python 复制代码

# Regex
# 正则表达式

# 导入re模块
import re
'''
1．用import re 导入正则表达式模块。
2．用re.compile()函数创建一个Regex 对象（记得使用原始字符串）。
3．向Regex 对象的search()方法传入想查找的字符串。它返回一个Match 对象。
4．调用Match 对象的group()方法，返回实际匹配文本的字符串。
'''
def base() :
	# 创建正则表达式对象
	# 向re.compile()传入一个字符串值，表示正则表达式，它将返回一个Regex 模式对象，简称为Regex 对象。
	# 定义电话号码的正则表达式
	regex_pattern = r'\d{3}-\d{3}-\d{4}'
	# 创建一个Regex 对象
	phoneNumRegex = re.compile(regex_pattern)

	# 匹配Regex对象
	# 向search()方法查找传入的字符串，寻找该正则表达式的所有匹配
	# 无匹配返回None。有匹配返回Match对象，在用Match对象的group()方法返回匹配的文本
	mo = phoneNumRegex.search("Cell: 415-555-9999 Work: 212-555-0000")
	if(mo != None) :
		print("phone number found: ", mo.group()) # phone number found:  415-555-9999
	# findall()方法将返回一组字符串(list)，包含被查找字符串中的所有匹配。
	mo_list = phoneNumRegex.findall("Cell: 415-555-9999 Work: 212-555-0000")
	print("phone number list found: ", mo_list) # phone number list found:  ['415-555-9999', '212-555-0000']

base()

# 利用括号分组
def use_parenthesis() :

	# (\d\d\d)-(\d\d\d-\d\d\d\d) 
	regex_pattern = r'(\d{3})-(\d{3}-\d{4})'

	phone_number_regex = re.compile(regex_pattern)
	mo = phone_number_regex.search("Cell: 415-555-9999 Work: 212-555-0000")
	if(mo != None) :
		print("parenthesis-group(1): ", mo.group(1))	# parenthesis-group(1):  415
		print("parenthesis-group(2): ", mo.group(2))	# parenthesis-group(2):  555-9999
		print("parenthesis-group(): " + mo.group())		# parenthesis-group(): 415-555-9999
	# findall()方法将返回一组字符串(list)，包含被查找字符串中的所有匹配。
	mo_list = phone_number_regex.findall("Cell: 415-555-9999 Work: 212-555-0000")
	print("phone number list found: ", mo_list) # phone number list found:  [('415', '555-9999'), ('212', '555-0000')]


use_parenthesis()

# 利用管道"|"匹配多个分组
def use_channel() :

	# 匹配任意一个表达式
	# a|b, 匹配a或b
	regex_pattern = r'man|mobile|copter|bat'
	obj_regex = re.compile(regex_pattern)
	obj_mo = obj_regex.search("OMG, there is a bat!")
	if(obj_mo != None) :
		print("obj-group(): ", obj_mo.group())		# obj-group(): bat

	# 匹配每个分组中的任意一个表达式
	regex_pattern = r'(B|b)at(man|mobile|copter|bat)'
	bat_regex = re.compile(regex_pattern)
	bat_mo = bat_regex.search("Batman lost a wheel.")
	if(obj_mo != None) :
		print("channel-group(): ", bat_mo.group())		# channel-group(): Batman
		print("channel-groups(): ", bat_mo.groups())	# channel-groups():  ('B', 'man')
		print("channel-group(1): ", bat_mo.group(1))	# channel-group(1):  B
		print("channel-group(2): ", bat_mo.group(2))	# channel-group(2):  man

use_channel()

# 利用问号实现可选模式
def use_question_mark() :
	
	# cd(ab)?e, 文本中不论是否包含ab，都可匹配

	regex_pattern = r'car(pen)?ter'
	desc_regex = re.compile(regex_pattern)
	bob_mo = desc_regex.search("Bob's father is a carpenter")
	if(bob_mo != None) :
		print("pen-group(): ", bob_mo.group())		# pen-group():  carpenter		
		print("pen-group(1): ", bob_mo.group(1))	# pen-group(1):  pen		
		print("pen-groups(): ", bob_mo.groups())	# pen-groups():  ('pen',)	

	alice_mo = desc_regex.search("and Alice's father is a carter.")
	if(alice_mo != None) :
		print("non_pen-group(): ", alice_mo.group())	# non_pen-group():  carter		
		print("non_pen-group(1): ", alice_mo.group(1))	# non_pen-group(1):  None		
		print("non_pen-groups(): ", alice_mo.groups())	# non_pen-groups():  (None,)

use_question_mark()

# 用星号匹配零次或多次
def use_start_mark() :
	# s(o)*，o可以出现0次或多次

	regex_pattern = r's(o)*'
	start_regex = re.compile(regex_pattern)
	o_mo = start_regex.search("you are soooooo beautiful.")
	one_o_mo = start_regex.search("you are so beautiful.")
	non_o_mo = start_regex.search("the t-shirt's size is 's'.")
	if(o_mo != None) :
		print("many o: ", o_mo.group())		# many o:  soooooo
	if(one_o_mo != None) :
		print("one o: ", one_o_mo.group())	# one o:  so
	if(non_o_mo != None) :
		print("non o: ", non_o_mo.group())	# non o:  s

use_start_mark()

# 用加号匹配一次或多次
def use_add_mark() :
	# s(o)+，o 至少出现一次

	regex_pattern = r's(o)+'
	start_regex = re.compile(regex_pattern)
	o_mo = start_regex.search("you are soooooo beautiful.")
	one_o_mo = start_regex.search("you are so beautiful.")
	non_o_mo = start_regex.search("the t-shirt's size is 's'.")
	if(o_mo != None) :
		print("o_mo-group(): ", o_mo.group())			# o_mo-group():  soooooo
		print("o_mo-groups(): ", o_mo.groups())			# o_mo-groups():  ('o',)

	if(one_o_mo != None) :
		print("one_o-group(): ", one_o_mo.group())		# one_o-groups():  so
		print("one_o-groups(): ", one_o_mo.groups())	# one_o-groups():  ('o',)

	if(non_o_mo != None) :								# 加号+需要至少有匹配一个o
		print("non_o-group(): ", non_o_mo.group())	
		print("non_o-groups(): ", non_o_mo.groups())	

use_add_mark()

# 用花括号匹配特定次数
def use_bracket() :
	# s(o){3}	匹配 sooo
	# s(o){3,5} 匹配 sooo, soooo, sooooo
	# s(o){3,}	匹配 sooo, soooo, sooooo, soooo~~
	# s(o){,5} 	匹配 s, so, soo, sooo, soooo, sooooo
	# 贪婪模式，尽可能多地匹配所搜索的字符串。
	regex_pattern = r's(o){3,5}'
	bracket_regex = re.compile(regex_pattern)
	three_o_mo = bracket_regex.search("you are sooo beautiful.")
	two_o_mo = bracket_regex.search("you are soo beautiful.")
	four_o_mo = bracket_regex.search("you are soooo beautiful.")

	if(three_o_mo != None) :
		print("three_o-group(): ", three_o_mo.group())		# three_o-group():  sooo
		print("three_o-groups(): ", three_o_mo.groups())	# three_o-groups():  ('o',)

	if(two_o_mo != None) :								# two_o_mo 只有两个 'o'，正则表达式需要匹配3~5个o，不满足条件
		print("two_o-group(): ", two_o_mo.group())
		print("two_o-groups(): ", two_o_mo.groups())

	if(four_o_mo != None) :					
		print("four_o-group(): ", four_o_mo.group())		# four_o-group():  soooo 贪婪模式尽可能多地匹配所搜索的字符串，所以会匹配四个
		print("four_o-groups(): ", four_o_mo.groups())		# four_o-groups():  ('o',)

	# 非贪婪模式， 在{}后面加?,尽可能最短匹配
	non_greed_regex_pattern = r's(o){3,5}?'
	non_greed_bracket_regex = re.compile(non_greed_regex_pattern)
	non_greed_three_o_mo = non_greed_bracket_regex.search("you are sooo beautiful.")
	non_greed_two_o_mo = non_greed_bracket_regex.search("you are soo beautiful.")
	non_greed_four_o_mo = non_greed_bracket_regex.search("you are soooo beautiful.")
	if(non_greed_three_o_mo != None) :
		print("non_greed_three_o-group(): ", non_greed_three_o_mo.group())		# non_greed_three_o-group():  so
		print("non_greed_three_o-groups(): ", non_greed_three_o_mo.groups())	# non_greed_three_o-groups():  ('o',)

	if(non_greed_two_o_mo != None) :											# 不会匹配
		print("non_greed_two_o-group(): ", non_greed_two_o_mo.group())		
		print("non_greed_two_o-groups(): ", non_greed_two_o_mo.groups())	
	if(non_greed_four_o_mo != None) :					
		print("non_greed_four_o-group(): ", non_greed_four_o_mo.group())		# non_greed_four_o-group():  so 非贪婪模式尽可能少地匹配所搜索的字符串，所以会匹配1个
		print("non_greed_four_o-groups(): ", non_greed_four_o_mo.groups())		# non_greed_four_o-groups():  ('o',)

use_bracket()

常用字符分类

字符	表示
\d	0 到9 的任何数字，等价于[0-9]
\w	任何字母、数字或下划线字符（可以认为是匹配"单词"字符），等价于[A-Za-z_0-9]
\s	空格、制表符或换行符（可以认为是匹配"空白"字符），等价于[ \f\n\r\t\v]
\D	除0 到9 的数字以外的任何字符，等价与[^0-9]
\W	除字母、数字和下划线以外的任何字符，等价于[^A-Za-z_0-9]
\S	除空格、制表符和换行符以外的任何字符，等价于[^ \f\n\r\t\v]。
\f	换页符。等价于\x0c和\cL。
\n	换行符。等价于\x0a和\cJ。
\r	回车符。等价于\x0d和\cM。
\t	制表符。等价于\x09和\cI。
\v	垂直制表符。等价于\x0b和\cK。
\	将下一个字符标记为或特殊字符、或原义字符、或向后引用、或八进制转义符。例如，"\\n"匹配\n。"\n"匹配换行符。序列"\\"匹配"\"，"\("则匹配"("。
^	匹配必须发生在被查找文本开始处。例如，正则表达式 r'^Hello' 匹配以'Hello'开始的字符串。
$	表示该字符串必须以这个正则表达式的模式结束。例如，正则表达式 r'\d$' 匹配以数字0 到9 结束的字符串
^$	同时使用^和 $，表明整个字符串必须匹配该模式，例如，正则表达式 r'\^\\d+$ ' 匹配从开始到结束都是数字的字符串
[]	包含,默认是一个字符长度。注意：在方括号内，普通的正则表达式符号不会被解释。所以不需要前面加上倒斜杠转义.、*、?或()字符。
[^]	不包含,默认是一个字符长度
[A-Z]	26个大写字母
[^A-Z]	非26个大写字母
[a-z]	26个小写字母
[^a-z]	非26个小写字母
[0-9]	0至9数字
[^0-9]	非0至9数字
[A-Za-z0-9]	26个大写字母、26个小写字母和0至9数字
[^A-Za-z0-9]	非26个大写字母、26个小写字母和0至9数字
[A,H,T,W]	包含A或H或T或W字母，','分割
.	通配符，匹配除换行外所有单个字符
.*	任意文本，匹配所有字符回忆：句点字符表示"除换行外所有单个字符"，星号字符表示"前面字符出现零次或多次

python 复制代码

# 自定义字符

import re
# 自定义正则表达式 
# \d+\s\w+
# 匹配的文本有一个或多个数字(\d+)，接下来是一个空白字符(\s)，接下来是一个或多个字母/数字/下划线字符(\w+)
xmasRegex = re.compile(r'\d+\s\w+')
list = xmasRegex.findall('12 drummers, 11 pipers, 10 lords, 9 ladies, 8 maids, \
	7 swans, and 6 geese, 5 rings_, 4 birds , 3 hens, 2doves, 1 partridge1')
print(list)	# ['12 drummers', '11 pipers', '10 lords', '9 ladies', '8 maids', 
			#'7 swans', '6 geese', '5 rings_', '4 birds', '3 hens', '1 partridge1']

# [aeiouAEIOU] 匹配所有元音字符，不论大小写。
vowelRegex = re.compile(r'[aeiouAEIOU]')
vowel_list = vowelRegex.findall('RoboCop eats baby food. BABY FOOD.')
print(vowel_list)	# ['o', 'o', 'o', 'e', 'a', 'a', 'o', 'o', 'A', 'O', 'O']

# [^aeiouAEIOU] 匹配所有非元音字符
consonantRegex = re.compile(r'[^aeiouAEIOU]')
consonant_list = consonantRegex.findall('RoboCop eats baby food. BABY FOOD .')
print(consonant_list) # ['R', 'b', 'C', 'p', ' ', 't', 's', ' ', 'b', 'b', 'y', ' ', 'f', 'd', '.', ' ', 'B', 'B', 'Y', ' ', 'F', 'D', '.']

# 在正则表达式的开始处使用插入符号（^），表明匹配必须发生在被查找文本开始处。
beginsWithHello = re.compile(r'^Hello')
begin_with_mo = beginsWithHello.search('Hello world!')
if begin_with_mo != None : 
	print(begin_with_mo.group())	# Hello
begin_without_mo = beginsWithHello.search('He said hello.')
print(begin_without_mo == None) # True

# 正则表达式的末尾加上美元符号（$），表示该字符串必须以这个正则表达式的模式结束。
endsWithNumber = re.compile(r'\d$')
end_with_mo = endsWithNumber.search('Your number is 42')
if end_with_mo != None :
	print(end_with_mo.group())	# 2
end_without_mo = endsWithNumber.search('Your number is forty two.') 
print(end_without_mo == None)	# True

# 同时使用^和$，表明整个字符串必须匹配该模式，只匹配该字符串的某个子集是不够的。
wholeStringIsNum = re.compile(r'^\d+$')
whole_num_mo = wholeStringIsNum.search('1234567890')
if whole_num_mo != None :
	print(whole_num_mo.group())	# 1234567890
with_alphabet_mo = wholeStringIsNum.search('12345xyz67890') 
print(with_alphabet_mo == None)	# True
with_blank_mo = wholeStringIsNum.search('12 34567890') 
print(with_blank_mo == None)	# True

# . 句点，通配符，匹配除换行外所有单个字符
atRegex = re.compile(r'.at')
at_list = atRegex.findall('The cat in the hat sat on the flat mat.')
print(at_list)	# ['cat', 'hat', 'sat', 'lat', 'mat']

# .*, 匹配所有字符
nameRegex = re.compile(r'First Name: (.*) Last Name: (.*)')
name_mo = nameRegex.search('First Name: Al Last Name: Sweigart')
if name_mo != None :
	print(name_mo.group(1))	# 'Al'
	print(name_mo.group(2))	# 'Sweigart'

其他

匹配换行符

通过传入re.DOTALL 作为re.compile()的第二个参数，可以让句点字符匹配所有字符，包括换行字符。

python 复制代码

# 匹配换行符

import re

# re.DOTALL 作为compil()的第二个参数
noNewlineRegex = re.compile('.*')
print(noNewlineRegex
		.search('Serve the public trust.\nProtect the innocent.\nUphold the law.')
		.group())	# 'Serve the public trust.'

newlineRegex = re.compile('.*', re.DOTALL)
print (newlineRegex
		.search('Serve the public trust.\nProtect the innocent.\nUphold the law.')
		.group()) # 'Serve the public trust.\nProtect the innocent.\nUphold the law.'

不区分大小写的匹配

若只关心匹配字母，不关心它们是大写或小写，可以向re.compile()传入re.IGNORECASE 或re.I，作为第二个参数。

python 复制代码

# 不区分大小写的匹配

import re

# 忽略大小写。 向re.compile()传入re.IGNORECASE 或re.I，作为第二个参数。
robocop = re.compile(r'robocop', re.I)
rob_mo = robocop.search('RoboCop is part man, part machine, all cop.')
print(rov_mo.group())	# 'RoboCop'
print(robocop.search('ROBOCOP protects the innocent.').group())	# 'ROBOCOP'
print(robocop.search('Al, why does your programming book talk about robocop so much?').group())`# 'robocop'

替换字符串

可以使用Regex对象的sub()方法替换字符串。sub()方法需要两个参数，第一个参数为要替换为的字符串，第二个参数为原字符串。sub()最后返回的内容为替换后的字符串

python 复制代码

# 用sub()方法替换字符串

import re

# sub()替换字符串
# 第一个参数：要替换为的字符串， 第二个参数： 原字符串
# \w+ 表示一个或多个字母、数字或下划线
namesRegex = re.compile(r'Agent \w+')
# 用 CENSORED 取代 匹配'Agent \w+'的字符
sub_str = namesRegex.sub('CENSORED', 'Agent Alice gave the secret documents to Agent Bob.')
print(sub_str) # 'CENSORED gave the secret documents to CENSORED.'

管理复杂的正则表达式

可以向re.compile()传入变量re.VERBOSE，作为第二个参数。【verbose 冗长的】告诉re.compile()，忽略正则表达式字符串中的空白符和注释。同时使用了三重引号('")，创建多行字符串。

python 复制代码

# 管理复杂的正则表达式

# phoneRegex = re.compile(r'((\d{3}|\(\d{3}\))?(\s|-|\.)?\d{3}(\s|-|\.)\d{4}(\s*(ext|x|ext.)\s*\d{2,5})?)')

# re.VERBOSE 和 三重引号
phoneRegex = re.compile(r'''(
	(\d{3}|\(\d{3}\))? 				# area code
	(\s|-|\.)? 						# separator
	\d{3} 							# first 3 digits
	(\s|-|\.) 						# separator
	\d{4} 							# last 4 digits
	(\s*(ext|x|ext.)\s*\d{2,5})? 	# extension
	)''', re.VERBOSE))

组合使用re.IGNOREC ASE、re.DOTALL 和re.VERBOSE

re.compile()函数只接受一个值作为它的第二参数。可以使用管道字符（|）将变量组合起来，从而绕过这个限制。

someRegexValue = re.compile('foo', re.IGNORECASE | re.DOTALL)

项目：电话号码和E-mail 地址提取程序

简单功能：在一篇长的网页或文章中，找出所有电话号码和邮箱地址

项目框架：

从剪切板取得文本

找出文本中所有的电话号码和E-mail地址

将它们粘贴到剪切板

实现：

用pyperclip模块复制和粘贴字符串

创建两个正则表达式，一个匹配电话，一个匹配邮箱

对两个正则表达式，找到所有匹配，而不只是第一次匹配

将匹配好的字符串整理好格式，放在一个字符串中，用于粘贴

如果文本中没有找到匹配，显示某种消息

python 复制代码

# found phone numbers and emails on the clipboard
# 电话号码和e-mail 地址的提取

'''
在一篇长的网页或文章中，找出所有电话号码和邮箱地址
框架：
从剪切板取得文本
找出文本中所有的电话号码和E-mail地址
将它们粘贴到剪切板

实现：
用pyperclip模块复制和粘贴字符串
创建两个正则表达式，一个匹配电话，一个匹配邮箱
对两个正则表达式，找到所有匹配，而不只是第一次匹配
将匹配好的字符串整理好格式，放在一个字符串中，用于粘贴
如果文本中没有找到匹配，显示某种消息

'''
# 导入模块
import re, pyperclip

# create phone regex
phone_regex = re.compile(r'''(
	(\d{3}|\(\d{3}\))?				# area code 区号 021或（021）
	(\s|-|\.)?						# separator 分隔符（空格，-，.）
	(\d{3})							# first 3 digits 前三位
	(\s|-|\.)?						# separator 分隔符
	(\d{4})							# last 4 digits （后四位）
	(\s*(ext|x|ext.)\s*(\d{2,5}))?	# extension	分机号
	)''', re.VERBOSE)

# create emial regex
email_regex = re.compile(r'''(
	[a-zA-Z0-9._%+-]+		# username 
	@						# @ symbol
	[a-zA-z0-9.-]+			# domain name 域名
	(\.[a-zA-Z]{2,4})		# dot-something
	)''', re.VERBOSE)

# Find matches in clipboard text
# 获取剪切板上的文本
text = str(pyperclip.paste())	

matches = []
for groups in phone_regex.findall(text) :
	# 格式化
	phone_num = "-".join([groups[1], groups[3], groups[5]])
	if groups[8] != "" :
		phone_num += ' x' + gourps[8]
	matches.append(phone_num)

for groups in email_regex.findall(text) :
	matches.append(groups[0])

# copy results to the clipboard
if len(matches) > 0 :
	# 将手机号和邮箱复制到剪切板
	pyperclip.copy("\n".join(matches))
	print("copied to clipboard: ")
	print("\n".join(matches))
else :
	print("No phone number or email address found.")

本章内容来自于《Python编程快速上手-让繁琐工作自动化》