1.使用正则完成下列内容的匹配
-
匹配陕西省区号 029-12345
-
匹配邮政编码 745100
-
匹配身份证号 62282519960504337X
python
import re
# 1. 匹配陕西省区号(格式:029-数字,区号固定029,后面跟任意位数数字)
def match_shaanxi_area_code(text):
pattern = r"^029-\d+$"
return re.match(pattern, text) is not None
# 2. 匹配邮政编码(6位纯数字)
def match_postal_code(text):
pattern = r"^\d{6}$"
return re.match(pattern, text) is not None
# 3. 匹配邮箱(用户名@域名.后缀,支持常见格式)
def match_email(text):
pattern = r"^[a-zA-Z0-9_-]+@[a-zA-Z0-9_-]+\.[a-zA-Z]{2,}$"
return re.match(pattern, text) is not None
# 4. 匹配身份证号(18位,最后一位可以是数字或X/x)
def match_id_card(text):
pattern = r"^\d{17}[0-9Xx]$"
return re.match(pattern, text) is not None
# 测试代码
if __name__ == "__main__":
# 测试区号
print("区号匹配:", match_shaanxi_area_code("029-12345"))
print("区号不匹配:", match_shaanxi_area_code("010-12345"))
# 测试邮编
print("邮编匹配:", match_postal_code("745100"))
print("邮编不匹配:", match_postal_code("74510"))
# 测试邮箱
print("邮箱匹配:", match_email("lijian@xianoupeng.com"))
print("邮箱不匹配:", match_email("lijianxianoupeng.com"))
# 测试身份证
print("身份证匹配:", match_id_card("62282519960504337X"))
print("身份证不匹配:", match_id_card("62282519960504337"))
2.爬取学校官网,获取所有图片途径并将路径存储在本地文件中,使用装饰器完成
python
# 定义装饰器:将爬取的结果保存到本地文件
def save_to_file(file_path):
def decorator(func):
def wrapper(*args, **kwargs):
# 执行原函数,获取图片路径列表
result = func(*args, **kwargs)
# 将结果写入文件
with open(file_path, "w", encoding="utf-8") as f:
for idx, path in enumerate(result, 1):
f.write(f"{idx}. {path}\n")
print(f"图片路径已保存到 {file_path},共 {len(result)} 条")
return result
return wrapper
return decorator
# 爬虫函数:爬取学校官网的所有图片路径(替换为你的学校官网地址)
@save_to_file("school_images.txt") # 装饰器指定保存的文件路径
def crawl_school_images(url):
try:
# 请求网页(添加请求头,模拟浏览器访问)
headers = {
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36"
}
response = requests.get(url, headers=headers, timeout=10)
response.raise_for_status() # 抛出HTTP请求错误
response.encoding = response.apparent_encoding # 自动识别编码
# 解析网页
soup = BeautifulSoup(response.text, "html.parser")
img_tags = soup.find_all("img") # 获取所有img标签
# 提取图片路径(处理相对路径/绝对路径)
img_paths = []
for img in img_tags:
img_src = img.get("src")
if img_src and img_src not in img_paths: # 去重
# 处理相对路径:拼接成绝对路径(如果需要)
if img_src.startswith("/"):
img_src = url.rstrip("/") + img_src
img_paths.append(img_src)
return img_paths
except Exception as e:
print(f"爬取失败:{e}")
return []