一、需求描述
请把下面文本中的电话、邮箱、座机号码和热线都爬取出来:
来黑马程序员学习Java,电话:18512566758,18354678765;或者联系邮箱:boniu@itcast.cn;座机电话:010365178955,010-989512356;邮箱bozai@itcast.cn,邮箱2:diei0009@163.com;热线电话:400-618-9090,400-618-4000,4006184000,4006180909。
二、代码实现
java
package com.itheima.regex;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
public class TestCase2 {
public static void main(String[] args) {
method1();
}
public static void method1(){
//文本
String data = "来黑马程序员学习Java,\n" +
" 电话:18512566758,18354678765;\n" +
" 或者联系邮箱:boniu@itcast.cn;\n" +
" 座机电话:010365178955,010-989512356;\n" +
" 邮箱bozai@itcast.cn,\n" +
" 邮箱2:diei0009@163.com;\n" +
" 热线电话:400-618-9090,400-618-4000,4006184000,4006180909。";
//定义爬取规则
String regex = "(1[3-9]\\d{9})|(0\\d{2,7}-?[1-9]\\d{4,19})|\\w{2,}@\\w{2,20}(\\.\\w{2,10}){1,2}" +
"|(400-?\\d{3,7}-?\\d{3,7})";
//把正则表达式封装成一个pattern对象
Pattern pattern = Pattern.compile(regex);
//通过pattern对象去获取查找内容的匹配器对象
Matcher matcher = pattern.matcher(data);
//定义循环开始爬取信息
while (matcher.find()) {
String result = matcher.group();
System.out.println(result);
}
}
}