从网络上爬取姓和名字,组合成男生和女生的姓名,并写入到本地文件中
//获取百家姓的网址: // https://hanyu.baidu.com/shici/detail?from=aladdin\&pid=0b2f26d4c0ddb3ee693fdb1137ee1b0d\&smp_names=termBrand2%2Cpoem1 //获取男孩姓名http://www.haoming8.cn/baobao/10881.html
//获取女孩姓名http://www.haoming8.cn/baobao/7641.html
package Day13_IO;
import java.io.*;
import java.net.MalformedURLException;
import java.net.URL;
import java.net.URLConnection;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashSet;
import java.util.Random;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
public class IOTest01 {
public static void main(String[] args) throws IOException {
//获取百家姓的网址:
// https://hanyu.baidu.com/shici/detail?from=aladdin&pid=0b2f26d4c0ddb3ee693fdb1137ee1b0d&smp_names=termBrand2%2Cpoem1
//获取男孩姓名http://www.haoming8.cn/baobao/10881.html
//获取女孩姓名http://www.haoming8.cn/baobao/7641.html
//1、定义变量记录网址
String familyNameNet="https://hanyu.baidu.com/shici/detail?from=aladdin&pid=0b2f26d4c0ddb3ee693fdb1137ee1b0d&smp_names=termBrand2%2Cpoem1";
String boyName="http://www.haoming8.cn/baobao/10881.html";
String girlName="http://www.haoming8.cn/baobao/7641.html";
//2、爬取数据,把网络上的数据拼接成一个字符串
String familyNameStr = webCrawler(familyNameNet);
String boyNameStr = webCrawler(boyName);
String girlNameStr = webCrawler(girlName);
//3、拼接的字符串并不是目标值,要调用另一个方法,利用正则表达式来处理
ArrayList<String> familyNameNetList = gatData(familyNameStr, "([\\u4e00-\\u9fa5]{4})([,|。])",1);
ArrayList<String> boyNameList = gatData(boyNameStr, "([\\u4e00-\\u9fa5]{2})([、|。])",1);
ArrayList<String> girlNameStrList = gatData(girlNameStr, "(.. ){4}..",0);
//4、处理数据
//处理姓氏:把每个姓氏拆开,放到一个新的集合中
ArrayList<String> familyList=new ArrayList<>();
for (String s : familyNameNetList) {
for(int i=0;i<s.length();i++){
familyList.add(s.charAt(i)+"");
}
}
//处理男生姓名
//去重重复数据
ArrayList<String> boyList=new ArrayList<>();
for (String s : boyNameList) {
if (!boyList.contains(s)) {
boyList.add(s);
}
}
//处理女生姓名
//System.out.println(girlNameStrList);
ArrayList<String> girlList=new ArrayList<>();
for (String s : girlNameStrList) {
String[] split = s.split(" ");
for(int i=0;i<split.length;i++){
girlList.add(split[i]);
}
}
//System.out.println(girlList);
//5、处理数据
//生成姓名+性别+随机年龄
ArrayList<String> personName = getName(familyList, boyList, girlList, 70, 70);
//6、写出数据
BufferedWriter br=new BufferedWriter(new FileWriter("E:\\java\\javase\\basic\\javadown\\test5.txt"));
for (String s : personName) {
br.write(s);
br.newLine();
}
br.close();
}
public static ArrayList<String> getName(ArrayList<String> familyList,
ArrayList<String> boyList,
ArrayList<String> girlList,
int boyCount,
int girlCount){
//1、生成男生不重复的名字
HashSet<String> boyhs=new HashSet<>();
while(true){
if(boyhs.size()==boyCount){
break;
}else{
//随机打乱,然后放到集合中
Collections.shuffle(familyList);
Collections.shuffle(boyList);
boyhs.add(familyList.get(0)+boyList.get(0));
}
}
//2、生成女生不重复的名字
HashSet<String> girlhs=new HashSet<>();
while(true){
if(girlhs.size()==girlCount){
break;
}else{
//随机打乱,然后放到集合中
Collections.shuffle(familyList);
Collections.shuffle(girlList);
girlhs.add(familyList.get(0)+girlList.get(0));
}
}
// //3、最终效果,名字+性别+年龄
ArrayList<String> list=new ArrayList<>();
Random random=new Random();
for (String boy : boyhs) {
int bogAge = random.nextInt(10) + 8;
list.add(boy+"-男-"+bogAge);
}
for (String girl : girlhs) {
int girlAge = random.nextInt(8) + 8;
list.add(girl+"-女-"+girlAge);
}
return list;
}
//利用正则表达式来提取姓氏,并存储到集合中
public static ArrayList<String> gatData(String familyNamestr,String regex,int index){
//创建集合存放数据
ArrayList<String> list = new ArrayList<>();
//按照正则表达式的规则去获取数据
Pattern pattern = Pattern.compile(regex);
//按照pattern的规则,到familyNamestr当中获取数据
Matcher matcher = pattern.matcher(familyNamestr);
while(matcher.find()){
String group = matcher.group(index);
list.add(group);
//System.out.println(group);
}
return list;
}
//定义一个方法来爬取数据
public static String webCrawler(String net) throws IOException {
//4、创建StringBuilder对象,拼接字符串
StringBuilder sb = new StringBuilder();
//1、创建一个URL对象
URL url = new URL(net);
//2、链接上这个网址
//细节:保证网络是畅通的,而且这个网址是可以链接上的
URLConnection conn = url.openConnection();
//3、读取数据:因为存在中文,先用conn中的一个getInputStream读取字节流,再用转化流将字节流转化为字符流
InputStreamReader isr = new InputStreamReader(conn.getInputStream());
int len;
while((len=isr.read())!=-1){
//读取后的数据,拼接成一个字符串
sb.append((char)len);
}
//5、关流
isr.close();
//6、返回结构
return sb.toString();
}
}
2、同样的操作,使用工具包hutool生成姓名假数据
package Day13_IO;
import cn.hutool.Hutool;
import cn.hutool.core.io.FileUtil;
import cn.hutool.core.util.ReUtil;
import cn.hutool.http.HttpUtil;
import java.util.*;
public class IOTest02 {
public static void main(String[] args) {
//1、定义变量记录网址
String familyNameNet = "https://hanyu.baidu.com/shici/detail?from=aladdin&pid=0b2f26d4c0ddb3ee693fdb1137ee1b0d&smp_names=termBrand2%2Cpoem1";
String boyName = "http://www.haoming8.cn/baobao/10881.html";
String girlName = "http://www.haoming8.cn/baobao/7641.html";
//2、糊涂包get方法调用
String familyNameNetstr = HttpUtil.get(familyNameNet);
String boyNamestr = HttpUtil.get(boyName);
String girlNamestr = HttpUtil.get(girlName);
//3、利用正则表达式
List<String> familyNameList = ReUtil.findAll("([\\u4e00-\\u9fa5]{4})([,|。])", familyNameNetstr, 1);
List<String> boyNameList = ReUtil.findAll("([\\u4e00-\\u9fa5]{2})([、|。])", boyNamestr, 1);
List<String> girlNameList = ReUtil.findAll("(.. ){4}..", girlNamestr, 0);
System.out.println(familyNameList);
System.out.println(boyNameList);
System.out.println(girlNameList);
//4、处理数据
//处理姓氏:把每个姓氏拆开,放到一个新的集合中
ArrayList<String> familyList = new ArrayList<>();
for (String s : familyNameList) {
for (int i = 0; i < s.length(); i++) {
familyList.add(s.charAt(i) + "");
}
}
//处理男生姓名
//去重重复数据
ArrayList<String> boyList = new ArrayList<>();
for (String s : boyNameList) {
if (!boyList.contains(s)) {
boyList.add(s);
}
}
//处理女生姓名
//System.out.println(girlNameStrList);
ArrayList<String> girlList = new ArrayList<>();
for (String s : girlNameList) {
String[] split = s.split(" ");
for (int i = 0; i < split.length; i++) {
girlList.add(split[i]);
}
}
//5、写入数据
ArrayList<String> personName = getName(familyList, boyList, girlList, 70, 70);
//6、写出数据
FileUtil.writeLines(personName,"E:\\java\\javase\\basic\\javadown\\test5.txt","UTF-8");
}
public static ArrayList<String> getName(ArrayList<String> familyList,
ArrayList<String> boyList,
ArrayList<String> girlList,
int boyCount,
int girlCount){
//1、生成男生不重复的名字
HashSet<String> boyhs=new HashSet<>();
while(true){
if(boyhs.size()==boyCount){
break;
}else{
//随机打乱,然后放到集合中
Collections.shuffle(familyList);
Collections.shuffle(boyList);
boyhs.add(familyList.get(0)+boyList.get(0));
}
}
//2、生成女生不重复的名字
HashSet<String> girlhs=new HashSet<>();
while(true){
if(girlhs.size()==girlCount){
break;
}else{
//随机打乱,然后放到集合中
Collections.shuffle(familyList);
Collections.shuffle(girlList);
girlhs.add(familyList.get(0)+girlList.get(0));
}
}
// //3、最终效果,名字+性别+年龄
ArrayList<String> list=new ArrayList<>();
Random random=new Random();
for (String boy : boyhs) {
int bogAge = random.nextInt(10) + 8;
list.add(boy+"-男-"+bogAge);
}
for (String girl : girlhs) {
int girlAge = random.nextInt(8) + 8;
list.add(girl+"-女-"+girlAge);
}
return list;
}
}