1.hadoop 配置文件 core-site hdfs-site yarn-site.xml worker
- 基本架构 jobMannager resourceManager TaskMananger 一些流程
2.hadoop 命令行操作
hdfs dfs -put [-f] [-p] <localsrc> ... <dst>
hdfs dfs -get [-p] [-ignoreCrc] [-crc] <src> ... <localdst>
hadoop hdfs dfs --put [本地目录] [hadoop目录]
hadoop fs -mkdir -p < hdfs dir >
3.hadoop java 操作
Mapper,Reducer,InputFormat OutPutFormat Comparator Partition Comperess
bash
public class WordCountMapper extends Mapper<LongWritable,Text,Text,LongWritable> {
/**
* 初始化
*
* @param context
* @throws IOException
* @throws InterruptedException
*/
@Override
protected void setup(Context context) throws IOException, InterruptedException {
super.setup(context);
}
/**
*
* 用户业务
*
* @param key
* @param value
* @param context
* @throws IOException
* @throws InterruptedException
*/
@Override
protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
String str = value.toString();
String [] words = StringUtils.split(str);
for(String word:words){
context.write(new Text(word),new LongWritable(1));
}
}
/**
* 清理资源
*
* @param context
* @throws IOException
* @throws InterruptedException
*/
@Override
protected void cleanup(Context context) throws IOException, InterruptedException {
super.cleanup(context);
}
}
bash
public class WordCountReducer extends Reducer<Text, LongWritable, Text, LongWritable> {
@Override
protected void reduce(Text key, Iterable<LongWritable> values, Context context) throws IOException, InterruptedException {
long count =0;
for(LongWritable value:values){
count += value.get();
}
context.write(key,new LongWritable(count));
}
}
bash
public class WordCountDriver {
public static void main(String[] args) {
Configuration config = new Configuration();
System.setProperty("HADOOP_USER_NAME", "xiemeng");
config.set("fs.defaultFS","hdfs://192.168.64.128:9870");
config.set("mapreduce.framework.name","yarn");
config.set("yarn.resourcemanager.hostname","192.168.64.128");
config.set("mapreduce.app-submission.cross-platform", "true");
config.set("mapreduce.job.jar","file:/D:/code/hadoop-start-demo/target/hadoop-start-demo-1.0-SNAPSHOT.jar");
try {
Job job = Job.getInstance(config);
job.setJarByClass(WordCountDriver.class);
job.setMapperClass(WordCountMapper.class);
job.setCombinerClass(WordCountCombiner.class);
job.setReducerClass(WordCountReducer.class);
job.setMapOutputKeyClass(Text.class);
job.setMapOutputValueClass(LongWritable.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(LongWritable.class);
FileInputFormat.setInputPaths(job,new Path("/wordcount/input"));
FileOutputFormat.setOutputPath(job,new Path("/wordcount2/output"));
instance.setGroupingComparatorClass(OrderGroupintComparator.class);
FileOutputFormat.setCompressOutput(job, true);
FileOutputFormat.setOutputCompressorClass(job, BZip2Codec.class);
boolean complete = job.waitForCompletion(true);
System.exit(complete ? 0:1);
} catch (Exception e) {
e.printStackTrace();
}
}
bash
public class OrderGroupintComparator extends WritableComparator {
public OrderGroupintComparator() {
super(OrderBean.class,true);
}
@Override
public int compare(Object o1, Object o2) {
OrderBean orderBean = (OrderBean) o1;
OrderBean orderBean2 = (OrderBean)o2;
if(orderBean.getOrderId() > orderBean2.getOrderId()){
return 1;
}else if(orderBean.getOrderId() < orderBean2.getOrderId()){
return -1;
}else {
return 0;
}
}
}
bash
public class FilterOutputFormat extends FileOutputFormat<Text, NullWritable> {
@Override
public RecordWriter<Text, NullWritable> getRecordWriter(TaskAttemptContext taskAttemptContext) throws IOException, InterruptedException {
CustomWriter customWriter = new CustomWriter(taskAttemptContext);
return customWriter;
}
protected static class CustomWriter extends RecordWriter<Text, NullWritable> {
private FileSystem fs;
private FSDataOutputStream fos;
private TaskAttemptContext context;
public CustomWriter(TaskAttemptContext context) {
this.context = context;
}
@Override
public void write(Text text, NullWritable nullWritable) throws IOException, InterruptedException {
fs = FileSystem.get(context.getConfiguration());
String key = text.toString();
Path path = null;
if (StringUtils.startsWith(key, "137")) {
path = new Path("file:/D:/hadoop/output/format/out/137/");
} else {
path = new Path("file:/D:/hadoop/output/format/out/138/");
}
fos = fs.create(path,true);
byte[] bys = new byte[text.getLength()];
fos.write(text.toString().getBytes());
}
@Override
public void close(TaskAttemptContext taskAttemptContext) throws IOException, InterruptedException {
IOUtils.closeQuietly(fos);
IOUtils.closeQuietly(fs);
}
}
}
bash
public class WholeFileInputFormat extends FileInputFormat<Text, BytesWritable> {
@Override
protected boolean isSplitable(JobContext context, Path filename) {
return false;
}
@Override
public RecordReader<Text, BytesWritable> createRecordReader(InputSplit inputSplit, TaskAttemptContext taskAttemptContext) throws IOException, InterruptedException {
WholeRecordReader reader = new WholeRecordReader();
reader.initialize(inputSplit, taskAttemptContext);
return reader;
}
}
bash
@Data
public class FlowBeanObj implements Writable, WritableComparable<FlowBeanObj> {
private long upFlow;
private long downFlow;
private long sumFlow;
@Override
public int compareTo(FlowBeanObj o) {
if(o.getSumFlow() > this.getSumFlow()){
return -1;
}else if(o.getSumFlow() < this.getSumFlow()){
return 1;
}else {
return 0;
}
}
}
bash
public class WholeRecordReader extends RecordReader<Text, BytesWritable> {
private Configuration config;
private FileSplit fileSplit;
private boolean isProgress = true;
private BytesWritable value = new BytesWritable();
private Text k = new Text();
private FileSystem fs;
private FSDataInputStream fis;
@Override
public void initialize(InputSplit inputSplit, TaskAttemptContext context) throws IOException, InterruptedException {
fileSplit = (FileSplit) inputSplit;
this.config = context.getConfiguration();
}
@Override
public boolean nextKeyValue() throws IOException, InterruptedException {
try {
if (isProgress) {
byte[] contents = new byte[(int) fileSplit.getLength()];
Path path = fileSplit.getPath();
fs = path.getFileSystem(config);
fis = fs.open(path);
IOUtils.readFully(fis,contents, 0,contents.length);
value.set(contents, 0, contents.length);
k.set(fileSplit.getPath().toString());
isProgress = false;
return true;
}
} catch (Exception e) {
e.printStackTrace();
}finally {
IOUtils.closeQuietly(fis);
}
return false;
}
@Override
public Text getCurrentKey() throws IOException, InterruptedException {
return k;
}
@Override
public BytesWritable getCurrentValue() throws IOException, InterruptedException {
return value;
}
@Override
public float getProgress() throws IOException, InterruptedException {
return 0;
}
@Override
public void close() throws IOException {
fis.close();
fs.close();
}
}
bash
public class HdfsClient {
public static void main(String[] args) throws URISyntaxException, IOException, InterruptedException {
Configuration config = new Configuration();
config.set("fs.defaultFS","hdfs://localhost:9000");
config.set("dfs.replication","2");
FileSystem fs = FileSystem.get(new URI("hdfs://localhost:9000"),config,"xieme");
fs.mkdirs(new Path("/hive3"));
fs.copyFromLocalFile(new Path("file:/d:/elasticsearch.txt") ,new Path("/hive3"));
fs.copyToLocalFile(false,new Path("/hive3/elasticsearch.txt"), new Path("file:/d:/hive3/elasticsearch2.txt"));
fs.rename(new Path("/hive3/elasticsearch.txt"),new Path("/hive3/elasticsearch2.txt"));
RemoteIterator<LocatedFileStatus> locatedFileStatusRemoteIterator = fs.listFiles(new Path("/"), true);
while(locatedFileStatusRemoteIterator.hasNext()){
LocatedFileStatus next = locatedFileStatusRemoteIterator.next();
System.out.print(next.getPath().getName()+"\t");
System.out.print(next.getLen()+"\t");
System.out.print(next.getGroup()+"\t");
System.out.print(next.getOwner()+"\t");
System.out.print(next.getPermission()+"\t");
System.out.print(next.getPath()+"\t");
BlockLocation[] blockLocations = next.getBlockLocations();
for(BlockLocation queue: blockLocations){
for(String host :queue.getHosts()){
System.out.print(host+"\t");
}
}
System.out.println("");
}*/
//fs.delete(new Path("/hive3"),true);
/*FileStatus[] fileStatuses = fs.listStatus(new Path("/"));
for(FileStatus fileStatus:fileStatuses){
if(fileStatus.isDirectory()){
System.out.println(fileStatus.getPath().getName());
}
}*/
// 流copy
FileInputStream fis = new FileInputStream("d:/elasticsearch.txt");
FSDataOutputStream fos = fs.create(new Path("/hive/elasticsearch.txt"));
IOUtils.copyBytes(fis,fos, config);
IOUtils.closeStream(fis);
IOUtils.closeStream(fos);
FSDataInputStream fis2 = fs.open(new Path("/hive/elasticsearch.txt"));
FileOutputStream fos2 = new FileOutputStream("d:/elasticsearch.tar.gz.part1");
fis2.seek(1);
IOUtils.copyBytes(fis2,fos2,config);
/*byte [] buf = new byte[1024];
for(int i=0; i<128;i++){
while(fis2.read(buf)!=-1){
fos2.write(buf);
}
}*/
IOUtils.closeStream(fis2);
IOUtils.closeStream(fos2);
fs.close();
}
}
- hadoop 优化