eclipse远程连接hadoop进行开发测试
马克飞象 由于搭建hadoop环境在仿真系统,在本地远程连接hdfs和提交mapreduce的job任务精力了曲折,现整理如下:
-
hadoop环境 :hadoop2.5.2 jdk1.7 eclipse_luno hadoop_eclipse插件2.6;
wordcount代码如下:
package test;
import java.io.File;
import java.io.IOException;
import java.util.StringTokenizer;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
public class WordCount {
public static class TokenizerMapper extends Mapper<Object, Text, Text, IntWritable>{
private final static IntWritable one = new IntWritable(1);
private Text word = new Text();
public void map(Object key, Text value, Context context ) throws IOException, InterruptedException {
StringTokenizer itr = new StringTokenizer(value.toString());
while (itr.hasMoreTokens()) {
word.set(itr.nextToken());
context.write(word, one);
}
}
}
public static class IntSumReducer extends Reducer<Text,IntWritable,Text,IntWritable> {
private IntWritable result = new IntWritable();
public void reduce(Text key, Iterable<IntWritable> values,
Context context
) throws IOException, InterruptedException {
int sum = 0;
for (IntWritable val : values) {
sum += val.get();
}
result.set(sum);
context.write(key, result);
}
}
public static void main(String[] args) throws Exception {
Configuration conf = new Configuration();
conf.set("df.default.name", "hdfs://10.128.7.140:9000");
conf.set("hadoop.job.user","hadoop");
// conf.set("mapred.job.tracker", "10.128.7.140:9001");
Path in = new Path("hdfs://10.128.7.140:9000/test/test.txt");
Path out = new Path("hdfs://10.128.7.140:9000/usr/output");
out.getFileSystem(conf).delete(out, true);
Job job = new Job(conf, "word——count");
//
File jarFile = EJob.createTempJar("bin");
EJob.addClasspath("/home/hadoop/hadoop-1.2.1/conf");
ClassLoader classLoader = EJob.getClassLoader();
Thread.currentThread().setContextClassLoader(classLoader);
((JobConf) job.getConfiguration()).setJar(jarFile.toString());
job.setMapperClass(TokenizerMapper.class);
job.setCombinerClass(IntSumReducer.class);
job.setReducerClass(IntSumReducer.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(IntWritable.class);
FileInputFormat.addInputPath(job, in);
FileOutputFormat.setOutputPath(job,out);
System.exit(job.waitForCompletion(true) ? 0 : 1);
}
}
- 异常1 :找不到winutils.exe
2015-03-27 18:01:42,982 ERROR [main] util.Shell (Shell.java:getWinUtilsPath(373)) - Failed to locate the winutils binary in the hadoop binary path
java.io.IOException: Could not locate executable null\bin\winutils.exe in the Hadoop binaries.
at org.apache.hadoop.util.Shell.getQualifiedBinPath(Shell.java:355)
at org.apache.hadoop.util.Shell.getWinUtilsPath(Shell.java:370)
at org.apache.hadoop.util.Shell.<clinit>(Shell.java:363)
at org.apache.hadoop.util.StringUtils.<clinit>(StringUtils.java:78)
at org.apache.hadoop.security.Groups.parseStaticMapping(Groups.java:93)
at org.apache.hadoop.security.Groups.<init>(Groups.java:77)
at org.apache.hadoop.security.Groups.getUserToGroupsMappingService(Groups.java:240)
at org.apache.hadoop.security.UserGroupInformation.initialize(UserGroupInformation.java:257)
at org.apache.hadoop.security.UserGroupInformation.ensureInitialized(UserGroupInformation.java:234)
at org.apache.hadoop.security.UserGroupInformation.loginUserFromSubject(UserGroupInformation.java:749)
at org.apache.hadoop.security.UserGroupInformation.getLoginUser(UserGroupInformation.java:734)
at org.apache.hadoop.security.UserGroupInformation.getCurrentUser(UserGroupInformation.java:607)
at org.apache.hadoop.fs.FileSystem$Cache$Key.<init>(FileSystem.java:2748)
at org.apache.hadoop.fs.FileSystem$Cache$Key.<init>(FileSystem.java:2740)
at org.apache.hadoop.fs.FileSystem$Cache.get(FileSystem.java:2606)
at org.apache.hadoop.fs.FileSystem.get(FileSystem.java:368)
at org.apache.hadoop.fs.Path.getFileSystem(Path.java:296)
at test.WordCount.main(WordCount.java:73)
2015-03-27 18:01:43,812 INFO [main] Configuration.deprecation (Configuration.java:warnOnceIfDeprecated(1019)) - session.id is deprecated. Instead, use dfs.metrics.session-id
2015-03-27 18:01:43,812 INFO [main]
解决:下载winutils.exe文件:
下载地址:https://github.com/srccodes/hadoop-common-2.2.0-bin
将bin目录下的文件最好都下载到本地,然后替换本地hadoop安装目录下的bin目录
然后设置HADOOP_HOME环境变量或者在javamain方法中代码设置:
System.setProperty("hadoop.home.dir", "D://hadoop");
- 异常2 :如上,问题消失后又出来如下错误:
2015-03-27 18:07:38,072 INFO [main] mapreduce.JobSubmitter (JobSubmitter.java:submitJobInternal(441)) - Cleaning up the staging area file:/tmp/hadoop-Administrator/mapred/staging/Administrator1538933894/.staging/job_local1538933894_0001
Exception in thread "main" java.lang.UnsatisfiedLinkError: org.apache.hadoop.io.nativeio.NativeIO$Windows.access0(Ljava/lang/String;I)Z
at org.apache.hadoop.io.nativeio.NativeIO$Windows.access0(Native Method)
at org.apache.hadoop.io.nativeio.NativeIO$Windows.access(NativeIO.java:570)
at org.apache.hadoop.fs.FileUtil.canRead(FileUtil.java:977)
at org.apache.hadoop.util.DiskChecker.checkAccessByFileMethods(DiskChecker.java:173)
at org.apache.hadoop.util.DiskChecker.checkDirAccess(DiskChecker.java:160)
at org.apache.hadoop.util.DiskChecker.checkDir(DiskChecker.java:94)
at org.apache.hadoop.fs.LocalDirAllocator$AllocatorPerContext.confChanged(LocalDirAllocator.java:285)
at org.apache.hadoop.fs.LocalDirAllocator$AllocatorPerContext.getLocalPathForWrite(LocalDirAllocator.java:344)
at org.apache.hadoop.fs.LocalDirAllocator.getLocalPathForWrite(LocalDirAllocator.java:150)
at org.apache.hadoop.fs.LocalDirAllocator.getLocalPathForWrite(LocalDirAllocator.java:131)
at org.apache.hadoop.fs.LocalDirAllocator.getLocalPathForWrite(LocalDirAllocator.java:115)
at org.apache.hadoop.mapred.LocalDistributedCacheManager.setup(LocalDistributedCacheManager.java:131)
at org.apache.hadoop.mapred.LocalJobRunner$Job.<init>(LocalJobRunner.java:163)
at org.apache.hadoop.mapred.LocalJobRunner.submitJob(LocalJobRunner.java:731)
at org.apache.hadoop.mapreduce.JobSubmitter.submitJobInternal(JobSubmitter.java:432)
at org.apache.hadoop.mapreduce.Job$10.run(Job.java:1285)
at org.apache.hadoop.mapreduce.Job$10.run(Job.java:1282)
at java.security.AccessController.doPrivileged(Native Method)
at javax.security.auth.Subject.doAs(Subject.java:415)
at org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1614)
at org.apache.hadoop.mapreduce.Job.submit(Job.java:1282)
at org.apache.hadoop.mapreduce.Job.waitForCompletion(Job.java:1303)
at test.WordCount.main(WordCount.java:92)
解决办法:将刚下载的bin目录中的hadoop.dll文件放到目录C:\Windows\System32 目录下问题解决
- eclipse_hadoop2.6的插件可以在附件中下载适用于eclipse luno(4.3)hadoop2.5 2.6
相关推荐
windows下 eclipse操作hadoop集群 插件
Hadoop伪分布式部署文档(包括服务器伪分布式部署,本地hadoop开发环境部署,eclipse远程连接Hadoop服务器,实测无数遍,综合网上无数文档)
windows Eclipse远程调用Hadoop
Eclipse中远程调试Hadoop必备资料:hadoop-eclipse-plugin-1.1.1和hadoop-core-1.0.2-modified;已经在eclipse-jee-juno-SR1-win32-x86_64和hadoop1.1.1 下测试过。
eclipse连接远程hadoop集群开发时权限不足问题解决方案 (2).pdfeclipse连接远程hadoop集群开发时权限不足问题解决方案 (2).pdf
目的很简单,为进行研究与学习,部署一个hadoop运行环境,并搭建一个...2.在windows 上能够使用eclipse连接ubuntu系统上部署的hadoop进行开发与测试 3.只需要在eclipse中“Run on Hadoop",不需要自己打包上传运行。
eclipse连接hadoop所需要的hadoop.ddl和eclipse插件和hadoop运行案例
eclipse远程调试出现Exception in thread “main” java.lang.UnsatisfiedLinkError,eclipse4.5远程调试hadoop2.7.4依赖包 ,方法 将匹配的hadoop.dll、winutils.exe、libwinutils.lib拷贝到C:\Windows\System32 详情...
hadoop-eclipse2.5.2、hadoop-eclipse2.6.0、hadoop-eclipse2.6.5
hadoop-eclipse2.7.1、hadoop-eclipse2.7.2、hadoop-eclipse2.7.3
eclipse配置hadoop,并且如何在eclipse中进行mapreduce的开发
Hadoop在eclipse上面安装插件,以及对Hadoop jar包和hadoop-common jar包的安装,环境变量的配置
windowXP上运行着eclipse以及hadoop-eclipse插件,但是本文的重点不是说如何在eclipse上安装hadoop的开发插件。
基于Eclipse的Hadoop应用开发环境配置
NULL 博文链接:https://dacoolbaby.iteye.com/blog/1730321
用于eclipse连接hadoop使用到的插件包,这里有几个版本的,不同的eclipse版本可以使用的插件包不一样,需要注意。
Eclipse连接hadoop开发,主要是在windows环境下开发hadoop时的连接配置文档,有详细的配置过程,值得拥有!!
用Windows下的Eclipse远程连接Hadoop的插件,适用于64位操作系统,hadoop2.6.0版本
是windows下eclipse连接hadoop的操作文档,注意系统环境变量一定要配置
用于windows下eclipse连接hadoop2.2.0的插件以及eclipse版本,亲测可用