MAC下hadoop开发环境搭建系列（五） -

qingshou117

浏览: 46846 次
性别:
来自: 杭州

最近访客更多访客>>

xinbindanzi

tyzqqq

zhangcaiyanbeyond

FengChou

博主相关

博客

微博

相册

留言

关于我

文章分类

社区版块

存档分类

MAC下hadoop开发环境搭建系列（五）

博客分类：

Hadoop

JEDIS REDIS HADOOP MAPREDUCE CONFIG

分享一个mapreduce程序，作用：将hdfs文件数据批量加载进redis内存数据库：

1.源代码：

/**
 * Program:
 * The program is used to batch load data to redis by Jedis.
 * History:
 * Created by Qingshou Chen on 15/11/13.
 */

package com.asiainfo.bdcenter;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.NullOutputFormat;
import org.apache.hadoop.util.StringUtils;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import redis.clients.jedis.HostAndPort;
import redis.clients.jedis.JedisCluster;

import java.io.IOException;
import java.util.*;

public class BatchToRedis extends Configured implements Tool{

    /**
     * 继承Configured,implements Tool 可以方便读取命令行-conf -D 配置信息和加载其他配置文件(通过addResource方法)
     */

    /**
     * 日志处理
     */
    private static Logger logger = LoggerFactory.getLogger(BatchToRedis.class);
    public  int  run(String[] args) throws Exception{

         /**
         * 检查调用参数是否正确
         */
        if (args.length != 0){
            System.err.println("Usage:hadoop jar BatchToRedis.jar");
            System.err.println("Attention:Please set parameters at config.xml in the same path of BatchToRedis.jar");
            System.exit(-1);
        }

        /**
         * 初始化配置 config.xml在src目录下,执行jar包时需要将该文件跟jar包放在同一个目录下
         */
        Configuration conf = getConf();
        conf.addResource(new Path("./config.xml"));

        /*Date date = new Date();
        DateFormat df = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
        String sdf = df.format(date);*/
        String jobname = "BatchToRedis:"+conf.get("redis.table.name");
        logger.info("***************JOB:"+jobname+" START***************");
        logger.info("****This Program is used by batch load data to redis cluster!****");
        logger.info("****The result is set hashkeys like:hmset table_name:key fieldname1 fieldvalue1 [fieldnameN fieldvalueN]****");
        logger.info("***************"+"INPUT PATH:"+conf.get("input.path")+"***************");
        logger.info("***************"+"DATA FIELD NAME:"+conf.get("data.field.name")+"***************");
        logger.info("***************"+"DATA FIELD SPLIT:"+conf.get("data.field.split")+"***************");
        logger.info("***************"+"REDIS CLUSTER:"+conf.get("redis.cluster.node.list")+"***************");

        /**
         * SET JOB
         */
        Job job = Job.getInstance(conf,jobname);
        job.setJarByClass(BatchToRedis.class);
        FileInputFormat.addInputPath(job, new Path(conf.get("input.path")));
        FileInputFormat.setMinInputSplitSize(job,1);
        FileInputFormat.setMaxInputSplitSize(job,Long.parseLong(conf.get("map.split.size"))*1024*1024);
//        FileOutputFormat.setOutputPath(job, new Path(args[1]));
        job.setMapperClass(BatchToRedisMapper.class);
        job.setReducerClass(BatchToRedisReducer.class);
        job.setNumReduceTasks(0);
//        job.setOutputKeyClass(Text.class);
        job.setMapOutputKeyClass(Text.class);
        job.setMapOutputValueClass(Text.class);
        job.setOutputFormatClass(NullOutputFormat.class);

        /**
         * 显示job执行时间和结果
         */
        long currentTime = System.currentTimeMillis();
        boolean success = job.waitForCompletion(true);
        logger.info("***************Job Escape: " + StringUtils.formatTimeDiff(System.currentTimeMillis(), currentTime)+"***************");
        if (!success)

        {
            logger.info("***************JOB FAILED***************");
            return 1;
        }

        logger.info("***************JOB END SUCCESSFULL***************");
        return 0;
    }
    public static void main(String[] args) throws Exception {
        int exitCode = ToolRunner.run(new BatchToRedis(), args);
        System.exit(exitCode);

    }

    /**
     * map
     */
    public static class BatchToRedisMapper extends Mapper<LongWritable, Text, Text, Text> {

        private  JedisCluster jc;

        protected void setup(Context context) throws IOException{

            /**
            * 连接redis集群
            */
            Set<HostAndPort> jedisClusterNodes = new HashSet<HostAndPort>();
            Configuration conf = context.getConfiguration();
            String[] redisnodes = conf.get("redis.cluster.node.list").split(",");
            for(String redisnode : redisnodes){
                jedisClusterNodes.add(new HostAndPort(redisnode.split(":")[0],Integer.parseInt(redisnode.split(":")[1])));
            }
            this.jc = new JedisCluster(jedisClusterNodes);
        }
        public void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {

            String line = value.toString();
            Configuration conf = context.getConfiguration();
            String[] fields = line.split(conf.get("data.field.split"));
            String[] fieldnames = conf.get("data.field.name").split(",");
            Map<String, String> map = new HashMap<String, String>();

            /**
             * 设置主键 table_name+主键属性(需放在数据文件第一列)
             */
            String hashkey = conf.get("redis.table.name") + ":" + fields[0];

            /**
             * 设置其他属性
             */
            for(int i = 1;i<fields.length;i++) {
                map.put(fieldnames[i], fields[i]);
            }

            jc.del(hashkey);
            jc.hmset(hashkey, map);
//            System.out.println(jc.hget(hashkey,"name"));

        }

        protected void cleanup(Context context){
            /**
             * 关闭redis集群
             */
            jc.close();
        }
    }

    /**
     * reduce
     */
    public static class BatchToRedisReducer extends Reducer<Text, Text, Text, IntWritable> {
        public void reduce(Text key, Iterable<Text> values, Context context) throws IOException, InterruptedException {

            System.out.println("reduce execute!!!!");
        }
    }

}

2.配置文件,config.xml:

<?xml version="1.0"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>

<configuration xmlns:xi="http://www.w3.org/2001/XInclude">
   <!--  配置map切片大小  单位 MB-->
   <property>
      <name>map.split.size</name>
      <value>32</value>
   </property>
   <!-- txt文件(数据文件)的路径（HDFS）-->
   <property>
      <name>input.path</name>
      <value>/bdcenter/tmp/test/in</value>
   </property>
   <!--数据文件字段名,用逗号分隔,注意：主键放第一列！ -->
   <property>
      <name>data.field.name</name>
      <value>bill_id,name,age,sex</value>
   </property> 
   <!--数据文件字段分隔符 -->
   <property>
      <name>data.field.split</name>
      <value>,</value>
   </property> 
   <!-- redis数据库中hash 主键中所放的表名-->
   <property>
       <name>redis.table.name</name>
      <value>dw_user</value>
   </property>    

   <!-- Redis cluster node list -->
   <property>
      <name>redis.cluster.node.list</name>
      <value>10.192.168.74:6379,10.192.168.75:6379,10.192.168.76:6379,10.192.168.77:6379,10.192.168.78:6379,10.192.168.79:6379,10.192.168.80:6379,10.192.168.81:6379,10.192.168.82:6379,10.192.168.83:6379</value>
   </property>
</configuration>