storm 消费kafka

折月煮酒 提交于 2020-01-07 13:22:42

【推荐】2019 Java 开发者跳槽指南.pdf(吐血整理) >>>

程序入口

一个拓扑( Topology包括Spout、Bolt)提交会被分发到storm集群中的某个supervsion中,supervsion包含多个worker, 一个worker下包括多个线程池,Spout、Bolt会被分配到这些线程池里并行进行计算。

public class StormMain {
     public static void main(String[] args) throws InvalidTopologyException, AuthorizationException, AlreadyAliveException {
            // 通过TopologyBuilder来封装任务信息
            TopologyBuilder topologyBuilder = new TopologyBuilder();
//            设置spout  这里的2 表示2个线程去并发执行接收kafka消息,而这2个线程是在2个线程池中运行的,例如kafka里的消息1,2,3,4,5,6,7,8,9,10那么这两个

//线程可能分配到的 1个线程为 1,3,4,5,另一个消费2,7,8,9,10
            topologyBuilder.setSpout("readfilespout",new ReadKafkaSpout(),2);
//            这里的4表示4个线程去接收spout发的数据
            topologyBuilder.setBolt("PrintBolt",new SplitBolt(),4).shuffleGrouping("readfilespout");
//            准备一个配置文件
            Config config = new Config();
//            storm中任务提交有两种方式,一种方式是本地模式,另一种是集群模式。
//            LocalCluster localCluster = new LocalCluster();
//            localCluster.submitTopology("wordcount",config,topologyBuilder.createTopology());
            //在storm集群中,worker是用来分配的资源。如果一个程序没有指定worker数,那么就会使用默认值。
            config.setNumWorkers(2);
            //提交到集群
            StormSubmitter.submitTopology("kafka-topology",config,topologyBuilder.createTopology());
        }
}

readfilespout程序

//spout 读取kafka消息(并行执行)

public class ReadKafkaSpout extends BaseRichSpout {

      /**
     * 
     */
    private static final long serialVersionUID = 1L;
    KafkaConsumer<String, String> kafkaConsumer =null;
      private SpoutOutputCollector collector;
    public void open(Map conf, TopologyContext context, SpoutOutputCollector collector) {
        // TODO Auto-generated method stub
        init();
        this.collector = collector;
    }

//无线循环执行该方法,该方法会一直被循环执行。

    public void nextTuple() {
        // TODO Auto-generated method stub
         ConsumerRecords<String, String> records = kafkaConsumer.poll(1000);
         for (ConsumerRecord<String, String> record : records) {
             collector.emit(Arrays.asList((Object)record.value()));
             System.out.println("Thread name = "+Thread.currentThread().getName()+"   storm 收到kafka消息 并发送到下游 完成=="+record.value());
         }
    }

    public void declareOutputFields(OutputFieldsDeclarer declarer) {
        // TODO Auto-generated method stub
          declarer.declare(new Fields("ks"));
    }
    
    public void init()
    {
         Properties p = new Properties();
         p.put(ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG, "192.168.5.207:9092");
         p.put(ConsumerConfig.KEY_DESERIALIZER_CLASS_CONFIG, StringDeserializer.class);
         p.put(ConsumerConfig.VALUE_DESERIALIZER_CLASS_CONFIG, StringDeserializer.class);
         p.put(ConsumerConfig.GROUP_ID_CONFIG, "kafka-storm-group");

         kafkaConsumer = new KafkaConsumer<String, String>(p);
         kafkaConsumer.subscribe(Collections.singletonList("kafka-storm"));// 订阅消息
    }
    

}

//PrintBolt 

public class PrintBolt extends BaseRichBolt{
    /**
     * 
     */
    private static final long serialVersionUID = 1L;
    private  OutputCollector collector;

    public void prepare(Map stormConf, TopologyContext context, OutputCollector collector) {
        // TODO Auto-generated method stub
         this.setCollector(collector);
    }

    public void execute(Tuple input) {
        // TODO Auto-generated method stub
         // 获取上游的句子
        Object hello = input.getStringByField("ks");
        System.out.println("Thread name = "+Thread.currentThread().getName()+"storm 下游收到数据 完成=="+hello);
        
    }

    public void declareOutputFields(OutputFieldsDeclarer declarer) {
        // TODO Auto-generated method stub
        
    }

    public OutputCollector getCollector() {
        return collector;
    }

    public void setCollector(OutputCollector collector) {
        this.collector = collector;
    }
}

(4)打包程序 storm-test.jar 提交./bin/storm jar ./storm-test.jar glodon.gldon.test.storm.kafka.StormMain stormTopology

(5)进入 storm下的logs/workers-artifacts/  查看日志/logs/workers-artifacts/kafka-topology-2-1578363427/6701

其中一个端口下,线程所在的线程池和另一个接收线程所在的线程池并不是同一个线程池(可以印证以上结论)

另一个端口下日志 /logs/workers-artifacts/kafka-topology-2-1578363427/6701

易学教程内所有资源均来自网络或用户发布的内容,如有违反法律规定的内容欢迎反馈
该文章没有解决你所遇到的问题?点击提问,说说你的问题,让更多的人一起探讨吧!