Spark Structured Streaming with Hbase integration

后端 未结 3 1592
忘了有多久
忘了有多久 2020-12-14 05:20

We are doing streaming on kafka data which being collected from MySQL. Now once all the analytics has been done i want to save my data directly to Hbase. I have through the

3条回答
  •  没有蜡笔的小新
    2020-12-14 05:32

    1- add these libraries to your project :

          "org.apache.hbase" % "hbase-client" % "2.0.1"
          "org.apache.hbase" % "hbase-common" % "2.0.1"
    

    2- add this trait to your code :

       import java.util.concurrent.ExecutorService
       import org.apache.hadoop.hbase.client.{Connection, ConnectionFactory, Put, Table}
       import org.apache.hadoop.hbase.security.User
       import org.apache.hadoop.hbase.{HBaseConfiguration, TableName}
       import org.apache.spark.sql.ForeachWriter
    
       trait HBaseForeachWriter[RECORD] extends ForeachWriter[RECORD] {
    
         val tableName: String
         val hbaseConfResources: Seq[String]
    
         def pool: Option[ExecutorService] = None
    
         def user: Option[User] = None
    
         private var hTable: Table = _
         private var connection: Connection = _
    
    
         override def open(partitionId: Long, version: Long): Boolean = {
           connection = createConnection()
           hTable = getHTable(connection)
           true
         }
    
         def createConnection(): Connection = {
           val hbaseConfig = HBaseConfiguration.create()
           hbaseConfResources.foreach(hbaseConfig.addResource)
           ConnectionFactory.createConnection(hbaseConfig, pool.orNull,                      user.orNull)
    
         }
    
         def getHTable(connection: Connection): Table = {
           connection.getTable(TableName.valueOf(tableName))
         }
    
         override def process(record: RECORD): Unit = {
           val put = toPut(record)
           hTable.put(put)
         }
    
         override def close(errorOrNull: Throwable): Unit = {
           hTable.close()
           connection.close()
         }
    
         def toPut(record: RECORD): Put
    
       }
    

    3- use it for your logic :

        val ds = .... //anyDataset[WhatEverYourDataType]
    
        val query = ds.writeStream
               .foreach(new HBaseForeachWriter[WhatEverYourDataType] {
                                override val tableName: String = "hbase-table-name"
                                //your cluster files, i assume here it is in resources  
                                override val hbaseConfResources: Seq[String] = Seq("core-site.xml", "hbase-site.xml") 
    
                                override def toPut(record: WhatEverYourDataType): Put = {
                                  val key = .....
                                  val columnFamaliyName : String = ....
                                  val columnName : String = ....
                                  val columnValue = ....
    
                                  val p = new Put(Bytes.toBytes(key))
                                  //Add columns ... 
                       p.addColumn(Bytes.toBytes(columnFamaliyName),
                                   Bytes.toBytes(columnName), 
                                   Bytes.toBytes(columnValue))
    
                                  p
                                }
    
                              }
               ).start()
    
             query.awaitTermination()
    

提交回复
热议问题