Spark源码分析-Master的onStart()方法是什么时候调用的？

一般我们都知道，Master的生命周期方法为：constructor->onstart->receive*->onStop；但是在Master的main方法中并没有对onStart的直接调用，那么onStart方法是什么时候被调用的呢？

这其实与Spark的底层的Netty通信架构有关。

在Master的main方法中：

 val rpcEnv: RpcEnv = RpcEnv.create(SYSTEM_NAME, host, port, conf, securityMgr)

当调用RpcEnv的create()方法时，会最终初始化并且执行NettyRpcEnv并且最终将这个对象返回。
那么我们再来看一下NettyRpcEnv的源码：

private[netty] class NettyRpcEnv(
                                    val conf: SparkConf,
                                    javaSerializerInstance: JavaSerializerInstance,
                                    host: String,
                                    securityManager: SecurityManager) extends RpcEnv(conf) with Logging{

    private[netty] val transportConf = SparkTransportConf.fromSparkConf(
        conf.clone.set("spark.rpc.io.numConnectionsPerPeer", "1"),
        "rpc",
        conf.getInt("spark.rpc.io.threads", 0))
    // 创建消息分发器, 会创建一个收件箱.  可以提升异步处理消息的能力 ->
    private val dispatcher: Dispatcher = new Dispatcher(this)
    ......
}

我们再来看一下Dispatcher的源码：

/**
 * A message dispatcher, responsible for routing RPC messages to the appropriate endpoint(s).
  * 消息分发器, 负责路由RPC消息到一个或多个合适的Endpoint
 */
private[netty] class Dispatcher(nettyEnv: NettyRpcEnv) extends Logging {
  // 端点数据: 端点名, 端点, EndpointRef
  private class EndpointData(
      val name: String,
      val endpoint: RpcEndpoint,
      val ref: NettyRpcEndpointRef) {
    // 每个Endpoint的收件箱
    val inbox: Inbox = new Inbox(ref, endpoint)
  }
  // 端点实例名称与端点数据之间映射关系的缓存. 可以根据端点名称快速的找到或删除EndpointData
  private val endpoints: ConcurrentMap[String, EndpointData] =
    new ConcurrentHashMap[String, EndpointData]
  // 端点实例与端点ref之间的映射关系
  private val endpointRefs: ConcurrentMap[RpcEndpoint, RpcEndpointRef] =
    new ConcurrentHashMap[RpcEndpoint, RpcEndpointRef]

  // Track the receivers whose inboxes may contain messages.
  // 存储EndpointData的阻塞队列. 只有Inbox中有消息的EndpointData才会加入到此队列
  private val receivers = new LinkedBlockingQueue[EndpointData]

  /**
   * True if the dispatcher has been stopped. Once stopped, all messages posted will be bounced
   * immediately.
    * dispatcher是否停止的状态
   */
  @GuardedBy("this")
  private var stopped = false

  def registerRpcEndpoint(name: String, endpoint: RpcEndpoint): NettyRpcEndpointRef = {
    val addr = RpcEndpointAddress(nettyEnv.address, name)
    val endpointRef = new NettyRpcEndpointRef(nettyEnv.conf, addr, nettyEnv)
    synchronized {
      if (stopped) {
        throw new IllegalStateException("RpcEnv has been stopped")
      }
      // 把 RpcEndpoint 放入到 map集合中
      if (endpoints.putIfAbsent(name, new EndpointData(name, endpoint, endpointRef)) != null) {
        throw new IllegalArgumentException(s"There is already an RpcEndpoint called $name")
      }
      // 过去刚刚放入map集合的RpcEndpoint
      val data = endpoints.get(name)
      // 把 RpcEndpoint和 RpcEndpointRef做映射
      endpointRefs.put(data.endpoint, data.ref)
      // 把EndpointData 放入阻塞队列中
      receivers.offer(data)  // for the OnStart message
    }
    endpointRef
  }

  ......

  /** Thread pool used for dispatching messages.
    *
    * 用于对消息进行调度的线程池
    * */
  private val threadpool: ThreadPoolExecutor = {
    // 线程池的大小
    val numThreads = nettyEnv.conf.getInt("spark.rpc.netty.dispatcher.numThreads",
      math.max(2, Runtime.getRuntime.availableProcessors()))
    // 创建线程池. 固定大小, 并且启动的线程都是后台线程
    val pool = ThreadUtils.newDaemonFixedThreadPool(numThreads, "dispatcher-event-loop")
    // 启动运行MessageLoop的线程
    for (i <- 0 until numThreads) {
      // ->
      pool.execute(new MessageLoop)
    }
    // 返回线程池
    pool
  }

  /** Message loop used for dispatching messages. */
  private class MessageLoop extends Runnable {
    override def run(): Unit = {
      try {
        while (true) {
          try {
            // 从阻塞的EndpointData队列中取出一个 EndpointData
            val data = receivers.take()
            if (data == PoisonPill) {
              // Put PoisonPill back so that other MessageLoops can see it.
              receivers.offer(PoisonPill)
              return
            }
            // 处理信息
            data.inbox.process(Dispatcher.this)
          } catch {
            case NonFatal(e) => logError(e.getMessage, e)
          }
        }
      } catch {
        case ie: InterruptedException => // exit
      }
    }
  }

  /** A poison endpoint that indicates MessageLoop should exit its message loop.
    * 一个有毒的endpoint, 表示MessageLoop应该退出
    * */
  private val PoisonPill = new EndpointData(null, null, null)
}

可知threadpool变量内的函数会运行，最后走到data.inbox.process(Dispatcher.this)方法。
查看该方法的源码：

/**
      * Process stored messages.
      * 处理存储的消息
      */
    def process(dispatcher: Dispatcher): Unit = {
        var message: InboxMessage = null
        inbox.synchronized {
            if (!enableConcurrent && numActiveThreads != 0) {
                return
            }
            message = messages.poll()
            if (message != null) {
                numActiveThreads += 1
            } else {
                return
            }
        }
        while (true) {
            safelyCall(endpoint) {
                message match {
					......
                    case OnStart =>
                        // 调用RpcEndpoint的onStart方法
                        endpoint.onStart()
                        if (!endpoint.isInstanceOf[ThreadSafeRpcEndpoint]) {
                            inbox.synchronized {
                                if (!stopped) {
                                    enableConcurrent = true
                                }
                            }
                        }
                    
                    ......
                }
            }
            
            ......
        }
    }

而在创建EndpointData的时候，会初始化Inbox对象：

private[netty] class Inbox(
                              val endpointRef: NettyRpcEndpointRef,
                              val endpoint: RpcEndpoint)
    extends Logging {
	......
    // OnStart should be the first message to process
    inbox.synchronized {  // synchronized(锁){}
        // 在message中放入一个消息 OnStart
        messages.add(OnStart)
    }

因为会向Inbox中放入onStart这个消息，当dispatcher不断循环接收里面的数据的时候会将其模式匹配之后然后处理！
此时就会调用Master的onStart()方法。

来源：CSDN

作者：满岛菜鸟

链接：https://blog.csdn.net/weixin_43616627/article/details/104215356

标签

源码