spark在yarn上运行作业报错

若如初见. 提交于 2020-03-08 19:33:08

spark在yarn上运行作业报错:
java.lang.RuntimeException: Error in configuring object
at org.apache.hadoop.util.ReflectionUtils.setJobConf(ReflectionUtils.java:112)
at org.apache.hadoop.util.ReflectionUtils.setConf(ReflectionUtils.java:78)
at org.apache.hadoop.util.ReflectionUtils.newInstance(ReflectionUtils.java:136)
at org.apache.spark.rdd.HadoopRDD.getInputFormat(HadoopRDD.scala:188)
at org.apache.spark.rdd.HadoopRDD.getPartitions(HadoopRDD.scala:201)
at org.apache.spark.rdd.RDDKaTeX parse error: Can't use function '$' in math mode at position 8: anonfun$̲partitions$2.ap…anonfun$partitions2.apply(RDD.scala:250)atscala.Option.getOrElse(Option.scala:121)atorg.apache.spark.rdd.RDD.partitions(RDD.scala:250)atorg.apache.spark.rdd.MapPartitionsRDD.getPartitions(MapPartitionsRDD.scala:35)atorg.apache.spark.rdd.RDD2.apply(RDD.scala:250) at scala.Option.getOrElse(Option.scala:121) at org.apache.spark.rdd.RDD.partitions(RDD.scala:250) at org.apache.spark.rdd.MapPartitionsRDD.getPartitions(MapPartitionsRDD.scala:35) at org.apache.spark.rdd.RDDanonfunanonfunpartitions2.apply(RDD.scala:252)atorg.apache.spark.rdd.RDD2.apply(RDD.scala:252) at org.apache.spark.rdd.RDDanonfunanonfunpartitions2.apply(RDD.scala:250)atscala.Option.getOrElse(Option.scala:121)atorg.apache.spark.rdd.RDD.partitions(RDD.scala:250)atorg.apache.spark.rdd.MapPartitionsRDD.getPartitions(MapPartitionsRDD.scala:35)atorg.apache.spark.rdd.RDD2.apply(RDD.scala:250) at scala.Option.getOrElse(Option.scala:121) at org.apache.spark.rdd.RDD.partitions(RDD.scala:250) at org.apache.spark.rdd.MapPartitionsRDD.getPartitions(MapPartitionsRDD.scala:35) at org.apache.spark.rdd.RDDanonfunanonfunpartitions2.apply(RDD.scala:252)atorg.apache.spark.rdd.RDD2.apply(RDD.scala:252) at org.apache.spark.rdd.RDDanonfunanonfunpartitions2.apply(RDD.scala:250)atscala.Option.getOrElse(Option.scala:121)atorg.apache.spark.rdd.RDD.partitions(RDD.scala:250)atorg.apache.spark.rdd.MapPartitionsRDD.getPartitions(MapPartitionsRDD.scala:35)atorg.apache.spark.rdd.RDD2.apply(RDD.scala:250) at scala.Option.getOrElse(Option.scala:121) at org.apache.spark.rdd.RDD.partitions(RDD.scala:250) at org.apache.spark.rdd.MapPartitionsRDD.getPartitions(MapPartitionsRDD.scala:35) at org.apache.spark.rdd.RDDanonfunanonfunpartitions2.apply(RDD.scala:252)atorg.apache.spark.rdd.RDD2.apply(RDD.scala:252) at org.apache.spark.rdd.RDDanonfunanonfunpartitions2.apply(RDD.scala:250)atscala.Option.getOrElse(Option.scala:121)atorg.apache.spark.rdd.RDD.partitions(RDD.scala:250)atorg.apache.spark.ShuffleDependency.<init>(Dependency.scala:91)atorg.apache.spark.rdd.ShuffledRDD.getDependencies(ShuffledRDD.scala:91)atorg.apache.spark.rdd.RDD2.apply(RDD.scala:250) at scala.Option.getOrElse(Option.scala:121) at org.apache.spark.rdd.RDD.partitions(RDD.scala:250) at org.apache.spark.ShuffleDependency.<init>(Dependency.scala:91) at org.apache.spark.rdd.ShuffledRDD.getDependencies(ShuffledRDD.scala:91) at org.apache.spark.rdd.RDDanonfunanonfundependencies2.apply(RDD.scala:239)atorg.apache.spark.rdd.RDD2.apply(RDD.scala:239) at org.apache.spark.rdd.RDDanonfunanonfundependencies2.apply(RDD.scala:237)atscala.Option.getOrElse(Option.scala:121)atorg.apache.spark.rdd.RDD.dependencies(RDD.scala:237)atorg.apache.spark.scheduler.DAGScheduler.getShuffleDependencies(DAGScheduler.scala:424)atorg.apache.spark.scheduler.DAGScheduler.getOrCreateParentStages(DAGScheduler.scala:373)atorg.apache.spark.scheduler.DAGScheduler.createResultStage(DAGScheduler.scala:360)atorg.apache.spark.scheduler.DAGScheduler.handleJobSubmitted(DAGScheduler.scala:838)atorg.apache.spark.scheduler.DAGSchedulerEventProcessLoop.doOnReceive(DAGScheduler.scala:1613)atorg.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:1605)atorg.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:1594)atorg.apache.spark.util.EventLoop2.apply(RDD.scala:237) at scala.Option.getOrElse(Option.scala:121) at org.apache.spark.rdd.RDD.dependencies(RDD.scala:237) at org.apache.spark.scheduler.DAGScheduler.getShuffleDependencies(DAGScheduler.scala:424) at org.apache.spark.scheduler.DAGScheduler.getOrCreateParentStages(DAGScheduler.scala:373) at org.apache.spark.scheduler.DAGScheduler.createResultStage(DAGScheduler.scala:360) at org.apache.spark.scheduler.DAGScheduler.handleJobSubmitted(DAGScheduler.scala:838) at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.doOnReceive(DAGScheduler.scala:1613) at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:1605) at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:1594) at org.apache.spark.util.EventLoop$anon1.run(EventLoop.scala:48)Causedby:java.lang.reflect.InvocationTargetExceptionatsun.reflect.NativeMethodAccessorImpl.invoke0(NativeMethod)atsun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)atsun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)atjava.lang.reflect.Method.invoke(Method.java:498)atorg.apache.hadoop.util.ReflectionUtils.setJobConf(ReflectionUtils.java:109)...37moreCausedby:java.lang.IllegalArgumentException:Compressioncodeccom.hadoop.compression.lzo.LzoCodecnotfound.atorg.apache.hadoop.io.compress.CompressionCodecFactory.getCodecClasses(CompressionCodecFactory.java:139)atorg.apache.hadoop.io.compress.CompressionCodecFactory.<init>(CompressionCodecFactory.java:180)atorg.apache.hadoop.mapred.TextInputFormat.configure(TextInputFormat.java:45)...42moreCausedby:java.lang.ClassNotFoundException:Classcom.hadoop.compression.lzo.LzoCodecnotfoundatorg.apache.hadoop.conf.Configuration.getClassByName(Configuration.java:2101)atorg.apache.hadoop.io.compress.CompressionCodecFactory.getCodecClasses(CompressionCodecFactory.java:132)...44more20/03/0818:10:11INFOscheduler.DAGScheduler:Job0failed:saveAsTextFileatWordCount.scala:35,took0.042565sExceptioninthread"main"java.lang.RuntimeException:Errorinconfiguringobjectatorg.apache.hadoop.util.ReflectionUtils.setJobConf(ReflectionUtils.java:112)atorg.apache.hadoop.util.ReflectionUtils.setConf(ReflectionUtils.java:78)atorg.apache.hadoop.util.ReflectionUtils.newInstance(ReflectionUtils.java:136)atorg.apache.spark.rdd.HadoopRDD.getInputFormat(HadoopRDD.scala:188)atorg.apache.spark.rdd.HadoopRDD.getPartitions(HadoopRDD.scala:201)atorg.apache.spark.rdd.RDD1.run(EventLoop.scala:48) Caused by: java.lang.reflect.InvocationTargetException at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method) at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62) at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43) at java.lang.reflect.Method.invoke(Method.java:498) at org.apache.hadoop.util.ReflectionUtils.setJobConf(ReflectionUtils.java:109) ... 37 more Caused by: java.lang.IllegalArgumentException: Compression codec com.hadoop.compression.lzo.LzoCodec not found. at org.apache.hadoop.io.compress.CompressionCodecFactory.getCodecClasses(CompressionCodecFactory.java:139) at org.apache.hadoop.io.compress.CompressionCodecFactory.<init>(CompressionCodecFactory.java:180) at org.apache.hadoop.mapred.TextInputFormat.configure(TextInputFormat.java:45) ... 42 more Caused by: java.lang.ClassNotFoundException: Class com.hadoop.compression.lzo.LzoCodec not found at org.apache.hadoop.conf.Configuration.getClassByName(Configuration.java:2101) at org.apache.hadoop.io.compress.CompressionCodecFactory.getCodecClasses(CompressionCodecFactory.java:132) ... 44 more 20/03/08 18:10:11 INFO scheduler.DAGScheduler: Job 0 failed: saveAsTextFile at WordCount.scala:35, took 0.042565 s Exception in thread "main" java.lang.RuntimeException: Error in configuring object at org.apache.hadoop.util.ReflectionUtils.setJobConf(ReflectionUtils.java:112) at org.apache.hadoop.util.ReflectionUtils.setConf(ReflectionUtils.java:78) at org.apache.hadoop.util.ReflectionUtils.newInstance(ReflectionUtils.java:136) at org.apache.spark.rdd.HadoopRDD.getInputFormat(HadoopRDD.scala:188) at org.apache.spark.rdd.HadoopRDD.getPartitions(HadoopRDD.scala:201) at org.apache.spark.rdd.RDDanonfunanonfunpartitions2.apply(RDD.scala:252)atorg.apache.spark.rdd.RDD2.apply(RDD.scala:252) at org.apache.spark.rdd.RDDanonfunanonfunpartitions2.apply(RDD.scala:250)atscala.Option.getOrElse(Option.scala:121)atorg.apache.spark.rdd.RDD.partitions(RDD.scala:250)atorg.apache.spark.rdd.MapPartitionsRDD.getPartitions(MapPartitionsRDD.scala:35)atorg.apache.spark.rdd.RDD2.apply(RDD.scala:250) at scala.Option.getOrElse(Option.scala:121) at org.apache.spark.rdd.RDD.partitions(RDD.scala:250) at org.apache.spark.rdd.MapPartitionsRDD.getPartitions(MapPartitionsRDD.scala:35) at org.apache.spark.rdd.RDDanonfunanonfunpartitions2.apply(RDD.scala:252)atorg.apache.spark.rdd.RDD2.apply(RDD.scala:252) at org.apache.spark.rdd.RDDanonfunanonfunpartitions2.apply(RDD.scala:250)atscala.Option.getOrElse(Option.scala:121)atorg.apache.spark.rdd.RDD.partitions(RDD.scala:250)atorg.apache.spark.rdd.MapPartitionsRDD.getPartitions(MapPartitionsRDD.scala:35)atorg.apache.spark.rdd.RDD2.apply(RDD.scala:250) at scala.Option.getOrElse(Option.scala:121) at org.apache.spark.rdd.RDD.partitions(RDD.scala:250) at org.apache.spark.rdd.MapPartitionsRDD.getPartitions(MapPartitionsRDD.scala:35) at org.apache.spark.rdd.RDDanonfunanonfunpartitions2.apply(RDD.scala:252)atorg.apache.spark.rdd.RDD2.apply(RDD.scala:252) at org.apache.spark.rdd.RDDanonfunanonfunpartitions2.apply(RDD.scala:250)atscala.Option.getOrElse(Option.scala:121)atorg.apache.spark.rdd.RDD.partitions(RDD.scala:250)atorg.apache.spark.rdd.MapPartitionsRDD.getPartitions(MapPartitionsRDD.scala:35)atorg.apache.spark.rdd.RDD2.apply(RDD.scala:250) at scala.Option.getOrElse(Option.scala:121) at org.apache.spark.rdd.RDD.partitions(RDD.scala:250) at org.apache.spark.rdd.MapPartitionsRDD.getPartitions(MapPartitionsRDD.scala:35) at org.apache.spark.rdd.RDDanonfunanonfunpartitions2.apply(RDD.scala:252)atorg.apache.spark.rdd.RDD2.apply(RDD.scala:252) at org.apache.spark.rdd.RDDanonfunanonfunpartitions2.apply(RDD.scala:250)atscala.Option.getOrElse(Option.scala:121)atorg.apache.spark.rdd.RDD.partitions(RDD.scala:250)atorg.apache.spark.ShuffleDependency.<init>(Dependency.scala:91)atorg.apache.spark.rdd.ShuffledRDD.getDependencies(ShuffledRDD.scala:91)atorg.apache.spark.rdd.RDD2.apply(RDD.scala:250) at scala.Option.getOrElse(Option.scala:121) at org.apache.spark.rdd.RDD.partitions(RDD.scala:250) at org.apache.spark.ShuffleDependency.<init>(Dependency.scala:91) at org.apache.spark.rdd.ShuffledRDD.getDependencies(ShuffledRDD.scala:91) at org.apache.spark.rdd.RDDanonfunanonfundependencies2.apply(RDD.scala:239)atorg.apache.spark.rdd.RDD2.apply(RDD.scala:239) at org.apache.spark.rdd.RDDanonfunanonfundependencies2.apply(RDD.scala:237)atscala.Option.getOrElse(Option.scala:121)atorg.apache.spark.rdd.RDD.dependencies(RDD.scala:237)atorg.apache.spark.scheduler.DAGScheduler.getShuffleDependencies(DAGScheduler.scala:424)atorg.apache.spark.scheduler.DAGScheduler.getOrCreateParentStages(DAGScheduler.scala:373)atorg.apache.spark.scheduler.DAGScheduler.createResultStage(DAGScheduler.scala:360)atorg.apache.spark.scheduler.DAGScheduler.handleJobSubmitted(DAGScheduler.scala:838)atorg.apache.spark.scheduler.DAGSchedulerEventProcessLoop.doOnReceive(DAGScheduler.scala:1613)atorg.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:1605)atorg.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:1594)atorg.apache.spark.util.EventLoop2.apply(RDD.scala:237) at scala.Option.getOrElse(Option.scala:121) at org.apache.spark.rdd.RDD.dependencies(RDD.scala:237) at org.apache.spark.scheduler.DAGScheduler.getShuffleDependencies(DAGScheduler.scala:424) at org.apache.spark.scheduler.DAGScheduler.getOrCreateParentStages(DAGScheduler.scala:373) at org.apache.spark.scheduler.DAGScheduler.createResultStage(DAGScheduler.scala:360) at org.apache.spark.scheduler.DAGScheduler.handleJobSubmitted(DAGScheduler.scala:838) at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.doOnReceive(DAGScheduler.scala:1613) at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:1605) at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:1594) at org.apache.spark.util.EventLoop$anon1.run(EventLoop.scala:48)atorg.apache.spark.scheduler.DAGScheduler.runJob(DAGScheduler.scala:628)atorg.apache.spark.SparkContext.runJob(SparkContext.scala:1925)atorg.apache.spark.SparkContext.runJob(SparkContext.scala:1938)atorg.apache.spark.SparkContext.runJob(SparkContext.scala:1958)atorg.apache.spark.rdd.PairRDDFunctions1.run(EventLoop.scala:48) at org.apache.spark.scheduler.DAGScheduler.runJob(DAGScheduler.scala:628) at org.apache.spark.SparkContext.runJob(SparkContext.scala:1925) at org.apache.spark.SparkContext.runJob(SparkContext.scala:1938) at org.apache.spark.SparkContext.runJob(SparkContext.scala:1958) at org.apache.spark.rdd.PairRDDFunctionsanonfunanonfunsaveAsHadoopDataset1.apply1.applymcVsp(PairRDDFunctions.scala:1226)atorg.apache.spark.rdd.PairRDDFunctionssp(PairRDDFunctions.scala:1226) at org.apache.spark.rdd.PairRDDFunctionsanonfunanonfunsaveAsHadoopDataset1.apply(PairRDDFunctions.scala:1168)atorg.apache.spark.rdd.PairRDDFunctions1.apply(PairRDDFunctions.scala:1168) at org.apache.spark.rdd.PairRDDFunctionsanonfunanonfunsaveAsHadoopDataset1.apply(PairRDDFunctions.scala:1168)atorg.apache.spark.rdd.RDDOperationScope1.apply(PairRDDFunctions.scala:1168) at org.apache.spark.rdd.RDDOperationScope.withScope(RDDOperationScope.scala:151)
at org.apache.spark.rdd.RDDOperationScope.withScope(RDDOperationScope.scala:112)atorg.apache.spark.rdd.RDD.withScope(RDD.scala:362)atorg.apache.spark.rdd.PairRDDFunctions.saveAsHadoopDataset(PairRDDFunctions.scala:1168)atorg.apache.spark.rdd.PairRDDFunctions.withScope(RDDOperationScope.scala:112) at org.apache.spark.rdd.RDD.withScope(RDD.scala:362) at org.apache.spark.rdd.PairRDDFunctions.saveAsHadoopDataset(PairRDDFunctions.scala:1168) at org.apache.spark.rdd.PairRDDFunctionsanonfunanonfunsaveAsHadoopFile4.apply4.applymcVsp(PairRDDFunctions.scala:1071)atorg.apache.spark.rdd.PairRDDFunctionssp(PairRDDFunctions.scala:1071) at org.apache.spark.rdd.PairRDDFunctionsanonfunanonfunsaveAsHadoopFile4.apply(PairRDDFunctions.scala:1037)atorg.apache.spark.rdd.PairRDDFunctions4.apply(PairRDDFunctions.scala:1037) at org.apache.spark.rdd.PairRDDFunctionsanonfunanonfunsaveAsHadoopFile4.apply(PairRDDFunctions.scala:1037)atorg.apache.spark.rdd.RDDOperationScope4.apply(PairRDDFunctions.scala:1037) at org.apache.spark.rdd.RDDOperationScope.withScope(RDDOperationScope.scala:151)
at org.apache.spark.rdd.RDDOperationScope.withScope(RDDOperationScope.scala:112)atorg.apache.spark.rdd.RDD.withScope(RDD.scala:362)atorg.apache.spark.rdd.PairRDDFunctions.saveAsHadoopFile(PairRDDFunctions.scala:1037)atorg.apache.spark.rdd.PairRDDFunctions.withScope(RDDOperationScope.scala:112) at org.apache.spark.rdd.RDD.withScope(RDD.scala:362) at org.apache.spark.rdd.PairRDDFunctions.saveAsHadoopFile(PairRDDFunctions.scala:1037) at org.apache.spark.rdd.PairRDDFunctionsanonfunanonfunsaveAsHadoopFile1.apply1.applymcVsp(PairRDDFunctions.scala:963)atorg.apache.spark.rdd.PairRDDFunctionssp(PairRDDFunctions.scala:963) at org.apache.spark.rdd.PairRDDFunctionsanonfunanonfunsaveAsHadoopFile1.apply(PairRDDFunctions.scala:963)atorg.apache.spark.rdd.PairRDDFunctions1.apply(PairRDDFunctions.scala:963) at org.apache.spark.rdd.PairRDDFunctionsanonfunanonfunsaveAsHadoopFile1.apply(PairRDDFunctions.scala:963)atorg.apache.spark.rdd.RDDOperationScope1.apply(PairRDDFunctions.scala:963) at org.apache.spark.rdd.RDDOperationScope.withScope(RDDOperationScope.scala:151)
at org.apache.spark.rdd.RDDOperationScope.withScope(RDDOperationScope.scala:112)atorg.apache.spark.rdd.RDD.withScope(RDD.scala:362)atorg.apache.spark.rdd.PairRDDFunctions.saveAsHadoopFile(PairRDDFunctions.scala:962)atorg.apache.spark.rdd.RDD.withScope(RDDOperationScope.scala:112) at org.apache.spark.rdd.RDD.withScope(RDD.scala:362) at org.apache.spark.rdd.PairRDDFunctions.saveAsHadoopFile(PairRDDFunctions.scala:962) at org.apache.spark.rdd.RDDanonfunanonfunsaveAsTextFile1.apply1.applymcVsp(RDD.scala:1489)atorg.apache.spark.rdd.RDDsp(RDD.scala:1489) at org.apache.spark.rdd.RDDanonfunanonfunsaveAsTextFile1.apply(RDD.scala:1468)atorg.apache.spark.rdd.RDD1.apply(RDD.scala:1468) at org.apache.spark.rdd.RDDanonfunanonfunsaveAsTextFile1.apply(RDD.scala:1468)atorg.apache.spark.rdd.RDDOperationScope1.apply(RDD.scala:1468) at org.apache.spark.rdd.RDDOperationScope.withScope(RDDOperationScope.scala:151)
at org.apache.spark.rdd.RDDOperationScope.withScope(RDDOperationScope.scala:112)atorg.apache.spark.rdd.RDD.withScope(RDD.scala:362)atorg.apache.spark.rdd.RDD.saveAsTextFile(RDD.scala:1468)atcom.atguigu.spark.WordCount.withScope(RDDOperationScope.scala:112) at org.apache.spark.rdd.RDD.withScope(RDD.scala:362) at org.apache.spark.rdd.RDD.saveAsTextFile(RDD.scala:1468) at com.atguigu.spark.WordCount.main(WordCount.scala:35)
at com.atguigu.spark.WordCount.main(WordCount.scala)
at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
at java.lang.reflect.Method.invoke(Method.java:498)
at org.apache.spark.deploy.SparkSubmit.org.orgapachesparksparkdeploySparkSubmitSparkSubmitrunMain(SparkSubmit.scala:743)atorg.apache.spark.deploy.SparkSubmitrunMain(SparkSubmit.scala:743) at org.apache.spark.deploy.SparkSubmit.doRunMain1(SparkSubmit.scala:187)atorg.apache.spark.deploy.SparkSubmit1(SparkSubmit.scala:187) at org.apache.spark.deploy.SparkSubmit.submit(SparkSubmit.scala:212)
at org.apache.spark.deploy.SparkSubmit.main(SparkSubmit.scala:126)atorg.apache.spark.deploy.SparkSubmit.main(SparkSubmit.scala)Causedby:java.lang.reflect.InvocationTargetExceptionatsun.reflect.NativeMethodAccessorImpl.invoke0(NativeMethod)atsun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)atsun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)atjava.lang.reflect.Method.invoke(Method.java:498)atorg.apache.hadoop.util.ReflectionUtils.setJobConf(ReflectionUtils.java:109)atorg.apache.hadoop.util.ReflectionUtils.setConf(ReflectionUtils.java:78)atorg.apache.hadoop.util.ReflectionUtils.newInstance(ReflectionUtils.java:136)atorg.apache.spark.rdd.HadoopRDD.getInputFormat(HadoopRDD.scala:188)atorg.apache.spark.rdd.HadoopRDD.getPartitions(HadoopRDD.scala:201)atorg.apache.spark.rdd.RDD.main(SparkSubmit.scala:126) at org.apache.spark.deploy.SparkSubmit.main(SparkSubmit.scala) Caused by: java.lang.reflect.InvocationTargetException at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method) at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62) at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43) at java.lang.reflect.Method.invoke(Method.java:498) at org.apache.hadoop.util.ReflectionUtils.setJobConf(ReflectionUtils.java:109) at org.apache.hadoop.util.ReflectionUtils.setConf(ReflectionUtils.java:78) at org.apache.hadoop.util.ReflectionUtils.newInstance(ReflectionUtils.java:136) at org.apache.spark.rdd.HadoopRDD.getInputFormat(HadoopRDD.scala:188) at org.apache.spark.rdd.HadoopRDD.getPartitions(HadoopRDD.scala:201) at org.apache.spark.rdd.RDDanonfunanonfunpartitions2.apply(RDD.scala:252)atorg.apache.spark.rdd.RDD2.apply(RDD.scala:252) at org.apache.spark.rdd.RDDanonfunanonfunpartitions2.apply(RDD.scala:250)atscala.Option.getOrElse(Option.scala:121)atorg.apache.spark.rdd.RDD.partitions(RDD.scala:250)atorg.apache.spark.rdd.MapPartitionsRDD.getPartitions(MapPartitionsRDD.scala:35)atorg.apache.spark.rdd.RDD2.apply(RDD.scala:250) at scala.Option.getOrElse(Option.scala:121) at org.apache.spark.rdd.RDD.partitions(RDD.scala:250) at org.apache.spark.rdd.MapPartitionsRDD.getPartitions(MapPartitionsRDD.scala:35) at org.apache.spark.rdd.RDDanonfunanonfunpartitions2.apply(RDD.scala:252)atorg.apache.spark.rdd.RDD2.apply(RDD.scala:252) at org.apache.spark.rdd.RDDanonfunanonfunpartitions2.apply(RDD.scala:250)atscala.Option.getOrElse(Option.scala:121)atorg.apache.spark.rdd.RDD.partitions(RDD.scala:250)atorg.apache.spark.rdd.MapPartitionsRDD.getPartitions(MapPartitionsRDD.scala:35)atorg.apache.spark.rdd.RDD2.apply(RDD.scala:250) at scala.Option.getOrElse(Option.scala:121) at org.apache.spark.rdd.RDD.partitions(RDD.scala:250) at org.apache.spark.rdd.MapPartitionsRDD.getPartitions(MapPartitionsRDD.scala:35) at org.apache.spark.rdd.RDDanonfunanonfunpartitions2.apply(RDD.scala:252)atorg.apache.spark.rdd.RDD2.apply(RDD.scala:252) at org.apache.spark.rdd.RDDanonfunanonfunpartitions2.apply(RDD.scala:250)atscala.Option.getOrElse(Option.scala:121)atorg.apache.spark.rdd.RDD.partitions(RDD.scala:250)atorg.apache.spark.rdd.MapPartitionsRDD.getPartitions(MapPartitionsRDD.scala:35)atorg.apache.spark.rdd.RDD2.apply(RDD.scala:250) at scala.Option.getOrElse(Option.scala:121) at org.apache.spark.rdd.RDD.partitions(RDD.scala:250) at org.apache.spark.rdd.MapPartitionsRDD.getPartitions(MapPartitionsRDD.scala:35) at org.apache.spark.rdd.RDDanonfunanonfunpartitions2.apply(RDD.scala:252)atorg.apache.spark.rdd.RDD2.apply(RDD.scala:252) at org.apache.spark.rdd.RDDanonfunanonfunpartitions2.apply(RDD.scala:250)atscala.Option.getOrElse(Option.scala:121)atorg.apache.spark.rdd.RDD.partitions(RDD.scala:250)atorg.apache.spark.ShuffleDependency.<init>(Dependency.scala:91)atorg.apache.spark.rdd.ShuffledRDD.getDependencies(ShuffledRDD.scala:91)atorg.apache.spark.rdd.RDD2.apply(RDD.scala:250) at scala.Option.getOrElse(Option.scala:121) at org.apache.spark.rdd.RDD.partitions(RDD.scala:250) at org.apache.spark.ShuffleDependency.<init>(Dependency.scala:91) at org.apache.spark.rdd.ShuffledRDD.getDependencies(ShuffledRDD.scala:91) at org.apache.spark.rdd.RDDanonfunanonfundependencies2.apply(RDD.scala:239)atorg.apache.spark.rdd.RDD2.apply(RDD.scala:239) at org.apache.spark.rdd.RDDanonfunanonfundependencies2.apply(RDD.scala:237)atscala.Option.getOrElse(Option.scala:121)atorg.apache.spark.rdd.RDD.dependencies(RDD.scala:237)atorg.apache.spark.scheduler.DAGScheduler.getShuffleDependencies(DAGScheduler.scala:424)atorg.apache.spark.scheduler.DAGScheduler.getOrCreateParentStages(DAGScheduler.scala:373)atorg.apache.spark.scheduler.DAGScheduler.createResultStage(DAGScheduler.scala:360)atorg.apache.spark.scheduler.DAGScheduler.handleJobSubmitted(DAGScheduler.scala:838)atorg.apache.spark.scheduler.DAGSchedulerEventProcessLoop.doOnReceive(DAGScheduler.scala:1613)atorg.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:1605)atorg.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:1594)atorg.apache.spark.util.EventLoop2.apply(RDD.scala:237) at scala.Option.getOrElse(Option.scala:121) at org.apache.spark.rdd.RDD.dependencies(RDD.scala:237) at org.apache.spark.scheduler.DAGScheduler.getShuffleDependencies(DAGScheduler.scala:424) at org.apache.spark.scheduler.DAGScheduler.getOrCreateParentStages(DAGScheduler.scala:373) at org.apache.spark.scheduler.DAGScheduler.createResultStage(DAGScheduler.scala:360) at org.apache.spark.scheduler.DAGScheduler.handleJobSubmitted(DAGScheduler.scala:838) at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.doOnReceive(DAGScheduler.scala:1613) at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:1605) at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:1594) at org.apache.spark.util.EventLoop$anon$1.run(EventLoop.scala:48)
Caused by: java.lang.IllegalArgumentException: Compression codec com.hadoop.compression.lzo.LzoCodec not found.
at org.apache.hadoop.io.compress.CompressionCodecFactory.getCodecClasses(CompressionCodecFactory.java:139)
at org.apache.hadoop.io.compress.CompressionCodecFactory.(CompressionCodecFactory.java:180)
at org.apache.hadoop.mapred.TextInputFormat.configure(TextInputFormat.java:45)
… 42 more
Caused by: java.lang.ClassNotFoundException: Class com.hadoop.compression.lzo.LzoCodec not found
at org.apache.hadoop.conf.Configuration.getClassByName(Configuration.java:2101)
at org.apache.hadoop.io.compress.CompressionCodecFactory.getCodecClasses(CompressionCodecFactory.java:132)
… 44 more
错误信息

原因:
在Hadoop中配置了支持LZO压缩格式,并且存储在HDFS中的文件也为LZO格式存储,而使用yarn资源调度框架来调度spark引擎进行计算时,spark并不支持读取LZO压缩格式的文件,导致报错

解决方法:
[ 在spark中配置支持LZO格式 ]
找到spark的conf文件夹,编辑spark-defaults.conf,添加下面两行:
spark.driver.extraClassPath /opt/module/hadoop-2.7.2/share/hadoop/common/hadoop-lzo-0.4.20.jar

spark.executor.extraClassPath /opt/module/hadoop-2.7.2/share/hadoop/common/hadoop-lzo-0.4.20.jar

即指定配置Hadoop支持LZO压缩时编译的LZOjar的存储位置(即${HADOOP_HOME}/share/hadoop/common/…)

再次运行成功
运行成功

标签
易学教程内所有资源均来自网络或用户发布的内容,如有违反法律规定的内容欢迎反馈
该文章没有解决你所遇到的问题?点击提问,说说你的问题,让更多的人一起探讨吧!