MRS华为云 pysparkSession初始化问题解决方案

举报
yugogo 发表于 2021/12/06 16:02:52 2021/12/06
【摘要】 Python模型在Spark2.4.5推理失败,报py4j.protocol.Py4JJavaError: An error occurred while calling o602.load.: java.util.NoSuchElementException: None.get

修改Spark2x脚本

问题描述

Python模型在Spark2.4.5推理失败

报错:

Traceback (most recent call last):
  File "load_and_predict.py", line 210, in <module>
    main()
  File "load_and_predict.py", line 203, in main
    result = model.fit(data)
  File "load_and_predict.py", line 149, in fit
    features_col=self.features)
  File "/srv/BigData/hadoop/data6/nm/localdir/usercache/xxxxxx/appcache/application_1630214655126_410542/container_e26_1630214655126_410542_02_000001/__pyfiles__/base.py", line 501, in predict
    model = self.pipeline_model_load(path)
  File "/srv/BigData/hadoop/data6/nm/localdir/usercache/xxxxxx/appcache/application_1630214655126_410542/container_e26_1630214655126_410542_02_000001/__pyfiles__/base.py", line 453, in pipeline_model_load
    model = PipelineModel.load(full_path)
  File "/srv/BigData/hadoop/data6/nm/localdir/usercache/xxxxxx/appcache/application_1630214655126_410542/container_e26_1630214655126_410542_02_000001/pyspark.zip/pyspark/ml/util.py", line 362, in load
  File "/srv/BigData/hadoop/data6/nm/localdir/usercache/xxxxxx/appcache/application_1630214655126_410542/container_e26_1630214655126_410542_02_000001/pyspark.zip/pyspark/ml/pipeline.py", line 244, in load
  File "/srv/BigData/hadoop/data6/nm/localdir/usercache/xxxxxx/appcache/application_1630214655126_410542/container_e26_1630214655126_410542_02_000001/pyspark.zip/pyspark/ml/pipeline.py", line 378, in load
  File "/srv/BigData/hadoop/data6/nm/localdir/usercache/xxxxxx/appcache/application_1630214655126_410542/container_e26_1630214655126_410542_02_000001/pyspark.zip/pyspark/ml/util.py", line 612, in loadParamsInstance
  File "/srv/BigData/hadoop/data6/nm/localdir/usercache/xxxxxx/appcache/application_1630214655126_410542/container_e26_1630214655126_410542_02_000001/pyspark.zip/pyspark/ml/util.py", line 362, in load
  File "/srv/BigData/hadoop/data6/nm/localdir/usercache/xxxxxx/appcache/application_1630214655126_410542/container_e26_1630214655126_410542_02_000001/pyspark.zip/pyspark/ml/util.py", line 300, in load
  File "/srv/BigData/hadoop/data6/nm/localdir/usercache/xxxxxx/appcache/application_1630214655126_410542/container_e26_1630214655126_410542_02_000001/py4j-0.10.7-src.zip/py4j/java_gateway.py", line 1257, in __call__
  File "/srv/BigData/hadoop/data6/nm/localdir/usercache/xxxxxx/appcache/application_1630214655126_410542/container_e26_1630214655126_410542_02_000001/pyspark.zip/pyspark/sql/utils.py", line 63, in deco
    #         class_type = "binary"
  File "/srv/BigData/hadoop/data6/nm/localdir/usercache/xxxxxx/appcache/application_1630214655126_410542/container_e26_1630214655126_410542_02_000001/py4j-0.10.7-src.zip/py4j/protocol.py", line 328, in get_return_value
py4j.protocol.Py4JJavaError: An error occurred while calling o602.load.
: java.util.NoSuchElementException: None.get
at scala.None$.get(Option.scala:347)
at scala.None$.get(Option.scala:345)
at org.apache.spark.sql.execution.FileSourceScanExec.needsUnsafeRowConversion$lzycompute(DataSourceScanExec.scala:179)
at org.apache.spark.sql.execution.FileSourceScanExec.needsUnsafeRowConversion(DataSourceScanExec.scala:177)
at org.apache.spark.sql.execution.ColumnarBatchScan$class.produceRows(ColumnarBatchScan.scala:167)
at org.apache.spark.sql.execution.ColumnarBatchScan$class.doProduce(ColumnarBatchScan.scala:85)
at org.apache.spark.sql.execution.FileSourceScanExec.doProduce(DataSourceScanExec.scala:160)
at org.apache.spark.sql.execution.CodegenSupport$$anonfun$produce$1.apply(WholeStageCodegenExec.scala:90)
at org.apache.spark.sql.execution.CodegenSupport$$anonfun$produce$1.apply(WholeStageCodegenExec.scala:85)
at org.apache.spark.sql.execution.SparkPlan$$anonfun$executeQuery$1.apply(SparkPlan.scala:201)
at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151)
at org.apache.spark.sql.execution.SparkPlan.executeQuery(SparkPlan.scala:198)
at org.apache.spark.sql.execution.CodegenSupport$class.produce(WholeStageCodegenExec.scala:85)
at org.apache.spark.sql.execution.FileSourceScanExec.produce(DataSourceScanExec.scala:160)
at org.apache.spark.sql.execution.WholeStageCodegenExec.doCodeGen(WholeStageCodegenExec.scala:492)
at org.apache.spark.sql.execution.WholeStageCodegenExec.doExecute(WholeStageCodegenExec.scala:546)
at org.apache.spark.sql.execution.SparkPlan$$anonfun$execute$1.apply(SparkPlan.scala:177)
at org.apache.spark.sql.execution.SparkPlan$$anonfun$execute$1.apply(SparkPlan.scala:173)
at org.apache.spark.sql.execution.SparkPlan$$anonfun$executeQuery$1.apply(SparkPlan.scala:201)
at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151)
at org.apache.spark.sql.execution.SparkPlan.executeQuery(SparkPlan.scala:198)
at org.apache.spark.sql.execution.SparkPlan.execute(SparkPlan.scala:173)
at org.apache.spark.sql.execution.DeserializeToObjectExec.doExecute(objects.scala:89)
at org.apache.spark.sql.execution.SparkPlan$$anonfun$execute$1.apply(SparkPlan.scala:177)
at org.apache.spark.sql.execution.SparkPlan$$anonfun$execute$1.apply(SparkPlan.scala:173)
at org.apache.spark.sql.execution.SparkPlan$$anonfun$executeQuery$1.apply(SparkPlan.scala:201)
at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151)
at org.apache.spark.sql.execution.SparkPlan.executeQuery(SparkPlan.scala:198)
at org.apache.spark.sql.execution.SparkPlan.execute(SparkPlan.scala:173)
at org.apache.spark.sql.execution.QueryExecution.toRdd$lzycompute(QueryExecution.scala:93)
at org.apache.spark.sql.execution.QueryExecution.toRdd(QueryExecution.scala:91)
at org.apache.spark.sql.Dataset.rdd$lzycompute(Dataset.scala:3061)
at org.apache.spark.sql.Dataset.rdd(Dataset.scala:3059)
at org.apache.spark.ml.tree.EnsembleModelReadWrite$.loadImpl(treeModels.scala:450)
at org.apache.spark.ml.classification.GBTClassificationModel$GBTClassificationModelReader.load(GBTClassifier.scala:420)
at org.apache.spark.ml.classification.GBTClassificationModel$GBTClassificationModelReader.load(GBTClassifier.scala:411)
at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
at java.lang.reflect.Method.invoke(Method.java:498)
at py4j.reflection.MethodInvoker.invoke(MethodInvoker.java:244)
at py4j.reflection.ReflectionEngine.invoke(ReflectionEngine.java:357)
at py4j.Gateway.invoke(Gateway.java:282)
at py4j.commands.AbstractCommand.invokeMethod(AbstractCommand.java:132)
at py4j.commands.CallCommand.execute(CallCommand.java:79)
at py4j.GatewayConnection.run(GatewayConnection.java:238)
at java.lang.Thread.run(Thread.java:748)


解决方案

  1. 切换到omm用户,进入客户端Spark2X目录下
  2. 解压“pyspark.zip”压缩包。

            cd spark/python/lib

            unzip pyspark.zip

            rm -f pyspark.zip

     3.执行以下命令,修改压缩包内的“session.py”脚本,并重新生成压缩包。

            sed -i 's/jsparkSession = self._jvm.SparkSession(self._jsc.sc())/jsparkSession =                        self._jvm.SparkSession.builder().config(self._jsc.sc().getConf()).getOrCreate()/' pyspark/sql/session.py

             zip -r pyspark.zip pyspark

            rm -rf pyspark

【版权声明】本文为华为云社区用户原创内容,转载时必须标注文章的来源(华为云社区)、文章链接、文章作者等基本信息, 否则作者和本社区有权追究责任。如果您发现本社区中有涉嫌抄袭的内容,欢迎发送邮件进行举报,并提供相关证据,一经查实,本社区将立刻删除涉嫌侵权内容,举报邮箱: cloudbbs@huaweicloud.com
  • 点赞
  • 收藏
  • 关注作者

评论(0

0/1000
抱歉,系统识别当前为高风险访问,暂不支持该操作

全部回复

上滑加载中

设置昵称

在此一键设置昵称,即可参与社区互动!

*长度不超过10个汉字或20个英文字符,设置后3个月内不可修改。

*长度不超过10个汉字或20个英文字符,设置后3个月内不可修改。