九十五、Spark-SparkSQL(打包集群运行最终版)

举报
托马斯-酷涛 发表于 2022/05/26 01:31:45 2022/05/26
【摘要】 <scala.version>2.11.0</scala.version> <spark.version>2.0.0</spark.version> <hadoop.version>2.6.0</hadoop.version> <slf4j.versio...

<scala.version>2.11.0</scala.version>
<spark.version>2.0.0</spark.version>
<hadoop.version>2.6.0</hadoop.version>
<slf4j.version>1.7.16</slf4j.version>
<log4j.version>1.2.17</log4j.version>
<mysql.version>8.0.23</mysql.version>

pom依赖


  
  1. <?xml version="1.0" encoding="UTF-8"?>
  2. <project xmlns="http://maven.apache.org/POM/4.0.0"
  3. xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
  4. xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
  5. <modelVersion>4.0.0</modelVersion>
  6. <groupId>org.example</groupId>
  7. <artifactId>SparkDemo</artifactId>
  8. <version>1.0-SNAPSHOT</version>
  9. <properties>
  10. <scala.version>2.11.0</scala.version>
  11. <spark.version>2.0.0</spark.version>
  12. <hadoop.version>2.6.0</hadoop.version>
  13. <slf4j.version>1.7.16</slf4j.version>
  14. <log4j.version>1.2.17</log4j.version>
  15. <mysql.version>8.0.23</mysql.version>
  16. </properties>
  17. <dependencies>
  18. <dependency>
  19. <groupId>com.hankcs</groupId>
  20. <artifactId>hanlp</artifactId>
  21. <version>portable-1.7.8</version>
  22. </dependency>
  23. <!-- Scala 库 -->
  24. <dependency>
  25. <groupId>org.scala-lang</groupId>
  26. <artifactId>scala-library</artifactId>
  27. <version>${scala.version}</version>
  28. </dependency>
  29. <!-- MySQL连接 -->
  30. <dependency>
  31. <groupId>mysql</groupId>
  32. <artifactId>mysql-connector-java</artifactId>
  33. <version>8.0.23</version>
  34. </dependency>
  35. <!-- Spark 系列包 -->
  36. <dependency>
  37. <groupId>org.apache.spark</groupId>
  38. <artifactId>spark-core_2.11</artifactId>
  39. <version>${spark.version}</version>
  40. </dependency>
  41. <dependency>
  42. <groupId>org.apache.spark</groupId>
  43. <artifactId>spark-sql_2.11</artifactId>
  44. <version>${spark.version}</version>
  45. </dependency>
  46. <dependency>
  47. <groupId>org.apache.hadoop</groupId>
  48. <artifactId>hadoop-client</artifactId>
  49. <version>${hadoop.version}</version>
  50. </dependency>
  51. <!-- 日志相关 -->
  52. <dependency>
  53. <groupId>org.slf4j</groupId>
  54. <artifactId>jcl-over-slf4j</artifactId>
  55. <version>${slf4j.version}</version>
  56. </dependency>
  57. <dependency>
  58. <groupId>org.slf4j</groupId>
  59. <artifactId>slf4j-api</artifactId>
  60. <version>${slf4j.version}</version>
  61. </dependency>
  62. <dependency>
  63. <groupId>org.slf4j</groupId>
  64. <artifactId>slf4j-log4j12</artifactId>
  65. <version>${slf4j.version}</version>
  66. </dependency>
  67. <dependency>
  68. <groupId>log4j</groupId>
  69. <artifactId>log4j</artifactId>
  70. <version>${log4j.version}</version>
  71. </dependency>
  72. <!--MapReduce-->
  73. <dependency>
  74. <groupId>org.apache.hadoop</groupId>
  75. <artifactId>hadoop-hdfs</artifactId>
  76. <version>2.6.0</version>
  77. </dependency>
  78. <dependency>
  79. <groupId>org.apache.hadoop</groupId>
  80. <artifactId>hadoop-common</artifactId>
  81. <version>2.6.0</version>
  82. </dependency>
  83. <dependency>
  84. <groupId>org.apache.hadoop</groupId>
  85. <artifactId>hadoop-client</artifactId>
  86. <version>2.6.0</version>
  87. </dependency>
  88. <dependency>
  89. <groupId>org.apache.hadoop</groupId>
  90. <artifactId>hadoop-mapreduce-client-core</artifactId>
  91. <version>2.6.0</version>
  92. </dependency>
  93. </dependencies>
  94. <build>
  95. <sourceDirectory>src/main/scala</sourceDirectory>
  96. <plugins>
  97. <plugin>
  98. <groupId>org.apache.maven.plugins</groupId>
  99. <artifactId>maven-compiler-plugin</artifactId>
  100. <version>3.0</version>
  101. <configuration>
  102. <source>1.8</source>
  103. <target>1.8</target>
  104. <encoding>UTF-8</encoding>
  105. </configuration>
  106. </plugin>
  107. <plugin>
  108. <groupId>net.alchim31.maven</groupId>
  109. <artifactId>scala-maven-plugin</artifactId>
  110. <version>3.2.0</version>
  111. <executions>
  112. <execution>
  113. <goals>
  114. <goal>compile</goal>
  115. <goal>testCompile</goal>
  116. </goals>
  117. <configuration>
  118. <args>
  119. <arg>-dependencyfile</arg>
  120. <arg>${project.build.directory}/.scala_dependencies</arg>
  121. </args>
  122. </configuration>
  123. </execution>
  124. </executions>
  125. </plugin>
  126. </plugins>
  127. </build>
  128. </project>

代码


  
  1. package org.example.spark
  2. import org.apache.spark.rdd.RDD
  3. import org.apache.spark.{SparkConf, SparkContext}
  4. object word_packge {
  5. def main(args: Array[String]): Unit = {
  6. val conf: SparkConf = new SparkConf().setAppName("jiqun").setMaster("local[6]")
  7. val sc = new SparkContext(conf)
  8. val line: RDD[String] = sc.textFile("hdfs://192.168.231.105:8020/input/HelloWord.txt")
  9. val words: RDD[String] = line.flatMap(_.split(" "))
  10. val word: RDD[(String, Int)] = words.map((_, 1))
  11. val result: RDD[(String, Int)] = word.reduceByKey(_ + _)
  12. System.setProperty("HADOOP_USER_NAME","root")
  13. result.repartition(1).saveAsTextFile("hdfs://192.168.231.105:8020/output/output1")
  14. Thread.sleep(1000 * 60)
  15. sc.stop()
  16. }
  17. }

去掉多余Jar包

打包

找到Jar包上传

集群运行

bin/spark-submit --class org.example.spark.word_packge /input/SparkDemo.jar 

 

HDFS查看运行结果


文章来源: tuomasi.blog.csdn.net,作者:托马斯-酷涛,版权归原作者所有,如需转载,请联系作者。

原文链接:tuomasi.blog.csdn.net/article/details/124107746

【版权声明】本文为华为云社区用户转载文章,如果您发现本社区中有涉嫌抄袭的内容,欢迎发送邮件进行举报,并提供相关证据,一经查实,本社区将立刻删除涉嫌侵权内容,举报邮箱: cloudbbs@huaweicloud.com
  • 点赞
  • 收藏
  • 关注作者

评论(0

0/1000
抱歉,系统识别当前为高风险访问,暂不支持该操作

全部回复

上滑加载中

设置昵称

在此一键设置昵称,即可参与社区互动!

*长度不超过10个汉字或20个英文字符,设置后3个月内不可修改。

*长度不超过10个汉字或20个英文字符,设置后3个月内不可修改。