九十五、Spark-SparkSQL(打包集群运行最终版)
【摘要】
<scala.version>2.11.0</scala.version> <spark.version>2.0.0</spark.version> <hadoop.version>2.6.0</hadoop.version> <slf4j.versio...
<scala.version>2.11.0</scala.version>
<spark.version>2.0.0</spark.version>
<hadoop.version>2.6.0</hadoop.version>
<slf4j.version>1.7.16</slf4j.version>
<log4j.version>1.2.17</log4j.version>
<mysql.version>8.0.23</mysql.version>
pom依赖
-
<?xml version="1.0" encoding="UTF-8"?>
-
<project xmlns="http://maven.apache.org/POM/4.0.0"
-
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
-
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
-
<modelVersion>4.0.0</modelVersion>
-
-
<groupId>org.example</groupId>
-
<artifactId>SparkDemo</artifactId>
-
<version>1.0-SNAPSHOT</version>
-
-
<properties>
-
<scala.version>2.11.0</scala.version>
-
<spark.version>2.0.0</spark.version>
-
<hadoop.version>2.6.0</hadoop.version>
-
<slf4j.version>1.7.16</slf4j.version>
-
<log4j.version>1.2.17</log4j.version>
-
<mysql.version>8.0.23</mysql.version>
-
</properties>
-
-
<dependencies>
-
<dependency>
-
<groupId>com.hankcs</groupId>
-
<artifactId>hanlp</artifactId>
-
<version>portable-1.7.8</version>
-
</dependency>
-
<!-- Scala 库 -->
-
<dependency>
-
<groupId>org.scala-lang</groupId>
-
<artifactId>scala-library</artifactId>
-
<version>${scala.version}</version>
-
</dependency>
-
<!-- MySQL连接 -->
-
<dependency>
-
<groupId>mysql</groupId>
-
<artifactId>mysql-connector-java</artifactId>
-
<version>8.0.23</version>
-
</dependency>
-
<!-- Spark 系列包 -->
-
<dependency>
-
<groupId>org.apache.spark</groupId>
-
<artifactId>spark-core_2.11</artifactId>
-
<version>${spark.version}</version>
-
</dependency>
-
<dependency>
-
<groupId>org.apache.spark</groupId>
-
<artifactId>spark-sql_2.11</artifactId>
-
<version>${spark.version}</version>
-
</dependency>
-
<dependency>
-
<groupId>org.apache.hadoop</groupId>
-
<artifactId>hadoop-client</artifactId>
-
<version>${hadoop.version}</version>
-
</dependency>
-
<!-- 日志相关 -->
-
<dependency>
-
<groupId>org.slf4j</groupId>
-
<artifactId>jcl-over-slf4j</artifactId>
-
<version>${slf4j.version}</version>
-
</dependency>
-
<dependency>
-
<groupId>org.slf4j</groupId>
-
<artifactId>slf4j-api</artifactId>
-
<version>${slf4j.version}</version>
-
</dependency>
-
<dependency>
-
<groupId>org.slf4j</groupId>
-
<artifactId>slf4j-log4j12</artifactId>
-
<version>${slf4j.version}</version>
-
</dependency>
-
<dependency>
-
<groupId>log4j</groupId>
-
<artifactId>log4j</artifactId>
-
<version>${log4j.version}</version>
-
</dependency>
-
<!--MapReduce-->
-
<dependency>
-
<groupId>org.apache.hadoop</groupId>
-
<artifactId>hadoop-hdfs</artifactId>
-
<version>2.6.0</version>
-
</dependency>
-
<dependency>
-
<groupId>org.apache.hadoop</groupId>
-
<artifactId>hadoop-common</artifactId>
-
<version>2.6.0</version>
-
</dependency>
-
<dependency>
-
<groupId>org.apache.hadoop</groupId>
-
<artifactId>hadoop-client</artifactId>
-
<version>2.6.0</version>
-
</dependency>
-
-
-
-
<dependency>
-
<groupId>org.apache.hadoop</groupId>
-
<artifactId>hadoop-mapreduce-client-core</artifactId>
-
<version>2.6.0</version>
-
</dependency>
-
</dependencies>
-
<build>
-
<sourceDirectory>src/main/scala</sourceDirectory>
-
<plugins>
-
<plugin>
-
<groupId>org.apache.maven.plugins</groupId>
-
<artifactId>maven-compiler-plugin</artifactId>
-
<version>3.0</version>
-
<configuration>
-
<source>1.8</source>
-
<target>1.8</target>
-
<encoding>UTF-8</encoding>
-
</configuration>
-
</plugin>
-
<plugin>
-
<groupId>net.alchim31.maven</groupId>
-
<artifactId>scala-maven-plugin</artifactId>
-
<version>3.2.0</version>
-
<executions>
-
<execution>
-
<goals>
-
<goal>compile</goal>
-
<goal>testCompile</goal>
-
</goals>
-
<configuration>
-
<args>
-
<arg>-dependencyfile</arg>
-
<arg>${project.build.directory}/.scala_dependencies</arg>
-
</args>
-
</configuration>
-
</execution>
-
</executions>
-
</plugin>
-
</plugins>
-
</build>
-
</project>
代码
-
package org.example.spark
-
-
import org.apache.spark.rdd.RDD
-
import org.apache.spark.{SparkConf, SparkContext}
-
-
object word_packge {
-
def main(args: Array[String]): Unit = {
-
val conf: SparkConf = new SparkConf().setAppName("jiqun").setMaster("local[6]")
-
val sc = new SparkContext(conf)
-
-
val line: RDD[String] = sc.textFile("hdfs://192.168.231.105:8020/input/HelloWord.txt")
-
-
val words: RDD[String] = line.flatMap(_.split(" "))
-
-
val word: RDD[(String, Int)] = words.map((_, 1))
-
-
val result: RDD[(String, Int)] = word.reduceByKey(_ + _)
-
-
System.setProperty("HADOOP_USER_NAME","root")
-
result.repartition(1).saveAsTextFile("hdfs://192.168.231.105:8020/output/output1")
-
-
Thread.sleep(1000 * 60)
-
-
sc.stop()
-
}
-
}
去掉多余Jar包
打包
找到Jar包上传
集群运行
bin/spark-submit --class org.example.spark.word_packge /input/SparkDemo.jar
HDFS查看运行结果
文章来源: tuomasi.blog.csdn.net,作者:托马斯-酷涛,版权归原作者所有,如需转载,请联系作者。
原文链接:tuomasi.blog.csdn.net/article/details/124107746
【版权声明】本文为华为云社区用户转载文章,如果您发现本社区中有涉嫌抄袭的内容,欢迎发送邮件进行举报,并提供相关证据,一经查实,本社区将立刻删除涉嫌侵权内容,举报邮箱:
cloudbbs@huaweicloud.com
- 点赞
- 收藏
- 关注作者
评论(0)