/data/hadoopv2/apps/spark-1.5.2-bin-hadoop2.6/bin/spark-shell --executor-memory 400M --driver-memory 400M --master spark://aws10029.in.aiwaly.com:7077,aws10028.in.aiwaly.com:7077

#cat spark-env.sh
export JAVA_HOME=/data/hadoopv2/apps/jdk1.8.0_65
export SCALA_HOME=/data/hadoopv2/apps/spark-1.5.2-bin-hadoop2.6

export SPARK_DAEMON_JAVA_OPTS="-Dspark.deploy.recoveryMode=ZOOKEEPER -Dspark.deploy.zookeeper.url=aws10028.in.aiwaly.com:2181,aws10029.in.aiwaly.com:2181,aws10030.in.aiwaly.com:2181 -Dspark.deploy.zookeeper.dir=/spark"
#export SPARK_MASTER_PORT=8077
#export SPARK_MASTER_IP=aws10028.in.aiwaly.com,aws10029.in.aiwaly.com
export SPARK_WORKER_CORES=1
#export SPARK_WORKER_INSTANCES=1
export SPARK_WORKER_MEMORY=1G
/data/hadoopv2/apps/spark-1.5.2-bin-hadoop2.6/bin/spark-submit --master spark://aws10029.in.aiwaly.com:7077,aws10028.in.aiwaly.com:7077 --class "samples.WordCount" --executor-memory 400M --driver-memory 400M /awdisk0002/nas/disant/apps/spark/jar/spark-wordcount-in-scala-20160128a.jar spark://aws10029.in.aiwaly.com:7077,aws10028.in.aiwaly.com:7077 hdfs://aws10028.in.aiwaly.com:9000/usr/hadoop/test hdfs://aws10028.in.aiwaly.com:9000/usr/hadoop/test201601281921

运行方式: 1: spark-shell
  ./bin/spark-shell --executor-memory 1g --driver-memory 1g --master spark://feng03:7077
2: spark-submit
  ./bin/spark-submit --class "SimpleApp" --master spark://feng03:7077 /home/jifeng/code/simple/target/scala-2.10/simple-project_2.10-1.0.jar
20160128spark-submit命令
/data/hadoopv2/apps/spark-1.5.2-bin-hadoop2.6/bin/spark-submit --class "samples.WordCount" --executor-memory 400M --driver-memory 400M --name WordCountDemo /awdisk0002/nas/disant/apps/spark/jar/spark-wordcount-in-scala-20160128b.jar aaaa hdfs://aws10028.in.aiwaly.com:9000/usr/hadoop/test hdfs://aws10028.in.aiwaly.com:9000/usr/hadoop/test$(date +%Y%m%d%H%M%S)

/data/hadoopv2/apps/spark-1.5.2-bin-hadoop2.6/bin/spark-submit --class "samples.WordCount" --executor-memory 400M --driver-memory 400M --name WordCountDemo /awdisk0002/nas/disant/apps/spark/jar/spark-wordcount-in-scala-20160128a.jar local[4] hdfs://aws10028.in.aiwaly.com:9000/usr/hadoop/test hdfs://aws10028.in.aiwaly.com:9000/usr/hadoop/test$(date +%Y%m%d%H%M%S)


top50统计方法
textFile.filter(_.split("\",\"").length > 5).map(line => line.split("\",\"")(5)).flatMap(_.split("\\|")).map(word => (word, 1)).reduceByKey(_+_).map(x => (x._2, x._1)).sortByKey(false).map(y => (y._2, y._1)).map(x => (x._1 +"|" + x._2) ).take(50)
文档更新时间: 2019-07-09 10:59   作者:月影鹏鹏