spark FP-Growth树应用示例

/ 0评 / 0
package sparkFIM

import java.util.concurrent.ConcurrentHashMap
import org.apache.spark.mllib.fpm.FPGrowth
import org.apache.spark.{SparkConf, SparkContext}
import scala.collection._


/**
 * Created by zhangshuai on 2017/4/5.
 */
object Test1 {

    System.setProperty("hadoop.home.dir", "E://hadoop2.6_Win_x64-master");
    val conf = new SparkConf().setMaster("local").setAppName("testFP-Growth");
    val sc = new SparkContext(conf);
    var freqMap = new ConcurrentHashMap[mutable.ArrayBuffer[String],mutable.ArrayBuffer[ItemFreq]]();//捆绑推销(key值为用户购买的历史商品)
    val items = new ConcurrentHashMap[Long,mutable.ArrayBuffer[String]]()//用户购买的历史商品
    val minSupport = 0.5//最小支持度
    val minConf = 0.75//最小置信度
    var freq = 1L//用户历史商品出现的次数
    var li = mutable.ArrayBuffer[ItemFreq]()

  def main(args: Array[String]): Unit = {
    //1.加载过去一段时间,大量用户购买的商品,数据源为商品列表,训练FP-Growth模型
    val data = sc.textFile("E://demo.txt").map(_.split(" ")).cache()
    val count = data.count()
    val fpg = new FPGrowth().setMinSupport(minSupport).setNumPartitions(3)
    val model = fpg.run(data)



    //2.输出所有频繁项集
    val result = model.freqItemsets.filter(_.items.size >= 1)
    result.foreach(f => println(f.items.mkString(" ")+"->"+f.freq))

    //3.获取用户id,并得到历史商品
    val userId = args(0).toLong
    var bucket:mutable.ArrayBuffer[String] = items.get(userId.toLong)
    if(bucket == null){
      bucket = new mutable.ArrayBuffer[String]()
      for(i <- 1 until args.length){
        bucket += (args(i))
      }
    }

    items.put(userId,bucket)//收集用户购买的历史商品
    for(item <- result){
      //4.在模型中找出与用户的历史商品相符合的频繁项集,得到频率
      if(item.items.mkString == items.get(userId).mkString){
        freq = item.freq
      }
    }
    println("历史商品出现的次数:" + freq)//调试信息(输出用户历史商品的支持度)

    //5.根据历史商品,找出置信度相对高的频繁项,推荐给用户

    for(f <- result){
      if(f.items.mkString.contains(items.get(userId).mkString) && f.items.size > items.get(userId).size) {
        val conf:Double = f.freq.toDouble / freq.toDouble
        if(conf >= minConf) {
          //找出所有置信度大于minConf的项
          var item = f.items
          for (i <- 0 until items.get(userId).size) {
            item = item.filter(_ != items.get(userId)(i)) //过滤掉用户历史商品,剩下的为推荐的商品
          }
          for (str <- item) {
            li += ItemFreq(str, conf)
          }
        }
      }
    }
    freqMap.put(items.get(userId),li);
    println("推荐的商品为:")
    freqMap.get(items.get(userId)).foreach(f =>println(f.item + "->" + f.freq))
  }
}
D:\JDK1.7\bin\java -Didea.launcher.port=7533 "-Didea.launcher.bin.path=D:\IDEA\IntelliJ IDEA 14.1.4\bin" -Dfile.encoding=UTF-8 -classpath "D:\JDK1.7\jre\lib\charsets.jar;D:\JDK1.7\jre\lib\deploy.jar;D:\JDK1.7\jre\lib\javaws.jar;D:\JDK1.7\jre\lib\jce.jar;D:\JDK1.7\jre\lib\jfr.jar;D:\JDK1.7\jre\lib\jfxrt.jar;D:\JDK1.7\jre\lib\jsse.jar;D:\JDK1.7\jre\lib\management-agent.jar;D:\JDK1.7\jre\lib\plugin.jar;D:\JDK1.7\jre\lib\resources.jar;D:\JDK1.7\jre\lib\rt.jar;D:\JDK1.7\jre\lib\ext\access-bridge-64.jar;D:\JDK1.7\jre\lib\ext\dnsns.jar;D:\JDK1.7\jre\lib\ext\jaccess.jar;D:\JDK1.7\jre\lib\ext\localedata.jar;D:\JDK1.7\jre\lib\ext\sunec.jar;D:\JDK1.7\jre\lib\ext\sunjce_provider.jar;D:\JDK1.7\jre\lib\ext\sunmscapi.jar;D:\JDK1.7\jre\lib\ext\zipfs.jar;C:\Users\test\Downloads\SparkJavaFPGrowth-master\target\classes;C:\Program Files (x86)\scala\lib\scala-actors-2.11.0.jar;C:\Program Files (x86)\scala\lib\scala-actors-migration_2.11-1.1.0.jar;C:\Program Files (x86)\scala\lib\scala-library.jar;C:\Program Files (x86)\scala\lib\scala-parser-combinators_2.11-1.0.4.jar;C:\Program Files (x86)\scala\lib\scala-reflect.jar;C:\Program Files (x86)\scala\lib\scala-swing_2.11-1.0.2.jar;C:\Program Files (x86)\scala\lib\scala-xml_2.11-1.0.4.jar;C:\Users\test\.m2\repository\org\apache\spark\spark-core_2.11\1.6.0\spark-core_2.11-1.6.0.jar;C:\Users\test\.m2\repository\org\apache\avro\avro-mapred\1.7.7\avro-mapred-1.7.7-hadoop2.jar;C:\Users\test\.m2\repository\org\apache\avro\avro-ipc\1.7.7\avro-ipc-1.7.7.jar;C:\Users\test\.m2\repository\org\apache\avro\avro\1.7.7\avro-1.7.7.jar;C:\Users\test\.m2\repository\org\apache\avro\avro-ipc\1.7.7\avro-ipc-1.7.7-tests.jar;C:\Users\test\.m2\repository\org\codehaus\jackson\jackson-core-asl\1.9.13\jackson-core-asl-1.9.13.jar;C:\Users\test\.m2\repository\org\codehaus\jackson\jackson-mapper-asl\1.9.13\jackson-mapper-asl-1.9.13.jar;C:\Users\test\.m2\repository\com\twitter\chill_2.11\0.5.0\chill_2.11-0.5.0.jar;C:\Users\test\.m2\repository\com\esotericsoftware\kryo\kryo\2.21\kryo-2.21.jar;C:\Users\test\.m2\repository\com\esotericsoftware\reflectasm\reflectasm\1.07\reflectasm-1.07-shaded.jar;C:\Users\test\.m2\repository\com\esotericsoftware\minlog\minlog\1.2\minlog-1.2.jar;C:\Users\test\.m2\repository\org\objenesis\objenesis\1.2\objenesis-1.2.jar;C:\Users\test\.m2\repository\com\twitter\chill-java\0.5.0\chill-java-0.5.0.jar;C:\Users\test\.m2\repository\org\apache\xbean\xbean-asm5-shaded\4.4\xbean-asm5-shaded-4.4.jar;C:\Users\test\.m2\repository\org\apache\hadoop\hadoop-client\2.2.0\hadoop-client-2.2.0.jar;C:\Users\test\.m2\repository\org\apache\hadoop\hadoop-common\2.2.0\hadoop-common-2.2.0.jar;C:\Users\test\.m2\repository\commons-cli\commons-cli\1.2\commons-cli-1.2.jar;C:\Users\test\.m2\repository\org\apache\commons\commons-math\2.1\commons-math-2.1.jar;C:\Users\test\.m2\repository\xmlenc\xmlenc\0.52\xmlenc-0.52.jar;C:\Users\test\.m2\repository\commons-configuration\commons-configuration\1.6\commons-configuration-1.6.jar;C:\Users\test\.m2\repository\commons-collections\commons-collections\3.2.1\commons-collections-3.2.1.jar;C:\Users\test\.m2\repository\commons-digester\commons-digester\1.8\commons-digester-1.8.jar;C:\Users\test\.m2\repository\commons-beanutils\commons-beanutils\1.7.0\commons-beanutils-1.7.0.jar;C:\Users\test\.m2\repository\commons-beanutils\commons-beanutils-core\1.8.0\commons-beanutils-core-1.8.0.jar;C:\Users\test\.m2\repository\org\apache\hadoop\hadoop-auth\2.2.0\hadoop-auth-2.2.0.jar;C:\Users\test\.m2\repository\org\apache\commons\commons-compress\1.4.1\commons-compress-1.4.1.jar;C:\Users\test\.m2\repository\org\tukaani\xz\1.0\xz-1.0.jar;C:\Users\test\.m2\repository\org\apache\hadoop\hadoop-hdfs\2.2.0\hadoop-hdfs-2.2.0.jar;C:\Users\test\.m2\repository\org\mortbay\jetty\jetty-util\6.1.26\jetty-util-6.1.26.jar;C:\Users\test\.m2\repository\org\apache\hadoop\hadoop-mapreduce-client-app\2.2.0\hadoop-mapreduce-client-app-2.2.0.jar;C:\Users\test\.m2\repository\org\apache\hadoop\hadoop-mapreduce-client-common\2.2.0\hadoop-mapreduce-client-common-2.2.0.jar;C:\Users\test\.m2\repository\org\apache\hadoop\hadoop-yarn-client\2.2.0\hadoop-yarn-client-2.2.0.jar;C:\Users\test\.m2\repository\com\google\inject\guice\3.0\guice-3.0.jar;C:\Users\test\.m2\repository\javax\inject\javax.inject\1\javax.inject-1.jar;C:\Users\test\.m2\repository\aopalliance\aopalliance\1.0\aopalliance-1.0.jar;C:\Users\test\.m2\repository\com\sun\jersey\jersey-test-framework\jersey-test-framework-grizzly2\1.9\jersey-test-framework-grizzly2-1.9.jar;C:\Users\test\.m2\repository\com\sun\jersey\jersey-test-framework\jersey-test-framework-core\1.9\jersey-test-framework-core-1.9.jar;C:\Users\test\.m2\repository\javax\servlet\javax.servlet-api\3.0.1\javax.servlet-api-3.0.1.jar;C:\Users\test\.m2\repository\com\sun\jersey\jersey-client\1.9\jersey-client-1.9.jar;C:\Users\test\.m2\repository\com\sun\jersey\jersey-grizzly2\1.9\jersey-grizzly2-1.9.jar;C:\Users\test\.m2\repository\org\glassfish\grizzly\grizzly-http\2.1.2\grizzly-http-2.1.2.jar;C:\Users\test\.m2\repository\org\glassfish\grizzly\grizzly-framework\2.1.2\grizzly-framework-2.1.2.jar;C:\Users\test\.m2\repository\org\glassfish\gmbal\gmbal-api-only\3.0.0-b023\gmbal-api-only-3.0.0-b023.jar;C:\Users\test\.m2\repository\org\glassfish\external\management-api\3.0.0-b012\management-api-3.0.0-b012.jar;C:\Users\test\.m2\repository\org\glassfish\grizzly\grizzly-http-server\2.1.2\grizzly-http-server-2.1.2.jar;C:\Users\test\.m2\repository\org\glassfish\grizzly\grizzly-rcm\2.1.2\grizzly-rcm-2.1.2.jar;C:\Users\test\.m2\repository\org\glassfish\grizzly\grizzly-http-servlet\2.1.2\grizzly-http-servlet-2.1.2.jar;C:\Users\test\.m2\repository\org\glassfish\javax.servlet\3.1\javax.servlet-3.1.jar;C:\Users\test\.m2\repository\com\sun\jersey\jersey-json\1.9\jersey-json-1.9.jar;C:\Users\test\.m2\repository\org\codehaus\jettison\jettison\1.1\jettison-1.1.jar;C:\Users\test\.m2\repository\stax\stax-api\1.0.1\stax-api-1.0.1.jar;C:\Users\test\.m2\repository\org\codehaus\jackson\jackson-jaxrs\1.8.3\jackson-jaxrs-1.8.3.jar;C:\Users\test\.m2\repository\org\codehaus\jackson\jackson-xc\1.8.3\jackson-xc-1.8.3.jar;C:\Users\test\.m2\repository\com\sun\jersey\contribs\jersey-guice\1.9\jersey-guice-1.9.jar;C:\Users\test\.m2\repository\org\apache\hadoop\hadoop-yarn-server-common\2.2.0\hadoop-yarn-server-common-2.2.0.jar;C:\Users\test\.m2\repository\org\apache\hadoop\hadoop-mapreduce-client-shuffle\2.2.0\hadoop-mapreduce-client-shuffle-2.2.0.jar;C:\Users\test\.m2\repository\org\apache\hadoop\hadoop-yarn-api\2.2.0\hadoop-yarn-api-2.2.0.jar;C:\Users\test\.m2\repository\org\apache\hadoop\hadoop-mapreduce-client-core\2.2.0\hadoop-mapreduce-client-core-2.2.0.jar;C:\Users\test\.m2\repository\org\apache\hadoop\hadoop-yarn-common\2.2.0\hadoop-yarn-common-2.2.0.jar;C:\Users\test\.m2\repository\org\apache\hadoop\hadoop-mapreduce-client-jobclient\2.2.0\hadoop-mapreduce-client-jobclient-2.2.0.jar;C:\Users\test\.m2\repository\org\apache\hadoop\hadoop-annotations\2.2.0\hadoop-annotations-2.2.0.jar;C:\Users\test\.m2\repository\org\apache\spark\spark-launcher_2.11\1.6.0\spark-launcher_2.11-1.6.0.jar;C:\Users\test\.m2\repository\org\apache\spark\spark-network-common_2.11\1.6.0\spark-network-common_2.11-1.6.0.jar;C:\Users\test\.m2\repository\org\apache\spark\spark-network-shuffle_2.11\1.6.0\spark-network-shuffle_2.11-1.6.0.jar;C:\Users\test\.m2\repository\org\fusesource\leveldbjni\leveldbjni-all\1.8\leveldbjni-all-1.8.jar;C:\Users\test\.m2\repository\com\fasterxml\jackson\core\jackson-annotations\2.4.4\jackson-annotations-2.4.4.jar;C:\Users\test\.m2\repository\org\apache\spark\spark-unsafe_2.11\1.6.0\spark-unsafe_2.11-1.6.0.jar;C:\Users\test\.m2\repository\net\java\dev\jets3t\jets3t\0.7.1\jets3t-0.7.1.jar;C:\Users\test\.m2\repository\commons-codec\commons-codec\1.3\commons-codec-1.3.jar;C:\Users\test\.m2\repository\commons-httpclient\commons-httpclient\3.1\commons-httpclient-3.1.jar;C:\Users\test\.m2\repository\org\apache\curator\curator-recipes\2.4.0\curator-recipes-2.4.0.jar;C:\Users\test\.m2\repository\org\apache\curator\curator-framework\2.4.0\curator-framework-2.4.0.jar;C:\Users\test\.m2\repository\org\apache\curator\curator-client\2.4.0\curator-client-2.4.0.jar;C:\Users\test\.m2\repository\org\apache\zookeeper\zookeeper\3.4.5\zookeeper-3.4.5.jar;C:\Users\test\.m2\repository\jline\jline\0.9.94\jline-0.9.94.jar;C:\Users\test\.m2\repository\com\google\guava\guava\14.0.1\guava-14.0.1.jar;C:\Users\test\.m2\repository\org\eclipse\jetty\orbit\javax.servlet\3.0.0.v201112011016\javax.servlet-3.0.0.v201112011016.jar;C:\Users\test\.m2\repository\org\apache\commons\commons-lang3\3.3.2\commons-lang3-3.3.2.jar;C:\Users\test\.m2\repository\org\apache\commons\commons-math3\3.4.1\commons-math3-3.4.1.jar;C:\Users\test\.m2\repository\com\google\code\findbugs\jsr305\1.3.9\jsr305-1.3.9.jar;C:\Users\test\.m2\repository\org\slf4j\slf4j-api\1.7.10\slf4j-api-1.7.10.jar;C:\Users\test\.m2\repository\org\slf4j\jul-to-slf4j\1.7.10\jul-to-slf4j-1.7.10.jar;C:\Users\test\.m2\repository\org\slf4j\jcl-over-slf4j\1.7.10\jcl-over-slf4j-1.7.10.jar;C:\Users\test\.m2\repository\log4j\log4j\1.2.17\log4j-1.2.17.jar;C:\Users\test\.m2\repository\org\slf4j\slf4j-log4j12\1.7.10\slf4j-log4j12-1.7.10.jar;C:\Users\test\.m2\repository\com\ning\compress-lzf\1.0.3\compress-lzf-1.0.3.jar;C:\Users\test\.m2\repository\org\xerial\snappy\snappy-java\1.1.2\snappy-java-1.1.2.jar;C:\Users\test\.m2\repository\net\jpountz\lz4\lz4\1.3.0\lz4-1.3.0.jar;C:\Users\test\.m2\repository\org\roaringbitmap\RoaringBitmap\0.5.11\RoaringBitmap-0.5.11.jar;C:\Users\test\.m2\repository\commons-net\commons-net\2.2\commons-net-2.2.jar;C:\Users\test\.m2\repository\com\typesafe\akka\akka-remote_2.11\2.3.11\akka-remote_2.11-2.3.11.jar;C:\Users\test\.m2\repository\com\typesafe\akka\akka-actor_2.11\2.3.11\akka-actor_2.11-2.3.11.jar;C:\Users\test\.m2\repository\com\typesafe\config\1.2.1\config-1.2.1.jar;C:\Users\test\.m2\repository\io\netty\netty\3.8.0.Final\netty-3.8.0.Final.jar;C:\Users\test\.m2\repository\com\google\protobuf\protobuf-java\2.5.0\protobuf-java-2.5.0.jar;C:\Users\test\.m2\repository\org\uncommons\maths\uncommons-maths\1.2.2a\uncommons-maths-1.2.2a.jar;C:\Users\test\.m2\repository\com\typesafe\akka\akka-slf4j_2.11\2.3.11\akka-slf4j_2.11-2.3.11.jar;C:\Users\test\.m2\repository\org\scala-lang\scala-library\2.11.7\scala-library-2.11.7.jar;C:\Users\test\.m2\repository\org\json4s\json4s-jackson_2.11\3.2.10\json4s-jackson_2.11-3.2.10.jar;C:\Users\test\.m2\repository\org\json4s\json4s-core_2.11\3.2.10\json4s-core_2.11-3.2.10.jar;C:\Users\test\.m2\repository\org\json4s\json4s-ast_2.11\3.2.10\json4s-ast_2.11-3.2.10.jar;C:\Users\test\.m2\repository\org\scala-lang\scalap\2.11.0\scalap-2.11.0.jar;C:\Users\test\.m2\repository\org\scala-lang\scala-compiler\2.11.0\scala-compiler-2.11.0.jar;C:\Users\test\.m2\repository\org\scala-lang\modules\scala-xml_2.11\1.0.1\scala-xml_2.11-1.0.1.jar;C:\Users\test\.m2\repository\org\scala-lang\modules\scala-parser-combinators_2.11\1.0.1\scala-parser-combinators_2.11-1.0.1.jar;C:\Users\test\.m2\repository\com\sun\jersey\jersey-server\1.9\jersey-server-1.9.jar;C:\Users\test\.m2\repository\asm\asm\3.1\asm-3.1.jar;C:\Users\test\.m2\repository\com\sun\jersey\jersey-core\1.9\jersey-core-1.9.jar;C:\Users\test\.m2\repository\org\apache\mesos\mesos\0.21.1\mesos-0.21.1-shaded-protobuf.jar;C:\Users\test\.m2\repository\io\netty\netty-all\4.0.29.Final\netty-all-4.0.29.Final.jar;C:\Users\test\.m2\repository\com\clearspring\analytics\stream\2.7.0\stream-2.7.0.jar;C:\Users\test\.m2\repository\io\dropwizard\metrics\metrics-core\3.1.2\metrics-core-3.1.2.jar;C:\Users\test\.m2\repository\io\dropwizard\metrics\metrics-jvm\3.1.2\metrics-jvm-3.1.2.jar;C:\Users\test\.m2\repository\io\dropwizard\metrics\metrics-json\3.1.2\metrics-json-3.1.2.jar;C:\Users\test\.m2\repository\io\dropwizard\metrics\metrics-graphite\3.1.2\metrics-graphite-3.1.2.jar;C:\Users\test\.m2\repository\com\fasterxml\jackson\core\jackson-databind\2.4.4\jackson-databind-2.4.4.jar;C:\Users\test\.m2\repository\com\fasterxml\jackson\core\jackson-core\2.4.4\jackson-core-2.4.4.jar;C:\Users\test\.m2\repository\com\fasterxml\jackson\module\jackson-module-scala_2.11\2.4.4\jackson-module-scala_2.11-2.4.4.jar;C:\Users\test\.m2\repository\org\scala-lang\scala-reflect\2.11.2\scala-reflect-2.11.2.jar;C:\Users\test\.m2\repository\com\thoughtworks\paranamer\paranamer\2.6\paranamer-2.6.jar;C:\Users\test\.m2\repository\org\apache\ivy\ivy\2.4.0\ivy-2.4.0.jar;C:\Users\test\.m2\repository\oro\oro\2.0.8\oro-2.0.8.jar;C:\Users\test\.m2\repository\org\tachyonproject\tachyon-client\0.8.2\tachyon-client-0.8.2.jar;C:\Users\test\.m2\repository\commons-lang\commons-lang\2.4\commons-lang-2.4.jar;C:\Users\test\.m2\repository\commons-io\commons-io\2.4\commons-io-2.4.jar;C:\Users\test\.m2\repository\org\tachyonproject\tachyon-underfs-hdfs\0.8.2\tachyon-underfs-hdfs-0.8.2.jar;C:\Users\test\.m2\repository\org\tachyonproject\tachyon-underfs-s3\0.8.2\tachyon-underfs-s3-0.8.2.jar;C:\Users\test\.m2\repository\org\tachyonproject\tachyon-underfs-local\0.8.2\tachyon-underfs-local-0.8.2.jar;C:\Users\test\.m2\repository\net\razorvine\pyrolite\4.9\pyrolite-4.9.jar;C:\Users\test\.m2\repository\net\sf\py4j\py4j\0.9\py4j-0.9.jar;C:\Users\test\.m2\repository\org\spark-project\spark\unused\1.0.0\unused-1.0.0.jar;C:\Users\test\.m2\repository\org\apache\spark\spark-mllib_2.11\1.6.0\spark-mllib_2.11-1.6.0.jar;C:\Users\test\.m2\repository\org\apache\spark\spark-streaming_2.11\1.6.0\spark-streaming_2.11-1.6.0.jar;C:\Users\test\.m2\repository\org\apache\spark\spark-sql_2.11\1.6.0\spark-sql_2.11-1.6.0.jar;C:\Users\test\.m2\repository\org\apache\spark\spark-catalyst_2.11\1.6.0\spark-catalyst_2.11-1.6.0.jar;C:\Users\test\.m2\repository\org\codehaus\janino\janino\2.7.8\janino-2.7.8.jar;C:\Users\test\.m2\repository\org\codehaus\janino\commons-compiler\2.7.8\commons-compiler-2.7.8.jar;C:\Users\test\.m2\repository\org\apache\parquet\parquet-column\1.7.0\parquet-column-1.7.0.jar;C:\Users\test\.m2\repository\org\apache\parquet\parquet-common\1.7.0\parquet-common-1.7.0.jar;C:\Users\test\.m2\repository\org\apache\parquet\parquet-encoding\1.7.0\parquet-encoding-1.7.0.jar;C:\Users\test\.m2\repository\org\apache\parquet\parquet-generator\1.7.0\parquet-generator-1.7.0.jar;C:\Users\test\.m2\repository\org\apache\parquet\parquet-hadoop\1.7.0\parquet-hadoop-1.7.0.jar;C:\Users\test\.m2\repository\org\apache\parquet\parquet-format\2.3.0-incubating\parquet-format-2.3.0-incubating.jar;C:\Users\test\.m2\repository\org\apache\parquet\parquet-jackson\1.7.0\parquet-jackson-1.7.0.jar;C:\Users\test\.m2\repository\org\apache\spark\spark-graphx_2.11\1.6.0\spark-graphx_2.11-1.6.0.jar;C:\Users\test\.m2\repository\com\github\fommil\netlib\core\1.1.2\core-1.1.2.jar;C:\Users\test\.m2\repository\net\sourceforge\f2j\arpack_combined_all\0.1\arpack_combined_all-0.1.jar;C:\Users\test\.m2\repository\org\scalanlp\breeze_2.11\0.11.2\breeze_2.11-0.11.2.jar;C:\Users\test\.m2\repository\org\scalanlp\breeze-macros_2.11\0.11.2\breeze-macros_2.11-0.11.2.jar;C:\Users\test\.m2\repository\net\sf\opencsv\opencsv\2.3\opencsv-2.3.jar;C:\Users\test\.m2\repository\com\github\rwl\jtransforms\2.4.0\jtransforms-2.4.0.jar;C:\Users\test\.m2\repository\org\spire-math\spire_2.11\0.7.4\spire_2.11-0.7.4.jar;C:\Users\test\.m2\repository\org\spire-math\spire-macros_2.11\0.7.4\spire-macros_2.11-0.7.4.jar;C:\Users\test\.m2\repository\org\jpmml\pmml-model\1.1.15\pmml-model-1.1.15.jar;C:\Users\test\.m2\repository\org\jpmml\pmml-agent\1.1.15\pmml-agent-1.1.15.jar;C:\Users\test\.m2\repository\org\jpmml\pmml-schema\1.1.15\pmml-schema-1.1.15.jar;C:\Users\test\.m2\repository\com\sun\xml\bind\jaxb-impl\2.2.7\jaxb-impl-2.2.7.jar;C:\Users\test\.m2\repository\com\sun\xml\bind\jaxb-core\2.2.7\jaxb-core-2.2.7.jar;C:\Users\test\.m2\repository\javax\xml\bind\jaxb-api\2.2.7\jaxb-api-2.2.7.jar;D:\IDEA\IntelliJ IDEA 14.1.4\lib\idea_rt.jar" com.intellij.rt.execution.application.AppMain sparkFIM.Test1 2258
Using Spark's default log4j profile: org/apache/spark/log4j-defaults.properties
17/04/05 14:45:40 INFO SparkContext: Running Spark version 1.6.0
17/04/05 14:45:41 INFO SecurityManager: Changing view acls to: test
17/04/05 14:45:41 INFO SecurityManager: Changing modify acls to: test
17/04/05 14:45:41 INFO SecurityManager: SecurityManager: authentication disabled; ui acls disabled; users with view permissions: Set(test); users with modify permissions: Set(test)
17/04/05 14:45:42 INFO Utils: Successfully started service 'sparkDriver' on port 57941.
17/04/05 14:45:42 INFO Slf4jLogger: Slf4jLogger started
17/04/05 14:45:43 INFO Remoting: Starting remoting
17/04/05 14:45:43 INFO Remoting: Remoting started; listening on addresses :[akka.tcp://sparkDriverActorSystem@162.26.68.91:57954]
17/04/05 14:45:43 INFO Utils: Successfully started service 'sparkDriverActorSystem' on port 57954.
17/04/05 14:45:43 INFO SparkEnv: Registering MapOutputTracker
17/04/05 14:45:43 INFO SparkEnv: Registering BlockManagerMaster
17/04/05 14:45:43 INFO DiskBlockManager: Created local directory at C:\Users\test\AppData\Local\Temp\blockmgr-d6a76472-ca92-48bc-b158-abf636503778
17/04/05 14:45:43 INFO MemoryStore: MemoryStore started with capacity 1127.3 MB
17/04/05 14:45:44 INFO SparkEnv: Registering OutputCommitCoordinator
17/04/05 14:45:44 WARN Utils: Service 'SparkUI' could not bind on port 4040. Attempting port 4041.
17/04/05 14:45:44 INFO Utils: Successfully started service 'SparkUI' on port 4041.
17/04/05 14:45:44 INFO SparkUI: Started SparkUI at http://162.26.68.91:4041
17/04/05 14:45:45 INFO Executor: Starting executor ID driver on host localhost
17/04/05 14:45:45 INFO Utils: Successfully started service 'org.apache.spark.network.netty.NettyBlockTransferService' on port 57961.
17/04/05 14:45:45 INFO NettyBlockTransferService: Server created on 57961
17/04/05 14:45:45 INFO BlockManagerMaster: Trying to register BlockManager
17/04/05 14:45:45 INFO BlockManagerMasterEndpoint: Registering block manager localhost:57961 with 1127.3 MB RAM, BlockManagerId(driver, localhost, 57961)
17/04/05 14:45:45 INFO BlockManagerMaster: Registered BlockManager
17/04/05 14:45:46 INFO MemoryStore: Block broadcast_0 stored as values in memory (estimated size 104.0 KB, free 104.0 KB)
17/04/05 14:45:47 INFO MemoryStore: Block broadcast_0_piece0 stored as bytes in memory (estimated size 9.8 KB, free 113.8 KB)
17/04/05 14:45:47 INFO BlockManagerInfo: Added broadcast_0_piece0 in memory on localhost:57961 (size: 9.8 KB, free: 1127.2 MB)
17/04/05 14:45:47 INFO SparkContext: Created broadcast 0 from textFile at Test1.scala:26
17/04/05 14:45:50 WARN : Your hostname, zhangshuai resolves to a loopback/non-reachable address: fe80:0:0:0:0:5efe:c0a8:2f%23, but we couldn't find any external IP address!
17/04/05 14:46:02 INFO FileInputFormat: Total input paths to process : 1
17/04/05 14:46:02 INFO SparkContext: Starting job: count at Test1.scala:27
17/04/05 14:46:02 INFO DAGScheduler: Got job 0 (count at Test1.scala:27) with 1 output partitions
17/04/05 14:46:02 INFO DAGScheduler: Final stage: ResultStage 0 (count at Test1.scala:27)
17/04/05 14:46:02 INFO DAGScheduler: Parents of final stage: List()
17/04/05 14:46:02 INFO DAGScheduler: Missing parents: List()
17/04/05 14:46:02 INFO DAGScheduler: Submitting ResultStage 0 (MapPartitionsRDD[2] at map at Test1.scala:26), which has no missing parents
17/04/05 14:46:02 INFO MemoryStore: Block broadcast_1 stored as values in memory (estimated size 2.9 KB, free 116.7 KB)
17/04/05 14:46:02 INFO MemoryStore: Block broadcast_1_piece0 stored as bytes in memory (estimated size 1774.0 B, free 118.5 KB)
17/04/05 14:46:02 INFO BlockManagerInfo: Added broadcast_1_piece0 in memory on localhost:57961 (size: 1774.0 B, free: 1127.2 MB)
17/04/05 14:46:02 INFO SparkContext: Created broadcast 1 from broadcast at DAGScheduler.scala:1006
17/04/05 14:46:02 INFO DAGScheduler: Submitting 1 missing tasks from ResultStage 0 (MapPartitionsRDD[2] at map at Test1.scala:26)
17/04/05 14:46:02 INFO TaskSchedulerImpl: Adding task set 0.0 with 1 tasks
17/04/05 14:46:03 INFO TaskSetManager: Starting task 0.0 in stage 0.0 (TID 0, localhost, partition 0,PROCESS_LOCAL, 2038 bytes)
17/04/05 14:46:03 INFO Executor: Running task 0.0 in stage 0.0 (TID 0)
17/04/05 14:46:03 INFO CacheManager: Partition rdd_2_0 not found, computing it
17/04/05 14:46:03 INFO HadoopRDD: Input split: file:/E:/demo.txt:0+129
17/04/05 14:46:03 INFO deprecation: mapred.tip.id is deprecated. Instead, use mapreduce.task.id
17/04/05 14:46:03 INFO deprecation: mapred.task.id is deprecated. Instead, use mapreduce.task.attempt.id
17/04/05 14:46:03 INFO deprecation: mapred.task.is.map is deprecated. Instead, use mapreduce.task.ismap
17/04/05 14:46:03 INFO deprecation: mapred.task.partition is deprecated. Instead, use mapreduce.task.partition
17/04/05 14:46:03 INFO deprecation: mapred.job.id is deprecated. Instead, use mapreduce.job.id
17/04/05 14:46:03 INFO MemoryStore: Block rdd_2_0 stored as values in memory (estimated size 1056.0 B, free 119.5 KB)
17/04/05 14:46:03 INFO BlockManagerInfo: Added rdd_2_0 in memory on localhost:57961 (size: 1056.0 B, free: 1127.2 MB)
17/04/05 14:46:03 INFO Executor: Finished task 0.0 in stage 0.0 (TID 0). 2581 bytes result sent to driver
17/04/05 14:46:03 INFO TaskSetManager: Finished task 0.0 in stage 0.0 (TID 0) in 266 ms on localhost (1/1)
17/04/05 14:46:03 INFO TaskSchedulerImpl: Removed TaskSet 0.0, whose tasks have all completed, from pool 
17/04/05 14:46:03 INFO DAGScheduler: ResultStage 0 (count at Test1.scala:27) finished in 0.306 s
17/04/05 14:46:03 INFO DAGScheduler: Job 0 finished: count at Test1.scala:27, took 0.613993 s
17/04/05 14:46:03 INFO SparkContext: Starting job: count at FPGrowth.scala:114
17/04/05 14:46:03 INFO DAGScheduler: Got job 1 (count at FPGrowth.scala:114) with 1 output partitions
17/04/05 14:46:03 INFO DAGScheduler: Final stage: ResultStage 1 (count at FPGrowth.scala:114)
17/04/05 14:46:03 INFO DAGScheduler: Parents of final stage: List()
17/04/05 14:46:03 INFO DAGScheduler: Missing parents: List()
17/04/05 14:46:03 INFO DAGScheduler: Submitting ResultStage 1 (MapPartitionsRDD[2] at map at Test1.scala:26), which has no missing parents
17/04/05 14:46:03 INFO MemoryStore: Block broadcast_2 stored as values in memory (estimated size 2.9 KB, free 122.4 KB)
17/04/05 14:46:03 INFO MemoryStore: Block broadcast_2_piece0 stored as bytes in memory (estimated size 1774.0 B, free 124.2 KB)
17/04/05 14:46:03 INFO BlockManagerInfo: Added broadcast_2_piece0 in memory on localhost:57961 (size: 1774.0 B, free: 1127.2 MB)
17/04/05 14:46:03 INFO SparkContext: Created broadcast 2 from broadcast at DAGScheduler.scala:1006
17/04/05 14:46:03 INFO DAGScheduler: Submitting 1 missing tasks from ResultStage 1 (MapPartitionsRDD[2] at map at Test1.scala:26)
17/04/05 14:46:03 INFO TaskSchedulerImpl: Adding task set 1.0 with 1 tasks
17/04/05 14:46:03 INFO TaskSetManager: Starting task 0.0 in stage 1.0 (TID 1, localhost, partition 0,PROCESS_LOCAL, 2038 bytes)
17/04/05 14:46:03 INFO Executor: Running task 0.0 in stage 1.0 (TID 1)
17/04/05 14:46:03 INFO BlockManager: Found block rdd_2_0 locally
17/04/05 14:46:03 INFO Executor: Finished task 0.0 in stage 1.0 (TID 1). 2082 bytes result sent to driver
17/04/05 14:46:03 INFO TaskSetManager: Finished task 0.0 in stage 1.0 (TID 1) in 34 ms on localhost (1/1)
17/04/05 14:46:03 INFO TaskSchedulerImpl: Removed TaskSet 1.0, whose tasks have all completed, from pool 
17/04/05 14:46:03 INFO DAGScheduler: ResultStage 1 (count at FPGrowth.scala:114) finished in 0.036 s
17/04/05 14:46:03 INFO DAGScheduler: Job 1 finished: count at FPGrowth.scala:114, took 0.076317 s
17/04/05 14:46:03 INFO SparkContext: Starting job: collect at FPGrowth.scala:149
17/04/05 14:46:03 INFO DAGScheduler: Registering RDD 4 (map at FPGrowth.scala:146)
17/04/05 14:46:03 INFO DAGScheduler: Got job 2 (collect at FPGrowth.scala:149) with 3 output partitions
17/04/05 14:46:03 INFO DAGScheduler: Final stage: ResultStage 3 (collect at FPGrowth.scala:149)
17/04/05 14:46:03 INFO DAGScheduler: Parents of final stage: List(ShuffleMapStage 2)
17/04/05 14:46:03 INFO DAGScheduler: Missing parents: List(ShuffleMapStage 2)
17/04/05 14:46:03 INFO DAGScheduler: Submitting ShuffleMapStage 2 (MapPartitionsRDD[4] at map at FPGrowth.scala:146), which has no missing parents
17/04/05 14:46:03 INFO MemoryStore: Block broadcast_3 stored as values in memory (estimated size 4.1 KB, free 128.3 KB)
17/04/05 14:46:03 INFO MemoryStore: Block broadcast_3_piece0 stored as bytes in memory (estimated size 2.3 KB, free 130.5 KB)
17/04/05 14:46:03 INFO BlockManagerInfo: Added broadcast_3_piece0 in memory on localhost:57961 (size: 2.3 KB, free: 1127.2 MB)
17/04/05 14:46:03 INFO SparkContext: Created broadcast 3 from broadcast at DAGScheduler.scala:1006
17/04/05 14:46:03 INFO DAGScheduler: Submitting 1 missing tasks from ShuffleMapStage 2 (MapPartitionsRDD[4] at map at FPGrowth.scala:146)
17/04/05 14:46:03 INFO TaskSchedulerImpl: Adding task set 2.0 with 1 tasks
17/04/05 14:46:03 INFO TaskSetManager: Starting task 0.0 in stage 2.0 (TID 2, localhost, partition 0,PROCESS_LOCAL, 2027 bytes)
17/04/05 14:46:03 INFO Executor: Running task 0.0 in stage 2.0 (TID 2)
17/04/05 14:46:03 INFO BlockManager: Found block rdd_2_0 locally
17/04/05 14:46:03 INFO Executor: Finished task 0.0 in stage 2.0 (TID 2). 2255 bytes result sent to driver
17/04/05 14:46:03 INFO TaskSetManager: Finished task 0.0 in stage 2.0 (TID 2) in 144 ms on localhost (1/1)
17/04/05 14:46:03 INFO TaskSchedulerImpl: Removed TaskSet 2.0, whose tasks have all completed, from pool 
17/04/05 14:46:03 INFO DAGScheduler: ShuffleMapStage 2 (map at FPGrowth.scala:146) finished in 0.148 s
17/04/05 14:46:03 INFO DAGScheduler: looking for newly runnable stages
17/04/05 14:46:03 INFO DAGScheduler: running: Set()
17/04/05 14:46:03 INFO DAGScheduler: waiting: Set(ResultStage 3)
17/04/05 14:46:03 INFO DAGScheduler: failed: Set()
17/04/05 14:46:03 INFO DAGScheduler: Submitting ResultStage 3 (MapPartitionsRDD[6] at filter at FPGrowth.scala:148), which has no missing parents
17/04/05 14:46:03 INFO MemoryStore: Block broadcast_4 stored as values in memory (estimated size 3.0 KB, free 133.5 KB)
17/04/05 14:46:03 INFO MemoryStore: Block broadcast_4_piece0 stored as bytes in memory (estimated size 1770.0 B, free 135.2 KB)
17/04/05 14:46:03 INFO BlockManagerInfo: Added broadcast_4_piece0 in memory on localhost:57961 (size: 1770.0 B, free: 1127.2 MB)
17/04/05 14:46:03 INFO SparkContext: Created broadcast 4 from broadcast at DAGScheduler.scala:1006
17/04/05 14:46:03 INFO DAGScheduler: Submitting 3 missing tasks from ResultStage 3 (MapPartitionsRDD[6] at filter at FPGrowth.scala:148)
17/04/05 14:46:03 INFO TaskSchedulerImpl: Adding task set 3.0 with 3 tasks
17/04/05 14:46:03 INFO TaskSetManager: Starting task 0.0 in stage 3.0 (TID 3, localhost, partition 0,NODE_LOCAL, 1813 bytes)
17/04/05 14:46:03 INFO Executor: Running task 0.0 in stage 3.0 (TID 3)
17/04/05 14:46:03 INFO BlockManagerInfo: Removed broadcast_2_piece0 on localhost:57961 in memory (size: 1774.0 B, free: 1127.2 MB)
17/04/05 14:46:03 INFO ContextCleaner: Cleaned accumulator 2
17/04/05 14:46:03 INFO BlockManagerInfo: Removed broadcast_1_piece0 on localhost:57961 in memory (size: 1774.0 B, free: 1127.2 MB)
17/04/05 14:46:03 INFO ContextCleaner: Cleaned accumulator 1
17/04/05 14:46:03 INFO ShuffleBlockFetcherIterator: Getting 1 non-empty blocks out of 1 blocks
17/04/05 14:46:03 INFO ShuffleBlockFetcherIterator: Started 0 remote fetches in 11 ms
17/04/05 14:46:03 INFO Executor: Finished task 0.0 in stage 3.0 (TID 3). 1365 bytes result sent to driver
17/04/05 14:46:03 INFO TaskSetManager: Starting task 1.0 in stage 3.0 (TID 4, localhost, partition 1,NODE_LOCAL, 1813 bytes)
17/04/05 14:46:03 INFO Executor: Running task 1.0 in stage 3.0 (TID 4)
17/04/05 14:46:03 INFO TaskSetManager: Finished task 0.0 in stage 3.0 (TID 3) in 107 ms on localhost (1/3)
17/04/05 14:46:03 INFO ShuffleBlockFetcherIterator: Getting 1 non-empty blocks out of 1 blocks
17/04/05 14:46:03 INFO ShuffleBlockFetcherIterator: Started 0 remote fetches in 1 ms
17/04/05 14:46:03 INFO Executor: Finished task 1.0 in stage 3.0 (TID 4). 1313 bytes result sent to driver
17/04/05 14:46:03 INFO TaskSetManager: Starting task 2.0 in stage 3.0 (TID 5, localhost, partition 2,PROCESS_LOCAL, 1813 bytes)
17/04/05 14:46:03 INFO Executor: Running task 2.0 in stage 3.0 (TID 5)
17/04/05 14:46:03 INFO TaskSetManager: Finished task 1.0 in stage 3.0 (TID 4) in 21 ms on localhost (2/3)
17/04/05 14:46:03 INFO ShuffleBlockFetcherIterator: Getting 0 non-empty blocks out of 1 blocks
17/04/05 14:46:03 INFO ShuffleBlockFetcherIterator: Started 0 remote fetches in 1 ms
17/04/05 14:46:03 INFO Executor: Finished task 2.0 in stage 3.0 (TID 5). 1161 bytes result sent to driver
17/04/05 14:46:03 INFO TaskSetManager: Finished task 2.0 in stage 3.0 (TID 5) in 17 ms on localhost (3/3)
17/04/05 14:46:03 INFO DAGScheduler: ResultStage 3 (collect at FPGrowth.scala:149) finished in 0.146 s
17/04/05 14:46:03 INFO TaskSchedulerImpl: Removed TaskSet 3.0, whose tasks have all completed, from pool 
17/04/05 14:46:03 INFO DAGScheduler: Job 2 finished: collect at FPGrowth.scala:149, took 0.436373 s
17/04/05 14:46:04 INFO SparkContext: Starting job: foreach at Test1.scala:33
17/04/05 14:46:04 INFO DAGScheduler: Registering RDD 7 (flatMap at FPGrowth.scala:168)
17/04/05 14:46:04 INFO DAGScheduler: Got job 3 (foreach at Test1.scala:33) with 3 output partitions
17/04/05 14:46:04 INFO DAGScheduler: Final stage: ResultStage 5 (foreach at Test1.scala:33)
17/04/05 14:46:04 INFO DAGScheduler: Parents of final stage: List(ShuffleMapStage 4)
17/04/05 14:46:04 INFO DAGScheduler: Missing parents: List(ShuffleMapStage 4)
17/04/05 14:46:04 INFO DAGScheduler: Submitting ShuffleMapStage 4 (MapPartitionsRDD[7] at flatMap at FPGrowth.scala:168), which has no missing parents
17/04/05 14:46:04 INFO MemoryStore: Block broadcast_5 stored as values in memory (estimated size 6.6 KB, free 132.5 KB)
17/04/05 14:46:04 INFO MemoryStore: Block broadcast_5_piece0 stored as bytes in memory (estimated size 3.4 KB, free 135.8 KB)
17/04/05 14:46:04 INFO BlockManagerInfo: Added broadcast_5_piece0 in memory on localhost:57961 (size: 3.4 KB, free: 1127.2 MB)
17/04/05 14:46:04 INFO SparkContext: Created broadcast 5 from broadcast at DAGScheduler.scala:1006
17/04/05 14:46:04 INFO DAGScheduler: Submitting 1 missing tasks from ShuffleMapStage 4 (MapPartitionsRDD[7] at flatMap at FPGrowth.scala:168)
17/04/05 14:46:04 INFO TaskSchedulerImpl: Adding task set 4.0 with 1 tasks
17/04/05 14:46:04 INFO TaskSetManager: Starting task 0.0 in stage 4.0 (TID 6, localhost, partition 0,PROCESS_LOCAL, 2027 bytes)
17/04/05 14:46:04 INFO Executor: Running task 0.0 in stage 4.0 (TID 6)
17/04/05 14:46:04 INFO BlockManager: Found block rdd_2_0 locally
17/04/05 14:46:04 INFO Executor: Finished task 0.0 in stage 4.0 (TID 6). 2255 bytes result sent to driver
17/04/05 14:46:04 INFO TaskSetManager: Finished task 0.0 in stage 4.0 (TID 6) in 99 ms on localhost (1/1)
17/04/05 14:46:04 INFO DAGScheduler: ShuffleMapStage 4 (flatMap at FPGrowth.scala:168) finished in 0.099 s
17/04/05 14:46:04 INFO TaskSchedulerImpl: Removed TaskSet 4.0, whose tasks have all completed, from pool 
17/04/05 14:46:04 INFO DAGScheduler: looking for newly runnable stages
17/04/05 14:46:04 INFO DAGScheduler: running: Set()
17/04/05 14:46:04 INFO DAGScheduler: waiting: Set(ResultStage 5)
17/04/05 14:46:04 INFO DAGScheduler: failed: Set()
17/04/05 14:46:04 INFO DAGScheduler: Submitting ResultStage 5 (MapPartitionsRDD[11] at filter at Test1.scala:32), which has no missing parents
17/04/05 14:46:04 INFO MemoryStore: Block broadcast_6 stored as values in memory (estimated size 7.7 KB, free 143.6 KB)
17/04/05 14:46:04 INFO MemoryStore: Block broadcast_6_piece0 stored as bytes in memory (estimated size 3.8 KB, free 147.4 KB)
17/04/05 14:46:04 INFO BlockManagerInfo: Added broadcast_6_piece0 in memory on localhost:57961 (size: 3.8 KB, free: 1127.2 MB)
17/04/05 14:46:04 INFO SparkContext: Created broadcast 6 from broadcast at DAGScheduler.scala:1006
17/04/05 14:46:04 INFO DAGScheduler: Submitting 3 missing tasks from ResultStage 5 (MapPartitionsRDD[11] at filter at Test1.scala:32)
17/04/05 14:46:04 INFO TaskSchedulerImpl: Adding task set 5.0 with 3 tasks
17/04/05 14:46:04 INFO TaskSetManager: Starting task 0.0 in stage 5.0 (TID 7, localhost, partition 0,NODE_LOCAL, 1813 bytes)
17/04/05 14:46:04 INFO Executor: Running task 0.0 in stage 5.0 (TID 7)
17/04/05 14:46:04 INFO ShuffleBlockFetcherIterator: Getting 1 non-empty blocks out of 1 blocks
17/04/05 14:46:04 INFO ShuffleBlockFetcherIterator: Started 0 remote fetches in 0 ms
尿布->3
尿布 啤酒->3
果汁->4
17/04/05 14:46:04 INFO Executor: Finished task 0.0 in stage 5.0 (TID 7). 1165 bytes result sent to driver
17/04/05 14:46:04 INFO TaskSetManager: Starting task 1.0 in stage 5.0 (TID 8, localhost, partition 1,NODE_LOCAL, 1813 bytes)
17/04/05 14:46:04 INFO Executor: Running task 1.0 in stage 5.0 (TID 8)
17/04/05 14:46:04 INFO TaskSetManager: Finished task 0.0 in stage 5.0 (TID 7) in 46 ms on localhost (1/3)
17/04/05 14:46:04 INFO ShuffleBlockFetcherIterator: Getting 1 non-empty blocks out of 1 blocks
17/04/05 14:46:04 INFO ShuffleBlockFetcherIterator: Started 0 remote fetches in 0 ms
鸡肉->4
鸡肉 果汁->3
17/04/05 14:46:04 INFO Executor: Finished task 1.0 in stage 5.0 (TID 8). 1165 bytes result sent to driver
17/04/05 14:46:04 INFO TaskSetManager: Starting task 2.0 in stage 5.0 (TID 9, localhost, partition 2,NODE_LOCAL, 1813 bytes)
17/04/05 14:46:04 INFO Executor: Running task 2.0 in stage 5.0 (TID 9)
17/04/05 14:46:04 INFO TaskSetManager: Finished task 1.0 in stage 5.0 (TID 8) in 32 ms on localhost (2/3)
17/04/05 14:46:04 INFO ShuffleBlockFetcherIterator: Getting 1 non-empty blocks out of 1 blocks
17/04/05 14:46:04 INFO ShuffleBlockFetcherIterator: Started 0 remote fetches in 1 ms
啤酒->4
啤酒 鸡肉->3
啤酒 果汁->3
17/04/05 14:46:04 INFO Executor: Finished task 2.0 in stage 5.0 (TID 9). 1165 bytes result sent to driver
17/04/05 14:46:04 INFO DAGScheduler: ResultStage 5 (foreach at Test1.scala:33) finished in 0.103 s
17/04/05 14:46:04 INFO TaskSetManager: Finished task 2.0 in stage 5.0 (TID 9) in 29 ms on localhost (3/3)
17/04/05 14:46:04 INFO TaskSchedulerImpl: Removed TaskSet 5.0, whose tasks have all completed, from pool 
17/04/05 14:46:04 INFO DAGScheduler: Job 3 finished: foreach at Test1.scala:33, took 0.254952 s
17/04/05 14:46:04 INFO SparkContext: Starting job: foreach at Test1.scala:46
17/04/05 14:46:04 INFO MapOutputTrackerMaster: Size of output statuses for shuffle 1 is 146 bytes
17/04/05 14:46:04 INFO DAGScheduler: Got job 4 (foreach at Test1.scala:46) with 3 output partitions
17/04/05 14:46:04 INFO DAGScheduler: Final stage: ResultStage 7 (foreach at Test1.scala:46)
17/04/05 14:46:04 INFO DAGScheduler: Parents of final stage: List(ShuffleMapStage 6)
17/04/05 14:46:04 INFO DAGScheduler: Missing parents: List()
17/04/05 14:46:04 INFO DAGScheduler: Submitting ResultStage 7 (MapPartitionsRDD[11] at filter at Test1.scala:32), which has no missing parents
17/04/05 14:46:04 INFO MemoryStore: Block broadcast_7 stored as values in memory (estimated size 7.8 KB, free 155.1 KB)
17/04/05 14:46:04 INFO MemoryStore: Block broadcast_7_piece0 stored as bytes in memory (estimated size 3.8 KB, free 158.9 KB)
17/04/05 14:46:04 INFO BlockManagerInfo: Added broadcast_7_piece0 in memory on localhost:57961 (size: 3.8 KB, free: 1127.2 MB)
17/04/05 14:46:04 INFO SparkContext: Created broadcast 7 from broadcast at DAGScheduler.scala:1006
17/04/05 14:46:04 INFO DAGScheduler: Submitting 3 missing tasks from ResultStage 7 (MapPartitionsRDD[11] at filter at Test1.scala:32)
17/04/05 14:46:04 INFO TaskSchedulerImpl: Adding task set 7.0 with 3 tasks
17/04/05 14:46:04 INFO TaskSetManager: Starting task 0.0 in stage 7.0 (TID 10, localhost, partition 0,NODE_LOCAL, 1813 bytes)
17/04/05 14:46:04 INFO Executor: Running task 0.0 in stage 7.0 (TID 10)
17/04/05 14:46:04 INFO ShuffleBlockFetcherIterator: Getting 1 non-empty blocks out of 1 blocks
17/04/05 14:46:04 INFO ShuffleBlockFetcherIterator: Started 0 remote fetches in 1 ms
17/04/05 14:46:04 INFO Executor: Finished task 0.0 in stage 7.0 (TID 10). 1165 bytes result sent to driver
17/04/05 14:46:04 INFO TaskSetManager: Starting task 1.0 in stage 7.0 (TID 11, localhost, partition 1,NODE_LOCAL, 1813 bytes)
17/04/05 14:46:04 INFO Executor: Running task 1.0 in stage 7.0 (TID 11)
17/04/05 14:46:04 INFO TaskSetManager: Finished task 0.0 in stage 7.0 (TID 10) in 31 ms on localhost (1/3)
17/04/05 14:46:04 INFO ShuffleBlockFetcherIterator: Getting 1 non-empty blocks out of 1 blocks
17/04/05 14:46:04 INFO ShuffleBlockFetcherIterator: Started 0 remote fetches in 1 ms
17/04/05 14:46:04 INFO Executor: Finished task 1.0 in stage 7.0 (TID 11). 1165 bytes result sent to driver
17/04/05 14:46:04 INFO TaskSetManager: Starting task 2.0 in stage 7.0 (TID 12, localhost, partition 2,NODE_LOCAL, 1813 bytes)
17/04/05 14:46:04 INFO Executor: Running task 2.0 in stage 7.0 (TID 12)
17/04/05 14:46:04 INFO TaskSetManager: Finished task 1.0 in stage 7.0 (TID 11) in 29 ms on localhost (2/3)
17/04/05 14:46:04 INFO ShuffleBlockFetcherIterator: Getting 1 non-empty blocks out of 1 blocks
17/04/05 14:46:04 INFO ShuffleBlockFetcherIterator: Started 0 remote fetches in 0 ms
17/04/05 14:46:04 INFO Executor: Finished task 2.0 in stage 7.0 (TID 12). 1165 bytes result sent to driver
17/04/05 14:46:04 INFO TaskSetManager: Finished task 2.0 in stage 7.0 (TID 12) in 29 ms on localhost (3/3)
17/04/05 14:46:04 INFO TaskSchedulerImpl: Removed TaskSet 7.0, whose tasks have all completed, from pool 
17/04/05 14:46:04 INFO DAGScheduler: ResultStage 7 (foreach at Test1.scala:46) finished in 0.086 s
17/04/05 14:46:04 INFO DAGScheduler: Job 4 finished: foreach at Test1.scala:46, took 0.126375 s
历史商品出现的次数:1
17/04/05 14:46:04 INFO SparkContext: Starting job: foreach at Test1.scala:56
17/04/05 14:46:04 INFO DAGScheduler: Got job 5 (foreach at Test1.scala:56) with 3 output partitions
17/04/05 14:46:04 INFO DAGScheduler: Final stage: ResultStage 9 (foreach at Test1.scala:56)
17/04/05 14:46:04 INFO DAGScheduler: Parents of final stage: List(ShuffleMapStage 8)
17/04/05 14:46:04 INFO DAGScheduler: Missing parents: List()
17/04/05 14:46:04 INFO DAGScheduler: Submitting ResultStage 9 (MapPartitionsRDD[11] at filter at Test1.scala:32), which has no missing parents
17/04/05 14:46:04 INFO MemoryStore: Block broadcast_8 stored as values in memory (estimated size 7.8 KB, free 166.7 KB)
17/04/05 14:46:04 INFO MemoryStore: Block broadcast_8_piece0 stored as bytes in memory (estimated size 3.8 KB, free 170.5 KB)
17/04/05 14:46:04 INFO BlockManagerInfo: Added broadcast_8_piece0 in memory on localhost:57961 (size: 3.8 KB, free: 1127.2 MB)
17/04/05 14:46:04 INFO SparkContext: Created broadcast 8 from broadcast at DAGScheduler.scala:1006
17/04/05 14:46:04 INFO DAGScheduler: Submitting 3 missing tasks from ResultStage 9 (MapPartitionsRDD[11] at filter at Test1.scala:32)
17/04/05 14:46:04 INFO TaskSchedulerImpl: Adding task set 9.0 with 3 tasks
17/04/05 14:46:04 INFO TaskSetManager: Starting task 0.0 in stage 9.0 (TID 13, localhost, partition 0,NODE_LOCAL, 1813 bytes)
17/04/05 14:46:04 INFO Executor: Running task 0.0 in stage 9.0 (TID 13)
17/04/05 14:46:04 INFO ShuffleBlockFetcherIterator: Getting 1 non-empty blocks out of 1 blocks
17/04/05 14:46:04 INFO ShuffleBlockFetcherIterator: Started 0 remote fetches in 1 ms
17/04/05 14:46:04 INFO Executor: Finished task 0.0 in stage 9.0 (TID 13). 1165 bytes result sent to driver
17/04/05 14:46:04 INFO TaskSetManager: Starting task 1.0 in stage 9.0 (TID 14, localhost, partition 1,NODE_LOCAL, 1813 bytes)
17/04/05 14:46:04 INFO Executor: Running task 1.0 in stage 9.0 (TID 14)
17/04/05 14:46:04 INFO TaskSetManager: Finished task 0.0 in stage 9.0 (TID 13) in 29 ms on localhost (1/3)
17/04/05 14:46:04 INFO ShuffleBlockFetcherIterator: Getting 1 non-empty blocks out of 1 blocks
17/04/05 14:46:04 INFO ShuffleBlockFetcherIterator: Started 0 remote fetches in 1 ms
17/04/05 14:46:04 INFO Executor: Finished task 1.0 in stage 9.0 (TID 14). 1165 bytes result sent to driver
17/04/05 14:46:04 INFO TaskSetManager: Starting task 2.0 in stage 9.0 (TID 15, localhost, partition 2,NODE_LOCAL, 1813 bytes)
17/04/05 14:46:04 INFO Executor: Running task 2.0 in stage 9.0 (TID 15)
17/04/05 14:46:04 INFO TaskSetManager: Finished task 1.0 in stage 9.0 (TID 14) in 26 ms on localhost (2/3)
17/04/05 14:46:04 INFO ShuffleBlockFetcherIterator: Getting 1 non-empty blocks out of 1 blocks
17/04/05 14:46:04 INFO ShuffleBlockFetcherIterator: Started 0 remote fetches in 1 ms
17/04/05 14:46:04 INFO Executor: Finished task 2.0 in stage 9.0 (TID 15). 1165 bytes result sent to driver
17/04/05 14:46:04 INFO TaskSetManager: Finished task 2.0 in stage 9.0 (TID 15) in 28 ms on localhost (3/3)
17/04/05 14:46:04 INFO TaskSchedulerImpl: Removed TaskSet 9.0, whose tasks have all completed, from pool 
17/04/05 14:46:04 INFO DAGScheduler: ResultStage 9 (foreach at Test1.scala:56) finished in 0.080 s
17/04/05 14:46:04 INFO DAGScheduler: Job 5 finished: foreach at Test1.scala:56, took 0.108626 s
推荐的商品为:
尿布->3.0
尿布->3.0
啤酒->3.0
果汁->4.0
鸡肉->4.0
鸡肉->3.0
果汁->3.0
啤酒->4.0
啤酒->3.0
鸡肉->3.0
啤酒->3.0
果汁->3.0
17/04/05 14:46:04 INFO SparkContext: Invoking stop() from shutdown hook
17/04/05 14:46:04 INFO SparkUI: Stopped Spark web UI at http://162.26.68.91:4041
17/04/05 14:46:04 INFO MapOutputTrackerMasterEndpoint: MapOutputTrackerMasterEndpoint stopped!
17/04/05 14:46:04 INFO MemoryStore: MemoryStore cleared
17/04/05 14:46:04 INFO BlockManager: BlockManager stopped
17/04/05 14:46:04 INFO BlockManagerMaster: BlockManagerMaster stopped
17/04/05 14:46:04 INFO OutputCommitCoordinator$OutputCommitCoordinatorEndpoint: OutputCommitCoordinator stopped!
17/04/05 14:46:04 INFO SparkContext: Successfully stopped SparkContext
17/04/05 14:46:04 INFO ShutdownHookManager: Shutdown hook called
17/04/05 14:46:04 INFO ShutdownHookManager: Deleting directory C:\Users\test\AppData\Local\Temp\spark-ba475294-932a-4ef9-8ee1-f08e94494013
17/04/05 14:46:04 INFO RemoteActorRefProvider$RemotingTerminator: Shutting down remote daemon.

Process finished with exit code 0

注:参考与http://www.cnblogs.com/txq157/p/6091308.html

发表评论

电子邮件地址不会被公开。 必填项已用*标注