# spark-shell
scala> val lines = sc.textFile("README.md")
lines: org.apache.spark.rdd.RDD[String] = README.md MapPartitionsRDD[1] at textFile at
scala> lines.count()
res0: Long = 99
scala> lines.first()
res1: String = # Apache Spark
val lines=sc.textfile("hdfs://") ==加载进来成为rdd resilient distributed dataset 弹性数据集
val errors=lines.filter(_.startswith("error")) ##transformation
val errors.persist() ##缓存rdd
val mysql_error=errors.filter(_.contains("mysql")).count ##action
val http_error=errors.filter(_.contains("http")).count ##action
忘事儿的菲芸