以上图片是个经常用来解释宽窄依赖的经典图,来源于论文<<Resilient Distributed Datasets: A Fault-Tolerant Abstraction for In-Memory Cluster Computing>>。以下这段话也来自与该论文:
join: Joining two RDDs may lead to either two nar- row dependencies (if they are both hash/range partitioned with the same partitioner), two wide dependencies, or a mix (if one parent has a partitioner and one does not). In either case, the output RDD has a partitioner (either one inherited from the parents or a default hash partitioner)
classSparkContext(config: SparkConf) extendsLogging{ /** Default level of parallelism to use when not given by user (e.g. parallelize and makeRDD). */ defdefaultParallelism: Int = { assertNotStopped() taskScheduler.defaultParallelism } defparallelize[T: ClassTag]( seq: Seq[T], numSlices: Int = defaultParallelism): RDD[T] = withScope { assertNotStopped() newParallelCollectionRDD[T](this, seq, numSlices, Map[Int, Seq[String]]()) } }
...省略部分代码 // Load any properties specified through --conf and the default properties file // 通过sparkProperties(已经读取了spark-defaluts.conf内动)hashMap对缺失配置进行填充。 for ((k, v) <- args.sparkProperties) { sparkConf.setIfMissing(k, v) }
package main import ( "database/sql" "log" "time" _ "github.com/go-sql-driver/mysql" ) funcmain() { // before you run this test program, please run the script in your mysql // "set global wait_timeout=10;" // 表示mysql server关闭不活跃连接的等待时间 // 参考 https://github.com/go-sql-driver/mysql/issues/657 db, err := sql.Open("mysql", "root:zhang@tcp(127.0.0.1:3306)/?charset=latin1&autocommit=1&parseTime=true&loc=Local&timeout=3s") if err != nil { log.Fatal(err) } defer db.Close() //db.SetConnMaxLifetime(5 * time.Second) err = db.Ping() if err != nil { log.Fatal(err) } gofunc() { for { _, err := db.Exec("select * from test_time.A") if err != nil { log.Fatal(err) } // Wait for 11 seconds. This should be enough to timeout the conn, since `wait_timeout` is 10s time.Sleep(11 * time.Second) } }() time.Sleep(1000 * time.Second) }