>>> rdd = sc.parallelize([("bone", 231), ("bone", 21213), ("jack",1)])
>>> rdd.groupByKey().map(lambda x: sum(x[1])).collect()
[1, 21444]>>> rdd.groupByKey().map(lambda x: (x[0],sum(x[1]))).collect()[('jack', 1), ('bone', 21444)]>>> rdd.groupByKey().map(lambda x: (x[0],sum(x[1].data))).collect()[('jack', 1), ('bone', 21444)]>>> rdd.groupByKey().mapValues(lambda x: sum(x)).collect()
[('jack', 1), ('bone', 21444)]>>> rdd.groupByKey().mapValues(lambda x: sum(x.data)).collect()[('jack', 1), ('bone', 21444)]