data = [1,2,4,7,11,15,20] # list of data
data
rdd = sc.parallelize(data,4) # create 4 partitions of the data
rdd
rdd.collect() # action the data
rdd1 = rdd.map(lambda x:(x+2)*4) # perform a transformation or calculation of the data
rdd1.collect()
rdd2 = rdd.filter(lambda x:(x+2)*4)
rdd2.collect()
rdd3 = rdd.filter(lambda x:x%3==0)
rdd3.collect()
rdd4 = rdd.map(lambda x:x%3==0)
rdd4.collect()
rdd5 = sc.parallelize([4,2,2,6,7,7,19,40,41,40,40])
rdd5.distinct()
rdd5.distinct().collect()
rdd6 = sc.parallelize([1,2,3,4])
rdd7 = rdd6.map(lambda x:[x,x+2,x+7])
rdd7.collect()
rdd8 = rdd6.flatMap(lambda x:[x,x+2,x+7])
rdd8.collect()
print(rdd7.collect())
print(rdd8.collect())
rdd8.reduce(lambda a,b:a*b)