11.03.2020
import findspark
findspark.init('/home/bigdata/Documents/spark-3.0.0')
from pyspark import SparkContext, SparkConf
from pyspark.streaming import StreamingContext
config = SparkConf().setAppName("reduce_by_key_window_example")
sc = SparkContext(conf=config)
ssc = StreamingContext(sc, 1)
ssc.checkpoint("checkpoint")
lines = ssc.socketTextStream("localhost", 5000)
words = lines.flatMap(lambda line: line.split(" "))
pairs = words.map(lambda word: (word, 1))
pairs.reduceByKeyAndWindow(func=lambda x,y: x+y,invFunc=lambda x,y:x+y, windowDuration=15, slideDuration=5).pprint()
ssc.start()
ssc.stop(stopGraceFully=True, stopSparkContext=True)
ssc.stop