11.03.2020
import findspark
findspark.init('/home/bigdata/Documents/spark-3.0.0')
from pyspark import SparkContext, SparkConf
from pyspark.streaming import StreamingContext
import os
config = (SparkConf().setAppName("UNSW").set("spark.executer.memory", "2g"))
sc = SparkContext(conf=config)
ssc = StreamingContext(sc, 2)
ssc.checkpoint("checkpoint")
unsw_list = list()
entries = os.listdir('UNSW_data')
for entry in entries:
print(entry)
path = "UNSW_data/" + entry
unsw_list += [sc.textFile(path, 4)]
unsw_list[49].take(2)
rdd_stream = ssc.queueStream(unsw_list)
################################## solution (transformation and action) #########################################
ssc.start()
ssc.stop(stopGraceFully=True, stopSparkContext=True)