! wget https://hhmida.github.io/tp-hadoop/assets/shakespeare.txt
! hadoop fs -put shakespeare.txt
%%file indique à Jupyter de sauvegarder le contenu de la cellule dans un fichier
%%file wordcount.py
from mrjob.step import MRStep
from mrjob.job import MRJob
class WordCount(MRJob):
def steps(self):
return [MRStep(mapper=self.mapper_get_words, reducer=self.reducer_count_words)]
def mapper_get_words(self, _, line):
for word in line.split():
yield word, 1
def reducer_count_words(self, key, values):
yield key, sum(values)
if __name__ == '__main__':
WordCount.run()
! python3 wordcount.py shakespeare.txt
! python3 wordcount.py -r hadoop hdfs:///user/uti/shakespeare.txt