Mongodb and mapreduce: full example for news word counts [github source]

Mongodb and mapreduce: full example for news word counts

The github source code located at:
https://github.com/elbasri/mongodb/blob/master/mapreduce

Full source:

// News Content (Body)
var map = function() {
var body = this.body;
if (body) {
str=body.replace(/<[^>]*>/g, ”);
str=str.replace(/<\s*br\/*>/gi, “\n”);
str=str.replace(/<\s*a.*href=”(.*?)”.*>(.*?)<\/a>/gi, ” $2 (Link->$1) “);
str=str.replace(/<\s*\/*.+?>/ig, “\n”);
str=str.replace(/ {2,}/gi, ” “);
str=str.replace(/\n+\s*/gi, “\n\n”);
body = str.toLowerCase().split(” “);
for (var i = body.length – 1; i >= 0; i–) {
if (body[i] && body[i].length>4) {
emit(body[i], 1);
}
}
}
};
// News Titles
var map = function() {
var title = this.title;
if (title) {
title = title.toLowerCase().split(” “);
for (var i = title.length – 1; i >= 0; i–) {
if (title[i] && title[i].length>4) {
emit(title[i], 1);
}
}
}
};
var reduce = function( key, values ) {
var count = 0;
values.forEach(function(v) {
count +=v;
});
return count;
}
// All Docs
// db.news.mapReduce(map, reduce, { out: “word_count” })
// Inline Out
// db.news.mapReduce(map, reduce, { out: { inline: 1 } })
// Limited
db.news.mapReduce(map, reduce, {limit: 1000, out: “word_count2” })

db.word_count.find().sort({ value: -1 }).limit(10)

Leave a Comment

Your email address will not be published. Required fields are marked *

*
*