The github source code located at:
https://github.com/elbasri/mongodb/blob/master/mapreduce
Full source:
// News Content (Body) |
var map = function() { |
var body = this.body; |
if (body) { |
str=body.replace(/<[^>]*>/g, ”); |
str=str.replace(/<\s*br\/*>/gi, “\n”); |
str=str.replace(/<\s*a.*href=”(.*?)”.*>(.*?)<\/a>/gi, ” $2 (Link->$1) “); |
str=str.replace(/<\s*\/*.+?>/ig, “\n”); |
str=str.replace(/ {2,}/gi, ” “); |
str=str.replace(/\n+\s*/gi, “\n\n”); |
body = str.toLowerCase().split(” “); |
for (var i = body.length – 1; i >= 0; i–) { |
if (body[i] && body[i].length>4) { |
emit(body[i], 1); |
} |
} |
} |
}; |
// News Titles |
var map = function() { |
var title = this.title; |
if (title) { |
title = title.toLowerCase().split(” “); |
for (var i = title.length – 1; i >= 0; i–) { |
if (title[i] && title[i].length>4) { |
emit(title[i], 1); |
} |
} |
} |
}; |
var reduce = function( key, values ) { |
var count = 0; |
values.forEach(function(v) { |
count +=v; |
}); |
return count; |
} |
// All Docs |
// db.news.mapReduce(map, reduce, { out: “word_count” }) |
// Inline Out |
// db.news.mapReduce(map, reduce, { out: { inline: 1 } }) |
// Limited |
db.news.mapReduce(map, reduce, {limit: 1000, out: “word_count2” }) |
db.word_count.find().sort({ value: -1 }).limit(10)