在MongoDB中合并两个集合

我一直试图在MongoDB中使用MapReduce来做我认为是一个简单的过程。 我不知道这是否是正确的方法,如果我甚至应该使用MapReduce。 我search了一下我想到的关键词,试图打到我认为我会取得最大成功的文档 – 但没有。 也许我太想这个了?

我有两个集合: detailsgpas

details是由一大堆文件(300万以上)组成的。 studentid元素可以重复两次,每年year ,如下所示:

 { "_id" : ObjectId("4d49b7yah5b6d8372v640100"), "classes" : [1,17,19,21], "studentid" : "12345a", "year" : 1} { "_id" : ObjectId("4d76b7oij7s2d8372v640100"), "classes" : [2,12,19,22], "studentid" : "98765a", "year" : 1} { "_id" : ObjectId("4d49b7oij7s2d8372v640100"), "classes" : [32,91,101,217], "studentid" : "12345a", "year" : 2} { "_id" : ObjectId("4d76b7rty7s2d8372v640100"), "classes" : [1,11,18,22], "studentid" : "24680a", "year" : 1} { "_id" : ObjectId("4d49b7oij7s2d8856v640100"), "classes" : [32,99,110,215], "studentid" : "98765a", "year" : 2} ... 

gpas元素有着相同的gpasdetails 。 每个studentid只有一个条目,如下所示:

 { "_id" : ObjectId("4d49b7yah5b6d8372v640111"), "studentid" : "12345a", "overall" : 97, "subscore": 1} { "_id" : ObjectId("4f76b7oij7s2d8372v640213"), "studentid" : "98765a", "overall" : 85, "subscore": 5} { "_id" : ObjectId("4j49b7oij7s2d8372v640871"), "studentid" : "24680a", "overall" : 76, "subscore": 2} ... 

最后,我想为这个格式的每个学生收集一行:

 { "_id" : ObjectId("4d49b7yah5b6d8372v640111"), "studentid" : "12345a", "classes_1": [1,17,19,21], "classes_2": [32,91,101,217], "overall" : 97, "subscore": 1} { "_id" : ObjectId("4f76b7oij7s2d8372v640213"), "studentid" : "98765a", "classes_1": [2,12,19,22], "classes_2": [32,99,110,215], "overall" : 85, "subscore": 5} { "_id" : ObjectId("4j49b7oij7s2d8372v640871"), "studentid" : "24680a", "classes_1": [1,11,18,22], "classes_2": [], "overall" : 76, "subscore": 2} ... 

我要这样做的方式是像这样运行MapReduce:

 var mapDetails = function() { emit(this.studentid, {studentid: this.studentid, classes: this.classes, year: this.year, overall: 0, subscore: 0}); }; var mapGpas = function() { emit(this.studentid, {studentid: this.studentid, classes: [], year: 0, overall: this.overall, subscore: this.subscore}); }; var reduce = function(key, values) { var outs = { studentid: "0", classes_1: [], classes_2: [], overall: 0, subscore: 0}; values.forEach(function(value) { if (value.year == 0) { outs.overall = value.overall; outs.subscore = value.subscore; } else { if (value.year == 1) { outs.classes_1 = value.classes; } if (value.year == 2) { outs.classes_2 = value.classes; } outs.studentid = value.studentid; } }); return outs; }; res = db.details.mapReduce(mapDetails, reduce, {out: {reduce: 'joined'}}) res = db.gpas.mapReduce(mapGpas, reduce, {out: {reduce: 'joined'}}) 

但是当我运行它,这是我的结果集合:

 { "_id" : "12345a", "value" : { "studentid" : "12345a", "classes_1" : [ ], "classes_2" : [ ], "overall" : 97, "subscore" : 1 } } { "_id" : "98765a", "value" : { "studentid" : "98765a", "classes_1" : [ ], "classes_2" : [ ], "overall" : 85, "subscore" : 5 } } { "_id" : "24680a", "value" : { "studentid" : "24680a", "classes_1" : [ ], "classes_2" : [ ], "overall" : 76, "subscore" : 2 } } 

我错过了类数组。

另外,作为一个旁白,我如何访问MapReduce value元素中的元素? MapReduce总是输出到value还是其他名称?

这与MongoDB用户Google Groups上提出的问题类似。
https://groups.google.com/group/mongodb-user/browse_thread/thread/60a8b683e2626ada?pli=1

答案引用了一个类似于您的示例的在线教程: http : //tebros.com/2011/07/using-mongodb-mapreduce-to-join-2-collections/

有关MongoDB中MapReduce的更多信息,请参阅文档: http : //www.mongodb.org/display/DOCS/MapReduce

此外,在MongoDB Cookbook文章的标题为“使用版本化文档查找最大值和最小值”的“Extras”部分中,有一个有用的分步演示:MapReduce操作: http://cookbook.mongodb。组织/模式/ finding_max_and_min /

请原谅我,如果你已经阅读了一些参考文件。 我已经将它们包含在了可能正在阅读本文的其他用户的利益中,并且在MongoDB中使用MapReduce

Map函数中“emit”语句的输出与Reduce函数的输出相匹配是非常重要的。 如果只有一个由Map函数输出的文档,Reduce函数可能根本就不运行,那么您的输出集合将会有不匹配的文档。

我稍微修改了你的映射语句,以你想要的输出格式发送文档,并有两个独立的“类”数组。
我还重写了您的reduce语句,以便将类添加到classes_1和classes_2数组中,但前提是它们尚不存在。

 var mapDetails = function(){ var output = {studentid: this.studentid, classes_1: [], classes_2: [], year: this.year, overall: 0, subscore: 0} if (this.year == 1) { output.classes_1 = this.classes; } if (this.year == 2) { output.classes_2 = this.classes; } emit(this.studentid, output); }; var mapGpas = function() { emit(this.studentid, {studentid: this.studentid, classes_1: [], classes_2: [], year: 0, overall: this.overall, subscore: this.subscore}); }; var r = function(key, values) { var outs = { studentid: "0", classes_1: [], classes_2: [], overall: 0, subscore: 0}; values.forEach(function(v){ outs.studentid = v.studentid; v.classes_1.forEach(function(class){if(outs.classes_1.indexOf(class)==-1){outs.classes_1.push(class)}}) v.classes_2.forEach(function(class){if(outs.classes_2.indexOf(class)==-1){outs.classes_2.push(class)}}) if (v.year == 0) { outs.overall = v.overall; outs.subscore = v.subscore; } }); return outs; }; res = db.details.mapReduce(mapDetails, r, {out: {reduce: 'joined'}}) res = db.gpas.mapReduce(mapGpas, r, {out: {reduce: 'joined'}}) 

运行两个MapReduce操作会生成以下集合,它们与您所需的格式相匹配:

 > db.joined.find() { "_id" : "12345a", "value" : { "studentid" : "12345a", "classes_1" : [ 1, 17, 19, 21 ], "classes_2" : [ 32, 91, 101, 217 ], "overall" : 97, "subscore" : 1 } } { "_id" : "24680a", "value" : { "studentid" : "24680a", "classes_1" : [ 1, 11, 18, 22 ], "classes_2" : [ ], "overall" : 76, "subscore" : 2 } } { "_id" : "98765a", "value" : { "studentid" : "98765a", "classes_1" : [ 2, 12, 19, 22 ], "classes_2" : [ 32, 99, 110, 215 ], "overall" : 85, "subscore" : 5 } } > 

MapReduce总是以{_id:“id”,value:“value”}的forms输出文档在文档中有更多关于子文档的信息,名为“Dot Notation(Reaching into Objects)”: http: /www.mongodb.org/display/DOCS/Dot+Notation+%28Reaching+into+Objects%29

如果您希望MapReduce的输出以不同的格式显示,则必须在应用程序中以编程方式执行此操作。

希望这会提高你对MapReduce的理解,让你更近一步地生成你想要的输出集合。 祝你好运!

你不能使用m / r,因为它只适用于一个集合。 从多个集合中读取将破坏分片兼容性,因此不被允许。 你可以用新的聚合框架(2.1+)做你想做的,或者在你的应用程序中做这个。