【mongoDB高級篇①】聚集運算之group,aggregate · 我的mongodb小冊

## group ### 語法 ``` db.collection.group({ key:{field:1},//按什么字段進行分組 initial:{count:0},//進行分組前變量初始化,該處聲明的變量可以在以下回調函數中作為result的屬性使用 cond:{},//類似mysql中的having,分組后的查詢返回 reduce: function ( curr, result ) { }, //The function takes two arguments: the current document and an aggregation result document for that group.先迭代出分組,然后再迭代分組中的文檔,即curr變量就代表當前分組中此刻迭代到的文檔,result變量就代表當前分組。 keyf：function(doc){},//keyf和key二選一,傳入的參數doc代表當前文檔,如果分組的字段是經過運算后的字段用到,作用類似mysql中的group by left('2015-09-12 14:05:22',10); finalize:function(result) {}//該result也就是reduce的result,都是代表當前分組,這個函數是在走完當前分組結束后回調; }) ``` 除了分組的key字段外,就只返回有result參數的回調函數中的操作的屬性字段; ### 實例 ``` # 表結構如下 { _id: ObjectId("5085a95c8fada716c89d0021"), ord_dt: ISODate("2012-07-01T04:00:00Z"), ship_dt: ISODate("2012-07-02T04:00:00Z"), item: { sku: "abc123", price: 1.99, uom: "pcs", qty: 25 } } ``` ``` #Example1 SELECT ord_dt, item_sku FROM orders WHERE ord_dt > '01/01/2012' GROUP BY ord_dt, item_sku ↓↓↓↓ db.orders.group( { key: { ord_dt: 1, 'item.sku': 1 }, cond: { ord_dt: { $gt: new Date( '01/01/2012' ) } }, reduce: function ( curr, result ) { }, initial: { } } ) #Example2 SELECT ord_dt, item_sku, SUM(item_qty) as total FROM orders WHERE ord_dt > '01/01/2012' GROUP BY ord_dt, item_sku ↓↓↓↓ db.orders.group( { key: { ord_dt: 1, 'item.sku': 1 }, cond: { ord_dt: { $gt: new Date( '01/01/2012' ) } }, reduce: function( curr, result ) { result.total += curr.item.qty; }, initial: { total : 0 } } ) #Example3 db.orders.group( { keyf: function(doc) { return { day_of_week: doc.ord_dt.getDay() }; }, cond: { ord_dt: { $gt: new Date( '01/01/2012' ) } }, reduce: function( curr, result ) { result.total += curr.item.qty; result.count++; }, initial: { total : 0, count: 0 }, finalize: function(result) { var weekdays = [ "Sunday", "Monday", "Tuesday", "Wednesday", "Thursday", "Friday", "Saturday" ]; result.day_of_week = weekdays[result.day_of_week]; result.avg = Math.round(result.total / result.count); } } ) [ { "day_of_week" : "Sunday", "total" : 70, "count" : 4, "avg" : 18 }, { "day_of_week" : "Friday", "total" : 110, "count" : 6, "avg" : 18 }, { "day_of_week" : "Tuesday", "total" : 70, "count" : 3, "avg" : 23 } ] ``` ### 工作中用到的實例 ``` #查詢每個欄目最貴的商品價格, max()操作 { key:{cat_id:1}, cond:{}, reduce:function(curr , result) { if(curr.shop_price > result.max) { result.max = curr.shop_price; } }, initial:{max:0} } #查詢每個欄目下商品的平均價格 { key:{cat_id:1}, cond:{}, reduce:function(curr , result) { result.cnt += 1; result.sum += curr.shop_price; }, initial:{sum:0,cnt:0}, finalize:function(result) { result.avg = result.sum/result.cnt; //在每次分組完畢后進行運算 } } ``` group其實略微有點雞肋,因為既然用到了mongodb,那復制集和分片是避無可免的,而group是不支持分片的運算 ## Aggregation 聚合管道是一個基于數據處理管道概念的框架。通過使用一個多階段的管道，將一組文檔轉換為最終的聚合結果。 ![aggregation-pipeline.png-78.1kB][1] ### 語法參考手冊: http://docs.mongoing.com/manual-zh/core/aggregation-pipeline.html ``` db.collection.aggregate(pipeline, options); pipeline Array # 與mysql中的字段對比說明 $project # 返回哪些字段,select,說它像select其實是不太準確的,因為aggregate是一個階段性管道操作符,$project是取出哪些數據進入下一個階段管道操作,真正的最終數據返回還是在group等操作中; $match # 放在group前相當于where使用,放在group后面相當于having使用 $sort # 排序1升-1降 sort一般放在group后,也就是說得到結果后再排序,如果先排序再分組沒什么意義; $limit # 相當于limit m,不能設置偏移量 $skip # 跳過第幾個文檔 $unwind # 把文檔中的數組元素打開,并形成多個文檔,參考Example1 $group: { _id: <expression>, <field1>: { <accumulator1> : <expression1> }, ... # 按什么字段分組,注意所有字段名前面都要加$,否則mongodb就為以為不加$的是普通常量,其中accumulator又包括以下幾個操作符 # $sum,$avg,$first,$last,$max,$min,$push,$addToSet #如果group by null就是 count(*)的效果 $geoNear # 取某一點的最近或最遠,在LBS地理位置中有用 $out # 把結果寫進新的集合中。注意1,不能寫進一個分片集合中。注意2,不能寫進 ``` ### 實例 **Example1:** unwind ``` > db.test.insert({ "_id" : 1, "item" : "ABC1", sizes: [ "S", "M", "L"] }); WriteResult({ "nInserted" : 1 }) > db.test.aggregate( [ { $unwind : "$sizes" } ] ) { "_id" : 1, "item" : "ABC1", "sizes" : "S" } { "_id" : 1, "item" : "ABC1", "sizes" : "M" } { "_id" : 1, "item" : "ABC1", "sizes" : "L" } db.test.insert({ "_id" : 2, "item" : "ABC1", sizes: [ "S", "M", "L",["XXL",'XL']] }); WriteResult({ "nInserted" : 1 }) > db.test.aggregate( [ { $unwind : "$sizes" } ] ) { "_id" : 1, "item" : "ABC1", "sizes" : "S" } { "_id" : 1, "item" : "ABC1", "sizes" : "M" } { "_id" : 1, "item" : "ABC1", "sizes" : "L" } { "_id" : 2, "item" : "ABC1", "sizes" : "S" } { "_id" : 2, "item" : "ABC1", "sizes" : "M" } { "_id" : 2, "item" : "ABC1", "sizes" : "L" } { "_id" : 2, "item" : "ABC1", "sizes" : [ "XXL", "XL" ] } # 只能打散一維數組 ``` ### **Example2** ``` #數據源 { "_id" : 1, "item" : "abc", "price" : 10, "quantity" : 2, "date" : ISODate("2014-03-01T08:00:00Z") } { "_id" : 2, "item" : "jkl", "price" : 20, "quantity" : 1, "date" : ISODate("2014-03-01T09:00:00Z") } { "_id" : 3, "item" : "xyz", "price" : 5, "quantity" : 10, "date" : ISODate("2014-03-15T09:00:00Z") } { "_id" : 4, "item" : "xyz", "price" : 5, "quantity" : 20, "date" : ISODate("2014-04-04T11:21:39.736Z") } { "_id" : 5, "item" : "abc", "price" : 10, "quantity" : 10, "date" : ISODate("2014-04-04T21:23:13.331Z") } # 綜合示例 db.sales.aggregate([ # 由上到下,分階段的進行,注意該數組中的順序是有意義的 { $project:{item:1,price:1,quantity:1} # 1.取出什么元素待操作; }, { $group:{ # 2. 對已取出的元素進行聚合運算; _id:"$item", # 根據什么來分組 quantityCount:{$sum:'$quantity'}, priceTotal:{$sum:'$price'} } }, { $sort:{ quantityCount:1 #3.升序 } }, # 4.基于上面的結果,取倒數第二名 { $skip: 2 }, { $limit:1 }, # 5.然后把結果寫到result集合中 { $out:'result' } ]) #表達式$month,$dayOfMonth,$year,$sum,$avg db.sales.aggregate( [ { $group : { _id : { month: { $month: "$date" }, day: { $dayOfMonth: "$date" }, year: { $year: "$date" } }, #按月日年分組 totalPrice: { $sum: { $multiply: [ "$price", "$quantity" ] } }, averageQuantity: { $avg: "$quantity" }, count: { $sum: 1 } } } ] ) #結果 { "_id" : { "month" : 3, "day" : 15, "year" : 2014 }, "totalPrice" : 50, "averageQuantity" : 10, "count" : 1 } { "_id" : { "month" : 4, "day" : 4, "year" : 2014 }, "totalPrice" : 200, "averageQuantity" : 15, "count" : 2 } { "_id" : { "month" : 3, "day" : 1, "year" : 2014 }, "totalPrice" : 40, "averageQuantity" : 1.5, "count" : 2 } # # # 表達式$push db.sales.aggregate( [ { $group: { _id: { day: { $dayOfYear: "$date"}, year: { $year: "$date" } }, itemsSold: { $push: { item: "$item", quantity: "$quantity" } } } } ] ) # result { "_id" : { "day" : 46, "year" : 2014 }, "itemsSold" : [ { "item" : "abc", "quantity" : 10 }, { "item" : "xyz", "quantity" : 10 }, { "item" : "xyz", "quantity" : 5 }, { "item" : "xyz", "quantity" : 10 } ] } { "_id" : { "day" : 34, "year" : 2014 }, "itemsSold" : [ { "item" : "jkl", "quantity" : 1 }, { "item" : "xyz", "quantity" : 5 } ] } { "_id" : { "day" : 1, "year" : 2014 }, "itemsSold" : [ { "item" : "abc", "quantity" : 2 } ] } # # # 表達式$addToSet db.sales.aggregate( [ { $group: { _id: { day: { $dayOfYear: "$date"}, year: { $year: "$date" } }, itemsSold: { $addToSet: "$item" } } } ] ) #result { "_id" : { "day" : 46, "year" : 2014 }, "itemsSold" : [ "xyz", "abc" ] } { "_id" : { "day" : 34, "year" : 2014 }, "itemsSold" : [ "xyz", "jkl" ] } { "_id" : { "day" : 1, "year" : 2014 }, "itemsSold" : [ "abc" ] } # # # 表達式 $first db.sales.aggregate( [ { $sort: { item: 1, date: 1 } }, { $group: { _id: "$item", firstSalesDate: { $first: "$date" } } } ] ) # result { "_id" : "xyz", "firstSalesDate" : ISODate("2014-02-03T09:05:00Z") } { "_id" : "jkl", "firstSalesDate" : ISODate("2014-02-03T09:00:00Z") } { "_id" : "abc", "firstSalesDate" : ISODate("2014-01-01T08:00:00Z") } ``` **Example3** ``` db.sales.aggregate( [ { $group : { _id : null, # 如果為null,就統計出全部 totalPrice: { $sum: { $multiply: [ "$price", "$quantity" ] } }, averageQuantity: { $avg: "$quantity" }, count: { $sum: 1 } } } ] ) ``` **Example4** ``` # 數據源 { "_id" : 8751, "title" : "The Banquet", "author" : "Dante", "copies" : 2 } { "_id" : 8752, "title" : "Divine Comedy", "author" : "Dante", "copies" : 1 } { "_id" : 8645, "title" : "Eclogues", "author" : "Dante", "copies" : 2 } { "_id" : 7000, "title" : "The Odyssey", "author" : "Homer", "copies" : 10 } { "_id" : 7020, "title" : "Iliad", "author" : "Homer", "copies" : 10 } # 根據作者分組,獲得其著多少書籍 db.books.aggregate( [ { $group : { _id : "$author", books: { $push: "$title" } } } ] ) # result { "_id" : "Homer", "books" : [ "The Odyssey", "Iliad" ] } { "_id" : "Dante", "books" : [ "The Banquet", "Divine Comedy", "Eclogues" ] } # 通過系統變量$$ROOT(當前的根文檔)來分組 db.books.aggregate( [ { $group : { _id : "$author", books: { $push: "$$ROOT" } } } ] ) # result { "_id" : "Homer", "books" : [ { "_id" : 7000, "title" : "The Odyssey", "author" : "Homer", "copies" : 10 }, { "_id" : 7020, "title" : "Iliad", "author" : "Homer", "copies" : 10 } ] } { "_id" : "Dante", "books" : [ { "_id" : 8751, "title" : "The Banquet", "author" : "Dante", "copies" : 2 }, { "_id" : 8752, "title" : "Divine Comedy", "author" : "Dante", "copies" : 1 }, { "_id" : 8645, "title" : "Eclogues", "author" : "Dante", "copies" : 2 } ] } ``` 郵政編碼數據集的聚合實例: http://docs.mongoing.com/manual-zh/tutorial/aggregation-zip-code-data-set.html 對用戶愛好數據做聚合實例: http://docs.mongoing.com/manual-zh/tutorial/aggregation-with-user-preference-data.html [1]: http://static.zybuluo.com/a5635268/bojbhmkbcl4tw0w2mlrple8e/aggregation-pipeline.png