Tuesday, June 30, 2015

Sharding in MongoDB

Horizontal scaling
Method of storing data across multiple m/c
Shards stores user related data



  • Start Server service
mongod --configsvr --dbpath C:\data\confdb --port 27019

  • Start  Agent service
mongos --configdb localhost:27019

  • Start  Database server
mongod --dbpath C:\data\sh1 --port 27020

  • Start Client
C:\>mongo --host localhost --port 27017
MongoDB shell version: 2.6.5
connecting to: localhost:27017/test
mongos>


mongos> sh.status()
--- Sharding Status ---
  sharding version: {
        "_id" : 1,
        "version" : 4,
        "minCompatibleVersion" : 4,
        "currentVersion" : 5,
        "clusterId" : ObjectId("55936ac576a153b4e1f67c45")
}
  shards:
  databases:
        {  "_id" : "admin",  "partitioned" : false,  "primary" : "config" }

  • Adding shard

mongos> sh.addShard("localhost:27020")
{ "shardAdded" : "shard0000", "ok" : 1 }

  • Check status again

mongos> sh.status()
--- Sharding Status ---
  sharding version: {
        "_id" : 1,
        "version" : 4,
        "minCompatibleVersion" : 4,
        "currentVersion" : 5,
        "clusterId" : ObjectId("55936ac576a153b4e1f67c45")
}
  shards:
        {  "_id" : "shard0000",  "host" : "localhost:27020" }
  databases:
        {  "_id" : "admin",  "partitioned" : false,  "primary" : "config" }

Enable sharding
mongos> use mydb1
switched to db mydb1
mongos> sh.enableSharding("mydb1")
{ "ok" : 1 }

mongos> db.createCollection("shTbl")
{ "ok" : 1 }
mongos> db.shTbl.insert({x:1})
WriteResult({ "nInserted" : 1 })
mongos> db.shTbl.find()
{ "_id" : ObjectId("55936e5fc7deef0077d13a9c"), "x" : 1 }
mongos> sh.status()
--- Sharding Status ---
  sharding version: {
        "_id" : 1,
        "version" : 4,
        "minCompatibleVersion" : 4,
        "currentVersion" : 5,
        "clusterId" : ObjectId("55936ac576a153b4e1f67c45")
}
  shards:
        {  "_id" : "shard0000",  "host" : "localhost:27020" }
  databases:
        {  "_id" : "admin",  "partitioned" : false,  "primary" : "config" }
        {  "_id" : "mydb1",  "partitioned" : true,  "primary" : "shard0000" }

mongos> db.shTbl.ensureIndex({x:1})
{
        "raw" : {
                "localhost:27020" : {
                        "createdCollectionAutomatically" : false,
                        "numIndexesBefore" : 1,
                        "numIndexesAfter" : 2,
                        "ok" : 1
                }
        },
        "ok" : 1
}
mongos> db.shTbl.getIndexex()
2015-07-01T10:09:05.018+0530 TypeError: Property 'getIndexex' of object mydb1.shTbl is not a functi
mongos> db.shTbl.getIndexes()
[
        {
                "v" : 1,
                "key" : {
                        "_id" : 1
                },
                "name" : "_id_",
                "ns" : "mydb1.shTbl"
        },
        {
                "v" : 1,
                "key" : {
                        "x" : 1
                },
                "name" : "x_1",
                "ns" : "mydb1.shTbl"
        }
]
mongos> sh.shardCollection("mydb1.shTbl",{"x":1})
{ "collectionsharded" : "mydb1.shTbl", "ok" : 1 }
mongos>


mongos> sh.status()
--- Sharding Status ---
  sharding version: {
        "_id" : 1,
        "version" : 4,
        "minCompatibleVersion" : 4,
        "currentVersion" : 5,
        "clusterId" : ObjectId("55936ac576a153b4e1f67c45")
}
  shards:
        {  "_id" : "shard0000",  "host" : "localhost:27020" }
  databases:
        {  "_id" : "admin",  "partitioned" : false,  "primary" : "config" }
        {  "_id" : "mydb1",  "partitioned" : true,  "primary" : "shard0000" }
                mydb1.shTbl
                        shard key: { "x" : 1 }
                        chunks:
                                shard0000       1
                        { "x" : { "$minKey" : 1 } } -->> { "x" : { "$maxKey" : 1 } } on : shard0000 Timestamp(1,
)

mongos>

mongos> db.shTbl.insert({x:2})
WriteResult({ "nInserted" : 1 })
mongos> db.shTbl.insert({x:3})
WriteResult({ "nInserted" : 1 })
mongos> db.shTbl.insert({x:4})
WriteResult({ "nInserted" : 1 })
mongos>
mongos> sh.status()
--- Sharding Status ---
  sharding version: {
        "_id" : 1,
        "version" : 4,
        "minCompatibleVersion" : 4,
        "currentVersion" : 5,
        "clusterId" : ObjectId("55936ac576a153b4e1f67c45")
}
  shards:
        {  "_id" : "shard0000",  "host" : "localhost:27020" }
  databases:
        {  "_id" : "admin",  "partitioned" : false,  "primary" : "config" }
        {  "_id" : "mydb1",  "partitioned" : true,  "primary" : "shard0000" }
                mydb1.shTbl
                        shard key: { "x" : 1 }
                        chunks:
                                shard0000       1
                        { "x" : { "$minKey" : 1 } } -->> { "x" : { "$maxKey" : 1 } } on : shard0000 Timestamp(1,
)

mongos> db.shTbl.find()
{ "_id" : ObjectId("55936e5fc7deef0077d13a9c"), "x" : 1 }
{ "_id" : ObjectId("55936facc7deef0077d13a9d"), "x" : 2 }
{ "_id" : ObjectId("55936fb7c7deef0077d13a9e"), "x" : 3 }
{ "_id" : ObjectId("55936fbec7deef0077d13a9f"), "x" : 4 }
mongos>







Test the sharding by connecting to sharded client of port 27020


C:\>mongo --port 27020
MongoDB shell version: 2.6.5
connecting to: 127.0.0.1:27020/test


> show dbs
admin  (empty)
local  0.078GB
mydb1  0.078GB
> use mydb1
switched to db mydb1
> show collections
shTbl
system.indexes
> db.shTbl.find()
{ "_id" : ObjectId("55936e5fc7deef0077d13a9c"), "x" : 1 }
{ "_id" : ObjectId("55936facc7deef0077d13a9d"), "x" : 2 }
{ "_id" : ObjectId("55936fb7c7deef0077d13a9e"), "x" : 3 }
{ "_id" : ObjectId("55936fbec7deef0077d13a9f"), "x" : 4 }
>










Replication in MongoDB

High throughput
Increase availability
Maintain copies of data like one in DR , one for reporting
Replica set members: primary , secondary






Machine generated alternative text:
Computer LocalDisk(C:)fr data
Organize — Include in library — Share with Burn New folder
. Name Date modified Type Size
Favorites
. db 6/29/2015 9:13 AM File folder
Libraries . rsl 6/30/2015 4:33 PM File folder
>  Documents . rs2 6/30/2015 4:33 PM File folder
> J Music . rs3 6/30/2015 4:33 PM File folder
‘: Computer



C:\Program Files\MongoDB 2.6 Standard\bin>mongod --dbpath C:\data\rs1 --port 27017 --replset rs101 --smallfiles -
oplogsize 16


C:\Program Files\MongoDB 2.6 Standard\bin>mongod --dbpath C:\data\rs2 --port 27018 --replSet rs101 --smallfiles -
oplogsize 16


C:\>mongo --port 27017
> var myconf ={_id:"rs101", members:[{_id:0,host:"localhost:27017"}]}
> myconf
{
        "_id" : "rs101",
        "members" : [
                {
                        "_id" : 0,
                        "host" : "localhost:27017"
                }
        ]
}
>


> rs.initiate(myconf)
{
        "info" : "Config now saved locally.  Should come online in about a minute.",
        "ok" : 1
}
rs101:OTHER>
rs101:PRIMARY>

rs101:PRIMARY> rs.status()
{
        "set" : "rs101",
        "date" : ISODate("2015-06-30T11:19:19Z"),
        "myState" : 1,
        "members" : [
                {
                        "_id" : 0,
                        "name" : "localhost:27017",
                        "health" : 1,
                        "state" : 1,
                        "stateStr" : "PRIMARY",
                        "uptime" : 598,
                        "optime" : Timestamp(1435663092, 1),
                        "optimeDate" : ISODate("2015-06-30T11:18:12Z"),
                        "electionTime" : Timestamp(1435663092, 2),
                        "electionDate" : ISODate("2015-06-30T11:18:12Z"),
                        "self" : true
                }
        ],
        "ok" : 1
}

rs101:PRIMARY> rs.add("localhost:27018")
{ "ok" : 1 }
rs101:PRIMARY>


rs101:PRIMARY> rs.status()
{
        "set" : "rs101",
        "date" : ISODate("2015-06-30T11:22:00Z"),
        "myState" : 1,
        "members" : [
                {
                        "_id" : 0,
                        "name" : "localhost:27017",
                        "health" : 1,
                        "state" : 1,
                        "stateStr" : "PRIMARY",
                        "uptime" : 759,
                        "optime" : Timestamp(1435663208, 1),
                        "optimeDate" : ISODate("2015-06-30T11:20:08Z"),
                        "electionTime" : Timestamp(1435663092, 2),
                        "electionDate" : ISODate("2015-06-30T11:18:12Z"),
                        "self" : true
                },
                {
                        "_id" : 1,
                        "name" : "localhost:27018",
                        "health" : 1,
                        "state" : 2,
                        "stateStr" : "SECONDARY",
                        "uptime" : 112,
                        "optime" : Timestamp(1435663208, 1),
                        "optimeDate" : ISODate("2015-06-30T11:20:08Z"),
                        "lastHeartbeat" : ISODate("2015-06-30T11:22:00Z"),
                        "lastHeartbeatRecv" : ISODate("2015-06-30T11:21:59Z"),
                        "pingMs" : 0,
                        "syncingTo" : "localhost:27017"
                }
        ],
        "ok" : 1
}
rs101:PRIMARY>





C:\>mongo --port 27018
MongoDB shell version: 2.6.5
connecting to: 127.0.0.1:27018/test
Server has startup warnings:
2015-06-30T16:42:50.272+0530 [initandlisten]
2015-06-30T16:42:50.272+0530 [initandlisten] ** NOTE: This is a 32 bit MongoDB binary.
2015-06-30T16:42:50.272+0530 [initandlisten] **       32 bit builds are limited to less than 2GB of data (or less
with --journal).
2015-06-30T16:42:50.272+0530 [initandlisten] **       Note that journaling defaults to off for 32 bit and is curr
ntly off.
2015-06-30T16:42:50.272+0530 [initandlisten] **       See http://dochub.mongodb.org/core/32bit
2015-06-30T16:42:50.288+0530 [initandlisten]
rs101:SECONDARY>
rs101:PRIMARY> show dbs
admin  (empty)
local  0.063GB
rs101:PRIMARY> use mydb
switched to db mydb
rs101:PRIMARY> db.createCollection("rstable")
{ "ok" : 1 }
rs101:PRIMARY>

rs101:SECONDARY> show dbs
admin  (empty)
local  0.063GB
mydb   0.031GB
rs101:SECONDARY> show collections
2015-06-30T16:57:15.761+0530 error: { "$err" : "not master and slaveOk=false", "code" : 13435 } at src/mongo/shel
/query.js:131
rs101:SECONDARY>




Mongo doesn’t allow read /write from secondary by default

Basics principles of MongoDB



Query Models-schemaless
No transactions
No Joins
No ACID
No Triggers
No Stored Procedures
No Roll back






Mongo Shell is written in JavaScript
Mongo DB server is written in C++
Drivers: available for .net, Java,python,Nodejs,Ruby etc
CRUD operations are Javascript function calls

Document Oriented->key value pair in json format
High Performance
High Availability
Easy Scalability
Rich Query Language
Primary key or id is automatically created
Immutable
Already indexed . But index on _id cannot be dropped


Sub Documents and Regex in MongoDB

Sub documents

> db.num.insert({x:10,"Name":{"FName":"tim","LName":"Southee"}})
WriteResult({ "nInserted" : 1 })

> db.num.find({'Name.FName':"tim"}).pretty()
{
        "_id" : ObjectId("55911994525a7a85c0902ff7"),
        "x" : 10,
        "Name" : {
                "FName" : "tim",
                "LName" : "Southee"
        }
}
>
> db.num.insert({x:10,"Name":{"FName":"tim","LName":"Southee"},"Qualif":["Bca","
Mca"]})
WriteResult({ "nInserted" : 1 })
> db.num.find({'Qualif':"Bca"}).pretty()
{
        "_id" : ObjectId("55911cf9525a7a85c0902ff8"),
        "x" : 10,
        "Name" : {
                "FName" : "tim",
                "LName" : "Southee"
        },
        "Qualif" : [
                "Bca",
                "Mca"
        ]
}
>

Regex
> db.Employee.find({"EmpName":{$regex:/^S/i}}).pretty()
{
        "_id" : ObjectId("55910c4f525a7a85c0902ff5"),
        "EmpName" : "Supreet",
        "Designation" : "Senior Software Eng"
}


Array

> db.myproj.insert({"Project":{"project1":["wbs1","wbs2"]}})
WriteResult({ "nInserted" : 1 })
> db.myproj.find({'Project.project1':"wbs1"}).pretty()
{
        "_id" : ObjectId("55911f67525a7a85c0902ffa"),
        "Project" : {
                "project1" : [
                        "wbs1",
                        "wbs2"
                ]
        }
}
> db.myproj.insert({"Project":{"project2":["wbs1","wbs2"]}})
WriteResult({ "nInserted" : 1 })
>
> db.myproj.find().pretty()
{
        "_id" : ObjectId("55911f17525a7a85c0902ff9"),
        "Project" : [
                "wbs1",
                "wbs2"
        ]
}
{
        "_id" : ObjectId("55911f67525a7a85c0902ffa"),
        "Project" : {
                "project1" : [
                        "wbs1",
                        "wbs2"
                ]
        }
}
{
        "_id" : ObjectId("55911fee525a7a85c0902ffb"),
        "Project" : {
                "project2" : [
                        "wbs1",
                        "wbs2"
                ]
        }
}
>

Orders collection : selecting from an array
> db.orders.insert({"memos":[{"memo":"on time","by":"payment"},{"memo":"delayed","by":"shipping"}]})
WriteResult({ "nInserted" : 1 })
> db.orders.find().pretty()
{
        "_id" : ObjectId("55912599525a7a85c0902ffc"),
        "memos" : [
                {
                        "memo" : "on time",
                        "by" : "shipping"
                },
                {
                        "memo" : "approved",
                        "by" : "billing"
                }
        ]
}
{
        "_id" : ObjectId("559125cb525a7a85c0902ffd"),
        "memos" : [
                {
                        "memo" : "on time",
                        "by" : "payment"
                },
                {
                        "memo" : "delayed",
                        "by" : "shipping"
                }
        ]
}
> db.orders.find({'memos.0.by':'shipping'}).pretty()
{
        "_id" : ObjectId("55912599525a7a85c0902ffc"),
        "memos" : [
                {
                        "memo" : "on time",
                        "by" : "shipping"
                },
                {
                        "memo" : "approved",
                        "by" : "billing"
                }
        ]
}
> db.orders.find({'memos.by':'shipping'}).pretty()
{
        "_id" : ObjectId("55912599525a7a85c0902ffc"),
        "memos" : [
                {
                        "memo" : "on time",
                        "by" : "shipping"
                },
                {
                        "memo" : "approved",
                        "by" : "billing"
                }
        ]
}
{
        "_id" : ObjectId("559125cb525a7a85c0902ffd"),
        "memos" : [
                {
                        "memo" : "on time",
                        "by" : "payment"
                },
                {
                        "memo" : "delayed",
                        "by" : "shipping"
                }
        ]
}


Helper Functions in MongoDB

> db.orders.find().count()
2
>
Sort({Fieldname:sort order})
> db.orders.find().sort({_id:-1}).pretty()
{
        "_id" : ObjectId("559125cb525a7a85c0902ffd"),
        "memos" : [
                {
                        "memo" : "on time",
                        "by" : "payment"
                },
                {
                        "memo" : "delayed",
                        "by" : "shipping"
                }
        ]
}
{
        "_id" : ObjectId("55912599525a7a85c0902ffc"),
        "memos" : [
                {
                        "memo" : "on time",
                        "by" : "shipping"
                },
                {
                        "memo" : "approved",
                        "by" : "billing"
                }
        ]
}
>


Limit(num)
> db.orders.find().limit(1).pretty()
{
        "_id" : ObjectId("55912599525a7a85c0902ffc"),
        "memos" : [
                {
                        "memo" : "on time",
                        "by" : "shipping"
                },
                {
                        "memo" : "approved",
                        "by" : "billing"
                }
        ]
}
>


Update in MongoDB - More examples

Positional or field specific update
Use of $set ,$inc ,$unset operator
Updating multiple docs in single command ->{multi:1}
-by default {multi:0}
Upsert-> default is false


> db.numbers.insert({"x":10,"y":67})
WriteResult({ "nInserted" : 1 })
> db.numbers.insert({"x":10,"y":50})
WriteResult({ "nInserted" : 1 })
> db.numbers.insert({"x":10,"y":100})
WriteResult({ "nInserted" : 1 })
> db.numbers.find().pretty()
{ "_id" : ObjectId("559212cb8e94c66f57951518"), "x" : 10, "y" : 67 }
{ "_id" : ObjectId("559212d88e94c66f57951519"), "x" : 10, "y" : 50 }
{ "_id" : ObjectId("559212dd8e94c66f5795151a"), "x" : 10, "y" : 100 }


> db.numbers.update({"x":10}, {$set:{"y":120}},{multi:1} )
WriteResult({ "nMatched" : 3, "nUpserted" : 0, "nModified" : 3 })
> db.numbers.find().pretty()
{ "_id" : ObjectId("559212cb8e94c66f57951518"), "x" : 10, "y" : 120 }
{ "_id" : ObjectId("559212d88e94c66f57951519"), "x" : 10, "y" : 120 }
{ "_id" : ObjectId("559212dd8e94c66f5795151a"), "x" : 10, "y" : 120 }
>

Note : if you do not use $set , schema will change after you update
 Unset : to remove a column

> db.numbers.find().pretty()
{ "_id" : ObjectId("559212cb8e94c66f57951518"), "x" : 101, "y" : 202 }
{ "_id" : ObjectId("559212d88e94c66f57951519"), "x" : 10, "y" : 125 }
{ "_id" : ObjectId("559212dd8e94c66f5795151a"), "x" : 10, "y" : 125 }
{ "_id" : ObjectId("55921888ab28e968f573138f"), "x" : 101, "y" : 202 }

> db.numbers.update({"x":101}, {$unset:{"y":202}})
WriteResult({ "nMatched" : 1, "nUpserted" : 0, "nModified" : 1 })

> db.numbers.find().pretty()
{ "_id" : ObjectId("559212cb8e94c66f57951518"), "x" : 101 }
{ "_id" : ObjectId("559212d88e94c66f57951519"), "x" : 10, "y" : 125 }
{ "_id" : ObjectId("559212dd8e94c66f5795151a"), "x" : 10, "y" : 125 }
{ "_id" : ObjectId("55921888ab28e968f573138f"), "x" : 101, "y" : 202 }
>





  
Increment
> db.numbers.update({"x":10}, {$inc:{"y":5}},{multi:1} )
WriteResult({ "nMatched" : 3, "nUpserted" : 0, "nModified" : 3 })
> db.numbers.find().pretty()
{ "_id" : ObjectId("559212cb8e94c66f57951518"), "x" : 10, "y" : 125 }
{ "_id" : ObjectId("559212d88e94c66f57951519"), "x" : 10, "y" : 125 }
{ "_id" : ObjectId("559212dd8e94c66f5795151a"), "x" : 10, "y" : 125 }
>
 Upsert: to modify or insert a new document

> db.numbers.find().pretty()
{ "_id" : ObjectId("559212cb8e94c66f57951518"), "x" : 10, "y" : 125 }
{ "_id" : ObjectId("559212d88e94c66f57951519"), "x" : 10, "y" : 125 }
{ "_id" : ObjectId("559212dd8e94c66f5795151a"), "x" : 10, "y" : 125 }
> db.numbers.update({"x":10}, {$set:{"x":101,"y":202}},{upsert:1} )
WriteResult({ "nMatched" : 1, "nUpserted" : 0, "nModified" : 1 })

> db.numbers.find().pretty()
{ "_id" : ObjectId("559212cb8e94c66f57951518"), "x" : 101, "y" : 202 }
{ "_id" : ObjectId("559212d88e94c66f57951519"), "x" : 10, "y" : 125 }
{ "_id" : ObjectId("559212dd8e94c66f5795151a"), "x" : 10, "y" : 125 }

> db.numbers.update({"x":1}, {$set:{"x":101,"y":202}},{upsert:1} )
WriteResult({
        "nMatched" : 0,
        "nUpserted" : 1,
        "nModified" : 0,
        "_id" : ObjectId("55921888ab28e968f573138f")
})
>