microsoft / ghcrawler

Crawl GitHub APIs and store the discovered orgs, repos, commits, ...

Geek Repo:Geek Repo

Github PK Tool:Github PK Tool

mongodb: _metadata.links.self.href missing index

grooverdan opened this issue · comments

 docker exec -ti docker_mongo_1 mongotop 5
2018-06-07T05:21:41.610+0000	connected to: 127.0.0.1

                            ns     total    read     write    2018-06-07T05:21:46Z
              ghcrawler.commit    4009ms     7ms    4002ms                        
...
{
	"op" : "update",
	"ns" : "ghcrawler.commit",
	"command" : {
		"q" : {
			"_metadata.links.self.href" : "urn:repo:19816070:commit:492e7b081168f1922ef6409ebba77dbf30638185"
		},
		"u" : {
....
	"millis" : 153,
	"planSummary" : "COLLSCAN",
	"execStats" : {
		"stage" : "UPDATE",
		"nReturned" : 0,
		"executionTimeMillisEstimate" : 150,
		"works" : 117269,
		"advanced" : 0,
		"needTime" : 117268,
		"needYield" : 0,
		"saveState" : 916,
		"restoreState" : 916,
		"isEOF" : 1,
		"invalidates" : 0,
		"nMatched" : 0,
		"nWouldModify" : 0,
		"nInvalidateSkips" : 0,
		"wouldInsert" : true,
		"fastmodinsert" : true,
		"inputStage" : {
			"stage" : "COLLSCAN",
			"filter" : {
				"_metadata.links.self.href" : {
					"$eq" : "urn:repo:19816070:commit:492e7b081168f1922ef6409ebba77dbf30638185"
				}
			},
			"nReturned" : 0,
			"executionTimeMillisEstimate" : 150,
			"works" : 117268,
			"advanced" : 0,
			"needTime" : 117267,
			"needYield" : 0,
			"saveState" : 916,
			"restoreState" : 916,
			"isEOF" : 1,
			"invalidates" : 0,
			"direction" : "forward",
			"docsExamined" : 117266
		}
	},
	"ts" : ISODate("2018-06-07T05:22:13.836Z"),
	"client" : "172.18.0.5",
	"allUsers" : [ ],

Solution:

> db.commit.createIndex( { "_metadata.links.self.href":  "hashed" } )
{
	"createdCollectionAutomatically" : false,
	"numIndexesBefore" : 2,
	"numIndexesAfter" : 3,
	"ok" : 1
}

After:

                            ns    total    read    write    2018-06-07T05:40:28Z
              ghcrawler.commit      8ms     8ms      0ms