안녕하세요.
AWS 상에서 운영을 하다 보니 Cloudwatch를 통하여 모니터링을 많이하게 되었습니다.
더군다나 EC2 위에 올린 MongoDB 모니터링이 필요하여 고민하던 도중 cloudwatch를 이용하기로 결정하고 python으로 개발 하였습니다.
MongoDB의 모니터링은 여러 방법이 많지만,
저는 mongostat / mongotop 을 이용하기로 하였습니다.
MongoDB에 대해서는 아키텍처와 지난번 Real MongoDB 세미나 참석하여 얻은 배움이 전부라 쉽지 않았습니다.
참고로 모니터링 관련 자료는 아래를 보고 툴 대신에 mongostat / top 을 하기로 한 것입니다.
https://docs.mongodb.com/manual/administration/monitoring/
해당 스크립트는 아래 github 스크립트를 custom 한 것입니다.(저작권 관련해서는 어떻게 되는지 모르겠네요..ㅠ)
Mongotop 의 경우도 개발한 mongostat를 커스텀하여 개발한 것입니다.
수집하는 데이터는 기본적으로 현재는 query/ insert / delete / update 관련이며, mongotop 의 경우 주요 테이블을 list하여 수집하였으며, 수집 데이터는 total time / readlock time / writelock time / queries time 을 수집하는 형태로 하였습니다.
또한 1초 단위로 수집한 시간을 뺀 것을 표시하였습니다. (표시 데이터 = 현재 수집한 시간 상태 - 1초전 수집한 데이터)
1회용 수집이기 때문에 crontab 에 등록해서 주기적으로 수집하면 됩니다.
혹여나 문의 사항 있으시면 댓글 또는 happy8510@gmail.com 으로 연락 주시거나 facebook 통해서 연락 주시면 도움 드릴 수 있는 부분에 대해서 도움 드리겠습니다.(개발자가 아니라서 개발 문의 사항은 한계가 있습니다..ㅠㅠ)
cloudwatch에 올리기 위해서는 미리 aws-config 설정 하시기 바랍니다.
* mongostat.py
import
argparse
import
commands
import
datetime, os, time, sys, random
import
boto3
from
pymongo
import
MongoClient
from
pymongo.errors
import
OperationFailure
from
pymongo.errors
import
ConnectionFailure
from
pymongo.errors
import
ServerSelectionTimeoutError
PYTHON_MONGOSTAT_VERSION
=
"0.0.1"
MONGO2_NOT_AUTH
=
"unauthorized"
MONGO3_NOT_AUTH
=
"not authorized"
MONGO3_AUTH_FAILUR
=
"Authentication failed"
cloudwatch
=
boto3.client(
'cloudwatch'
)
class
MongoInstance():
'Class for mongodb instance'
def
__init__(
self
, host, port, username, password):
'Initialize the mongodb instance information and create connection to it.'
self
.host
=
host
self
.port
=
port
self
.username
=
username
self
.password
=
password
self
.stats_info
=
{}
selfurl
=
'mongodb://%s:%s@%s:%s/admin'
%
(
self
.username,
self
.password,
self
.host,
self
.port)
try
:
self
.connection
=
MongoClient(selfurl)
except
ConnectionFailure:
print
"Connection error: create connection to mongodb instance failed."
exit(
1
)
try
:
server_info
=
self
.connection.server_info()
self
.version
=
server_info[
'version'
]
except
ServerSelectionTimeoutError:
print
"Timeout error: get server information of mongodb instance timeout."
return
def
try_stats_command(
self
):
'Try to execute the serverStatus command to see if authentication required.'
errmsg
=
server_status
=
server_status2
=
{}
sleep
=
1
admin
=
self
.connection.admin
try
:
server_status
=
admin.command({
"serverStatus"
:
1
})
time.sleep(sleep)
server_status2
=
admin.command({
"serverStatus"
:
1
})
except
OperationFailure, op_failure:
errmsg
=
op_failure.details
except
:
print
"Execution error: get server status of mongodb instance failed."
exit(
1
)
print
'errmsg :'
+
str
(errmsg)
if
errmsg !
=
{}:
if
errmsg[
'errmsg'
].find(MONGO2_NOT_AUTH)
=
=
-
1
and
errmsg[
'errmsg'
].find(MONGO3_NOT_AUTH)
=
=
-
1
:
print
"Execution error: %s."
%
errmsg[
'errmsg'
]
exit(
1
)
else
:
try
:
admin.authenticate(
self
.username,
self
.password)
except
OperationFailure, op_failure:
print
"Execution error: authenticate to mongodb instance failed."
exit(
1
)
try
:
server_status
=
admin.command({
"serverStatus"
:
1
})
time.sleep(sleep)
server_status2
=
admin.command({
"serverStatus"
:
1
})
except
OperationFailure, op_failure:
print
"Execution error: %s."
%
op_failure.details[
'errmsg'
]
exit(
1
)
thetime
=
datetime.datetime.now().strftime(
"%d-%m-%Y.%H:%M:%S"
)
cmd
=
"cat /proc/loadavg"
out
=
commands.getstatusoutput(cmd)
load
=
out[
1
].split()[
0
]
pq
=
0
pi
=
0
pu
=
0
pd
=
0
pgm
=
0
q
=
0
i
=
0
u
=
0
d
=
0
gm
=
0
glativeW
=
0
glativeR
=
0
res
=
int
(server_status[u
'mem'
][u
'resident'
])
vir
=
int
(server_status[u
'mem'
][u
'virtual'
])
mapd
=
int
(server_status[u
'mem'
][u
'mapped'
])
pq
=
int
(server_status[u
'opcounters'
][u
'query'
])
pi
=
int
(server_status[u
'opcounters'
][u
'insert'
])
pu
=
int
(server_status[u
'opcounters'
][u
'update'
])
pd
=
int
(server_status[u
'opcounters'
][u
'delete'
])
pgm
=
int
(server_status[u
'opcounters'
][u
'getmore'
])
pcon
=
int
(server_status[u
'connections'
][u
'current'
])
q
=
int
(server_status2[u
'opcounters'
][u
'query'
])
i
=
int
(server_status2[u
'opcounters'
][u
'insert'
])
u
=
int
(server_status2[u
'opcounters'
][u
'update'
])
d
=
int
(server_status2[u
'opcounters'
][u
'delete'
])
gm
=
int
(server_status[u
'opcounters'
][u
'getmore'
])
con
=
int
(server_status2[u
'connections'
][u
'current'
])
glactiveW
=
int
(server_status[u
'globalLock'
][u
'activeClients'
][u
'writers'
])
glactiveR
=
int
(server_status[u
'globalLock'
][u
'activeClients'
][u
'readers'
])
template
=
"%12s%22s%12s%12s%12s%12s%12s%12s%12s%12s%12s%12s%12s%12s"
header
=
(
'hostname'
,
'time'
,
'insert'
,
'query'
,
'update'
, \
'delete'
,
'getmore'
,
'active con'
,
'resident'
, \
'virtual'
,
'mapped'
,
'load'
,
'Act Writer'
,
'Act Reader'
)
cloudwatch.put_metric_data(
MetricData
=
[
{
'MetricName'
:
'MongoDB-Insert Value'
,
'Dimensions'
: [
{
'Name'
:
'MongoDB-Primary'
,
'Value'
:
'Insert'
},
],
'Unit'
:
'None'
,
'Value'
: (i
-
pi)
/
sleep
},
{
'MetricName'
:
'MongoDB-Query Value'
,
'Dimensions'
: [
{
'Name'
:
'MongoDB-Primary'
,
'Value'
:
'Query'
},
],
'Unit'
:
'None'
,
'Value'
: (q
-
pq)
/
sleep
},
{
'MetricName'
:
'MongoDB-Query Value'
,
'Dimensions'
: [
{
'Name'
:
'MongoDB-Primary'
,
'Value'
:
'Delete'
},
],
'Unit'
:
'None'
,
'Value'
: (d
-
pd)
/
sleep
},
{
'MetricName'
:
'MongoDB-Query Value'
,
'Dimensions'
: [
{
'Name'
:
'MongoDB-Primary'
,
'Value'
:
'Update'
},
],
'Unit'
:
'None'
,
'Value'
: (u
-
pu)
/
sleep
},
],
Namespace
=
'LogMetrics'
)
server_statusstr
=
"hostname, thetime, (i-pi)/sleep, (q-pq)/sleep, (u-pu)/sleep, (d-pd)/sleep, (gm-pgm)/sleep, con, res, vir, mapd, load, glactiveW, glactiveR"
print
template
%
header
print
template
%
(
eval
(server_statusstr))
def
mongostat_arg_check(args):
'Check the given arguments to make sure they are valid.'
if
args.rowcount
and
args.rowcount <
0
:
return
False
,
"number of stats line to print can not be negative."
if
args.username
and
not
args.password:
return
False
,
"only username given, without password."
if
not
args.username
and
args.password:
return
False
,
"only password given, without username."
if
args.host:
hostinfo
=
args.host.split(
':'
)
if
len
(hostinfo) >
2
:
return
False
,
"invalid mongodb host, only HOSTNAME of HOSTNAME:PORT acceptable."
if
len
(hostinfo)
=
=
2
:
try
:
port
=
int
(hostinfo[
1
])
if
args.port
and
args.port !
=
port:
return
False
,
"ports given by port option and host option not match."
except
ValueError:
return
False
,
"invalid mongodb host, the port part not integer."
return
True
,
None
def
mongostat_start(host, port, username, password, rowcount, noheaders, json):
'Start monitor the mongodb server status and output stats one time per second.'
mongo_instance
=
MongoInstance(host, port, username, password)
mongo_instance.try_stats_command()
if
__name__
=
=
'__main__'
:
hostname, username, password
=
'호스트정보'
,
'유저'
,
'비밀번호'
port, rowcount
=
포트,
0
noheaders, json
=
False
,
False
parser
=
argparse.ArgumentParser(description
=
"Monitor basic MongoDB server statistics."
)
parser.add_argument(
"--version"
,
help
=
"print the tool version and exit"
, action
=
"store_true"
)
parser.add_argument(
"--host"
,
help
=
"mongodb host to connect to"
)
parser.add_argument(
"--port"
,
help
=
"server port (can also use --host HOSTNAME:PORT)"
,
type
=
int
)
parser.add_argument(
"-u"
,
"--username"
,
help
=
"username for authentication"
)
parser.add_argument(
"-p"
,
"--password"
,
help
=
"password for authentication"
)
parser.add_argument(
"--noheaders"
,
help
=
"don't output column names"
, action
=
"store_true"
)
parser.add_argument(
"-n"
,
"--rowcount"
,
help
=
"number of stats lines to print (0 for indefinite)"
,
type
=
int
)
parser.add_argument(
"--json"
,
help
=
"output as JSON rather than a formatted table"
, action
=
"store_true"
)
arguments
=
parser.parse_args()
if
arguments.version:
print
"Python mongostat version: %s"
%
PYTHON_MONGOSTAT_VERSION
exit(
0
)
ok, errmsg
=
mongostat_arg_check(arguments)
if
ok
=
=
False
:
print
"Argument error: %s"
%
errmsg
exit(
1
)
if
arguments.host:
hostinfo
=
arguments.host.split(
':'
)
hostname
=
hostinfo[
0
]
if
len
(hostinfo)
=
=
2
:
port
=
int
(hostinfo[
1
])
if
arguments.port:
port
=
arguments.port
if
arguments.username:
username
=
arguments.username
password
=
arguments.password
if
arguments.rowcount:
rowcount
=
arguments.rowcount
if
arguments.noheaders:
noheaders
=
True
if
arguments.json:
json
=
True
mongostat_start(hostname, port, username, password, rowcount, noheaders, json)
* mongotop.py
import
argparse
import
commands
import
datetime, os, time, sys, random
import
boto3
from
pymongo
import
MongoClient
from
pymongo.errors
import
OperationFailure
from
pymongo.errors
import
ConnectionFailure
from
pymongo.errors
import
ServerSelectionTimeoutError
PYTHON_MONGOSTAT_VERSION
=
"0.0.1"
MONGO2_NOT_AUTH
=
"unauthorized"
MONGO3_NOT_AUTH
=
"not authorized"
MONGO3_AUTH_FAILUR
=
"Authentication failed"
cloudwatch
=
boto3.client(
'cloudwatch'
)
lstCollection
=
['collectionname1
'
,
'collectionname2'
,
'collectionname3'
,
'collectionname4'
,
'collectionname5'
]
class
MongoInstance():
'Class for mongodb instance'
def
__init__(
self
, host, port, username, password):
'Initialize the mongodb instance information and create connection to it.'
self
.host
=
host
self
.port
=
port
self
.username
=
username
self
.password
=
password
self
.stats_info
=
{}
selfurl
=
'mongodb://%s:%s@%s:%s/admin'
%
(
self
.username,
self
.password,
self
.host,
self
.port)
try
:
self
.connection
=
MongoClient(selfurl)
except
ConnectionFailure:
print
"Connection error: create connection to mongodb instance failed."
exit(
1
)
try
:
server_info
=
self
.connection.server_info()
self
.version
=
server_info[
'version'
]
except
ServerSelectionTimeoutError:
print
"Timeout error: get server information of mongodb instance timeout."
return
def
try_stats_command(
self
):
'Try to execute the serverStatus command to see if authentication required.'
errmsg
=
server_status
=
server_status2
=
{}
sleep
=
1
admin
=
self
.connection.admin
try
:
server_status
=
admin.command({
"top"
:
1
})
time.sleep(sleep)
server_status2
=
admin.command({
"top"
:
1
})
except
OperationFailure, op_failure:
errmsg
=
op_failure.details
except
:
print
"Execution error: get server status of mongodb instance failed."
exit(
1
)
print
'errmsg :'
+
str
(errmsg)
if
errmsg !
=
{}:
if
errmsg[
'errmsg'
].find(MONGO2_NOT_AUTH)
=
=
-
1
and
errmsg[
'errmsg'
].find(MONGO3_NOT_AUTH)
=
=
-
1
:
print
"Execution error: %s."
%
errmsg[
'errmsg'
]
exit(
1
)
else
:
try
:
admin.authenticate(
self
.username,
self
.password)
except
OperationFailure, op_failure:
print
"Execution error: authenticate to mongodb instance failed."
exit(
1
)
try
:
server_status
=
admin.command({
"top"
:
1
})
time.sleep(sleep)
server_status2
=
admin.command({
"top"
:
1
})
except
OperationFailure, op_failure:
print
"Execution error: %s."
%
op_failure.details[
'errmsg'
]
exit(
1
)
thetime
=
datetime.datetime.now().strftime(
"%d-%m-%Y.%H:%M:%S"
)
cmd
=
"cat /proc/loadavg"
out
=
commands.getstatusoutput(cmd)
load
=
out[
1
].split()[
0
]
for
strCollist
in
lstCollection :
tmpName
=
'rocketchat.%s'
%
strCollist
print
tmpName
ptotaltime
=
int
(server_status[u
'totals'
][tmpName][u
'total'
][u
'time'
])
totaltime
=
int
(server_status2[u
'totals'
][tmpName][u
'total'
][u
'time'
])
prelock
=
int
(server_status[u
'totals'
][tmpName][u
'readLock'
][u
'time'
])
relock
=
int
(server_status2[u
'totals'
][tmpName][u
'readLock'
][u
'time'
])
pwrlock
=
int
(server_status[u
'totals'
][tmpName][u
'writeLock'
][u
'time'
])
wrlock
=
int
(server_status2[u
'totals'
][tmpName][u
'writeLock'
][u
'time'
])
pquery
=
int
(server_status[u
'totals'
][tmpName][u
'queries'
][u
'time'
])
query
=
int
(server_status2[u
'totals'
][tmpName][u
'queries'
][u
'time'
])
strMetric_total_Name
=
'MongoDB-%s-%s Value'
%
(strCollist,
'total time'
)
strMetric_read_lock_Name
=
'MongoDB-%s-%s Value'
%
(strCollist,
'readLock time'
)
strMetric_write_lock_Name
=
'MongoDB-%s-%s Value'
%
(strCollist,
'writeLock time'
)
strMetric_query_Name
=
'MongoDB-%s-%s Value'
%
(strCollist,
'queries time'
)
cloudwatch.put_metric_data(
MetricData
=
[
{
'MetricName'
: strMetric_total_Name,
'Dimensions'
: [
{
'Name'
:
'MongoDB-Primary-Collections'
,
'Value'
:
'total time millisecond'
},
],
'Unit'
:
'None'
,
'Value'
: (totaltime
-
ptotaltime)
/
1000
},
{
'MetricName'
: strMetric_read_lock_Name,
'Dimensions'
: [
{
'Name'
:
'MongoDB-Primary-Collections'
,
'Value'
:
'readLock time millisecond'
},
],
'Unit'
:
'None'
,
'Value'
: (relock
-
prelock)
/
1000
},
{
'MetricName'
: strMetric_write_lock_Name,
'Dimensions'
: [
{
'Name'
:
'MongoDB-Primary-Collections'
,
'Value'
:
'writeLock time millisecond'
},
],
'Unit'
:
'None'
,
'Value'
: (wrlock
-
pwrlock)
/
1000
},
{
'MetricName'
: strMetric_query_Name,
'Dimensions'
: [
{
'Name'
:
'MongoDB-Primary-Collections'
,
'Value'
:
'queries time millisecond'
},
],
'Unit'
:
'None'
,
'Value'
: (query
-
pquery)
/
1000
},
],
Namespace
=
'LogMetrics'
)
template
=
"%12s%12s%12s%12s"
header
=
(
'totime'
,
'relock'
,
'wrlock'
,
'query'
)
server_statusstr
=
"(totaltime-ptotaltime)/1000, (relock-prelock)/1000, (wrlock-pwrlock)/1000, (query-pquery)/1000"
print
template
%
header
print
template
%
(
eval
(server_statusstr))
def
mongostat_arg_check(args):
'Check the given arguments to make sure they are valid.'
if
args.rowcount
and
args.rowcount <
0
:
return
False
,
"number of stats line to print can not be negative."
if
args.username
and
not
args.password:
return
False
,
"only username given, without password."
if
not
args.username
and
args.password:
return
False
,
"only password given, without username."
if
args.host:
hostinfo
=
args.host.split(
':'
)
if
len
(hostinfo) >
2
:
return
False
,
"invalid mongodb host, only HOSTNAME of HOSTNAME:PORT acceptable."
if
len
(hostinfo)
=
=
2
:
try
:
port
=
int
(hostinfo[
1
])
if
args.port
and
args.port !
=
port:
return
False
,
"ports given by port option and host option not match."
except
ValueError:
return
False
,
"invalid mongodb host, the port part not integer."
return
True
,
None
def
mongostat_start(host, port, username, password, rowcount, noheaders, json):
'Start monitor the mongodb server status and output stats one time per second.'
mongo_instance
=
MongoInstance(host, port, username, password)
mongo_instance.try_stats_command()
if
__name__
=
=
'__main__'
:
hostname, username, password
=
'접속ip'
,
'유저명'
,
'비밀번호'
port, rowcount
=
포트,
0
noheaders, json
=
False
,
False
parser
=
argparse.ArgumentParser(description
=
"Monitor basic MongoDB server statistics."
)
parser.add_argument(
"--version"
,
help
=
"print the tool version and exit"
, action
=
"store_true"
)
parser.add_argument(
"--host"
,
help
=
"mongodb host to connect to"
)
parser.add_argument(
"--port"
,
help
=
"server port (can also use --host HOSTNAME:PORT)"
,
type
=
int
)
parser.add_argument(
"-u"
,
"--username"
,
help
=
"username for authentication"
)
parser.add_argument(
"-p"
,
"--password"
,
help
=
"password for authentication"
)
parser.add_argument(
"--noheaders"
,
help
=
"don't output column names"
, action
=
"store_true"
)
parser.add_argument(
"-n"
,
"--rowcount"
,
help
=
"number of stats lines to print (0 for indefinite)"
,
type
=
int
)
parser.add_argument(
"--json"
,
help
=
"output as JSON rather than a formatted table"
, action
=
"store_true"
)
arguments
=
parser.parse_args()
if
arguments.version:
print
"Python mongostat version: %s"
%
PYTHON_MONGOSTAT_VERSION
exit(
0
)
ok, errmsg
=
mongostat_arg_check(arguments)
if
ok
=
=
False
:
print
"Argument error: %s"
%
errmsg
exit(
1
)
if
arguments.host:
hostinfo
=
arguments.host.split(
':'
)
hostname
=
hostinfo[
0
]
if
len
(hostinfo)
=
=
2
:
port
=
int
(hostinfo[
1
])
if
arguments.port:
port
=
arguments.port
if
arguments.username:
username
=
arguments.username
password
=
arguments.password
if
arguments.rowcount:
rowcount
=
arguments.rowcount
if
arguments.noheaders:
noheaders
=
True
if
arguments.json:
json
=
True
mongostat_start(hostname, port, username, password, rowcount, noheaders, json)