彻底挖掘mysqlbinlog数据内容
- - CSDN博客数据库推荐文章平时工作需要查询mysqlbinlog里面哪个库,哪个表在什么时间更新了什么数据,而通过mysqlbinlog转义过来的文件里面数据太乱,很多注释,而且也没有表结构相关字段,都用@1,@2等类似的方式显示,就想到通过python开发一个这样的功能. 先通过mysqlbinlog转义二进制日志binlog文件,保存为一份临时文件(执行完后删除).
平时工作需要查询mysqlbinlog里面哪个库,哪个表在什么时间更新了什么数据,而通过mysqlbinlog转义过来的文件里面数据太乱,很多注释,而且也没有表结构相关字段,都用@1,@2等类似的方式显示,就想到通过python开发一个这样的功能。
先说下脚本的原理:
__author__ = 'chunyang.wu'
# -*- coding: utf-8 -*-
#!/usr/bin/env python
import MySQLdb
import re
import os
import sys
mysql_config = {
'host':'192.168.1.197',\
'user':'root',\
'passwd':'123456',\
'port':3306,\
'db':'mydb'
}
class Deal_binlog:
def __init_db(self):
self.mysql_db = MySQLdb.connect(host=self.host,user=self.user,passwd=self.passwd,port=self.port,db=self.db)
self.mysql_cur=self.mysql_db.cursor()
def __init__(self):
self.host = mysql_config['host']
self.user = mysql_config['user']
self.passwd = mysql_config['passwd']
self.port = mysql_config['port']
self.db = mysql_config['db']
self.sql_file = sys.argv[2]
self.input_binlog_file = sys.argv[1]
self.tmp_binlog_file = 'tmp_binlog_file'
self.field = []
self.tb_name = ''
self.where = []
self.update = []
self.delete = []
self.patt = re.compile("/* .* */")
self.__init_db()
def _release_db(self):
self.mysql_cur.close()
self.mysql_db.close()
def _write_data(self,data):
print data
data = str(data)+"\n"
f = open(self.data_file,'a+')
f.write(data)
f.close()
def _get_table_name(self,line):
try:
if line.find('Table_map:')!=-1:
l = line.index('Table_map')
# print line[l::].split(' ')
self.tb_name = line[l::].split(' ')[1].replace('`','')
# return table
except Exception,ex:
print ex
def _get_table_structure(self,tb):
desc_sql = 'desc %s' %tb
# print desc_sql
self.field = []
self.mysql_cur.execute(desc_sql)
res = self.mysql_cur.fetchall()
for j in res:
self.field.append(j[0])
def _do(self):
'''先把mysql二进制的binlog解析成可识别文件,在从里面提取需要的数据'''
if os.path.exists(self.sql_file):
os.remove(self.sql_file)
os.popen('mysqlbinlog -v -v --base64-output=DECODE-ROWS %s>%s' %(self.input_binlog_file,self.tmp_binlog_file))
with open(self.tmp_binlog_file,"r") as infile:
for line in infile:
if line.rstrip('\n')=='BEGIN':
line = line.replace('BEGIN','')
elif line.find('Table_map:')!=-1:
self._get_table_name(line)
self._get_table_structure(self.tb_name)
elif line.find('### @')!=-1:
# print line.replace('### @','')
i = line.replace('### @','').split('=')[0]
# print line,i
line = line.replace('### @'+str(i),self.field[int(i)-1])
if(int(i)== len(self.field)):
line = self.patt.sub(' ',line)
else:
line = self.patt.sub(',',line)
elif line.find('###')!=-1:
line = line.replace('###','')
else:
line = ''
if line.rstrip('\n')!= '':
print line.rstrip('\n')
f = open(self.sql_file,'a+')
f.write(line)
f.close()
os.remove(self.tmp_binlog_file)
def usage():
print "python deal_binlog.py mysql_binlog_file out_put_sql_file"
print " tag: "
print " 1. change mysql_config dict"
print " 2. need MySQLdb"
print " 3. need your mysql server desc table privileges"
def main():
p = Deal_binlog()
p._do()
p._release_db()
if __name__=="__main__":
if len(sys.argv) ==3:
main()
else:
usage()
---------------------------------------------------------------------------------------------------------------------------------------------------------
版权所有,文章允许转载,但必须以链接方式注明源地址,否则追究法律责任!
QQ: 380968195
Email: [email protected]
Blog: http://blog.csdn.net/selectdb
URL: http://blog.csdn.net/selectdb/article/details/16861063