当前位置:   article > 正文

MySQL Binlog 解析工具 Maxwell 详解_binlog解析工具

binlog解析工具

MySQL Binlog 解析工具 Maxwell 详解

官方文档 :Reference - Maxwell's Daemon

maxwell 简介

Maxwell是一个能实时读取MySQL二进制日志binlog,并生成 JSON 格式的消息,作为生产者发送给 Kafka,Kinesis、RabbitMQ、Redis、Google Cloud Pub/Sub、文件或其它平台的应用程序。它的常见应用场景有ETL、维护缓存、收集表级别的dml指标、增量到搜索引擎、数据分区迁移、切库binlog回滚方案等。

Maxwell主要提供了下列功能:

  • 支持 SELECT * FROM table 的方式进行全量数据初始化
  • 支持在主库发生failover后,自动恢复binlog位置(GTID)
  • 可以对数据进行分区,解决数据倾斜问题,发送到kafka的数据支持database、table、column等级别的数据分区
  • 工作方式是伪装为Slave,接收binlog events,然后根据schemas信息拼装,可以接受ddl、xid、row等各种event

除了Maxwell外,目前常用的MySQL Binlog解析工具主要有阿里的canal、mysql_streamer,三个工具对比如下:

canal 由Java开发,分为服务端和客户端,拥有众多的衍生应用,性能稳定,功能强大;canal 需要自己编写客户端来消费canal解析到的数据。

maxwell相对于canal的优势是使用简单,它直接将数据变更输出为json字符串,不需要再编写客户端。

maxwell 的安装部署Maxwell简介、部署、原理和使用介绍-CSDN博客

生产环境配置:

注意:后台运行参数 --daemon

  1. ## tl;dr config 生产环境配置为info级别
  2. log_level=info
  3. ## mysql login info, mysql用户必须拥有读取binlog权限和新建库表的权限
  4. #mysql过滤设置
  5. filter= exclude: *.*, include: woc_school.*, exclude: *.*.create_time = *
  6. # mysql login info
  7. #host=127.0.0.1
  8. #user=root
  9. #password=root
  10. #output_nulls=true#是否包含值为NULL的字段,默认true
  11. ##jdbc 配置[解决抽取binlog后时区相比北京时间少8个时区的问题]
  12. # options to pass into the jdbc connection, given as opt=val&opt2=val2
  13. jdbc_options=characterEncoding=utf8mb4&autoReconnect=true&allowMultiQueries=true&useSSL=false&serverTimezone=Asia/Shanghai
  14. #使用replication_host 则开启
  15. #replication_jdbc_options=characterEncoding=utf8mb4&autoReconnect=true&allowMultiQueries=true&useSSL=false&serverTimezone=Asia/Shanghai
  16. #使用schema_host 则开启
  17. #schema_jdbc_options=characterEncoding=utf8mb4&autoReconnect=true&allowMultiQueries=true&useSSL=false&serverTimezone=Asia/Shanghai
  18. #rabbitmq消息配置
  19. producer=rabbitmq
  20. rabbitmq_host=1600123129785135.mq-amqp.cn-hangzhou-a.aliyuncs.com
  21. rabbitmq_user=MjoxNjAwMTIzMTI5Nzg1MTM1OkxUQUk1dEhObnVBN2NxbTJHeEUyMXBXSw==
  22. rabbitmq_pass=OUJBNTc3NzY2OUE1MUEyOUUzODQzN0Q2NjhCNTU5QzY2RjlDNkZDMToxNjM5NDQ1OTkzMTQ0
  23. rabbitmq_port=5672
  24. rabbitmq_virtual_host=canal-vhost
  25. rabbitmq_exchange=canal-exchange-routing-key
  26. rabbitmq_exchange_type=fanout
  27. rabbitmq_exchange_durable=true
  28. rabbitmq_exchange_autodelete=false
  29. ##按照 db.tbl 的格式指定 routing_key,在创建队列时,可以根据不同的表进入不同的队列,提高并行消费而不乱序的能力
  30. #rabbitmq_routing_key_template=%db%.%table%
  31. # *** general ***
  32. # choose where to produce data to. stdout|file|kafka|kinesis|pubsub|sqs|rabbitmq|redis
  33. #producer=kafka
  34. # set the log level. note that you can configure things further in log4j2.xml
  35. #log_level=DEBUG # [DEBUG, INFO, WARN, ERROR]
  36. # if set, maxwell will look up the scoped environment variables, strip off the prefix and inject the configs
  37. #env_config_prefix=MAXWELL_
  38. # *** mysql ***
  39. # mysql host to connect to
  40. #host=hostname
  41. # mysql port to connect to
  42. #port=3306
  43. # mysql user to connect as. This user must have REPLICATION SLAVE permissions,
  44. # as well as full access to the `maxwell` (or schema_database) database
  45. #user=maxwell
  46. # mysql password
  47. #password=maxwell
  48. # options to pass into the jdbc connection, given as opt=val&opt2=val2
  49. #jdbc_options=opt1=100&opt2=hello
  50. # name of the mysql database where maxwell keeps its own state
  51. #schema_database=maxwell
  52. # whether to use GTID or not for positioning
  53. #gtid_mode=true
  54. # maxwell will capture an initial "base" schema containing all table and column information,
  55. # and then keep delta-updates on top of that schema. If you have an inordinate amount of DDL changes,
  56. # the table containing delta changes will grow unbounded (and possibly too large) over time. If you
  57. # enable this option Maxwell will periodically compact its tables.
  58. #max_schemas=10000
  59. # SSL/TLS options
  60. # To use VERIFY_CA or VERIFY_IDENTITY, you must set the trust store with Java opts:
  61. # -Djavax.net.ssl.trustStore=<truststore> -Djavax.net.ssl.trustStorePassword=<password>
  62. # or import the MySQL cert into the global Java cacerts.
  63. # MODE must be one of DISABLED, PREFERRED, REQUIRED, VERIFY_CA, or VERIFY_IDENTITY
  64. #
  65. # turns on ssl for the maxwell-store connection, other connections inherit this setting unless specified
  66. #ssl=DISABLED
  67. # for binlog-connector
  68. #replication_ssl=DISABLED
  69. # for the schema-capture connection, if used
  70. #schema_ssl=DISABLED
  71. # maxwell can optionally replicate from a different server than where it stores
  72. # schema and binlog position info. Specify that different server here:
  73. #replication_host=other
  74. #replication_user=username
  75. #replication_password=password
  76. #replication_port=3306
  77. # This may be useful when using MaxScale's binlog mirroring host.
  78. # Specifies that Maxwell should capture schema from a different server than
  79. # it replicates from:
  80. #schema_host=other
  81. #schema_user=username
  82. #schema_password=password
  83. #schema_port=3306
  84. # *** output format ***
  85. # records include binlog position (default false)
  86. #output_binlog_position=true
  87. # records include a gtid string (default false)
  88. #output_gtid_position=true
  89. # records include fields with null values (default true). If this is false,
  90. # fields where the value is null will be omitted entirely from output.
  91. #output_nulls=true
  92. # records include server_id (default false)
  93. #output_server_id=true
  94. # records include thread_id (default false)
  95. #output_thread_id=true
  96. # records include schema_id (default false)
  97. #output_schema_id=true
  98. # records include row query, binlog option "binlog_rows_query_log_events" must be enabled" (default false)
  99. #output_row_query=true
  100. # DML records include list of values that make up a row's primary key (default false)
  101. #output_primary_keys=true
  102. # DML records include list of columns that make up a row's primary key (default false)
  103. #output_primary_key_columns=true
  104. # records include commit and xid (default true)
  105. #output_commit_info=true
  106. # This controls whether maxwell will output JSON information containing
  107. # DDL (ALTER/CREATE TABLE/ETC) infromation. (default: false)
  108. # See also: ddl_kafka_topic
  109. #output_ddl=true
  110. # turns underscore naming style of fields to camel case style in JSON output
  111. # default is none, which means the field name in JSON is the exact name in MySQL table
  112. #output_naming_strategy=underscore_to_camelcase
  113. # *** kafka ***
  114. # list of kafka brokers
  115. #kafka.bootstrap.servers=hosta:9092,hostb:9092
  116. # kafka topic to write to
  117. # this can be static, e.g. 'maxwell', or dynamic, e.g. namespace_%{database}_%{table}
  118. # in the latter case 'database' and 'table' will be replaced with the values for the row being processed
  119. #kafka_topic=maxwell
  120. # alternative kafka topic to write DDL (alter/create/drop) to. Defaults to kafka_topic
  121. #ddl_kafka_topic=maxwell_ddl
  122. # hash function to use. "default" is just the JVM's 'hashCode' function.
  123. #kafka_partition_hash=default # [default, murmur3]
  124. # how maxwell writes its kafka key.
  125. #
  126. # 'hash' looks like:
  127. # {"database":"test","table":"tickets","pk.id":10001}
  128. #
  129. # 'array' looks like:
  130. # ["test","tickets",[{"id":10001}]]
  131. #
  132. # default: "hash"
  133. #kafka_key_format=hash # [hash, array]
  134. # extra kafka options. Anything prefixed "kafka." will get
  135. # passed directly into the kafka-producer's config.
  136. # a few defaults.
  137. # These are 0.11-specific. They may or may not work with other versions.
  138. kafka.compression.type=snappy
  139. kafka.retries=0
  140. kafka.acks=1
  141. #kafka.batch.size=16384
  142. # kafka+SSL example
  143. # kafka.security.protocol=SSL
  144. # kafka.ssl.truststore.location=/var/private/ssl/kafka.client.truststore.jks
  145. # kafka.ssl.truststore.password=test1234
  146. # kafka.ssl.keystore.location=/var/private/ssl/kafka.client.keystore.jks
  147. # kafka.ssl.keystore.password=test1234
  148. # kafka.ssl.key.password=test1234#
  149. # controls a heuristic check that maxwell may use to detect messages that
  150. # we never heard back from. The heuristic check looks for "stuck" messages, and
  151. # will timeout maxwell after this many milliseconds.
  152. #
  153. # See https://github.com/zendesk/maxwell/blob/master/src/main/java/com/zendesk/maxwell/producer/InflightMessageList.java
  154. # if you really want to get into it.
  155. #producer_ack_timeout=120000 # default 0
  156. # *** partitioning ***
  157. # What part of the data do we partition by?
  158. #producer_partition_by=database # [database, table, primary_key, transaction_id, thread_id, column]
  159. # specify what fields to partition by when using producer_partition_by=column
  160. # column separated list.
  161. #producer_partition_columns=id,foo,bar
  162. # when using producer_partition_by=column, partition by this when
  163. # the specified column(s) don't exist.
  164. #producer_partition_by_fallback=database
  165. # *** kinesis ***
  166. #kinesis_stream=maxwell
  167. # AWS places a 256 unicode character limit on the max key length of a record
  168. # http://docs.aws.amazon.com/kinesis/latest/APIReference/API_PutRecord.html
  169. #
  170. # Setting this option to true enables hashing the key with the md5 algorithm
  171. # before we send it to kinesis so all the keys work within the key size limit.
  172. # Values: true, false
  173. # Default: false
  174. #kinesis_md5_keys=true
  175. # *** sqs ***
  176. #sqs_queue_uri=aws_sqs_queue_uri
  177. # The sqs producer will need aws credentials configured in the default
  178. # root folder and file format. Please check below link on how to do it.
  179. # http://docs.aws.amazon.com/sdk-for-java/v1/developer-guide/setup-credentials.html
  180. # *** pub/sub ***
  181. #pubsub_project_id=maxwell
  182. #pubsub_topic=maxwell
  183. #ddl_pubsub_topic=maxwell_ddl
  184. # *** rabbit-mq ***
  185. #rabbitmq_host=rabbitmq_hostname
  186. #rabbitmq_port=5672
  187. #rabbitmq_user=guest
  188. #rabbitmq_pass=guest
  189. #rabbitmq_virtual_host=/
  190. #rabbitmq_exchange=maxwell
  191. #rabbitmq_exchange_type=fanout
  192. #rabbitmq_exchange_durable=false
  193. #rabbitmq_exchange_autodelete=false
  194. #rabbitmq_routing_key_template=%db%.%table%
  195. #rabbitmq_message_persistent=false
  196. #rabbitmq_declare_exchange=true
  197. # *** redis ***
  198. #redis_host=redis_host
  199. #redis_port=6379
  200. #redis_auth=redis_auth
  201. #redis_database=0
  202. # name of pubsub/list/whatever key to publish to
  203. #redis_key=maxwell
  204. # this can be static, e.g. 'maxwell', or dynamic, e.g. namespace_%{database}_%{table}
  205. #redis_pub_channel=maxwell
  206. # this can be static, e.g. 'maxwell', or dynamic, e.g. namespace_%{database}_%{table}
  207. #redis_list_key=maxwell
  208. # this can be static, e.g. 'maxwell', or dynamic, e.g. namespace_%{database}_%{table}
  209. # Valid values for redis_type = pubsub|lpush. Defaults to pubsub
  210. #redis_type=pubsub
  211. # *** custom producer ***
  212. # the fully qualified class name for custom ProducerFactory
  213. # see the following link for more details.
  214. # http://maxwells-daemon.io/producers/#custom-producer
  215. #custom_producer.factory=
  216. # custom producer properties can be configured using the custom_producer.* property namespace
  217. #custom_producer.custom_prop=foo
  218. # *** filtering ***
  219. # filter rows out of Maxwell's output. Command separated list of filter-rules, evaluated in sequence.
  220. # A filter rule is:
  221. # <type> ":" <db> "." <tbl> [ "." <col> "=" <col_val> ]
  222. # type ::= [ "include" | "exclude" | "blacklist" ]
  223. # db ::= [ "/regexp/" | "string" | "`string`" | "*" ]
  224. # tbl ::= [ "/regexp/" | "string" | "`string`" | "*" ]
  225. # col_val ::= "column_name"
  226. # tbl ::= [ "/regexp/" | "string" | "`string`" | "*" ]
  227. #
  228. # See http://maxwells-daemon.io/filtering for more details
  229. #
  230. #filter= exclude: *.*, include: foo.*, include: bar.baz, include: foo.bar.col_eg = "value_to_match"
  231. # javascript filter
  232. # maxwell can run a bit of javascript for each row if you need very custom filtering/data munging.
  233. # See http://maxwells-daemon.io/filtering/#javascript_filters for more details
  234. #
  235. #javascript=/path/to/javascript_filter_file
  236. # *** encryption ***
  237. # Encryption mode. Possible values are none, data, and all. (default none)
  238. #encrypt=none
  239. # Specify the secret key to be used
  240. #secret_key=RandomInitVector
  241. # *** monitoring ***
  242. # Maxwell collects metrics via dropwizard. These can be exposed through the
  243. # base logging mechanism (slf4j), JMX, HTTP or pushed to Datadog.
  244. # Options: [jmx, slf4j, http, datadog]
  245. # Supplying multiple is allowed.
  246. #metrics_type=jmx,slf4j
  247. # The prefix maxwell will apply to all metrics
  248. #metrics_prefix=MaxwellMetrics # default MaxwellMetrics
  249. # Enable (dropwizard) JVM metrics, default false
  250. #metrics_jvm=true
  251. # When metrics_type includes slf4j this is the frequency metrics are emitted to the log, in seconds
  252. #metrics_slf4j_interval=60
  253. # When metrics_type includes http or diagnostic is enabled, this is the port the server will bind to.
  254. #http_port=8080
  255. # When metrics_type includes http or diagnostic is enabled, this is the http path prefix, default /.
  256. #http_path_prefix=/some/path/
  257. # ** The following are Datadog specific. **
  258. # When metrics_type includes datadog this is the way metrics will be reported.
  259. # Options: [udp, http]
  260. # Supplying multiple is not allowed.
  261. #metrics_datadog_type=udp
  262. # datadog tags that should be supplied
  263. #metrics_datadog_tags=tag1:value1,tag2:value2
  264. # The frequency metrics are pushed to datadog, in seconds
  265. #metrics_datadog_interval=60
  266. # required if metrics_datadog_type = http
  267. #metrics_datadog_apikey=API_KEY
  268. # required if metrics_datadog_type = udp
  269. #metrics_datadog_host=localhost # default localhost
  270. #metrics_datadog_port=8125 # default 8125
  271. # Maxwell exposes http diagnostic endpoint to check below in parallel:
  272. # 1. binlog replication lag
  273. # 2. producer (currently kafka) lag
  274. # To enable Maxwell diagnostic
  275. #http_diagnostic=true # default false
  276. # Diagnostic check timeout in milliseconds, required if diagnostic = true
  277. #http_diagnostic_timeout=10000 # default 10000
  278. # *** misc ***
  279. # maxwell's bootstrapping functionality has a couple of modes.
  280. #
  281. # In "async" mode, maxwell will output the replication stream while it
  282. # simultaneously outputs the database to the topic. Note that it won't
  283. # output replication data for any tables it is currently bootstrapping -- this
  284. # data will be buffered and output after the bootstrap is complete.
  285. #
  286. # In "sync" mode, maxwell stops the replication stream while it
  287. # outputs bootstrap data.
  288. #
  289. # async mode keeps ops live while bootstrapping, but carries the possibility of
  290. # data loss (due to buffering transactions). sync mode is safer but you
  291. # have to stop replication.
  292. #bootstrapper=async [sync, async, none]
  293. # output filename when using the "file" producer
  294. #output_file=/path/to/file

声明:本文内容由网友自发贡献,不代表【wpsshop博客】立场,版权归原作者所有,本站不承担相应法律责任。如您发现有侵权的内容,请联系我们。转载请注明出处:https://www.wpsshop.cn/w/寸_铁/article/detail/955194
推荐阅读
相关标签
  

闽ICP备14008679号