Canal+kafka同步mysql数据到ElastcSearch(一) 部署Canal server端

it2026-02-06  0

canal是阿里巴巴旗下的一款开源项目,纯Java开发。基于数据库增量日志解析,提供增量数据订阅&消费,目前开源版本主要支持MySQL日志的解析,canal伪装成MySQL Slave进行日志接收、解析,并发送至后端服务订阅、消费使用,系统架构如下:

1. 部署环境要求

JDK1.8以上MySQL5.7以上canal最新稳定版https://github.com/alibaba/canal/releaseskafka稳定版

2.mysql配置

需要MySQL主节点,编辑my.cnf  [mysqld]参数,添加binlog相关参数重启mysql服务,查看binlog是否启用:需要同步MySQL主节点上,新建canal用户:查看主节点binlog信息,并记录下

3.canal配置

安装JDK环境下载canal稳定版https://github.com/alibaba/canal/releases

部署canal server:

mkdir -p /usr/local/canal/canal.deployer

tar xvf canal.deployer-1.1.4.tar.gz -C /usr/local/canal/canal.deployer

编辑canal基础配置文件  vim conf/canal.properties ################################################# ######### common argument ############# ################################################# # tcp bind ip canal.ip = 1.1.1.1 #canal服务部署地址 # register ip to zookeeper canal.register.ip = canal.port = 11111 canal.metrics.pull.port = 11112 # canal instance user/passwd # canal.user = canal # canal.passwd = E3619321C1A937C46A0D8BD1DAC39F93B27D4458 # canal admin config #canal.admin.manager = 127.0.0.1:8089 canal.admin.port = 11110 canal.admin.user = admin canal.admin.passwd = 4ACFE3202A5FF5CF467898FC58AAB1D615029441 canal.zkServers = # flush data to zk canal.zookeeper.flush.period = 1000 canal.withoutNetty = false # tcp, kafka, RocketMQ canal.serverMode = kafka # flush meta cursor/parse position to file canal.file.data.dir = ${canal.conf.dir} canal.file.flush.period = 1000 ## memory store RingBuffer size, should be Math.pow(2,n) canal.instance.memory.buffer.size = 16384 ## memory store RingBuffer used memory unit size , default 1kb canal.instance.memory.buffer.memunit = 1024 ## meory store gets mode used MEMSIZE or ITEMSIZE canal.instance.memory.batch.mode = MEMSIZE canal.instance.memory.rawEntry = true ## detecing config canal.instance.detecting.enable = false #canal.instance.detecting.sql = insert into retl.xdual values(1,now()) on duplicate key update x=now() canal.instance.detecting.sql = select 1 canal.instance.detecting.interval.time = 3 canal.instance.detecting.retry.threshold = 3 canal.instance.detecting.heartbeatHaEnable = false # support maximum transaction size, more than the size of the transaction will be cut into multiple transactions delivery canal.instance.transaction.size = 1024 # mysql fallback connected to new master should fallback times canal.instance.fallbackIntervalInSeconds = 60 # network config canal.instance.network.receiveBufferSize = 16384 canal.instance.network.sendBufferSize = 16384 canal.instance.network.soTimeout = 30 # binlog filter config canal.instance.filter.druid.ddl = true canal.instance.filter.query.dcl = false canal.instance.filter.query.dml = true canal.instance.filter.query.ddl = false canal.instance.filter.table.error = false canal.instance.filter.rows = false canal.instance.filter.transaction.entry = false # binlog format/image check canal.instance.binlog.format = ROW,STATEMENT,MIXED canal.instance.binlog.image = FULL,MINIMAL,NOBLOB # binlog ddl isolation canal.instance.get.ddl.isolation = false # parallel parser config canal.instance.parser.parallel = true ## concurrent thread number, default 60% available processors, suggest not to exceed Runtime.getRuntime().availableProcessors() #canal.instance.parser.parallelThreadSize = 16 ## disruptor ringbuffer size, must be power of 2 canal.instance.parser.parallelBufferSize = 256 # table meta tsdb info canal.instance.tsdb.enable = true canal.instance.tsdb.dir = ${canal.file.data.dir:../conf}/${canal.instance.destination:} canal.instance.tsdb.url = jdbc:h2:${canal.instance.tsdb.dir}/h2;CACHE_SIZE=1000;MODE=MYSQL; canal.instance.tsdb.dbUsername = canal canal.instance.tsdb.dbPassword = canal # dump snapshot interval, default 24 hour canal.instance.tsdb.snapshot.interval = 24 # purge snapshot expire , default 360 hour(15 days) canal.instance.tsdb.snapshot.expire = 360 # aliyun ak/sk , support rds/mq canal.aliyun.accessKey = canal.aliyun.secretKey = ################################################# ######### destinations ############# ################################################# canal.destinations = mysqlEs ## 发送至Kafka的Topic,目标实例 # conf root dir canal.conf.dir = ../conf # auto scan instance dir add/remove and start/stop instance canal.auto.scan = true canal.auto.scan.interval = 5 canal.instance.tsdb.spring.xml = classpath:spring/tsdb/h2-tsdb.xml #canal.instance.tsdb.spring.xml = classpath:spring/tsdb/mysql-tsdb.xml canal.instance.global.mode = spring canal.instance.global.lazy = false canal.instance.global.manager.address = ${canal.admin.manager} #canal.instance.global.spring.xml = classpath:spring/memory-instance.xml canal.instance.global.spring.xml = classpath:spring/file-instance.xml #canal.instance.global.spring.xml = classpath:spring/default-instance.xml ################################################## ######### MQ ############# ################################################## canal.mq.servers = 1.1.1.1:9092 # kafka地址 canal.mq.retries = 0 canal.mq.batchSize = 16384 canal.mq.maxRequestSize = 1048576 canal.mq.lingerMs = 100 canal.mq.bufferMemory = 33554432 canal.mq.canalBatchSize = 50 canal.mq.canalGetTimeout = 100 canal.mq.flatMessage = true canal.mq.compressionType = none canal.mq.acks = all #canal.mq.properties. = canal.mq.producerGroup = test # Set this value to "cloud", if you want open message trace feature in aliyun. canal.mq.accessChannel = local # aliyun mq namespace #canal.mq.namespace = ################################################## ######### Kafka Kerberos Info ############# ################################################## canal.mq.kafka.kerberos.enable = false canal.mq.kafka.kerberos.krb5FilePath = "../conf/kerberos/krb5.conf" canal.mq.kafka.kerberos.jaasFilePath = "../conf/kerberos/jaas.conf" 核心参数修改项

canal.ip = 1.1.1.1 # 部署canal server ip

canal.serverMode = kafka  # canal server解析消息发送方式

canal.destinations = mysqlEs # 发送至Kafka的Topic,目标实例

canal.mq.servers = 1.1.1.1:9092 # kafka集群地址

canal.instance.filter.query.dml = true #过滤不匹配正则表达式 的dml操作

修改canal实例配置文件: 重命名 conf 目录下 example文件夹为 mysqlEs(发送至Kafka的Topic,目标实例)

修改mysqlEs下配置文件instance.properties  

################################################# ## mysql serverId , v1.0.26+ will autoGen # canal.instance.mysql.slaveId=100110002 # enable gtid use true/false canal.instance.gtidon=false # position info canal.instance.master.address=1.1.1.1:3306 canal.instance.master.journal.name=binlog.000024 canal.instance.master.position=1023778312 canal.instance.master.timestamp= canal.instance.master.gtid=55b5e4ac-629b-11ea-a980-0242ac110002:1-1741520 # rds oss binlog canal.instance.rds.accesskey= canal.instance.rds.secretkey= canal.instance.rds.instanceId= # table meta tsdb info canal.instance.tsdb.enable=true #canal.instance.tsdb.url=jdbc:mysql://127.0.0.1:3306/canal_tsdb #canal.instance.tsdb.dbUsername=canal #canal.instance.tsdb.dbPassword=canal #canal.instance.standby.address = #canal.instance.standby.journal.name = #canal.instance.standby.position = #canal.instance.standby.timestamp = #canal.instance.standby.gtid= # username/password canal.instance.dbUsername=canal canal.instance.dbPassword=canal canal.instance.connectionCharset = UTF-8 # enable druid Decrypt database password canal.instance.enableDruid=false #canal.instance.pwdPublicKey=MFwwDQYJKoZIhvcNAQEBBQADSwAwSAJBALK4BUxdDltRRE5/zXpVEVPUgunvscYFtEip3pmLlhrWpacX7y7GCMo2/JM6LeHmiiNdH1FWgGCpUfircSwlWKUCAwEAAQ== # table regex canal.instance.filter.regex=dbName.tableName,dbName2.tableName2 # table black regex canal.instance.filter.black.regex= # table field filter(format: schema1.tableName1:field1/field2,schema2.tableName2:field1/field2) #canal.instance.filter.field=test1.t_product:id/subject/keywords,test2.t_company:id/name/contact/ch # table field black filter(format: schema1.tableName1:field1/field2,schema2.tableName2:field1/field2) #canal.instance.filter.black.field=test1.t_product:subject/product_image,test2.t_company:id/name/contact/ch # mq config canal.mq.topic=mysqlEs # dynamic topic route by schema or table regex #canal.mq.dynamicTopic=mytest1.user,mytest2\\..*,.*\\..* canal.mq.partition=0 # hash partition config #canal.mq.partitionsNum=3 #canal.mq.partitionHash=test.table:id^name,.*\\..* ################################################# 核心参数修改配置项

canal.instance.mysql.slaveId=10210100202 # 不与MySQL主节点重复

##### MySQL主节点,show master status相关信息:

canal.instance.master.address=1.1.1.1:3306   #mysql数据库地址canal.instance.master.journal.name=binlog.000002   #binlog文件名 canal.instance.master.position=198702544     #binlog position canal.instance.master.timestamp= canal.instance.master.gtid=55b5e4ac-629b-11ea-a980-0242ac110002:1-35908  #binlog gtid

##### MySQL canal用户信息

canal.instance.dbUsername=canal canal.instance.dbPassword=canal

##### MySQL同步表正则过滤,目前只同步车辆相关基础表,根据实际情况而定

canal.instance.filter.regex=dbName.tableName,dbName1.tableName1 # table black regex canal.instance.filter.black.regex=

##### 继续binlog推送kafka topic名称

# mq configcanal.mq.topic=mysqlEs  

启动

bin/stop.sh

bin/startup.sh

日志 logs/canal/canal.log logs/mysqlEs/mysqlEs.log

 

 

 

 

 

最新回复(0)