############################################################################### # Bug#21507981: REPLICATION POSITION LOST AFTER CRASH ON MTS CONFIGURED SLAVE # # Problem: # ======== # Enable MTS along with crash-safe replication tables. Make sure that the server # is busily inserting data with multiple threads in parallel. Shutdown mysqld # uncleanly (kill -9 or power off server without notice). # # Now users are restarting the server with --relay-log-recovery=1 to recover the # crashed slave. # # This results in following error: # ================================ # 2015-06-24 13:49:03 3895 [ERROR] --relay-log-recovery cannot be executed when # the slave was stopped with an error or killed in MTS mode; consider using # RESET SLAVE or restart the server with --relay-log-recovery = 0 followed by # START SLAVE UNTIL SQL_AFTER_MTS_GAPS. # # i.e relay-log-recovery will not work in MTS mode. ############################################################################### # Following test demonstrates that when a gap is generated because MTS has # stopped due to an error then attempting 'relay-log-recovery' will not be # successful. Once the route cause of the error is eliminated restarting the # recovery with relay-log-recovery=1 should fix the issue. # # Testing Method: # =============== # It first creates two databases (d1 and d2) and setup slave to use two parallel # workers. The test case then insert on the slave a tuple that will block # writes on d2 and generate gaps. Now COMMIT the blocking tuple on slave so # that workers will proceed and apply the pending transactions. This will # result in duplicate key error and slave will stop. Now crash the slave # server and initiate relay-log-recovery. It should fail as slave has stopped # due to an error. Eliminate the cause of duplicate key error by removing the # local changes on slave. Restart the recovery process it should be # successful. --source include/force_restart.inc --source include/have_binlog_format_statement.inc --source include/only_mts_slave_parallel_workers.inc --source include/master-slave.inc --source include/rpl_connection_slave.inc call mtr.add_suppression("Duplicate entry*"); call mtr.add_suppression("The slave coordinator and worker threads are stopped, possibly leaving data in inconsistent state"); call mtr.add_suppression("Failed to initialize the master info structure"); call mtr.add_suppression("Slave failed to initialize relay log info*"); call mtr.add_suppression("MTS recovery: automatic recovery failed.*"); call mtr.add_suppression("Recovery from master pos"); --source extra/rpl_tests/rpl_generate_mts_gap.test --source include/rpl_connection_slave.inc COMMIT; --let $slave_sql_errno= convert_error(ER_DUP_ENTRY) source include/wait_for_slave_sql_error.inc; # Restart the slave server - Recovery will fail as MTS has stopped due to an # error. --let $rpl_server_number= 2 --let $rpl_server_parameters= --skip_slave_start=TRUE --relay_log_info_repository=TABLE --master_info_repository=TABLE --sync_master_info=1 --relay-log-recovery=1 --source include/rpl_restart_server.inc --source include/rpl_connection_slave.inc --exec echo "Relay log recovery should fail as MTS stopped due to an error" --error ER_SLAVE_RLI_INIT_REPOSITORY START SLAVE; # Remove the affending rows from slave. --exec echo "Eliminate the cause of MTS error" DELETE FROM d2.t WHERE a=2; DELETE FROM d1.t WHERE a=3; --let $table=d2.t --let $count=1 --source include/wait_until_rows_count.inc --let $table=d1.t --let $count=2 --source include/wait_until_rows_count.inc # Restart the slave server this should fix the gaps. --let $rpl_server_number= 2 --let $rpl_server_parameters= --skip_slave_start=TRUE --relay_log_info_repository=TABLE --master_info_repository=TABLE --sync_master_info=1 --relay-log-recovery=1 --source include/rpl_restart_server.inc --exec echo "MTS recovery should be successful. Check that gaps are filled." --let $assert_text= Table d1.t should contain 2 rows. --let $assert_cond= [select count(*) from d1.t] = 2 --source include/assert.inc --let $assert_text= Table d2.t should contain 3 rows. --let $assert_cond= [select count(*) from d2.t] = 3 --source include/assert.inc --source include/start_slave.inc # Check consistency --source include/rpl_connection_master.inc --source include/sync_slave_sql_with_master.inc --let $diff_tables= master:d1.t, slave:d1.t --source include/diff_tables.inc --let $diff_tables= master:d2.t, slave:d2.t --source include/diff_tables.inc # # Cleanup # --source include/rpl_connection_master.inc DROP DATABASE d1; DROP DATABASE d2; --source include/rpl_end.inc