1 files changed, 233 insertions, 0 deletions
diff --git a/examples/redis-unstable/tests/integration/psync2-master-restart.tcl b/examples/redis-unstable/tests/integration/psync2-master-restart.tcl
new file mode 100644
index 0000000..5971e74
--- /dev/null
+++ b/examples/redis-unstable/tests/integration/psync2-master-restart.tcl
@@ -0,0 +1,233 @@
+start_server {tags {"psync2 external:skip"}} {
+start_server {} {
+start_server {} {
+    set master [srv 0 client]
+    set master_host [srv 0 host]
+    set master_port [srv 0 port]
+    set replica [srv -1 client]
+    set replica_host [srv -1 host]
+    set replica_port [srv -1 port]
+    set sub_replica [srv -2 client]
+    # Make sure the server saves an RDB on shutdown
+    $master config set save "3600 1"
+    # Because we will test partial resync later, we don't want a timeout to cause
+    # the master-replica disconnect, then the extra reconnections will break the
+    # sync_partial_ok stat test
+    $master config set repl-timeout 3600
+    $replica config set repl-timeout 3600
+    $sub_replica config set repl-timeout 3600
+    # Avoid PINGs
+    $master config set repl-ping-replica-period 3600
+    $master config rewrite
+    # Build replication chain
+    $replica replicaof $master_host $master_port
+    $sub_replica replicaof $replica_host $replica_port
+    wait_for_condition 50 100 {
+        [status $replica master_link_status] eq {up} &&
+        [status $sub_replica master_link_status] eq {up}
+    } else {
+        fail "Replication not started."
+    }
+    test "PSYNC2: Partial resync after Master restart using RDB aux fields when offset is 0" {
+        assert {[status $master master_repl_offset] == 0}
+        set replid [status $master master_replid]
+        $replica config resetstat
+        catch {
+            restart_server 0 true false true now
+            set master [srv 0 client]
+        }
+        wait_for_condition 50 1000 {
+            [status $replica master_link_status] eq {up} &&
+            [status $sub_replica master_link_status] eq {up}
+        } else {
+            fail "Replicas didn't sync after master restart"
+        }
+        # Make sure master restore replication info correctly
+        assert {[status $master master_replid] != $replid}
+        assert {[status $master master_repl_offset] == 0}
+        assert {[status $master master_replid2] eq $replid}
+        assert {[status $master second_repl_offset] == 1}
+        # Make sure master set replication backlog correctly
+        assert {[status $master repl_backlog_active] == 1}
+        assert {[status $master repl_backlog_first_byte_offset] == 1}
+        assert {[status $master repl_backlog_histlen] == 0}
+        # Partial resync after Master restart
+        assert {[status $master sync_partial_ok] == 1}
+        assert {[status $replica sync_partial_ok] == 1}
+    }
+    # Generate some data
+    createComplexDataset $master 1000
+    test "PSYNC2: Partial resync after Master restart using RDB aux fields with data" {
+        wait_for_condition 500 100 {
+            [status $master master_repl_offset] == [status $replica master_repl_offset] &&
+            [status $master master_repl_offset] == [status $sub_replica master_repl_offset]
+        } else {
+            fail "Replicas and master offsets were unable to match *exactly*."
+        }
+        set replid [status $master master_replid]
+        set offset [status $master master_repl_offset]
+        $replica config resetstat
+        catch {
+            # SHUTDOWN NOW ensures master doesn't send GETACK to replicas before
+            # shutting down which would affect the replication offset.
+            restart_server 0 true false true now
+            set master [srv 0 client]
+        }
+        wait_for_condition 50 1000 {
+            [status $replica master_link_status] eq {up} &&
+            [status $sub_replica master_link_status] eq {up}
+        } else {
+            fail "Replicas didn't sync after master restart"
+        }
+        # Make sure master restore replication info correctly
+        assert {[status $master master_replid] != $replid}
+        assert {[status $master master_repl_offset] == $offset}
+        assert {[status $master master_replid2] eq $replid}
+        assert {[status $master second_repl_offset] == [expr $offset+1]}
+        # Make sure master set replication backlog correctly
+        assert {[status $master repl_backlog_active] == 1}
+        assert {[status $master repl_backlog_first_byte_offset] == [expr $offset+1]}
+        assert {[status $master repl_backlog_histlen] == 0}
+        # Partial resync after Master restart
+        assert {[status $master sync_partial_ok] == 1}
+        assert {[status $replica sync_partial_ok] == 1}
+    }
+    test "PSYNC2: Partial resync after Master restart using RDB aux fields with expire" {
+        $master debug set-active-expire 0
+        for {set j 0} {$j < 1024} {incr j} {
+            $master select [expr $j%16]
+            $master set $j somevalue px 10
+        }
+        after 20
+        # Wait until master has received ACK from replica. If the master thinks
+        # that any replica is lagging when it shuts down, master would send
+        # GETACK to the replicas, affecting the replication offset.
+        set offset [status $master master_repl_offset]
+        wait_for_condition 500 100 {
+            [string match "*slave0:*,offset=$offset,*" [$master info replication]] &&
+            $offset == [status $replica master_repl_offset] &&
+            $offset == [status $sub_replica master_repl_offset]
+        } else {
+            show_cluster_status
+            fail "Replicas and master offsets were unable to match *exactly*."
+        }
+        set offset [status $master master_repl_offset]
+        $replica config resetstat
+        catch {
+            # Unlike the test above, here we use SIGTERM, which behaves
+            # differently compared to SHUTDOWN NOW if there are lagging
+            # replicas. This is just to increase coverage and let each test use
+            # a different shutdown approach. In this case there are no lagging
+            # replicas though.
+            restart_server 0 true false
+            set master [srv 0 client]
+        }
+        wait_for_condition 50 1000 {
+            [status $replica master_link_status] eq {up} &&
+            [status $sub_replica master_link_status] eq {up}
+        } else {
+            fail "Replicas didn't sync after master restart"
+        }
+        set expired_offset [status $master repl_backlog_histlen]
+        # Stale keys expired and master_repl_offset grows correctly
+        assert {[status $master rdb_last_load_keys_expired] == 1024}
+        assert {[status $master master_repl_offset] == [expr $offset+$expired_offset]}
+        # Partial resync after Master restart
+        assert {[status $master sync_partial_ok] == 1}
+        assert {[status $replica sync_partial_ok] == 1}
+        set digest [$master debug digest]
+        wait_for_condition 10 100 {
+          $digest eq [$replica debug digest] &&
+          $digest eq [$sub_replica debug digest]
+        } else {
+            fail "Replica and sub-replica didn't sync after master restart in time..."
+        }
+    }
+    test "PSYNC2: Full resync after Master restart when too many key expired" {
+        $master config set repl-backlog-size 16384
+        $master config rewrite
+        $master debug set-active-expire 0
+        # Make sure replication backlog is full and will be trimmed.
+        for {set j 0} {$j < 2048} {incr j} {
+            $master select [expr $j%16]
+            $master set $j somevalue px 10
+        }
+        ##### hash-field-expiration
+        # Hashes of type OBJ_ENCODING_LISTPACK_EX won't be discarded during
+        # RDB load, even if they are expired.
+        $master hset myhash1 f1 v1 f2 v2 f3 v3
+        $master hpexpire myhash1 10 FIELDS 3 f1 f2 f3
+        # Hashes of type RDB_TYPE_HASH_METADATA will be discarded during RDB load.
+        $master config set hash-max-listpack-entries 0
+        $master hset myhash2 f1 v1 f2 v2
+        $master hpexpire myhash2 10 FIELDS 2 f1 f2
+        $master config set hash-max-listpack-entries 1
+        after 20
+        wait_for_condition 500 100 {
+            [status $master master_repl_offset] == [status $replica master_repl_offset] &&
+            [status $master master_repl_offset] == [status $sub_replica master_repl_offset]
+        } else {
+            fail "Replicas and master offsets were unable to match *exactly*."
+        }
+        $replica config resetstat
+        catch {
+            # Unlike the test above, here we use SIGTERM. This is just to
+            # increase coverage and let each test use a different shutdown
+            # approach.
+            restart_server 0 true false
+            set master [srv 0 client]
+        }
+        wait_for_condition 50 1000 {
+            [status $replica master_link_status] eq {up} &&
+            [status $sub_replica master_link_status] eq {up}
+        } else {
+            fail "Replicas didn't sync after master restart"
+        }
+        # Replication backlog is full
+        assert {[status $master repl_backlog_first_byte_offset] > [status $master second_repl_offset]}
+        assert {[status $master sync_partial_ok] == 0}
+        assert {[status $master sync_full] == 1}
+        assert {[status $master rdb_last_load_keys_expired] == 2048}
+        assert {[status $replica sync_full] == 1}
+        set digest [$master debug digest]
+        assert {$digest eq [$replica debug digest]}
+        assert {$digest eq [$sub_replica debug digest]}
+    }
+}}}

diff --git a/examples/redis-unstable/tests/integration/psync2-master-restart.tcl b/examples/redis-unstable/tests/integration/psync2-master-restart.tcl new file mode 100644 index 0000000..5971e74 --- /dev/null +++ b/examples/redis-unstable/tests/integration/psync2-master-restart.tcl
@@ -0,0 +1,233 @@
	1	start_server {tags {"psync2 external:skip"}} {
	2	start_server {} {
	3	start_server {} {
	4	set master [srv 0 client]
	5	set master_host [srv 0 host]
	6	set master_port [srv 0 port]
	7
	8	set replica [srv -1 client]
	9	set replica_host [srv -1 host]
	10	set replica_port [srv -1 port]
	11
	12	set sub_replica [srv -2 client]
	13
	14	# Make sure the server saves an RDB on shutdown
	15	$master config set save "3600 1"
	16
	17	# Because we will test partial resync later, we don't want a timeout to cause
	18	# the master-replica disconnect, then the extra reconnections will break the
	19	# sync_partial_ok stat test
	20	$master config set repl-timeout 3600
	21	$replica config set repl-timeout 3600
	22	$sub_replica config set repl-timeout 3600
	23
	24	# Avoid PINGs
	25	$master config set repl-ping-replica-period 3600
	26	$master config rewrite
	27
	28	# Build replication chain
	29	$replica replicaof $master_host $master_port
	30	$sub_replica replicaof $replica_host $replica_port
	31
	32	wait_for_condition 50 100 {
	33	[status $replica master_link_status] eq {up} &&
	34	[status $sub_replica master_link_status] eq {up}
	35	} else {
	36	fail "Replication not started."
	37	}
	38
	39	test "PSYNC2: Partial resync after Master restart using RDB aux fields when offset is 0" {
	40	assert {[status $master master_repl_offset] == 0}
	41
	42	set replid [status $master master_replid]
	43	$replica config resetstat
	44
	45	catch {
	46	restart_server 0 true false true now
	47	set master [srv 0 client]
	48	}
	49	wait_for_condition 50 1000 {
	50	[status $replica master_link_status] eq {up} &&
	51	[status $sub_replica master_link_status] eq {up}
	52	} else {
	53	fail "Replicas didn't sync after master restart"
	54	}
	55
	56	# Make sure master restore replication info correctly
	57	assert {[status $master master_replid] != $replid}
	58	assert {[status $master master_repl_offset] == 0}
	59	assert {[status $master master_replid2] eq $replid}
	60	assert {[status $master second_repl_offset] == 1}
	61
	62	# Make sure master set replication backlog correctly
	63	assert {[status $master repl_backlog_active] == 1}
	64	assert {[status $master repl_backlog_first_byte_offset] == 1}
	65	assert {[status $master repl_backlog_histlen] == 0}
	66
	67	# Partial resync after Master restart
	68	assert {[status $master sync_partial_ok] == 1}
	69	assert {[status $replica sync_partial_ok] == 1}
	70	}
	71
	72	# Generate some data
	73	createComplexDataset $master 1000
	74
	75	test "PSYNC2: Partial resync after Master restart using RDB aux fields with data" {
	76	wait_for_condition 500 100 {
	77	[status $master master_repl_offset] == [status $replica master_repl_offset] &&
	78	[status $master master_repl_offset] == [status $sub_replica master_repl_offset]
	79	} else {
	80	fail "Replicas and master offsets were unable to match exactly."
	81	}
	82
	83	set replid [status $master master_replid]
	84	set offset [status $master master_repl_offset]
	85	$replica config resetstat
	86
	87	catch {
	88	# SHUTDOWN NOW ensures master doesn't send GETACK to replicas before
	89	# shutting down which would affect the replication offset.
	90	restart_server 0 true false true now
	91	set master [srv 0 client]
	92	}
	93	wait_for_condition 50 1000 {
	94	[status $replica master_link_status] eq {up} &&
	95	[status $sub_replica master_link_status] eq {up}
	96	} else {
	97	fail "Replicas didn't sync after master restart"
	98	}
	99
	100	# Make sure master restore replication info correctly
	101	assert {[status $master master_replid] != $replid}
	102	assert {[status $master master_repl_offset] == $offset}
	103	assert {[status $master master_replid2] eq $replid}
	104	assert {[status $master second_repl_offset] == [expr $offset+1]}
	105
	106	# Make sure master set replication backlog correctly
	107	assert {[status $master repl_backlog_active] == 1}
	108	assert {[status $master repl_backlog_first_byte_offset] == [expr $offset+1]}
	109	assert {[status $master repl_backlog_histlen] == 0}
	110
	111	# Partial resync after Master restart
	112	assert {[status $master sync_partial_ok] == 1}
	113	assert {[status $replica sync_partial_ok] == 1}
	114	}
	115
	116	test "PSYNC2: Partial resync after Master restart using RDB aux fields with expire" {
	117	$master debug set-active-expire 0
	118	for {set j 0} {$j < 1024} {incr j} {
	119	$master select [expr $j%16]
	120	$master set $j somevalue px 10
	121	}
	122
	123	after 20
	124
	125	# Wait until master has received ACK from replica. If the master thinks
	126	# that any replica is lagging when it shuts down, master would send
	127	# GETACK to the replicas, affecting the replication offset.
	128	set offset [status $master master_repl_offset]
	129	wait_for_condition 500 100 {
	130	[string match "slave0:,offset=$offset,*" [$master info replication]] &&
	131	$offset == [status $replica master_repl_offset] &&
	132	$offset == [status $sub_replica master_repl_offset]
	133	} else {
	134	show_cluster_status
	135	fail "Replicas and master offsets were unable to match exactly."
	136	}
	137
	138	set offset [status $master master_repl_offset]
	139	$replica config resetstat
	140
	141	catch {
	142	# Unlike the test above, here we use SIGTERM, which behaves
	143	# differently compared to SHUTDOWN NOW if there are lagging
	144	# replicas. This is just to increase coverage and let each test use
	145	# a different shutdown approach. In this case there are no lagging
	146	# replicas though.
	147	restart_server 0 true false
	148	set master [srv 0 client]
	149	}
	150	wait_for_condition 50 1000 {
	151	[status $replica master_link_status] eq {up} &&
	152	[status $sub_replica master_link_status] eq {up}
	153	} else {
	154	fail "Replicas didn't sync after master restart"
	155	}
	156
	157	set expired_offset [status $master repl_backlog_histlen]
	158	# Stale keys expired and master_repl_offset grows correctly
	159	assert {[status $master rdb_last_load_keys_expired] == 1024}
	160	assert {[status $master master_repl_offset] == [expr $offset+$expired_offset]}
	161
	162	# Partial resync after Master restart
	163	assert {[status $master sync_partial_ok] == 1}
	164	assert {[status $replica sync_partial_ok] == 1}
	165
	166	set digest [$master debug digest]
	167	wait_for_condition 10 100 {
	168	$digest eq [$replica debug digest] &&
	169	$digest eq [$sub_replica debug digest]
	170	} else {
	171	fail "Replica and sub-replica didn't sync after master restart in time..."
	172	}
	173	}
	174
	175	test "PSYNC2: Full resync after Master restart when too many key expired" {
	176	$master config set repl-backlog-size 16384
	177	$master config rewrite
	178
	179	$master debug set-active-expire 0
	180	# Make sure replication backlog is full and will be trimmed.
	181	for {set j 0} {$j < 2048} {incr j} {
	182	$master select [expr $j%16]
	183	$master set $j somevalue px 10
	184	}
	185
	186	##### hash-field-expiration
	187	# Hashes of type OBJ_ENCODING_LISTPACK_EX won't be discarded during
	188	# RDB load, even if they are expired.
	189	$master hset myhash1 f1 v1 f2 v2 f3 v3
	190	$master hpexpire myhash1 10 FIELDS 3 f1 f2 f3
	191	# Hashes of type RDB_TYPE_HASH_METADATA will be discarded during RDB load.
	192	$master config set hash-max-listpack-entries 0
	193	$master hset myhash2 f1 v1 f2 v2
	194	$master hpexpire myhash2 10 FIELDS 2 f1 f2
	195	$master config set hash-max-listpack-entries 1
	196
	197	after 20
	198
	199	wait_for_condition 500 100 {
	200	[status $master master_repl_offset] == [status $replica master_repl_offset] &&
	201	[status $master master_repl_offset] == [status $sub_replica master_repl_offset]
	202	} else {
	203	fail "Replicas and master offsets were unable to match exactly."
	204	}
	205
	206	$replica config resetstat
	207
	208	catch {
	209	# Unlike the test above, here we use SIGTERM. This is just to
	210	# increase coverage and let each test use a different shutdown
	211	# approach.
	212	restart_server 0 true false
	213	set master [srv 0 client]
	214	}
	215	wait_for_condition 50 1000 {
	216	[status $replica master_link_status] eq {up} &&
	217	[status $sub_replica master_link_status] eq {up}
	218	} else {
	219	fail "Replicas didn't sync after master restart"
	220	}
	221
	222	# Replication backlog is full
	223	assert {[status $master repl_backlog_first_byte_offset] > [status $master second_repl_offset]}
	224	assert {[status $master sync_partial_ok] == 0}
	225	assert {[status $master sync_full] == 1}
	226	assert {[status $master rdb_last_load_keys_expired] == 2048}
	227	assert {[status $replica sync_full] == 1}
	228
	229	set digest [$master debug digest]
	230	assert {$digest eq [$replica debug digest]}
	231	assert {$digest eq [$sub_replica debug digest]}
	232	}
	233	}}}