jepsen: set1 and set2 don't fail anymore ??

This commit is contained in:
Alex Auvolat 2023-10-24 15:44:05 +02:00
parent d2c365767b
commit d13bde5e26
3 changed files with 47 additions and 40 deletions

View file

@ -69,9 +69,9 @@ Results with timestamp patch (`--patch tsfix2`):
- No failures with clock-scramble nemesis + partition nemesis (`--scenario cp`).
This proves that `tsfix2` (PR#543) does improve consistency.
- **Fails with layout reconfiguration nemesis** (`--scenario r`)
(TODO: note down the run id of a failed run)
(TODO: test more and investigate).
- **Fails with layout reconfiguration nemesis** (`--scenario r`).
Example of a failed run: `garage reg2/20231024T120806.899+0200`.
TODO: investigate.
This is the failure mode we are looking for and trying to fix for NLnet task 3.
@ -83,12 +83,11 @@ Results:
- For now, no failures with clock-scramble nemesis + partition nemesis -> TODO long test run
- Failures were not yet achieved with only the layout reconfiguration nemesis, although they should be.
- Does not seem to fail with only the layout reconfiguation nemesis (>20 runs), although theoretically it could
- **Fails with partition + layout reconfiguration nemesis** (`--scenario pr`)
(TODO: note down the run id of a failed run)
(TODO: test more and investigate).
This is the failure mode we are looking for and trying to fix for NLnet task 3.
- Does not seem to fail with the layout reconfiguation + partition nemesis (<10 runs), although theoretically it could
TODO: make it fail!!!
### Set, continuous test (interspersed reads and writes)
@ -99,10 +98,9 @@ Results:
- For now, no failures with clock-scramble nemesis + partition nemesis -> TODO long test run
- Failures were not yet achieved with only the layout reconfiguration nemesis, although they should be.
- Does not seem to fail with the clock scrambler + partition + layout reconfiguation nemesis (>10 runs), although theoretically it could
- TODO: failures should be achieved with `--scenario pr`? Even with 4 or 5 consecutive test runs, no failures were achieved, why?
(TODO: note down the run id of a failed run)
TODO: make it fail!!!
## Investigating (and fixing) errors

View file

@ -26,7 +26,8 @@
{"c" grgNemesis/scenario-c
"cp" grgNemesis/scenario-cp
"r" grgNemesis/scenario-r
"pr" grgNemesis/scenario-pr})
"pr" grgNemesis/scenario-pr
"cpr" grgNemesis/scenario-cpr})
(def patches
"A map of patch names to Garage builds"

View file

@ -76,30 +76,24 @@
(defn scenario-cp
"Clock scramble + partition scenario"
[opts]
{:generator (cycle [(gen/sleep 5)
{:type :info, :f :partition-start}
(gen/sleep 5)
{:type :info, :f :clock-scramble}
(gen/sleep 5)
{:generator (->>
(gen/mix [{:type :info, :f :clock-scramble}
{:type :info, :f :partition-stop}
(gen/sleep 5)
{:type :info, :f :clock-scramble}])
{:type :info, :f :partition-start}])
(gen/stagger 3))
:final-generator (gen/once {:type :info, :f :partition-stop})
:nemesis (nemesis/compose
{{:partition-start :start
:partition-stop :stop} (nemesis/partition-random-halves)
{:clock-scramble :scramble} (nemesis/clock-scrambler 20.0)})})
{{:clock-scramble :scramble} (nemesis/clock-scrambler 20.0)
{:partition-start :start
:partition-stop :stop} (nemesis/partition-random-halves)})})
(defn scenario-r
"Cluster reconfiguration scenario"
[opts]
{:generator (cycle [(gen/sleep 5)
{:type :info, :f :reconfigure-start}
(gen/sleep 5)
{:type :info, :f :reconfigure-start}
(gen/sleep 5)
{:generator (->>
(gen/mix [{:type :info, :f :reconfigure-start}
{:type :info, :f :reconfigure-stop}])
:final-generator (gen/once {:type :info, :f :reconfigure-stop})
(gen/stagger 3))
:nemesis (nemesis/compose
{{:reconfigure-start :start
:reconfigure-stop :stop} (reconfigure-subset 3)})})
@ -107,19 +101,33 @@
(defn scenario-pr
"Partition + cluster reconfiguration scenario"
[opts]
{:generator (cycle [(gen/sleep 3)
{:type :info, :f :reconfigure-start}
(gen/sleep 3)
{:type :info, :f :partition-start}
(gen/sleep 3)
{:type :info, :f :reconfigure-start}
(gen/sleep 3)
{:generator (->>
(gen/mix [{:type :info, :f :partition-start}
{:type :info, :f :partition-stop}
(gen/sleep 3)
{:type :info, :f :reconfigure-start}
{:type :info, :f :reconfigure-stop}])
(gen/stagger 3))
:final-generator (gen/once {:type :info, :f :partition-stop})
:nemesis (nemesis/compose
{{:partition-start :start
:partition-stop :stop} (nemesis/partition-random-halves)
{:reconfigure-start :start
:reconfigure-stop :stop} (reconfigure-subset 3)})})
(defn scenario-cpr
"Clock scramble + partition + cluster reconfiguration scenario"
[opts]
{:generator (->>
(gen/mix [{:type :info, :f :clock-scramble}
{:type :info, :f :partition-start}
{:type :info, :f :partition-stop}
{:type :info, :f :reconfigure-start}
{:type :info, :f :reconfigure-stop}])
(gen/stagger 3))
:final-generator (gen/once {:type :info, :f :partition-stop})
:nemesis (nemesis/compose
{{:clock-scramble :scramble} (nemesis/clock-scrambler 20.0)
{:partition-start :start
:partition-stop :stop} (nemesis/partition-random-halves)
{:reconfigure-start :start
:reconfigure-stop :stop} (reconfigure-subset 3)})})