Talk 2023-01-18: some WIP talking about consensus

This commit is contained in:
Alex Auvolat 2023-01-12 16:27:02 +01:00
parent 7416ba97ef
commit fe850f62c9
No known key found for this signature in database
GPG key ID: 0E496D15096376BE
6 changed files with 315 additions and 21 deletions

View file

@ -3,6 +3,7 @@ ASSETS=assets/consistent_hashing_1.pdf \
assets/consistent_hashing_3.pdf \
assets/consistent_hashing_4.pdf \
assets/garage_tables.pdf \
assets/consensus.pdf_tex \
assets/deuxfleurs.pdf
talk.pdf: talk.tex $(ASSETS)
@ -10,3 +11,6 @@ talk.pdf: talk.tex $(ASSETS)
assets/%.pdf: assets/%.svg
inkscape -D -z --file=$^ --export-pdf=$@
assets/%.pdf_tex: assets/%.svg
inkscape -D -z --file=$^ --export-pdf=$@ --export-latex

View file

@ -0,0 +1,137 @@
<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<!-- Created with Inkscape (http://www.inkscape.org/) -->
<svg
width="800"
height="300"
viewBox="0 0 211.66666 79.374999"
version="1.1"
id="svg5"
inkscape:version="1.2.2 (b0a8486541, 2022-12-01)"
sodipodi:docname="consensus.svg"
xmlns:inkscape="http://www.inkscape.org/namespaces/inkscape"
xmlns:sodipodi="http://sodipodi.sourceforge.net/DTD/sodipodi-0.dtd"
xmlns="http://www.w3.org/2000/svg"
xmlns:svg="http://www.w3.org/2000/svg">
<sodipodi:namedview
id="namedview7"
pagecolor="#ffffff"
bordercolor="#666666"
borderopacity="1.0"
inkscape:showpageshadow="2"
inkscape:pageopacity="0.0"
inkscape:pagecheckerboard="0"
inkscape:deskcolor="#d1d1d1"
inkscape:document-units="mm"
showgrid="false"
inkscape:zoom="1.4734708"
inkscape:cx="310.49139"
inkscape:cy="179.1688"
inkscape:window-width="1920"
inkscape:window-height="999"
inkscape:window-x="0"
inkscape:window-y="0"
inkscape:window-maximized="1"
inkscape:current-layer="layer1" />
<defs
id="defs2">
<marker
style="overflow:visible"
id="Arrow2"
refX="0"
refY="0"
orient="auto-start-reverse"
inkscape:stockid="Arrow2"
markerWidth="7.6999998"
markerHeight="5.5999999"
viewBox="0 0 7.7 5.6"
inkscape:isstock="true"
inkscape:collect="always"
preserveAspectRatio="xMidYMid">
<path
transform="scale(0.7)"
d="M -2,-4 9,0 -2,4 c 2,-2.33 2,-5.66 0,-8 z"
style="fill:context-stroke;fill-rule:evenodd;stroke:none"
id="arrow2L" />
</marker>
</defs>
<g
inkscape:label="Layer 1"
inkscape:groupmode="layer"
id="layer1">
<g
id="g1218"
transform="translate(-8.9161476,-12.502301)">
<circle
style="fill:#ffffff;stroke:#000000;stroke-width:1;stroke-dasharray:none;stroke-opacity:1"
id="path111"
cx="38.904896"
cy="37.936272"
r="13.474442" />
<text
xml:space="preserve"
style="font-size:8.46667px;line-height:1.25;font-family:sans-serif;text-align:center;text-anchor:middle;stroke-width:0.264583;fill:#000000"
x="38.879501"
y="40.908073"
id="text1105"><tspan
sodipodi:role="line"
id="tspan1103"
style="stroke-width:0.264583;fill:#000000"
x="38.879501"
y="40.908073">$\bot$</tspan></text>
</g>
<g
id="g1218-3"
transform="translate(127.41938,-12.502301)">
<circle
style="fill:#ffffff;stroke:#000000;stroke-width:1;stroke-dasharray:none;stroke-opacity:1"
id="path111-5"
cx="38.904896"
cy="37.936272"
r="13.474442" />
<text
xml:space="preserve"
style="font-size:8.46667px;line-height:1.25;font-family:sans-serif;text-align:center;text-anchor:middle;stroke-width:0.264583;fill:#000000"
x="38.879501"
y="40.908073"
id="text1105-6"><tspan
sodipodi:role="line"
id="tspan1103-2"
style="stroke-width:0.264583;fill:#000000"
x="38.879501"
y="40.908073">$x$</tspan></text>
</g>
<path
style="fill:none;stroke:#000000;stroke-width:1;stroke-linecap:butt;stroke-linejoin:miter;stroke-dasharray:none;stroke-opacity:1;marker-end:url(#Arrow2)"
d="M 44.289635,25.433971 H 145.90576"
id="path1414"
sodipodi:nodetypes="cc" />
<text
xml:space="preserve"
style="font-size:8.46667px;line-height:1.25;font-family:sans-serif;text-align:center;text-anchor:middle;stroke-width:0.264583;fill:#000000"
x="92.729836"
y="21.781803"
id="text2092"><tspan
sodipodi:role="line"
id="tspan2090"
style="stroke-width:0.264583;fill:#000000"
x="92.729836"
y="21.781803">$propose(x) / x$</tspan></text>
<text
xml:space="preserve"
style="font-size:8.46667px;line-height:1.25;font-family:sans-serif;text-align:center;text-anchor:middle;stroke-width:0.264583;fill:#000000"
x="166.29887"
y="69.89299"
id="text2092-9"><tspan
sodipodi:role="line"
id="tspan2090-1"
style="stroke-width:0.264583;fill:#000000"
x="166.29887"
y="69.89299">$propose(y) / x$</tspan></text>
<path
style="fill:none;stroke:#000000;stroke-width:1;stroke-linecap:butt;stroke-linejoin:miter;stroke-dasharray:none;stroke-opacity:1;marker-end:url(#Arrow2)"
d="m 155.82329,35.899857 c -8.35129,12.319651 0.54055,24.640898 11.72797,24.072085 8.65403,-0.440005 18.59818,-11.705963 11.8146,-20.570891"
id="path2150"
sodipodi:nodetypes="csc" />
</g>
</svg>

After

Width:  |  Height:  |  Size: 4.7 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 13 KiB

View file

@ -8,6 +8,7 @@
\usepackage{multirow}
\usetheme{boxes}
\usepackage{graphicx}
\usepackage{import}
\usepackage{adjustbox}
%\useoutertheme[footline=authortitle,subsection=false]{miniframes}
%\useoutertheme[footline=authorinstitute,subsection=false]{miniframes}
@ -479,33 +480,185 @@
\section{Problem 2: ensuring consistency}
%\begin{frame}
% \frametitle{Garage's architecture}
% \begin{center}
% \includegraphics[width=.35\linewidth]{assets/garage.drawio.pdf}
% \end{center}
%\end{frame}
\begin{frame}
\frametitle{Garage is \emph{coordination-free}:}
\frametitle{Consensus vs weak consistency}
\hspace{1em}
\begin{minipage}{7cm}
\textbf{Consensus-based systems:}
\vspace{1em}
\begin{itemize}
\item No Raft or Paxos
\item \textbf{Leader-based:} a leader is elected to coordinate
all reads and writes
\vspace{1em}
\item Internal data types are CRDTs
\item \textbf{Linearizability} of all operations\\
(strongest consistency guarantee)
\vspace{1em}
\item All nodes are equivalent (no master/leader/index node)
\item \textbf{Replicated state machines} that can implement
any sequential specification
\vspace{1em}
\item \textbf{Costly}, the leader is a bottleneck;
leader elections on failure take time
\end{itemize}
\vspace{2em}
$\to$ less sensitive to higher latencies between nodes
\end{minipage}
\hfill
\begin{minipage}{7cm} \visible<2->{
\textbf{Weakly consistent systems:}
\vspace{1em}
\begin{itemize}
\item \textbf{Nodes are equivalent}, any node
can originate a read or write operation
\vspace{1em}
\item \textbf{Read-after-write consistency} with quorums,
eventual consistency without
\vspace{1em}
\item \textbf{Operations have to commute}, i.e.~we
can only implement CRDTs
\vspace{1em}
\item \textbf{Fast}, no node is a bottleneck;\\
works the same with offline nodes
\end{itemize}
} \end{minipage}
\hspace{1em}
\end{frame}
\begin{frame}
\frametitle{Consistency model}
\begin{itemize}
\item Not ACID (not required by S3 spec) / not linearizable
\frametitle{Consensus vs weak consistency}
\begin{center}
\textbf{The same objects cannot be implemented in both models.}
\end{center}
\vspace{2em}
\hspace{1em}
\begin{minipage}{7cm}
\underline{Consensus-based systems:}
\vspace{1em}
\item \textbf{Read-after-write consistency}\\
{\footnotesize (stronger than eventual consistency)}
\textbf{Any sequential specification}\\~
\end{minipage}
\hfill
\begin{minipage}{7cm}
\underline{Weakly consistent systems:}
\vspace{1em}
\textbf{CRDTs only}\\(conflict-free replicated data types)
\end{minipage}
\hspace{1em}
\vspace{3em}
\begin{center}
Part of the complexity is \textbf{reported to the consumer of the API}
\end{center}
\end{frame}
\begin{frame}
\frametitle{Consensus vs weak consistency}
\begin{center}
\textbf{From a theoretical point of view:}\\
\end{center}
\vspace{2em}
\hspace{1em}
\begin{minipage}{6.5cm}
\underline{Consensus-based systems:}
\vspace{1em}
Require \textbf{additionnal assumptions} such as a fault detector or a strong RNG\\~
\end{minipage}
\hfill
\begin{minipage}{6.5cm}
\underline{Weakly consistent systems:}
\vspace{1em}
Can be implemented in \textbf{any asynchronous message passing distributed system}
\end{minipage}
\hspace{1em}
\vspace{3em}
\begin{center}
They represent \textbf{different classes of computational capability}
\end{center}
\end{frame}
\begin{frame}
\frametitle{Understanding the power of consensus}
\textbf{Consensus:} an API with a single operation, $propose(x)$
\begin{enumerate}
\item nodes all call $propose(x)$ with their proposed value;
\item nodes all receive the same value as a return value, which is one of the proposed values
\end{enumerate}
\vspace{1em}
\visible<2->{
\textbf{Equivalent to} a distributed algorithm that gives a total order on all requests
}
\vspace{1em}
\visible<3->{
\textbf{Implemented by} this simple replicated state machine:
\vspace{.5em}
\begin{figure}
\centering
\def\svgwidth{.5\textwidth}
\large
\import{assets/}{consensus.pdf_tex}
\end{figure}
\vspace{1em}
}
\end{frame}
\begin{frame}
\frametitle{Can my object be implemented without consensus?}
\underline{Given the specification of an API:}
\vspace{2em}
\begin{itemize}
\item \textbf{Using this API, we can implement the consensus object} (the $propose$ function)\\
$\to$ the API is equivalent to consensus/total ordering of messages\\
$\to$ the API cannot be implemented in a weakly consistent system
\vspace{2em}
\item \textbf{This API can be implemented using only weak primitives}\\
(e.g. a bunch of atomic registers)\\
$\to$ the API is strictly weaker than consensus\\
$\to$ we can implement it in Garage!
\end{itemize}
\end{frame}
\begin{frame}
\frametitle{Why avoid consensus?}
Consensus can be implemented reasonably well in practice, so why avoid it?
\vspace{2em}
\begin{itemize}
\item \textbf{Software complexity:} RAFT and PAXOS are complex beasts;\\
harder to prove, harder to reason about
\vspace{1.5em}
\item \textbf{Performance issues:}
\vspace{1em}
\begin{itemize}
\item The leader is a \textbf{bottleneck} for all requests
\vspace{1em}
\item Particularly \textbf{sensitive to higher latency} between nodes
\end{itemize}
\end{itemize}
\end{frame}
\begin{frame}
\frametitle{What can we implement without consensus?}
\begin{itemize}
\item Any \textbf{conflict-free replicated data type} (CRDT)
\vspace{1em}
\item Non-transactional key-value stores such as S3 are equivalent to a simple CRDT:\\
a \textbf{last-writer-wins registry}
\vspace{1em}
\item \textbf{Read-after-write consistency} can be implemented
using quorums on read and write operations
\vspace{1em}
\item \textbf{Monotonicity of reads} can be implemented with repair-on-read\\
(makes reads more costly, not implemented in Garage)
\end{itemize}
\end{frame}