mirror of
https://git.deuxfleurs.fr/Deuxfleurs/garage.git
synced 2025-01-20 04:58:11 +00:00
323 lines
8.3 KiB
TeX
323 lines
8.3 KiB
TeX
|
\section{State of the art}
|
||
|
|
||
|
|
||
|
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||
|
\begin{frame}{The CAP theorem}{Consistency vs. Availability}
|
||
|
|
||
|
\begin{block}{Eric Brewer's theorem}
|
||
|
``A shared-state system can have \textbf{at most two} of the following properties at any given time:
|
||
|
|
||
|
\begin{itemize}
|
||
|
\item \textbf{C}onsistency
|
||
|
\item \textbf{A}vailability
|
||
|
\item \textbf{P}artition tolerance''
|
||
|
\end{itemize}
|
||
|
\end{block}
|
||
|
|
||
|
|
||
|
\begin{center}
|
||
|
\Large
|
||
|
Under network partitions, a distributed data store has to sacrifice either availability or consistency.
|
||
|
\end{center}
|
||
|
\vfill
|
||
|
|
||
|
\begin{itemize}
|
||
|
\item \textbf{Consistency-first}: Abort incoming queries;
|
||
|
\item \textbf{Availability-first}: Return possibly stale data.
|
||
|
\end{itemize}
|
||
|
|
||
|
\end{frame}
|
||
|
|
||
|
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||
|
\begin{frame}{Consistency-first: the ACID model}{Consistency vs. Availability}
|
||
|
|
||
|
\textbf{Transaction}: unit of work within an ACID data store.
|
||
|
%Comprises multiple operations.
|
||
|
%E.g. bank transfer.
|
||
|
%E.g. a bank transfer from A to B is a transaction involving two operations: withdraw money from A & credit B with the same money amount.
|
||
|
\vfill
|
||
|
|
||
|
\begin{itemize}
|
||
|
\item \textbf{\underline{A}tomicity}: Transactions either complete entirely or fail.
|
||
|
|
||
|
No transaction ever seen as in-progress.
|
||
|
|
||
|
\item \textbf{\underline{C}onsistency}: Transactions always generate a valid state.
|
||
|
|
||
|
The database maintains its invariants across transactions.
|
||
|
|
||
|
\item \textbf{\underline{I}solation}: Concurrent transactions are seen as sequential.
|
||
|
|
||
|
Transactions are serializable, or sequentially consistent.
|
||
|
|
||
|
\item \textbf{\underline{D}urability}: Committed transactions are never forgotten.
|
||
|
\end{itemize}
|
||
|
\vfill\centering
|
||
|
|
||
|
Reads are fast, writes are slow.
|
||
|
|
||
|
\vfill\raggedright
|
||
|
|
||
|
Example: relational databases.
|
||
|
\end{frame}
|
||
|
|
||
|
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||
|
\begin{frame}[fragile]{Concurrent writes in ACID}{Consistency vs. Availability}
|
||
|
|
||
|
|
||
|
\begin{columns}
|
||
|
\column{.5\columnwidth}
|
||
|
\begin{block}{}
|
||
|
\begin{lstlisting}
|
||
|
transaction AcqDoses(y):
|
||
|
x <- SELECT #vaccines;
|
||
|
UPDATE #vaccines = (x + y);
|
||
|
\end{lstlisting}
|
||
|
\end{block}
|
||
|
\vspace{5ex}
|
||
|
|
||
|
Supports compound operations.
|
||
|
\column{.5\columnwidth}
|
||
|
\centering
|
||
|
\includegraphics[width=\columnwidth]{figures/conflict_acid.pdf}
|
||
|
\end{columns}
|
||
|
|
||
|
\end{frame}
|
||
|
|
||
|
|
||
|
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||
|
\begin{frame}{Availability-first: the BASE model}{Consistency vs. Availability}
|
||
|
|
||
|
|
||
|
Some apps prefer availability, e.g. Amazon products' reviews.
|
||
|
\vfill
|
||
|
|
||
|
The BASE model trades Consistency \& Isolation for Availability.
|
||
|
|
||
|
|
||
|
%Some applications do not care about strong consistency and prefer being highly available (e.g. Amazon's product reviews).
|
||
|
|
||
|
%In order to achieve higher availability, the BASE model relaxes consistency constraints of the ACID model: "eventual consistency".
|
||
|
\vfill
|
||
|
|
||
|
\begin{itemize}
|
||
|
\item \textbf{\underline{B}asic \underline{A}vailability}:
|
||
|
The data store thrives to be available.
|
||
|
|
||
|
\item \textbf{\underline{S}oft-state}:
|
||
|
Replicas can disagree on the valid state.
|
||
|
|
||
|
\item \textbf{\underline{E}ventual consistency}:
|
||
|
In the absence of write queries,
|
||
|
the data store will eventually converge to a single valid state.
|
||
|
\end{itemize}
|
||
|
\vfill\centering
|
||
|
|
||
|
Writes are fast, reads are slow.
|
||
|
|
||
|
\vfill\raggedright
|
||
|
|
||
|
Examples: key-value \& object stores.
|
||
|
|
||
|
\end{frame}
|
||
|
|
||
|
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||
|
\begin{frame}{Concurrent writes in BASE}{Consistency vs. Availability}
|
||
|
|
||
|
\begin{columns}
|
||
|
\column{.5\columnwidth}
|
||
|
\begin{block}{Object}
|
||
|
\begin{itemize}
|
||
|
\item Unique key
|
||
|
\item Arbitrary value
|
||
|
\item Metadata
|
||
|
\end{itemize}
|
||
|
\end{block}
|
||
|
\vspace{5ex}
|
||
|
|
||
|
Conflict resolution = client's job!
|
||
|
\vspace{5ex}
|
||
|
|
||
|
No compound operations.
|
||
|
\column{.5\columnwidth}
|
||
|
\centering
|
||
|
\includegraphics[width=\columnwidth]{figures/conflict_base.pdf}
|
||
|
\end{columns}
|
||
|
|
||
|
% KV storage is another example, distinction is minor here
|
||
|
|
||
|
% Object = unique key, arbitrary value, metadata.
|
||
|
|
||
|
% Object storage only provides semantics to investigate causal order of queries *for individual objects*. No compound operations, no transactions.
|
||
|
|
||
|
% Much easier to distribute, and "scale-out".
|
||
|
|
||
|
% Write is fast, read is slow (gotta collect all object versions).
|
||
|
|
||
|
% \todo{vaccines example with BASE model}
|
||
|
|
||
|
\end{frame}
|
||
|
|
||
|
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||
|
\begin{frame}{Strong Eventual Consistency w/ CRDTs}{Consistency vs. Availability}
|
||
|
|
||
|
\centering\small
|
||
|
|
||
|
\fullcite{defago_conflict-free_2011}
|
||
|
|
||
|
\vfill\raggedright\normalsize
|
||
|
|
||
|
\begin{block}{Strong Eventual Consistency (SEC)}
|
||
|
\begin{itemize}
|
||
|
\item CRDTs specify distributed operations
|
||
|
\item Conflicts will be solved according to specification
|
||
|
\item Proven \& bound eventual convergence
|
||
|
\end{itemize}
|
||
|
\end{block}
|
||
|
|
||
|
\vfill\centering
|
||
|
\includegraphics[width=.5\columnwidth]{figures/crdt.pdf}
|
||
|
|
||
|
\end{frame}
|
||
|
|
||
|
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||
|
\begin{frame}[fragile]{Concurrent writes with CRDTs}{Consistency vs. Availability}
|
||
|
|
||
|
\begin{columns}
|
||
|
\column{.5\columnwidth}
|
||
|
\begin{block}{}
|
||
|
\begin{lstlisting}
|
||
|
CRDT Counter(x0):
|
||
|
history = {}
|
||
|
op. incr(y):
|
||
|
history U= {(UUID(), y)}
|
||
|
op. decr(y):
|
||
|
history U= {(UUID(), -y)}
|
||
|
op. read():
|
||
|
x = x0
|
||
|
for (_, y) in history:
|
||
|
x += y
|
||
|
return x
|
||
|
\end{lstlisting}
|
||
|
\end{block}
|
||
|
\vspace{2ex}
|
||
|
|
||
|
Operations commute?
|
||
|
|
||
|
$\implies$ screw total order!
|
||
|
\column{.5\columnwidth}
|
||
|
\centering
|
||
|
\includegraphics[width=\columnwidth]{figures/conflict_crdt.pdf}
|
||
|
\end{columns}
|
||
|
|
||
|
\end{frame}
|
||
|
|
||
|
|
||
|
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||
|
\begin{frame}{A complex CRDT: the DAG}{Consistency vs. Availability}
|
||
|
|
||
|
\centering
|
||
|
\only<1>{\includegraphics[height=\textheight]{figures/dag_crdt.png}}%
|
||
|
\only<2>{
|
||
|
Just to say I swept a lot under the rug.
|
||
|
\vfill
|
||
|
|
||
|
For details, go read:
|
||
|
|
||
|
\fullcite{defago_conflict-free_2011}
|
||
|
\vfill
|
||
|
|
||
|
For an implementation, check \textbf{AntidoteDB}.
|
||
|
}
|
||
|
|
||
|
\end{frame}
|
||
|
|
||
|
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||
|
\begin{frame}{State of the practice}{Path dependency to the ``cloud''}
|
||
|
|
||
|
\begin{block}{The BASE model is fashionable because}
|
||
|
\centering
|
||
|
|
||
|
``\emph{High-performance} object storage for \emph{AI analytics} with PBs of \emph{IoT data streams} at the \emph{edge}, using \emph{5G}.''
|
||
|
% \begin{itemize}
|
||
|
% \item Highest performance
|
||
|
% \item IoT data streams are inherently distributed
|
||
|
% \end{itemize}
|
||
|
\end{block}
|
||
|
|
||
|
\vfill\centering
|
||
|
|
||
|
\includegraphics[width=.9\columnwidth]{figures/minio_edge.png}
|
||
|
|
||
|
\vfill\raggedright
|
||
|
|
||
|
|
||
|
%\begin{block}{}
|
||
|
\begin{itemize}
|
||
|
\item Always backed by cloud: high performance network links.
|
||
|
\item Edge nodes always seen as clients or data sources, not peers.
|
||
|
\end{itemize}
|
||
|
%\end{block}
|
||
|
|
||
|
% There is \textbf{always a central cloud cluster} in these use-cases.
|
||
|
|
||
|
% Hidden constraint: \textbf{high performance inter-node connectivity}.
|
||
|
|
||
|
|
||
|
|
||
|
\end{frame}
|
||
|
|
||
|
|
||
|
|
||
|
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||
|
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||
|
% \begin{frame}{A brief history of storage}
|
||
|
|
||
|
% We keep it short because we'll follow chronological order in the next section too.
|
||
|
|
||
|
% \end{frame}
|
||
|
|
||
|
|
||
|
% \begin{frame}{In the beginning, there were \emph{monoliths}}
|
||
|
|
||
|
% \includegraphics[width=.5\columnwidth]{figures/stonehenge.jpg}
|
||
|
|
||
|
% Web applications used to be monolithic:
|
||
|
|
||
|
% \begin{itemize}
|
||
|
% \item One or two servers;
|
||
|
% \item Availability was not an obsession;
|
||
|
% \item Latency was acceptable.
|
||
|
% \end{itemize}
|
||
|
|
||
|
% Relational databases were queens.
|
||
|
|
||
|
% \end{frame}
|
||
|
|
||
|
|
||
|
% \begin{frame}{Then came \emph{expectations}}
|
||
|
% Then, the whole world went online, and suddenly: expectations!
|
||
|
|
||
|
% \begin{itemize}
|
||
|
% \item ``Milliseconds matter.'' (Algolia slogan)
|
||
|
% \item Critical networked services (healthcare, logistics) need 100\% availability
|
||
|
% \end{itemize}
|
||
|
|
||
|
% $\implies$ Microservices \& horizontal scalability.
|
||
|
|
||
|
% \todo{Develop on the `herd not sheep' paradigm a bit.}
|
||
|
|
||
|
% \end{frame}
|
||
|
|
||
|
|
||
|
% \begin{frame}{Distributing state/storage: the remaining unknown}
|
||
|
|
||
|
% The microservices orchestration game works well for \emph{stateless} services.
|
||
|
|
||
|
% However, any application requires \emph{state}, persistent data.
|
||
|
|
||
|
% And this is tough. As we will now see.
|
||
|
|
||
|
% (Not that it's not well studied: distributed storage has always been fashionable.)
|
||
|
|
||
|
% \end{frame}
|