From 3b49dd9e639a0647268dd74156df69242d7e5ad5 Mon Sep 17 00:00:00 2001 From: Alex Auvolat Date: Tue, 11 Mar 2025 09:19:20 +0100 Subject: [PATCH] admin api: small refactor + add comments to layout-related calls --- doc/api/garage-admin-v2.json | 156 +++++++++++++++++++------------- src/api/admin/api.rs | 106 +++++++++++++++++++--- src/api/admin/cluster.rs | 28 +++--- src/garage/cli/remote/layout.rs | 20 +--- 4 files changed, 203 insertions(+), 107 deletions(-) diff --git a/doc/api/garage-admin-v2.json b/doc/api/garage-admin-v2.json index 921d8d4c..97de3a71 100644 --- a/doc/api/garage-admin-v2.json +++ b/doc/api/garage-admin-v2.json @@ -1412,6 +1412,7 @@ "version": { "type": "integer", "format": "int64", + "description": "As a safety measure, the new version number of the layout must\nbe specified here", "minimum": 0 } } @@ -1424,13 +1425,15 @@ ], "properties": { "layout": { - "$ref": "#/components/schemas/GetClusterLayoutResponse" + "$ref": "#/components/schemas/GetClusterLayoutResponse", + "description": "Details about the new cluster layout" }, "message": { "type": "array", "items": { "type": "string" - } + }, + "description": "Plain-text information about the layout computation\n(do not try to parse this)" } } }, @@ -1666,11 +1669,13 @@ ], "properties": { "allowMissingData": { - "type": "boolean" + "type": "boolean", + "description": "Allow the skip even if a quorum of nodes could not be found for\nthe data among the remaining nodes" }, "version": { "type": "integer", "format": "int64", + "description": "Version number of the layout to assume is currently up-to-date.\nThis will generally be the current layout version.", "minimum": 0 } } @@ -1686,13 +1691,15 @@ "type": "array", "items": { "type": "string" - } + }, + "description": "Nodes for which the ACK update tracker has been updated to `version`" }, "syncUpdated": { "type": "array", "items": { "type": "string" - } + }, + "description": "If `allow_missing_data` is set,\nnodes for which the SYNC update tracker has been updated to `version`" } } }, @@ -1708,19 +1715,23 @@ "gatewayNodes": { "type": "integer", "format": "int64", + "description": "Number of nodes with a gateway role in this layout version", "minimum": 0 }, "status": { - "$ref": "#/components/schemas/ClusterLayoutVersionStatus" + "$ref": "#/components/schemas/ClusterLayoutVersionStatus", + "description": "Status of this layout version" }, "storageNodes": { "type": "integer", "format": "int64", + "description": "Number of nodes with an assigned storage capacity in this layout version", "minimum": 0 }, "version": { "type": "integer", "format": "int64", + "description": "Version number of this layout version", "minimum": 0 } } @@ -1836,11 +1847,13 @@ "available": { "type": "integer", "format": "int64", + "description": "Number of bytes available", "minimum": 0 }, "total": { "type": "integer", "format": "int64", + "description": "Total number of bytes", "minimum": 0 } } @@ -2038,11 +2051,13 @@ "currentVersion": { "type": "integer", "format": "int64", + "description": "The current version number of the cluster layout", "minimum": 0 }, "minAck": { "type": "integer", "format": "int64", + "description": "All nodes in the cluster are aware of layout versions up to\nthis version number (at least)", "minimum": 0 }, "updateTrackers": { @@ -2050,6 +2065,7 @@ "object", "null" ], + "description": "Detailed update trackers for nodes (see\n`https://garagehq.deuxfleurs.fr/blog/2023-12-preserving-read-after-write-consistency/`)", "additionalProperties": { "$ref": "#/components/schemas/NodeUpdateTrackers" }, @@ -2061,7 +2077,8 @@ "type": "array", "items": { "$ref": "#/components/schemas/ClusterLayoutVersion" - } + }, + "description": "Layout version history" } } }, @@ -2070,24 +2087,27 @@ "required": [ "version", "roles", - "partitionSize", "parameters", + "partitionSize", "stagedRoleChanges" ], "properties": { "parameters": { - "$ref": "#/components/schemas/LayoutParameters" + "$ref": "#/components/schemas/LayoutParameters", + "description": "Layout parameters used when the current layout was computed" }, "partitionSize": { "type": "integer", "format": "int64", + "description": "The size, in bytes, of one Garage partition (= a shard)", "minimum": 0 }, "roles": { "type": "array", "items": { "$ref": "#/components/schemas/LayoutNodeRole" - } + }, + "description": "List of nodes that currently have a role in the cluster layout" }, "stagedParameters": { "oneOf": [ @@ -2095,7 +2115,8 @@ "type": "null" }, { - "$ref": "#/components/schemas/LayoutParameters" + "$ref": "#/components/schemas/LayoutParameters", + "description": "Layout parameters to use when computing the next version of\nthe cluster layout" } ] }, @@ -2103,11 +2124,13 @@ "type": "array", "items": { "$ref": "#/components/schemas/NodeRoleChange" - } + }, + "description": "List of nodes that will have a new role or whose role will be\nremoved in the next version of the cluster layout" }, "version": { "type": "integer", "format": "int64", + "description": "The current version number of the cluster layout", "minimum": 0 } } @@ -2133,13 +2156,15 @@ "layoutVersion": { "type": "integer", "format": "int64", + "description": "Current version number of the cluster layout", "minimum": 0 }, "nodes": { "type": "array", "items": { "$ref": "#/components/schemas/NodeResp" - } + }, + "description": "List of nodes that are either currently connected, part of the\ncurrent cluster layout, or part of an older cluster layout that\nis still active in the cluster (being drained)." } } }, @@ -2250,16 +2275,28 @@ "null" ], "format": "int64", + "description": "Capacity (in bytes) assigned by the cluster administrator,\nabsent for gateway nodes", "minimum": 0 }, "id": { - "type": "string" + "type": "string", + "description": "Identifier of the node" + }, + "storedPartitions": { + "type": [ + "integer", + "null" + ], + "format": "int64", + "description": "Number of partitions stored on this node\n(a result of the layout computation)", + "minimum": 0 }, "tags": { "type": "array", "items": { "type": "string" - } + }, + "description": "List of tags assigned by the cluster administrator" }, "usableCapacity": { "type": [ @@ -2267,10 +2304,12 @@ "null" ], "format": "int64", + "description": "Capacity (in bytes) that is actually usable on this node in the current\nlayout, which is equal to `stored_partitions` × `partition_size`", "minimum": 0 }, "zone": { - "type": "string" + "type": "string", + "description": "Zone name assigned by the cluster administrator" } } }, @@ -2281,7 +2320,8 @@ ], "properties": { "zoneRedundancy": { - "$ref": "#/components/schemas/ZoneRedundancy" + "$ref": "#/components/schemas/ZoneRedundancy", + "description": "Minimum number of zones in which a data partition must be replicated" } } }, @@ -3071,7 +3111,6 @@ "NodeAssignedRole": { "type": "object", "required": [ - "id", "zone", "tags" ], @@ -3082,19 +3121,19 @@ "null" ], "format": "int64", + "description": "Capacity (in bytes) assigned by the cluster administrator,\nabsent for gateway nodes", "minimum": 0 }, - "id": { - "type": "string" - }, "tags": { "type": "array", "items": { "type": "string" - } + }, + "description": "List of tags assigned by the cluster administrator" }, "zone": { - "type": "string" + "type": "string", + "description": "Zone name assigned by the cluster administrator" } } }, @@ -3110,7 +3149,8 @@ "type": [ "string", "null" - ] + ], + "description": "Socket address used by other nodes to connect to this node for RPC" }, "dataPartition": { "oneOf": [ @@ -3118,24 +3158,29 @@ "type": "null" }, { - "$ref": "#/components/schemas/FreeSpaceResp" + "$ref": "#/components/schemas/FreeSpaceResp", + "description": "Total and available space on the disk partition(s) containing the data\ndirectory(ies)" } ] }, "draining": { - "type": "boolean" + "type": "boolean", + "description": "Whether this node is part of an older layout version and is draining data." }, "hostname": { "type": [ "string", "null" - ] + ], + "description": "Hostname of the node" }, "id": { - "type": "string" + "type": "string", + "description": "Full-length node identifier" }, "isUp": { - "type": "boolean" + "type": "boolean", + "description": "Whether this node is connected in the cluster" }, "lastSeenSecsAgo": { "type": [ @@ -3143,6 +3188,7 @@ "null" ], "format": "int64", + "description": "For disconnected nodes, the number of seconds since last contact,\nor `null` if no contact was established since Garage restarted.", "minimum": 0 }, "metadataPartition": { @@ -3151,7 +3197,8 @@ "type": "null" }, { - "$ref": "#/components/schemas/FreeSpaceResp" + "$ref": "#/components/schemas/FreeSpaceResp", + "description": "Total and available space on the disk partition containing the\nmetadata directory" } ] }, @@ -3161,7 +3208,8 @@ "type": "null" }, { - "$ref": "#/components/schemas/NodeAssignedRole" + "$ref": "#/components/schemas/NodeAssignedRole", + "description": "Role assigned to this node in the current cluster layout" } ] } @@ -3201,33 +3249,7 @@ } }, { - "type": "object", - "required": [ - "zone", - "tags" - ], - "properties": { - "capacity": { - "type": [ - "integer", - "null" - ], - "format": "int64", - "description": "New capacity (in bytes) of the node", - "minimum": 0 - }, - "tags": { - "type": "array", - "items": { - "type": "string" - }, - "description": "New tags of the node" - }, - "zone": { - "type": "string", - "description": "New zone of the node" - } - } + "$ref": "#/components/schemas/NodeAssignedRole" } ] }, @@ -3265,7 +3287,8 @@ ], "properties": { "error": { - "type": "string" + "type": "string", + "description": "Error message indicating that the layout could not be computed\nwith the provided configuration" } } }, @@ -3280,10 +3303,12 @@ "type": "array", "items": { "type": "string" - } + }, + "description": "Plain-text information about the layout computation\n(do not try to parse this)" }, "newLayout": { - "$ref": "#/components/schemas/GetClusterLayoutResponse" + "$ref": "#/components/schemas/GetClusterLayoutResponse", + "description": "Details about the new cluster layout" } } } @@ -3439,7 +3464,8 @@ "type": "null" }, { - "$ref": "#/components/schemas/LayoutParameters" + "$ref": "#/components/schemas/LayoutParameters", + "description": "New layout computation parameters to use" } ] }, @@ -3447,7 +3473,8 @@ "type": "array", "items": { "$ref": "#/components/schemas/NodeRoleChange" - } + }, + "description": "New node roles to assign or remove in the cluster layout" } } }, @@ -3631,18 +3658,21 @@ "oneOf": [ { "type": "object", + "description": "Partitions must be replicated in at least this number of\ndistinct zones.", "required": [ "atLeast" ], "properties": { "atLeast": { "type": "integer", + "description": "Partitions must be replicated in at least this number of\ndistinct zones.", "minimum": 0 } } }, { "type": "string", + "description": "Partitions must be replicated in as many zones as possible:\nas many zones as there are replicas, if there are enough distinct\nzones, or at least one in each zone otherwise.", "enum": [ "maximum" ] diff --git a/src/api/admin/api.rs b/src/api/admin/api.rs index ec0a9e3c..78706ce3 100644 --- a/src/api/admin/api.rs +++ b/src/api/admin/api.rs @@ -168,23 +168,39 @@ pub struct GetClusterStatusRequest; #[derive(Debug, Clone, Serialize, Deserialize, ToSchema)] #[serde(rename_all = "camelCase")] pub struct GetClusterStatusResponse { + /// Current version number of the cluster layout pub layout_version: u64, + /// List of nodes that are either currently connected, part of the + /// current cluster layout, or part of an older cluster layout that + /// is still active in the cluster (being drained). pub nodes: Vec, } #[derive(Debug, Clone, Serialize, Deserialize, Default, ToSchema)] #[serde(rename_all = "camelCase")] pub struct NodeResp { + /// Full-length node identifier pub id: String, + /// Role assigned to this node in the current cluster layout pub role: Option, - #[schema(value_type = Option )] + /// Socket address used by other nodes to connect to this node for RPC + #[schema(value_type = Option)] pub addr: Option, + /// Hostname of the node pub hostname: Option, + /// Whether this node is connected in the cluster pub is_up: bool, + /// For disconnected nodes, the number of seconds since last contact, + /// or `null` if no contact was established since Garage restarted. pub last_seen_secs_ago: Option, + /// Whether this node is part of an older layout version and is draining data. pub draining: bool, + /// Total and available space on the disk partition(s) containing the data + /// directory(ies) #[serde(default, skip_serializing_if = "Option::is_none")] pub data_partition: Option, + /// Total and available space on the disk partition containing the + /// metadata directory #[serde(default, skip_serializing_if = "Option::is_none")] pub metadata_partition: Option, } @@ -192,16 +208,21 @@ pub struct NodeResp { #[derive(Debug, Clone, Serialize, Deserialize, ToSchema)] #[serde(rename_all = "camelCase")] pub struct NodeAssignedRole { - pub id: String, + /// Zone name assigned by the cluster administrator pub zone: String, - pub capacity: Option, + /// List of tags assigned by the cluster administrator pub tags: Vec, + /// Capacity (in bytes) assigned by the cluster administrator, + /// absent for gateway nodes + pub capacity: Option, } #[derive(Debug, Clone, Serialize, Deserialize, ToSchema)] #[serde(rename_all = "camelCase")] pub struct FreeSpaceResp { + /// Number of bytes available pub available: u64, + /// Total number of bytes pub total: u64, } @@ -273,22 +294,40 @@ pub struct GetClusterLayoutRequest; #[derive(Debug, Clone, Serialize, Deserialize, ToSchema)] #[serde(rename_all = "camelCase")] pub struct GetClusterLayoutResponse { + /// The current version number of the cluster layout pub version: u64, + /// List of nodes that currently have a role in the cluster layout pub roles: Vec, - pub partition_size: u64, + /// Layout parameters used when the current layout was computed pub parameters: LayoutParameters, + /// The size, in bytes, of one Garage partition (= a shard) + pub partition_size: u64, + /// List of nodes that will have a new role or whose role will be + /// removed in the next version of the cluster layout pub staged_role_changes: Vec, + /// Layout parameters to use when computing the next version of + /// the cluster layout pub staged_parameters: Option, } #[derive(Debug, Clone, Serialize, Deserialize, ToSchema)] #[serde(rename_all = "camelCase")] pub struct LayoutNodeRole { + /// Identifier of the node pub id: String, + /// Zone name assigned by the cluster administrator pub zone: String, - pub capacity: Option, - pub usable_capacity: Option, + /// List of tags assigned by the cluster administrator pub tags: Vec, + /// Capacity (in bytes) assigned by the cluster administrator, + /// absent for gateway nodes + pub capacity: Option, + /// Number of partitions stored on this node + /// (a result of the layout computation) + pub stored_partitions: Option, + /// Capacity (in bytes) that is actually usable on this node in the current + /// layout, which is equal to `stored_partitions` × `partition_size` + pub usable_capacity: Option, } #[derive(Debug, Clone, Serialize, Deserialize, ToSchema)] @@ -309,26 +348,25 @@ pub enum NodeRoleChangeEnum { remove: bool, }, #[serde(rename_all = "camelCase")] - Update { - /// New zone of the node - zone: String, - /// New capacity (in bytes) of the node - capacity: Option, - /// New tags of the node - tags: Vec, - }, + Update(NodeAssignedRole), } #[derive(Copy, Debug, Clone, Serialize, Deserialize, ToSchema)] #[serde(rename_all = "camelCase")] pub struct LayoutParameters { + /// Minimum number of zones in which a data partition must be replicated pub zone_redundancy: ZoneRedundancy, } #[derive(Copy, Debug, Clone, Serialize, Deserialize, ToSchema)] #[serde(rename_all = "camelCase")] pub enum ZoneRedundancy { + /// Partitions must be replicated in at least this number of + /// distinct zones. AtLeast(usize), + /// Partitions must be replicated in as many zones as possible: + /// as many zones as there are replicas, if there are enough distinct + /// zones, or at least one in each zone otherwise. Maximum, } @@ -340,25 +378,42 @@ pub struct GetClusterLayoutHistoryRequest; #[derive(Debug, Clone, Serialize, Deserialize, ToSchema)] #[serde(rename_all = "camelCase")] pub struct GetClusterLayoutHistoryResponse { + /// The current version number of the cluster layout pub current_version: u64, + /// All nodes in the cluster are aware of layout versions up to + /// this version number (at least) pub min_ack: u64, + /// Layout version history pub versions: Vec, + /// Detailed update trackers for nodes (see + /// `https://garagehq.deuxfleurs.fr/blog/2023-12-preserving-read-after-write-consistency/`) pub update_trackers: Option>, } #[derive(Debug, Clone, Serialize, Deserialize, ToSchema)] #[serde(rename_all = "camelCase")] pub struct ClusterLayoutVersion { + /// Version number of this layout version pub version: u64, + /// Status of this layout version pub status: ClusterLayoutVersionStatus, + /// Number of nodes with an assigned storage capacity in this layout version pub storage_nodes: u64, + /// Number of nodes with a gateway role in this layout version pub gateway_nodes: u64, } #[derive(Debug, Clone, Serialize, Deserialize, ToSchema)] pub enum ClusterLayoutVersionStatus { + /// This is the most up-to-date layout version Current, + /// This version is still active in the cluster because metadata + /// is being rebalanced or migrated from old nodes Draining, + /// This version is no longer active in the cluster for metadata + /// reads and writes. Note that there is still the possibility + /// that data blocks are being migrated away from nodes in this + /// layout version. Historical, } @@ -374,8 +429,10 @@ pub struct NodeUpdateTrackers { #[derive(Debug, Clone, Serialize, Deserialize, ToSchema)] pub struct UpdateClusterLayoutRequest { + /// New node roles to assign or remove in the cluster layout #[serde(default)] pub roles: Vec, + /// New layout computation parameters to use #[serde(default)] pub parameters: Option, } @@ -392,10 +449,17 @@ pub struct PreviewClusterLayoutChangesRequest; #[serde(untagged)] pub enum PreviewClusterLayoutChangesResponse { #[serde(rename_all = "camelCase")] - Error { error: String }, + Error { + /// Error message indicating that the layout could not be computed + /// with the provided configuration + error: String, + }, #[serde(rename_all = "camelCase")] Success { + /// Plain-text information about the layout computation + /// (do not try to parse this) message: Vec, + /// Details about the new cluster layout new_layout: GetClusterLayoutResponse, }, } @@ -405,13 +469,18 @@ pub enum PreviewClusterLayoutChangesResponse { #[derive(Debug, Clone, Serialize, Deserialize, ToSchema)] #[serde(rename_all = "camelCase")] pub struct ApplyClusterLayoutRequest { + /// As a safety measure, the new version number of the layout must + /// be specified here pub version: u64, } #[derive(Debug, Clone, Serialize, Deserialize, ToSchema)] #[serde(rename_all = "camelCase")] pub struct ApplyClusterLayoutResponse { + /// Plain-text information about the layout computation + /// (do not try to parse this) pub message: Vec, + /// Details about the new cluster layout pub layout: GetClusterLayoutResponse, } @@ -428,14 +497,21 @@ pub struct RevertClusterLayoutResponse(pub GetClusterLayoutResponse); #[derive(Debug, Clone, Serialize, Deserialize, ToSchema)] #[serde(rename_all = "camelCase")] pub struct ClusterLayoutSkipDeadNodesRequest { + /// Version number of the layout to assume is currently up-to-date. + /// This will generally be the current layout version. pub version: u64, + /// Allow the skip even if a quorum of nodes could not be found for + /// the data among the remaining nodes pub allow_missing_data: bool, } #[derive(Debug, Clone, Serialize, Deserialize, ToSchema)] #[serde(rename_all = "camelCase")] pub struct ClusterLayoutSkipDeadNodesResponse { + /// Nodes for which the ACK update tracker has been updated to `version` pub ack_updated: Vec, + /// If `allow_missing_data` is set, + /// nodes for which the SYNC update tracker has been updated to `version` pub sync_updated: Vec, } diff --git a/src/api/admin/cluster.rs b/src/api/admin/cluster.rs index 8171aa98..c86b3237 100644 --- a/src/api/admin/cluster.rs +++ b/src/api/admin/cluster.rs @@ -56,7 +56,6 @@ impl RequestHandler for GetClusterStatusRequest { for (id, _, role) in layout.current().roles.items().iter() { if let layout::NodeRoleV(Some(r)) = role { let role = NodeAssignedRole { - id: hex::encode(id), zone: r.zone.to_string(), capacity: r.capacity, tags: r.tags.clone(), @@ -189,15 +188,16 @@ fn format_cluster_layout(layout: &layout::LayoutHistory) -> GetClusterLayoutResp .items() .iter() .filter_map(|(k, _, v)| v.0.clone().map(|x| (k, x))) - .map(|(k, v)| LayoutNodeRole { - id: hex::encode(k), - zone: v.zone.clone(), - capacity: v.capacity, - usable_capacity: current - .get_node_usage(k) - .ok() - .map(|x| x as u64 * current.partition_size), - tags: v.tags.clone(), + .map(|(k, v)| { + let stored_partitions = current.get_node_usage(k).ok().map(|x| x as u64); + LayoutNodeRole { + id: hex::encode(k), + zone: v.zone.clone(), + capacity: v.capacity, + stored_partitions, + usable_capacity: stored_partitions.map(|x| x * current.partition_size), + tags: v.tags.clone(), + } }) .collect::>(); @@ -215,11 +215,11 @@ fn format_cluster_layout(layout: &layout::LayoutHistory) -> GetClusterLayoutResp }, Some(r) => NodeRoleChange { id: hex::encode(k), - action: NodeRoleChangeEnum::Update { + action: NodeRoleChangeEnum::Update(NodeAssignedRole { zone: r.zone.clone(), capacity: r.capacity, tags: r.tags.clone(), - }, + }), }, }) .collect::>(); @@ -346,11 +346,11 @@ impl RequestHandler for UpdateClusterLayoutRequest { let new_role = match change.action { NodeRoleChangeEnum::Remove { remove: true } => None, - NodeRoleChangeEnum::Update { + NodeRoleChangeEnum::Update(NodeAssignedRole { zone, capacity, tags, - } => { + }) => { if matches!(capacity, Some(cap) if cap < 1024) { return Err(Error::bad_request("Capacity should be at least 1K (1024)")); } diff --git a/src/garage/cli/remote/layout.rs b/src/garage/cli/remote/layout.rs index cd8f99f4..201dbcf7 100644 --- a/src/garage/cli/remote/layout.rs +++ b/src/garage/cli/remote/layout.rs @@ -120,11 +120,11 @@ impl Cli { actions.push(NodeRoleChange { id, - action: NodeRoleChangeEnum::Update { + action: NodeRoleChangeEnum::Update(NodeAssignedRole { zone, capacity, tags, - }, + }), }); } @@ -340,16 +340,7 @@ pub fn get_staged_or_current_role( if node.id == id { return match &node.action { NodeRoleChangeEnum::Remove { .. } => None, - NodeRoleChangeEnum::Update { - zone, - capacity, - tags, - } => Some(NodeAssignedRole { - id: id.to_string(), - zone: zone.to_string(), - capacity: *capacity, - tags: tags.clone(), - }), + NodeRoleChangeEnum::Update(role) => Some(role.clone()), }; } } @@ -357,7 +348,6 @@ pub fn get_staged_or_current_role( for node in layout.roles.iter() { if node.id == id { return Some(NodeAssignedRole { - id: node.id.clone(), zone: node.zone.clone(), capacity: node.capacity, tags: node.tags.clone(), @@ -437,11 +427,11 @@ pub fn print_staging_role_changes(layout: &GetClusterLayoutResponse) -> bool { let mut table = vec!["ID\tTags\tZone\tCapacity".to_string()]; for change in layout.staged_role_changes.iter() { match &change.action { - NodeRoleChangeEnum::Update { + NodeRoleChangeEnum::Update(NodeAssignedRole { tags, zone, capacity, - } => { + }) => { let tags = tags.join(","); table.push(format!( "{:.16}\t{}\t{}\t{}",