-
Notifications
You must be signed in to change notification settings - Fork 40
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
[clickhouse] Clickana monitoring dashboard tool (#7207)
### Overview As part of Stage 1 of [RFD468](https://rfd.shared.oxide.computer/rfd/0468) we'll be observing how a ClickHouse cluster behaves in comparison with a single node server. This commit introduces a basic tool that lets us visualize internal ClickHouse metric information. As a starting point, Clickana only has 4 charts, and the user may not choose what these are. Additionally, it is only capable of rendering data by making API calls. I'd like to make the tool more flexible; other capabilities will be added in follow up PRs. ### Usage ```console clickana --help Usage: clickana [OPTIONS] --clickhouse-addr <CLICKHOUSE_ADDR> Options: -l, --log-path <LOG_PATH> Path to the log file [env: CLICKANA_LOG_PATH=] [default: /tmp/clickana.log] -a, --clickhouse-addr <CLICKHOUSE_ADDR> Address where a clickhouse admin server is listening on -s, --sampling-interval <SAMPLING_INTERVAL> The interval to collect monitoring data in seconds [default: 60] -t, --time-range <TIME_RANGE> Range of time to collect monitoring data in seconds [default: 3600] -r, --refresh-interval <REFRESH_INTERVAL> The interval at which the dashboards will refresh [default: 60] -h, --help Print help ``` ### Manual Testing ``` root@oxz_clickhouse_015f9c34:~# /opt/oxide/clickana/bin/clickana -a [fd00:1122:3344:101::e]:8888 ``` <img width="1208" alt="Screenshot 2024-12-12 at 4 11 15 PM" src="https://github.com/user-attachments/assets/53658b02-3729-4b29-ac28-0a387c3143ac" /> ### Next Steps - Let the user set which metrics they would like to visualise in each chart. This may be nice to do through a TOML file or something. We could let them choose which unit to represent them in as well perhaps. - Have more metrics available. - It'd be nice to have the ability to take the timeseries as JSON instead of calling the API as well. This could be useful in the future to have some insight into our customer's racks for debugging purposes. We could include ClickHouse internal metric timeseries as part of the support bundles and they could be visualised via Clickana. WDYT @smklein ? Related: #6953
- Loading branch information
Showing
12 changed files
with
1,198 additions
and
42 deletions.
There are no files selected for viewing
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,31 @@ | ||
[package] | ||
name = "clickana" | ||
version = "0.1.0" | ||
edition = "2021" | ||
license = "MPL-2.0" | ||
|
||
[dependencies] | ||
anyhow.workspace = true | ||
camino.workspace = true | ||
chrono.workspace = true | ||
clap.workspace = true | ||
clickhouse-admin-types.workspace = true | ||
clickhouse-admin-server-client.workspace = true | ||
dropshot.workspace = true | ||
futures.workspace = true | ||
omicron-common.workspace = true | ||
ratatui.workspace = true | ||
schemars.workspace = true | ||
slog.workspace = true | ||
slog-async.workspace = true | ||
slog-dtrace.workspace = true | ||
slog-error-chain.workspace = true | ||
slog-term.workspace = true | ||
serde_json.workspace = true | ||
tokio.workspace = true | ||
tokio-postgres.workspace = true | ||
|
||
omicron-workspace-hack.workspace = true | ||
|
||
[lints] | ||
workspace = true |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,57 @@ | ||
// This Source Code Form is subject to the terms of the Mozilla Public | ||
// License, v. 2.0. If a copy of the MPL was not distributed with this | ||
// file, You can obtain one at https://mozilla.org/MPL/2.0/. | ||
|
||
use anyhow::Result; | ||
use camino::Utf8PathBuf; | ||
use clap::Parser; | ||
use clickana::Clickana; | ||
use std::net::SocketAddr; | ||
|
||
const CLICKANA_LOG_FILE: &str = "/tmp/clickana.log"; | ||
|
||
#[tokio::main] | ||
async fn main() -> Result<()> { | ||
let args = Cli::parse(); | ||
|
||
let terminal = ratatui::init(); | ||
let result = Clickana::new( | ||
args.clickhouse_addr, | ||
args.log_path, | ||
args.sampling_interval, | ||
args.time_range, | ||
args.refresh_interval, | ||
) | ||
.run(terminal) | ||
.await; | ||
ratatui::restore(); | ||
result | ||
} | ||
|
||
#[derive(Debug, Parser)] | ||
struct Cli { | ||
/// Path to the log file | ||
#[arg( | ||
long, | ||
short, | ||
env = "CLICKANA_LOG_PATH", | ||
default_value = CLICKANA_LOG_FILE, | ||
)] | ||
log_path: Utf8PathBuf, | ||
|
||
/// Address where a clickhouse admin server is listening on | ||
#[arg(long, short = 'a')] | ||
clickhouse_addr: SocketAddr, | ||
|
||
/// The interval to collect monitoring data in seconds | ||
#[arg(long, short, default_value_t = 60)] | ||
sampling_interval: u64, | ||
|
||
/// Range of time to collect monitoring data in seconds | ||
#[arg(long, short, default_value_t = 3600)] | ||
time_range: u64, | ||
|
||
/// The interval at which the dashboards will refresh | ||
#[arg(long, short, default_value_t = 60)] | ||
refresh_interval: u64, | ||
} |
Oops, something went wrong.