From 09cdc5662fb588acbf755501c76c618e05dd01c0 Mon Sep 17 00:00:00 2001 From: dorota <114921900+wojcik-dorota@users.noreply.github.com> Date: Wed, 8 Jan 2025 16:38:13 +0100 Subject: [PATCH] add(clickhouse): local on-disk cache for remote files (#609) --- .../concepts/clickhouse-tiered-storage.md | 14 ++++- .../howto/local-cache-tiered-storage.md | 62 +++++++++++++++++++ sidebars.ts | 1 + 3 files changed, 76 insertions(+), 1 deletion(-) create mode 100644 docs/products/clickhouse/howto/local-cache-tiered-storage.md diff --git a/docs/products/clickhouse/concepts/clickhouse-tiered-storage.md b/docs/products/clickhouse/concepts/clickhouse-tiered-storage.md index c42af4ee..384a66d6 100644 --- a/docs/products/clickhouse/concepts/clickhouse-tiered-storage.md +++ b/docs/products/clickhouse/concepts/clickhouse-tiered-storage.md @@ -10,6 +10,8 @@ The tiered storage feature introduces a method of organizing and storing data in On top of this default data allocation mechanism, you can control the tier your data is stored in using custom data retention periods. +## Tiered storage architecture + The tiered storage in Aiven for ClickHouse® consists of the following two layers: @@ -18,12 +20,22 @@ layers: - Object storage - the second tier: Affordable storage device with unlimited capability, better suited for historical and more rarely queried data, relatively slower +Aiven for ClickHouse's tiered storage supports +[local on-disk cache for remote files](/docs/products/clickhouse/howto/local-cache-tiered-storage), +which is enabled by default. You can +[disable the cache](/docs/products/clickhouse/howto/local-cache-tiered-storage#disable-the-cache) +or +[drop it](/docs/products/clickhouse/howto/local-cache-tiered-storage#free-up-space) to free +up the space it occupies. + +## Supported cloud platforms + On the Aiven tenant (in non-[BYOC](/docs/platform/concepts/byoc) environments), Aiven for ClickHouse tiered storage is supported on the following cloud platforms: - Microsoft Azure - Amazon Web Services (AWS) -- Google Cloud Platform (GCP) +- Google Cloud ## Why use it diff --git a/docs/products/clickhouse/howto/local-cache-tiered-storage.md b/docs/products/clickhouse/howto/local-cache-tiered-storage.md new file mode 100644 index 00000000..ac768cf8 --- /dev/null +++ b/docs/products/clickhouse/howto/local-cache-tiered-storage.md @@ -0,0 +1,62 @@ +--- +title: Local on-disk cache for remote files in Aiven for ClickHouse®'s tiered storage +sidebar_label: Local on-disk cache for remote files +--- + +Aiven for ClickHouse®'s tiered storage features local on-disk cache for remote files for improved query performance and reduced latency. + +To manage data, Aiven for ClickHouse's tiered storage uses local storage and remote storage. +When remote storage is used, Aiven for ClickHouse leverages a local on-disk cache to avoid +repeated remote fetches. + +## How it works + +When a query requires parts of a table stored in the remote tier, Aiven for ClickHouse +fetches the required parts from the remote storage. The fetched parts are automatically +stored in a local cache directory on the disk to avoid repeated downloads for subsequent +queries. For future queries, Aiven for ClickHouse checks the local cache first: + +- If the data is found in the cache, it is read directly from the local disk. +- If the data is not found in the cache, it is fetched from the remote storage and stored + in the local cache. + +Local on-disk cache for remote files is enabled by default for Aiven for ClickHouse's +tiered storage. You can +[disable the cache](/docs/products/clickhouse/howto/local-cache-tiered-storage#disable-the-cache) +or +[drop it](/docs/products/clickhouse/howto/local-cache-tiered-storage#free-up-space) to +free up the space it occupies. + +## Prerequisites + +- At least one Aiven for ClickHouse service using tiered storage +- Command line tool + ([ClickHouse client](/docs/products/clickhouse/howto/connect-with-clickhouse-cli)) + installed + +## Disable the cache + +To disable the local cache for a query, set the `enable_filesystem_cache` setting for the +query to `false`. +You can achieve this by appending `SETTINGS enable_filesystem_cache = false` to the end of +your query using an SQL client (for example, the +[ClickHouse client](/docs/products/clickhouse/howto/connect-with-clickhouse-cli)): + +```sql +SELECT 1 +SETTINGS enable_filesystem_cache = false; +``` + +## Free up space + +To drop the local cache and free up the used space, use the following cache command: + +```bash +SYSTEM DROP FILESYSTEM CACHE 'remote_cache' +``` + +## Related pages + +- [About tiered storage in Aiven for ClickHouse](/docs/products/clickhouse/concepts/clickhouse-tiered-storage) +- [Check data distribution between SSD and object storage](/docs/products/clickhouse/howto/check-data-tiered-storage) +- [Configure data retention thresholds for tiered storage](/docs/products/clickhouse/howto/configure-tiered-storage) diff --git a/sidebars.ts b/sidebars.ts index c3b910e0..c1eedf13 100644 --- a/sidebars.ts +++ b/sidebars.ts @@ -1299,6 +1299,7 @@ const sidebars: SidebarsConfig = { 'products/clickhouse/howto/configure-tiered-storage', 'products/clickhouse/howto/check-data-tiered-storage', 'products/clickhouse/howto/transfer-data-tiered-storage', + 'products/clickhouse/howto/local-cache-tiered-storage', ], }, ],