Add ZFS_SETUP.md
This commit is contained in:
commit
09996e1762
1 changed files with 282 additions and 0 deletions
282
ZFS_SETUP.md
Normal file
282
ZFS_SETUP.md
Normal file
|
@ -0,0 +1,282 @@
|
|||
# HE4
|
||||
|
||||
|
||||
## Hardware
|
||||
|
||||
10x HDDs 16TB( 14.6 TiB) (Seagate Exos X18) 2x SSDs 960GB(894.3 GiB)
|
||||
(1xSamsung 1xSeagate Nytro)
|
||||
|
||||
## Filesystem
|
||||
### Diagram
|
||||
```mermaid
|
||||
---
|
||||
config:
|
||||
theme: dark
|
||||
---
|
||||
|
||||
graph LR
|
||||
|
||||
subgraph Devices[Devices]
|
||||
direction TB
|
||||
subgraph HDDs[HDDs]
|
||||
HDD0 & HDD1 & HDD2 & HDD3 & HDD4 & HDD5 & HDD6 & HDD7 & HDD8 & HDD9
|
||||
end
|
||||
subgraph NVMEs[NVMe Drives]
|
||||
NVME0 --> nvme0n1["ns1"]
|
||||
NVME1 --> nvme1n1["ns1"] & nvme1n2["ns2"] & nvme1n3["ns3"]
|
||||
end
|
||||
end
|
||||
|
||||
subgraph Partitions[Partitions]
|
||||
direction TB
|
||||
subgraph HDD_Partitions[ ]
|
||||
space_hdd_0_p & space_hdd_1_p & space_hdd_2_p & space_hdd_3_p & space_hdd_4_p & space_hdd_5_p & space_hdd_6_p & space_hdd_7_p & space_hdd_8_p & space_hdd_9_p
|
||||
end
|
||||
subgraph NVME_Partitions[ ]
|
||||
subgraph NVME0_Partitions["nvme0n1 Partitions"]
|
||||
p1["/boot"] & p2["crypted_nvme0_cache"] & p3["crypted_nvme0_special"] & p4["crypted_nvme0_log"]
|
||||
end
|
||||
nvme0n1 --> NVME0_Partitions
|
||||
subgraph NVME1_Partitions[ ]
|
||||
space_nvme1p1 & space_nvme1p2 & space_nvme1p3
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
subgraph LUKS[LUKS]
|
||||
space_boot[ ]
|
||||
crypt_hdd0 & crypt_hdd1 & crypt_hdd2 & crypt_hdd3 & crypt_hdd4 & crypt_hdd5 & crypt_hdd6 & crypt_hdd7 & crypt_hdd8 & crypt_hdd9
|
||||
subgraph NVME0_LUKS[ ]
|
||||
crypt_nvme0_cache & crypt_nvme0_special & crypt_nvme0_log
|
||||
end
|
||||
|
||||
subgraph NVME1_LUKS[ ]
|
||||
crypt_nvme1_cache & crypt_nvme1_special & crypt_nvme1_log
|
||||
end
|
||||
end
|
||||
|
||||
subgraph ZFS_Pools[ZFS Pools]
|
||||
subgraph vault[vault]
|
||||
zdata["data (raidz2)"] & zcache["cache"] & zspecial["special (mirror)"] & zlog["log (mirror)"]
|
||||
end
|
||||
end
|
||||
|
||||
subgraph Filesystem[Filesystem]
|
||||
fs_root["/"] & fs_docker["/docker"] & fs_home["/home"] & fs_nix["/nix"] & fs_var["/var"] & fs_boot["/boot"]
|
||||
p1 ---x space_boot --> fs_boot
|
||||
vault --> fs_root & fs_docker & fs_home & fs_nix & fs_var
|
||||
end
|
||||
|
||||
HDD0 ---x space_hdd_0_p[ ] --> crypt_hdd0
|
||||
HDD1 ---x space_hdd_1_p[ ] --> crypt_hdd1
|
||||
HDD2 ---x space_hdd_2_p[ ] --> crypt_hdd2
|
||||
HDD3 ---x space_hdd_3_p[ ] --> crypt_hdd3
|
||||
HDD4 ---x space_hdd_4_p[ ] --> crypt_hdd4
|
||||
HDD5 ---x space_hdd_5_p[ ] --> crypt_hdd5
|
||||
HDD6 ---x space_hdd_6_p[ ] --> crypt_hdd6
|
||||
HDD7 ---x space_hdd_7_p[ ] --> crypt_hdd7
|
||||
HDD8 ---x space_hdd_8_p[ ] --> crypt_hdd8
|
||||
HDD9 ---x space_hdd_9_p[ ] --> crypt_hdd9
|
||||
|
||||
p2 ---> crypt_nvme0_cache
|
||||
p3 ---> crypt_nvme0_special
|
||||
p4 ---> crypt_nvme0_log
|
||||
nvme1n1 --x space_nvme1p1[ ] --> crypt_nvme1_cache
|
||||
nvme1n2 --x space_nvme1p2[ ] --> crypt_nvme1_special
|
||||
nvme1n3 --x space_nvme1p3[ ] --> crypt_nvme1_log
|
||||
|
||||
crypt_hdd0 & crypt_hdd1 & crypt_hdd2 & crypt_hdd3 & crypt_hdd4 & crypt_hdd5 & crypt_hdd6 & crypt_hdd7 & crypt_hdd8 & crypt_hdd9 ---o merge_data[ ] --> zdata
|
||||
crypt_nvme0_cache & crypt_nvme1_cache ---o merge_cache[ ] --> zcache
|
||||
crypt_nvme0_special & crypt_nvme1_special ---o merge_special[ ] --> zspecial
|
||||
crypt_nvme0_log & crypt_nvme1_log ---o merge_log[ ] --> zlog
|
||||
|
||||
classDef header fill:#2C3E50,color:#ECF0F1,stroke:none
|
||||
classDef device fill:#34495E,color:#ECF0F1,stroke:none
|
||||
classDef nvme fill:#2980B9,color:#ECF0F1,stroke:none
|
||||
classDef partition fill:#3498DB,color:#ECF0F1,stroke:none
|
||||
classDef luks fill:#1ABC9C,color:#ECF0F1,stroke:none
|
||||
classDef zfs fill:#16A085,color:#ECF0F1
|
||||
classDef pool fill:#303030,color:#ECF0F1
|
||||
classDef filesystem fill:#2E8B57,color:#ECF0F1,stroke:none
|
||||
classDef spacer fill:none,stroke:none
|
||||
classDef space fill:none,stroke:none,height:0
|
||||
|
||||
%%classDef veryhigh height:4600
|
||||
%%class zdata,fs_root veryhigh
|
||||
|
||||
class vault pool
|
||||
class zdata,zcache,zspecial,zlog zfs
|
||||
class crypt_hdd0,crypt_hdd1,crypt_hdd2,crypt_hdd3,crypt_hdd4,crypt_hdd5,crypt_hdd6,crypt_hdd7,crypt_hdd8,crypt_hdd9,crypt_nvme0_cache,crypt_nvme0_special,crypt_nvme0_log,crypt_nvme1_cache,crypt_nvme1_special,crypt_nvme1_log luks
|
||||
class p1,p2,p3,p4 partition
|
||||
class nvme0n1,nvme1n1,nvme1n2,nvme1n3 nvme
|
||||
class HDD0,HDD1,HDD2,HDD3,HDD4,HDD5,HDD6,HDD7,HDD8,HDD9,NVME0,NVME1 device
|
||||
class fs_boot,fs_docker,fs_home,fs_nix,fs_root,fs_var filesystem
|
||||
class HDD_Partitions,NVME_Partitions,NVME1_Partitions space
|
||||
class space_boot,space_hdd_0_p,space_hdd_1_p,space_hdd_2_p,space_hdd_3_p,space_hdd_4_p,space_hdd_5_p,space_hdd_6_p,space_hdd_7_p,space_hdd_8_p,space_hdd_9_p,space_nvme1p1,space_nvme1p2,space_nvme1p3 space
|
||||
class merge_data,merge_cache,merge_special,merge_log,merge_pool space
|
||||
```
|
||||
### Partitions
|
||||
#### HDDs
|
||||
|
||||
No partitions on hdds, use full disks
|
||||
|
||||
### SSD0
|
||||
|
||||
| id | size | type | use |
|
||||
|-----|---------|------|-----------------------------|
|
||||
| 0 | 1GiB | efi | boot |
|
||||
| 1 | 512GiB | luks | zfs cache(L2ARC) |
|
||||
| 2 | 256GiB | luks | zfs special |
|
||||
| 3 | 32GiB | luks | zfs slog(ZIL) |
|
||||
| | 93.3GiB | free | under-provisioning for perf |
|
||||
|
||||
|
||||
#### SSD1
|
||||
|
||||
use namespaces (same sizes as zfs partitions on SSD0) (no boot)
|
||||
|
||||
|
||||
|
||||
## Cryptsetup
|
||||
|
||||
``` fish
|
||||
echo -n 'passphrase' > /tmp/passphrase.key
|
||||
|
||||
# Data disks
|
||||
for disk in /dev/sd*;
|
||||
echo $disk
|
||||
cryptsetup luksFormat \
|
||||
--sector-size 4096 \
|
||||
--type luks2 \
|
||||
--hash sha512 \
|
||||
--cipher aes-xts-plain64 \
|
||||
--key-size 512 \
|
||||
--key-file /tmp/passphrase.key \
|
||||
--batch-mode \
|
||||
$disk
|
||||
end
|
||||
# zfs parts on nvme0
|
||||
for part in /dev/nvme0n1p2 /dev/nvme0n1p3 /dev/nvme0n1p4
|
||||
echo $part
|
||||
cryptsetup luksFormat \
|
||||
--sector-size 4096 \
|
||||
--type luks2 \
|
||||
--hash sha512 \
|
||||
--cipher aes-xts-plain64 \
|
||||
--key-size 512 \
|
||||
--key-file /tmp/passphrase.key \
|
||||
--batch-mode \
|
||||
$part
|
||||
end
|
||||
# zfs namespaces on nvme1
|
||||
for part in /dev/nvme1n*
|
||||
echo $part
|
||||
cryptsetup luksFormat \
|
||||
--sector-size 4096 \
|
||||
--type luks2 \
|
||||
--hash sha512 \
|
||||
--cipher aes-xts-plain64 \
|
||||
--key-size 512 \
|
||||
--key-file /tmp/passphrase.key \
|
||||
--batch-mode \
|
||||
$part
|
||||
end
|
||||
```
|
||||
|
||||
## LuksOpen
|
||||
|
||||
``` fish
|
||||
|
||||
# SSDs:
|
||||
for tuple in \
|
||||
"/dev/nvme0n1p2 crypt_ssd0_cache" \
|
||||
"/dev/nvme0n1p3 crypt_ssd0_special" \
|
||||
"/dev/nvme0n1p4 crypt_ssd0_log" \
|
||||
"/dev/nvme1n1 crypt_ssd1_cache" \
|
||||
"/dev/nvme1n2 crypt_ssd1_special" \
|
||||
"/dev/nvme1n3 crypt_ssd1_log"
|
||||
set split (echo $tuple | string split " ")
|
||||
set disk $split[1]; set name $split[2];
|
||||
echo luksOpen $disk @ $name
|
||||
cryptsetup luksOpen --key-file=/tmp/passphrase.key --perf-no_read_workqueue --perf-no_write_workqueue --allow-discards --persistent $disk $name
|
||||
end
|
||||
|
||||
# HDDs:
|
||||
for index in (seq 0 9)
|
||||
set disk /dev/sd(echo $index | tr "0-9" "a-j")
|
||||
set name crypt_hdd$index
|
||||
echo luksOpen $disk @ $name
|
||||
cryptsetup luksOpen --key-file=/tmp/passphrase.key --persistent $disk $name
|
||||
end
|
||||
```
|
||||
|
||||
## Undo
|
||||
|
||||
Close luks
|
||||
|
||||
``` fish
|
||||
for path in /dev/mapper/crypt*;
|
||||
cryptsetup close $path
|
||||
end
|
||||
```
|
||||
|
||||
## ZFS Mod params
|
||||
|
||||
param list:
|
||||
|
||||
``` toml
|
||||
l2arc_exclude_special=1 # are on same device, useless in our case
|
||||
zfs_dirty_data_max=17179869184 # 16GB default 10% of ram so 12.8GB
|
||||
zfs_txg_timeout=60 # default 5s, less fragmentation this way
|
||||
l2arc_write_boost=67108864 # 64 MB
|
||||
l2arc_write_max=16777216 # 16 MB (default 8MB)
|
||||
```
|
||||
|
||||
nix config format for mod params
|
||||
|
||||
``` nix
|
||||
# boot.zfs.enableUnstable = true;
|
||||
boot.extraModprobeConfig = ''
|
||||
options zfs param=value ...
|
||||
'';
|
||||
```
|
||||
|
||||
## Zpool
|
||||
|
||||
``` fish
|
||||
zpool create -n \ # dry run remove -n after checking
|
||||
-f # zfs doesn't like mirrors and raidz in the same pool
|
||||
-o ashift=12 \ # force 4k sectors, as 11/12 drives use those
|
||||
-o autoreplace=on \ # let hetzner replace, everything else happens automagically
|
||||
-o autotrim=on \ # TRIM, quite obivous, needs to be enabled on luks
|
||||
-o feature@lz4_compress=enabled \ # faster compression
|
||||
-o feature@large_blocks=enabled \ # allow bigger recordsize
|
||||
-O recordsize=1M \ # use bigger recordsize (8 default) (we should change this for docker db) (16k for mysql) (8k for postgres but 16k seems to increase perf for sequential scans)
|
||||
-O compression=on \ # default to best compression, updates when better algos get released
|
||||
-O atime=on \ # access times are useful
|
||||
-O relatime=on \ # but slow so lets use relatime
|
||||
vault \
|
||||
raidz2 \
|
||||
/dev/mapper/crypt_hdd0 \
|
||||
/dev/mapper/crypt_hdd1 \
|
||||
/dev/mapper/crypt_hdd2 \
|
||||
/dev/mapper/crypt_hdd3 \
|
||||
/dev/mapper/crypt_hdd4 \
|
||||
/dev/mapper/crypt_hdd5 \
|
||||
/dev/mapper/crypt_hdd6 \
|
||||
/dev/mapper/crypt_hdd7 \
|
||||
/dev/mapper/crypt_hdd8 \
|
||||
/dev/mapper/crypt_hdd9 \
|
||||
cache \
|
||||
/dev/mapper/crypt_ssd0_cache \
|
||||
/dev/mapper/crypt_ssd1_cache \
|
||||
special \
|
||||
mirror \
|
||||
/dev/mapper/crypt_ssd0_special \
|
||||
/dev/mapper/crypt_ssd1_special \
|
||||
log \
|
||||
mirror \
|
||||
/dev/mapper/crypt_ssd0_log \
|
||||
/dev/mapper/crypt_ssd1_log
|
||||
```
|
||||
|
Loading…
Add table
Reference in a new issue