18 Commits

Author SHA1 Message Date
f14675935d Renamed folders. 2025-02-22 16:57:38 +01:00
801c504757 Good old forgotten commit from 8 months ago. 2024-07-02 23:36:48 +02:00
b23f4087b3 Added a secondary NFS provisioner (slow/hdd) 2024-01-13 05:25:56 +01:00
e0d3141580 Added a secondary NFS provisioner (slow/hdd) 2024-01-13 05:23:32 +01:00
e3acd566c8 Minor typo correction. 2024-01-04 05:02:51 +01:00
3053904aa2 Couple lines added. 2023-12-21 04:55:45 +01:00
b5508eab97 Update 2023-12-16 08:15:41 +01:00
5f3c3b0e91 Notes update 2023-12-15 04:42:23 +01:00
b367990028 Notes update 2023-12-14 17:48:00 +01:00
4606dd3cf5 Progressed substantially on the migration. 2023-12-14 02:41:45 +01:00
96fd561257 Updated the README.md with the current deployment organization 2023-10-20 21:53:28 +02:00
950137040f Update README.md
Fixed amount of RAM
2023-10-20 20:48:19 +02:00
57a8288769 Part 8 and 9 documented (even tho part 9 was just a link to the repo used ... ) 2023-10-20 20:30:53 +02:00
efcb916c9f Part 8 and 9 documented (even tho part 9 was just a link to the repo used ... ) 2023-10-20 20:30:25 +02:00
cd7fdbdd16 Fixing header formatting. 2023-08-02 17:27:11 +02:00
40010b8c2a Added slave01 to the clsuter.
Part 7 documented.
2023-08-02 17:21:16 +02:00
19d8748741 The "new" cluster got all the configurations set on the "old" cluster.
Certificates already provisioned.

Part 6 complete.
2023-08-02 16:09:37 +02:00
c206bb1e5b IDK when but I deleted this file unintentionally. 2023-08-02 15:54:50 +02:00
65 changed files with 2660 additions and 177 deletions

View File

@@ -0,0 +1,6 @@
apiVersion: v1
kind: Namespace
metadata:
name: external
labels:
istio-injection: "enabled"

View File

@@ -3,6 +3,8 @@ kind: DestinationRule
metadata:
name: filebrowser
namespace: external
labels:
app: filebrowser
spec:
host: filebrowser.external.svc.cluster.local
trafficPolicy:

View File

@@ -3,6 +3,8 @@ kind: ServiceEntry
metadata:
name: filebrowser-se
namespace: external
labels:
app: filebrowser
spec:
hosts:
- filebrowser.external.svc.cluster.local

View File

@@ -3,6 +3,8 @@ kind: VirtualService
metadata:
name: filebrowser-vs
namespace: external
labels:
app: filebrowser
spec:
hosts:
- "filebrowser.filter.home"

View File

@@ -3,6 +3,8 @@ kind: DestinationRule
metadata:
name: gitea
namespace: external
labels:
app: gitea
spec:
host: gitea.external.svc.cluster.local
trafficPolicy:

View File

@@ -3,6 +3,8 @@ kind: ServiceEntry
metadata:
name: gitea-se
namespace: external
labels:
app: gitea
spec:
hosts:
- gitea.external.svc.cluster.local

View File

@@ -3,6 +3,8 @@ kind: VirtualService
metadata:
name: gitea-vs
namespace: external
labels:
app: gitea
spec:
hosts:
- "gitea.filter.home"

View File

@@ -3,6 +3,8 @@ kind: VirtualService
metadata:
name: jelly-vs
namespace: external
labels:
app: jellyfin
spec:
hosts:
- "jelly.filter.home"

View File

@@ -3,6 +3,8 @@ kind: DestinationRule
metadata:
name: tube
namespace: external
labels:
app: tube
spec:
host: tube.external.svc.cluster.local
trafficPolicy:

View File

@@ -3,6 +3,8 @@ kind: ServiceEntry
metadata:
name: tube-se
namespace: external
labels:
app: tube
spec:
hosts:
- tube.external.svc.cluster.local

View File

@@ -3,6 +3,8 @@ kind: VirtualService
metadata:
name: tube-vs
namespace: external
labels:
app: tube
spec:
hosts:
- "tube.filter.home"

View File

@@ -0,0 +1,14 @@
apiVersion: install.istio.io/v1alpha1
kind: IstioOperator
metadata:
namespace: istio-system
name: istio-config
labels:
last-update: 2023-07-16
spec:
profile: minimal
meshConfig:
accessLogFile: /dev/stdout
enableTracing: true
ingressService: istio-public-ingress
ingressSelector: public-ingress

View File

@@ -0,0 +1,21 @@
apiVersion: install.istio.io/v1alpha1
kind: IstioOperator
metadata:
namespace: istio-system
name: egress
labels:
last-update: 2023-07-16
spec:
profile: empty
components:
egressGateways:
- namespace: istio-system
name: egress-gw
enabled: true
label:
istio: egress-gw
app: istio-egress-gw
k8s:
service:
type: LoadBalancer
loadBalancerIP: 192.168.1.39

View File

@@ -0,0 +1,21 @@
apiVersion: install.istio.io/v1alpha1
kind: IstioOperator
metadata:
namespace: istio-system
name: local-ingress
labels:
last-update: 2023-07-16
spec:
profile: empty
components:
ingressGateways:
- namespace: istio-system
name: istio-local-ingress
enabled: true
label:
istio: local-ingress
app: istio-local-ingress
k8s:
service:
type: LoadBalancer
loadBalancerIP: 192.168.1.21

View File

@@ -0,0 +1,21 @@
apiVersion: install.istio.io/v1alpha1
kind: IstioOperator
metadata:
namespace: istio-system
name: public-ingress
labels:
last-update: 2023-07-16
spec:
profile: empty
components:
ingressGateways:
- namespace: istio-system
name: istio-public-ingress
enabled: true
label:
istio: public-ingress
app: istio-public-ingress
k8s:
service:
type: LoadBalancer
loadBalancerIP: 192.168.1.20

View File

@@ -0,0 +1,29 @@
apiVersion: metallb.io/v1beta1
kind: IPAddressPool
metadata:
name: cherrypick
namespace: metallb-system
spec:
addresses:
- 192.168.1.20-192.168.1.39
autoAssign: false
---
apiVersion: metallb.io/v1beta1
kind: IPAddressPool
metadata:
name: flex
namespace: metallb-system
spec:
addresses:
- 192.168.1.41 - 192.168.1.60
autoAssign: true
---
apiVersion: metallb.io/v1beta1
kind: L2Advertisement
metadata:
name: l2-advert
namespace: metallb-system
spec:
ipAddressPools:
- cherrypick
- flex

View File

@@ -0,0 +1,64 @@
apiVersion: cert-manager.io/v1
kind: ClusterIssuer
metadata:
name: letsencrypt-public
namespace: istio-system
spec:
acme:
# The ACME server URL
# server: https://acme-staging-v02.api.letsencrypt.org/directory # Testing
server: https://acme-v02.api.letsencrypt.org/directory # Prod
# Email address used for ACME registration
email: filter.oriol@gmail.com
# Name of a secret used to store the ACME account private key
privateKeySecretRef:
name: letsencrypt-public
# Enable the HTTP-01 challenge provider
solvers:
- http01:
ingress:
class: istio
podTemplate:
metadata:
annotations:
sidecar.istio.io/inject: "true"
---
apiVersion: cert-manager.io/v1
kind: Certificate
metadata:
name: filterhome-domain-cert-public
namespace: istio-system
spec:
secretName: filterhome-domain-cert-public
duration: 720h # 30d
renewBefore: 24h # 1d
# duration: 2160h # 90d
# renewBefore: 360h # 15d
isCA: false
privateKey:
algorithm: RSA
encoding: PKCS1
size: 4096
rotationPolicy: Always
usages:
- server auth
- client auth
dnsNames:
## - "*.filterhome.xyz"
# Gitea
- "gitea.filterhome.xyz"
# Jellyfin
- "jelly.filterhome.xyz"
# Filebrowser
- "filebrowser.filterhome.xyz"
# Tube
- "tube.filterhome.xyz"
issuerRef:
name: letsencrypt-public
kind: ClusterIssuer
group: cert-manager.io

View File

@@ -0,0 +1,6 @@
apiVersion: v1
kind: Namespace
metadata:
name: external
labels:
istio-injection: "enabled"

View File

@@ -0,0 +1,15 @@
apiVersion: networking.istio.io/v1alpha3
kind: DestinationRule
metadata:
name: filebrowser
namespace: external
labels:
app: filebrowser
spec:
host: filebrowser.external.svc.cluster.local
trafficPolicy:
tls:
mode: SIMPLE
connectionPool:
http:
h2UpgradePolicy: UPGRADE

View File

@@ -0,0 +1,19 @@
apiVersion: networking.istio.io/v1alpha3
kind: ServiceEntry
metadata:
name: filebrowser-se
namespace: external
labels:
app: filebrowser
spec:
hosts:
- filebrowser.external.svc.cluster.local
location: MESH_INTERNAL
ports:
- number: 443
name: https
protocol: HTTPS
resolution: NONE
workloadSelector:
labels:
host: srv

View File

@@ -0,0 +1,21 @@
apiVersion: networking.istio.io/v1alpha3
kind: VirtualService
metadata:
name: filebrowser-vs
namespace: external
labels:
app: filebrowser
spec:
hosts:
- "filebrowser.filter.home"
- "filebrowser.filterhome.xyz"
- "filebrowser.filterhome.duckdns.org"
gateways:
- default/public-gateway
- default/local-gateway
http:
- route:
- destination:
host: filebrowser.external.svc.cluster.local
port:
number: 443

View File

@@ -0,0 +1,40 @@
apiVersion: networking.istio.io/v1alpha3
kind: Gateway
metadata:
name: http-to-https-public
namespace: default
spec:
selector:
istio: public-ingress
servers:
- port:
number: 80
name: http2
protocol: HTTP2
hosts:
- "*"
tls:
httpsRedirect: true
---
apiVersion: networking.istio.io/v1alpha3
kind: Gateway
metadata:
name: public-gateway
namespace: default
spec:
selector:
istio: public-ingress
servers:
- port:
number: 443
name: https
protocol: HTTPS
hosts:
- "*.filterhome.xyz"
- "filterhome.xyz"
# - "filterhome.duckdns.org"
# - "*.filterhome.duckdns.org"
tls:
mode: SIMPLE
credentialName: filterhome-domain-cert-public

View File

@@ -0,0 +1,15 @@
apiVersion: networking.istio.io/v1alpha3
kind: DestinationRule
metadata:
name: gitea
namespace: external
labels:
app: gitea
spec:
host: gitea.external.svc.cluster.local
trafficPolicy:
tls:
mode: SIMPLE
connectionPool:
http:
h2UpgradePolicy: UPGRADE

View File

@@ -0,0 +1,19 @@
apiVersion: networking.istio.io/v1alpha3
kind: ServiceEntry
metadata:
name: gitea-se
namespace: external
labels:
app: gitea
spec:
hosts:
- gitea.external.svc.cluster.local
location: MESH_INTERNAL
ports:
- number: 443
name: https
protocol: HTTPS
resolution: NONE
workloadSelector:
labels:
host: srv

View File

@@ -0,0 +1,21 @@
apiVersion: networking.istio.io/v1alpha3
kind: VirtualService
metadata:
name: gitea-vs
namespace: external
labels:
app: gitea
spec:
hosts:
- "gitea.filter.home"
- "gitea.filterhome.xyz"
- "gitea.filterhome.duckdns.org"
gateways:
- default/public-gateway
- default/local-gateway
http:
- route:
- destination:
host: gitea.external.svc.cluster.local
port:
number: 443

View File

@@ -0,0 +1,9 @@
apiVersion: networking.istio.io/v1alpha3
kind: WorkloadEntry
metadata:
name: srv-host
namespace: external
spec:
address: 192.168.1.3
labels:
host: srv

View File

@@ -0,0 +1,16 @@
apiVersion: networking.istio.io/v1alpha3
kind: DestinationRule
metadata:
name: jelly
namespace: external
labels:
app: jellyfin
spec:
host: jelly.external.svc.cluster.local
trafficPolicy:
tls:
mode: SIMPLE
connectionPool:
http:
h2UpgradePolicy: DO_NOT_UPGRADE
# h2UpgradePolicy: UPGRADE

View File

@@ -0,0 +1,19 @@
apiVersion: networking.istio.io/v1alpha3
kind: ServiceEntry
metadata:
name: jelly-se
namespace: external
labels:
app: jellyfin
spec:
hosts:
- jelly.external.svc.cluster.local
location: MESH_INTERNAL
ports:
- number: 443
name: https
protocol: HTTPS
resolution: NONE
workloadSelector:
labels:
host: srv

View File

@@ -0,0 +1,21 @@
apiVersion: networking.istio.io/v1alpha3
kind: VirtualService
metadata:
name: jelly-vs
namespace: external
labels:
app: jellyfin
spec:
hosts:
- "jelly.filter.home"
- "jelly.filterhome.xyz"
- "jelly.filterhome.duckdns.org"
gateways:
- default/public-gateway
- default/local-gateway
http:
- route:
- destination:
host: jelly.external.svc.cluster.local
port:
number: 443

View File

@@ -0,0 +1,15 @@
apiVersion: networking.istio.io/v1alpha3
kind: DestinationRule
metadata:
name: tube
namespace: external
labels:
app: tube
spec:
host: tube.external.svc.cluster.local
trafficPolicy:
tls:
mode: SIMPLE
connectionPool:
http:
h2UpgradePolicy: UPGRADE

View File

@@ -0,0 +1,19 @@
apiVersion: networking.istio.io/v1alpha3
kind: ServiceEntry
metadata:
name: tube-se
namespace: external
labels:
app: tube
spec:
hosts:
- tube.external.svc.cluster.local
location: MESH_INTERNAL
ports:
- number: 443
name: https
protocol: HTTPS
resolution: NONE
workloadSelector:
labels:
host: srv

View File

@@ -0,0 +1,21 @@
apiVersion: networking.istio.io/v1alpha3
kind: VirtualService
metadata:
name: tube-vs
namespace: external
labels:
app: tube
spec:
hosts:
- "tube.filter.home"
- "tube.filterhome.xyz"
- "tube.filterhome.duckdns.org"
gateways:
- default/public-gateway
- default/local-gateway
http:
- route:
- destination:
host: tube.external.svc.cluster.local
port:
number: 443

View File

@@ -0,0 +1,42 @@
apiVersion: cert-manager.io/v1
kind: ClusterIssuer
metadata:
name: ca-issuer
namespace: cert-manager
spec:
ca:
secretName: local-ca
---
apiVersion: cert-manager.io/v1
kind: Certificate
metadata:
name: local-wildcard-certificate
namespace: istio-system
spec:
secretName: domain-cert-local
privateKey:
rotationPolicy: Always
algorithm: RSA
encoding: PKCS1
size: 4096
duration: 720h # 30d
renewBefore: 24h # 1d
subject:
organizations:
- FilterHome
commonName: filterhome
isCA: false
usages:
- server auth
- client auth
dnsNames:
# - demoapi.default
# - demoapi.default.svc
# - demoapi.default.svc.cluster
# - demoapi.default.svc.cluster.local
- "filter.home"
- "*.filter.home"
# - jelly.filter.home
issuerRef:
name: ca-issuer
kind: ClusterIssuer

View File

@@ -0,0 +1,8 @@
apiVersion: v1
kind: Secret
metadata:
name: local-ca
namespace: cert-manager
data:
tls.crt:
tls.key:

View File

@@ -0,0 +1,37 @@
apiVersion: networking.istio.io/v1alpha3
kind: Gateway
metadata:
name: http-to-https-local
namespace: default
spec:
selector:
istio: local-ingress
servers:
- port:
number: 80
name: http
protocol: HTTP
hosts:
- "*"
tls:
httpsRedirect: true
---
apiVersion: networking.istio.io/v1alpha3
kind: Gateway
metadata:
name: local-gateway
namespace: default
spec:
selector:
istio: local-ingress
servers:
- port:
number: 443
name: https
protocol: HTTPS
hosts:
- "filter.home"
- "*.filter.home"
tls:
mode: SIMPLE
credentialName: domain-cert-local

View File

@@ -0,0 +1,45 @@
apiVersion: cert-manager.io/v1
kind: ClusterIssuer
metadata:
name: letsencrypt-fihome
namespace: istio-system
spec:
acme:
# ACME Server
# prod : https://acme-v02.api.letsencrypt.org/directory
# staging : https://acme-staging-v02.api.letsencrypt.org/directory
server: https://acme-v02.api.letsencrypt.org/directory
# ACME Email address
email: <redacted>
privateKeySecretRef:
name: letsencrypt-fihome # staging or production
solvers:
- selector:
dnsNames:
- '*.fihome.xyz'
dns01:
webhook:
config:
apiKeySecretRef:
name: fihome-godaddy-api-key
key: key
secret: secret
production: true
ttl: 600
groupName: acme.fihome.xyz
solverName: godaddy
---
apiVersion: cert-manager.io/v1
kind: Certificate
metadata:
name: fihome-domain-cert-public
namespace: istio-system
spec:
secretName: fihome-domain-cert-public
duration: 720h # 30d
renewBefore: 168h # 1d
dnsNames:
- '*.fihome.xyz'
issuerRef:
name: letsencrypt-fihome
kind: ClusterIssuer

View File

@@ -0,0 +1,9 @@
apiVersion: v1
kind: Secret
metadata:
name: fihome-godaddy-api-key
namespace: cert-manager
type: Opaque
data:
key: <redacted>
secret: <redacted>

View File

@@ -0,0 +1,23 @@
apiVersion: networking.istio.io/v1alpha3
kind: VirtualService
metadata:
name: filebrowser-vs
namespace: external
labels:
app: filebrowser
spec:
hosts:
- "filebrowser.filter.home"
- "filebrowser.filterhome.xyz"
- "filebrowser.fihome.xyz"
- "filebrowser.filterhome.duckdns.org"
gateways:
- default/public-gateway
- default/local-gateway
- default/fihome-gateway
http:
- route:
- destination:
host: filebrowser.external.svc.cluster.local
port:
number: 443

View File

@@ -0,0 +1,20 @@
apiVersion: networking.istio.io/v1alpha3
kind: Gateway
metadata:
name: fihome-gateway
namespace: default
spec:
selector:
istio: public-ingress
servers:
- port:
number: 443
name: https
protocol: HTTPS
hosts:
- "*.fihome.xyz"
- "fihome.xyz"
tls:
mode: SIMPLE
credentialName: fihome-cert

View File

@@ -0,0 +1,23 @@
apiVersion: networking.istio.io/v1alpha3
kind: VirtualService
metadata:
name: gitea-vs
namespace: external
labels:
app: gitea
spec:
hosts:
- "gitea.filter.home"
- "gitea.filterhome.xyz"
- "gitea.fihome.xyz"
- "gitea.filterhome.duckdns.org"
gateways:
- default/public-gateway
- default/local-gateway
- default/fihome-gateway
http:
- route:
- destination:
host: gitea.external.svc.cluster.local
port:
number: 443

View File

@@ -0,0 +1,23 @@
apiVersion: networking.istio.io/v1alpha3
kind: VirtualService
metadata:
name: jelly-vs
namespace: external
labels:
app: jellyfin
spec:
hosts:
- "jelly.filter.home"
- "jelly.filterhome.xyz"
- "jelly.filterhome.duckdns.org"
- "jelly.fihome.xyz"
gateways:
- default/public-gateway
- default/local-gateway
- default/fihome-gateway
http:
- route:
- destination:
host: jelly.external.svc.cluster.local
port:
number: 443

View File

@@ -0,0 +1,23 @@
apiVersion: networking.istio.io/v1alpha3
kind: VirtualService
metadata:
name: tube-vs
namespace: external
labels:
app: tube
spec:
hosts:
- "tube.filter.home"
- "tube.filterhome.xyz"
- "tube.filterhome.duckdns.org"
- "tube.fihome.xyz"
gateways:
- default/public-gateway
- default/local-gateway
- default/fihome-gateway
http:
- route:
- destination:
host: tube.external.svc.cluster.local
port:
number: 443

View File

@@ -141,7 +141,7 @@ Current Issue? For X and y, I need to wait for a while for the DNS provider to r
- [x] Deploy an Ingress LB for local thingies.
> **Note:**\
> **Note:**
> - https://istio.io/latest/docs/tasks/traffic-management/egress/
> - https://istio.io/latest/docs/tasks/traffic-management/egress/egress-kubernetes-services/
> - https://istio.io/latest/docs/reference/config/istio.operator.v1alpha1/
@@ -167,7 +167,7 @@ Current Issue? For X and y, I need to wait for a while for the DNS provider to r
- [x] Deploy configurations to route some services through the Istio `istio-local-ingress` Load Balancer deployed.
> **Note**:\
> Regarding Let's Encrypt certificate provisioning, for testing purposes the `staging` environment should be used, nevertheless on my scenario I am running directly on the production environment, why?\
> Regarding Let's Encrypt certificate provisioning, for testing purposes the `staging` environment should be used, nevertheless on my scenario I am running directly on the production environment, why?
> - `Staging` and `Production` behave different, therefore one can get the certificates verified on `Staging` and not on `Production`.
> - I ran into some issues regarding the sentence from above, so there was some back and forth, this topic is mentioned [here at the end.](#2x1-able-to-get-staging-le-certs-but-not-the-production-one-when-using-custom-istio-selector-and-only-able-to-get-production-le-certs-when-using-the-default-istio-ingressgateway-selector)
> - Since there was "back and forth" I sort of cheated and set this as it is.
@@ -182,7 +182,7 @@ Current Issue? For X and y, I need to wait for a while for the DNS provider to r
### Part 4
> Completed 27/July/2023\
> Completed 27/July/2023
- [x] Deploy locally a Certificate Authorization Service (on the SRV host.)
@@ -208,6 +208,8 @@ Current Issue? For X and y, I need to wait for a while for the DNS provider to r
### Part 5
> Completed 01/August/2023
- [x] Explore Pi4 Storage options.
- [x] Consider Storage options for the OrangePi5.
@@ -218,8 +220,10 @@ Current Issue? For X and y, I need to wait for a while for the DNS provider to r
### Part 6
> Completed 02/August/2023
- [x] ~~Wipe~~ (**don't wipe** just use a different drive) and recreate the current `Kluster`, this time using the Pi4 as a _master_, and the 2 Orange Pi5 as _slaves_ (this will require updating the DNS/DHCP local services).
- [ ] Deploy all the services from the previous Kubernetes cluster to the new one.
- [x] Deploy all the services from the previous Kubernetes cluster to the new one.
> **Note**:\
> I can make a new cluster on the Pi4, and remove the taint that prevents from scheduling pods on that node. Deploy everything inside (a well a LB with the same exact IP than the current one, and proceed to stop the Orange PI 5), then "reformat" the OPi5s with a new distro, install stuff etc., and join them to the cluster running on the Pi4.
@@ -230,18 +234,27 @@ Current Issue? For X and y, I need to wait for a while for the DNS provider to r
### Part 7
- [ ] Remove the last host from the old kubernetes cluster and join it to the new cluster.
> Completed 02/August/2023
- [x] Remove the last host from the old kubernetes cluster and join it to the new cluster.
### Part 8
- [ ] Deploy NFS service on the `media SRV` host.
- [x] Set wildcards certificates through `ACME DNS01` challenge.
### Part 9
- [ ] Deploy Istio security.
- [x] Deploy NFS service(s) on the `media SRV` host.
### Part 10
- [ ] Deploy Istio security.
> **Note:**\
> If there is barely workloads, don't think that it's wise to deploy a restrictive security settings as per the moment.
### Part 11
- [ ] Update the `Current Setup` documentation with the new container and architecture rearrangement.
- [ ] Migrate some lightweight/not data heavy services from the `media SRV` to the `Kluster`.
@@ -258,17 +271,14 @@ Current Issue? For X and y, I need to wait for a while for the DNS provider to r
- Run the old migrated services back on the `media SRV` host.
### Part 11
### TMP Notes
- Set wildcards certificates through `ACME DNS01` challenge.
### Extras?
#### Horizontal Pod Autoscaling for the Istio LBs.
- https://github.com/joohoi/acme-dns
# Execution
## Part 1
### Transfer local network dependencies services from Pi4 to SRV.
#### Install ZIP on `Pi4`
@@ -1160,7 +1170,7 @@ I updated the Local DNS to point towards the new-architecture/new-resources.
First, let's generate a certificate, and it's key.
```shell
openssl req -x509 -newkey rsa:4096 -sha256 -days 5 -nodes \
openssl req -x509 -newkey rsa:4096 -sha256 -days 365 -nodes \
-keyout ca.filter.home.key -out ca.filter.home.cer \
-subj /C=ES/ST=BAR/O=FilterHome/CN=ca.filter.home \
-extensions ext \
@@ -1291,7 +1301,7 @@ x-envoy-upstream-service-time: 2
> It's extremely possible that I set HTTP to HTTPS redirect also locally, still need to decide if there is any reason for which I would like to maintain the local HTTP traffic.
### Part 5
## Part 5
I decided to use a SSD with the Pi4 through a USB3.0 connector.
@@ -1299,7 +1309,7 @@ On the Orange Pi5 I intend to use an m.2 that I bought for it, yet need to try i
Therefore, I will remove 1 node from the current Kubernetes cluster and try there the m.2 that I got.
#### Remove 1 OrangePI Node
### Remove 1 OrangePI Node
https://stackoverflow.com/questions/35757620/how-to-gracefully-remove-a-node-from-kubernetes
@@ -1353,7 +1363,7 @@ error when evicting pods/"istio-public-ingress-5bb994c8b7-j9hgr" -n "istio-syste
Well, it's on it, so let's open a new shell.
#### Cannot evict pod as it would violate the pod's disruption budget.
### Cannot evict pod as it would violate the pod's disruption budget.
Let's delete the "remaining" pods manually.
@@ -1430,7 +1440,7 @@ kubectl delete node slave01.filter.home
node "slave01.filter.home" deleted
```
### Part 6
## Part 6
Currently, the resources whereas:
@@ -1449,17 +1459,17 @@ I will change it to
```
#### Prepare NVME images etc.
### Prepare NVME images etc.
Did the needful.
#### Set up cluster
### Set up cluster
Used the following ansible script to set up things arround.
https://gitea.filterhome.xyz/ofilter/ansible_kubernetes_cluster
##### run.sh
#### run.sh
<pre><span style="color:#FF7F7F"><b></b></span> ./run.sh
@@ -1755,19 +1765,8 @@ PLAY RECAP *********************************************************************
<span style="color:#CC3980">slave02.filter.home</span> : <span style="color:#7F3FBF">ok=12 </span> <span style="color:#CC3980">changed=23 </span> unreachable=0 failed=0 <span style="color:#7f7fff">skipped=12 </span> rescued=0 ignored=0
</pre>
#### Check cluster status
### Check cluster status
First I will copy the kubeconfig file to a place of my own.
```shell
cp ksetup/Exported/kubeconfig.conf ~/kubeconfig.conf -v
```
```text
'ksetup/Exported/kubeconfig.conf' -> '/home/savagebidoof/kubeconfig.conf'
```
Pods are deployed correctly
```shell
kubectl get pods --kubeconfig ~/kubeconfig.conf -A -owide
@@ -1791,19 +1790,718 @@ metallb-system speaker-5zptn 1/1 Running 2
metallb-system speaker-whw4n 1/1 Running 2 (22m ago) 26m 192.168.1.11 slave02.filter.home <none> <none>
```
### Kubeconfig
#### Backup `Kubeconfig` file
I will back up `kubeconfig.conf` file to a directory of my own.
```shell
cp ksetup/Exported/kubeconfig.conf ~/kubeconfig.conf -v
```
```text
'ksetup/Exported/kubeconfig.conf' -> '/home/savagebidoof/kubeconfig.conf'
```
Pods are deployed correctly
#### Configure new `Kubeconfig` location
```shell
export KUBECONFIG="/home/savagebidoof/kubeconfig.conf"
```
#### Confirm `Kubeconfig` is selected properly
```shell
kubectl get nodes
```
```text
NAME STATUS ROLES AGE VERSION
pi4.filter.home Ready control-plane 18h v1.27.4
slave02.filter.home Ready <none> 17h v1.27.4
```
### Move workloads
Well it's time to move everything over.
During the couple of days I have been waiting until I get the NVME I ordered, considered changing the IP from the deployed LBs.
Instead of using:
```yaml
.80 -> public LB
.81 -> local LB
.90 -> egress LB
```
I will be using
```yaml
.20 -> public LB
.21 -> local LB
.39 -> egress LB
```
#### Deploy lacking CRDs
I already have `MetalLB` and `Calico` installed.
I lack Cert-manager CRDs.
```shell
kubectl apply -f https://github.com/cert-manager/cert-manager/releases/download/v1.12.0/cert-manager.yaml
```
```text
namespace/cert-manager created
customresourcedefinition.apiextensions.k8s.io/certificaterequests.cert-manager.io created
customresourcedefinition.apiextensions.k8s.io/certificates.cert-manager.io created
customresourcedefinition.apiextensions.k8s.io/challenges.acme.cert-manager.io created
customresourcedefinition.apiextensions.k8s.io/clusterissuers.cert-manager.io created
...
```
#### MetalLB Config
I am setting up 2 Address Pools.
`cherrypick` for the things that I want to give IPs with my finger.
```yaml
kind: IPAddressPool
...
name: cherrypick
- 192.168.1.20-192.168.1.39
...
autoAssign: false
```
And `flex` to assign IP dynamically.
```yaml
kind: IPAddressPool
...
name: flex
- 192.168.1.41-192.168.1.60
...
autoAssign: true
```
##### Deploy MetalLB configuration
```shell
kubectl create -f P6_Redeployment/MetalLB.yaml
```
# I am here <----
```text
ipaddresspool.metallb.io/cherrypick created
ipaddresspool.metallb.io/flex created
l2advertisement.metallb.io/l2-advert created
```
## Should add labels to the SE resources etc.
#### Deploy Istio Config and Load Balancers
##### IstioOperator_IstioConfig.yaml
```shell
istioctl install -y -f P6_Redeployment/Istio_Config/IstioOperator_IstioConfig.yaml
```
<pre>This will install the Istio 1.18.2 minimal profile with [&quot;Istio core&quot; &quot;Istiod&quot;] components into the cluster. Proceed? (y/N) y
<span style="color:#7F3FBF"></span> Istio core installed
<span style="color:#7F3FBF"></span> Istiod installed
<span style="color:#7F3FBF"></span> Installation complete
Making this installation the default for injection and validation.</pre>
##### IstioOperator_IstioEgress.yaml
```shell
istioctl install -y -f P6_Redeployment/Istio_Config/IstioOperator_IstioEgress.yaml
```
<pre><span style="color:#7F3FBF"></span> Egress gateways installed
<span style="color:#7F3FBF"></span> Installation complete </pre>
> **Note**:\
> The egress resource doesn't work "right off the bat", requires some configurations regarding this matter, don't think this will be done on this "walkthrough".
##### IstioOperator_LocalIngress.yaml
```shell
istioctl install -y -f P6_Redeployment/Istio_Config/IstioOperator_LocalIngress.yaml
```
<pre><span style="color:#7F3FBF"></span> Ingress gateways installed
<span style="color:#7F3FBF"></span> Installation complete</pre>
##### IstioOperator_PublicIngress.yaml
```shell
istioctl install -y -f P6_Redeployment/Istio_Config/IstioOperator_PublicIngress.yaml
```
<pre><span style="color:#7F3FBF"></span> Ingress gateways installed
<span style="color:#7F3FBF"></span> Installation complete </pre>
##### Check Service IP provisioning
```shell
kubectl get svc -n istio-system | grep LoadBalancer
```
<pre>egress-gw <span style="color:#FF7F7F"><b>LoadBalancer</b></span> 10.106.41.20 192.168.1.39 80:31322/TCP,443:30559/TCP 138m
istio-local-ingress <span style="color:#FF7F7F"><b>LoadBalancer</b></span> 10.97.14.59 192.168.1.21 15021:30005/TCP,80:30168/TCP,443:32103/TCP 50m
istio-public-ingress <span style="color:#FF7F7F"><b>LoadBalancer</b></span> 10.100.53.247 192.168.1.20 15021:31249/TCP,80:30427/TCP,443:30411/TCP 50m</pre>
### Prepare Secrets
#### Placeholder folder for God knows what
```shell
mkdir tmp
```
#### Local Certs
```shell
openssl req -x509 -newkey rsa:4096 -sha512 -days 365 -nodes \
-keyout tmp/ca.filter.home.key -out tmp/ca.filter.home.cer \
-subj /C=ES/ST=BAR/O=FilterHome/CN=ca.filter.home \
-extensions ext \
-config <(cat <<EOF
[req]
distinguished_name=req
[ext]
keyUsage=critical,keyCertSign,cRLSign
basicConstraints=critical,CA:true,pathlen:1
subjectAltName=DNS:ca.filter.home
EOF
)
```
```shell
cmdsubst heredoc> [req]
cmdsubst heredoc> distinguished_name=req
cmdsubst heredoc> [ext]
cmdsubst heredoc> keyUsage=critical,keyCertSign,cRLSign
cmdsubst heredoc> basicConstraints=critical,CA:true,pathlen:1
cmdsubst heredoc> subjectAltName=DNS:ca.filter.home
cmdsubst heredoc> EOF
cmdsubst> )
```
Export `.key` and `.cer` base64
```shell
cat tmp/ca.filter.home.cer | base64 | tr -d '\n'
```
```shell
cat tmp/ca.filter.home.key | base64 | tr -d '\n'
```
Add the base64 outputs to the secrets file `P6_Redeployment/non_Istio_Config/Local_Certs/Secret.yaml`
```shell
nano P6_Redeployment/non_Istio_Config/Local_Certs/Secret.yaml
```
#### Modify Public Certificate duration
Changed the duration to something more "reasonable".
```shell
nano P6_Redeployment/non_Istio_Config/Certificate_Manager/Issuer.yaml
```
#### Modify Local Certificate duration
Changed the duration to something more "reasonable".
```shell
nano P6_Redeployment/non_Istio_Config/Local_CA/Issuer.yaml
```
##### Set up HTTP to HTTPS in the local gateway.
Added the following Gateway and modified the previous existing to remove `HTTP` access through the port 80.
```yaml
apiVersion: networking.istio.io/v1alpha3
kind: Gateway
metadata:
name: http-to-https-local
namespace: default
spec:
selector:
istio: public-ingress
servers:
- port:
number: 80
name: http
protocol: HTTP
hosts:
- "*"
tls:
httpsRedirect: true
```
#### Deploy EVERYTHING not IstioOperator
Well it's time to press the create button, this will pop up some text, just make sure everything sates as "created".
```shell
kubectl create -f P6_Redeployment/non_Istio_Config -R
```
```text
clusterissuer.cert-manager.io/letsencrypt-public created
certificate.cert-manager.io/filterhome-domain-cert-public created
namespace/external created
destinationrule.networking.istio.io/filebrowser created
serviceentry.networking.istio.io/filebrowser-se created
virtualservice.networking.istio.io/filebrowser-vs created
gateway.networking.istio.io/http-to-https-public created
gateway.networking.istio.io/public-gateway created
destinationrule.networking.istio.io/gitea created
serviceentry.networking.istio.io/gitea-se created
virtualservice.networking.istio.io/gitea-vs created
workloadentry.networking.istio.io/srv-host created
destinationrule.networking.istio.io/jelly created
serviceentry.networking.istio.io/jelly-se created
virtualservice.networking.istio.io/jelly-vs created
destinationrule.networking.istio.io/tube created
serviceentry.networking.istio.io/tube-se created
virtualservice.networking.istio.io/tube-vs created
clusterissuer.cert-manager.io/ca-issuer created
certificate.cert-manager.io/local-wildcard-certificate created
secret/local-ca created
gateway.networking.istio.io/http-to-https-local created
gateway.networking.istio.io/local-gateway created
```
#### Remove ./tmp folder
We no longer need the folder ./tmp, therefore we can delete it.
```shell
rm ./tmp/* -v
```
```text
zsh: sure you want to delete all 4 files in /home/savagebidoof/IdeaProjects/home_shit/Migrations/Forget_Traefik_2023/./tmp [yn]? y
removed './tmp/ca.filter.home.cer'
removed './tmp/ca.filter.home.key'
```
#### Update the Router to point towards the "new" Ingress Load Balancer
Changed from `192.168.1.80` to `192.168.1.20`.
#### Update Local DNS
I did the needful.
#### Monitor Public Cert Provisioning
```shell
kubectl get events -n istio-system --field-selector involvedObject.name=filterhome-domain-cert-public,involvedObject.kind=Certificate --sort-by=.metadata.creationTimestamp --watch
```
```text
LAST SEEN TYPE REASON OBJECT MESSAGE
13m Normal Issuing certificate/filterhome-domain-cert-public Issuing certificate as Secret does not exist
12m Normal Generated certificate/filterhome-domain-cert-public Stored new private key in temporary Secret resource "filterhome-domain-cert-public-2vdxk"
12m Normal Requested certificate/filterhome-domain-cert-public Created new CertificateRequest resource "filterhome-domain-cert-public-js69j"
8m46s Normal Issuing certificate/filterhome-domain-cert-public Issuing certificate as Secret does not exist
8m35s Normal Generated certificate/filterhome-domain-cert-public Stored new private key in temporary Secret resource "filterhome-domain-cert-public-n8w8s"
8m35s Normal Requested certificate/filterhome-domain-cert-public Created new CertificateRequest resource "filterhome-domain-cert-public-cb8ws"
103s Normal Issuing certificate/filterhome-domain-cert-public The certificate has been successfully issued
```
## Part 7
I did set up the NVME with the OS etc.
### Set up cluster
Used the following ansible script join the host to the Kubernetes cluster, all I had to do was to populate the `Exported/kubeadm-join.command`.
https://gitea.filterhome.xyz/ofilter/ansible_kubernetes_cluster
## Difficulties
<pre><span style="color:#FF7F7F"><b></b></span> ./run.sh
PLAY [Pre Setup] *************************************************************************************************************************************************************************************************************************
TASK [apt update] ************************************************************************************************************************************************************************************************************************
<span style="color:#CC3980">changed: [slave01.filter.home]</span>
TASK [apt upgrade] ***********************************************************************************************************************************************************************************************************************
<span style="color:#CC3980">changed: [slave01.filter.home]</span>
TASK [Install sudo] **********************************************************************************************************************************************************************************************************************
<span style="color:#CC3980">changed: [slave01.filter.home]</span>
TASK [update facts] **********************************************************************************************************************************************************************************************************************
<span style="color:#7F3FBF">ok: [slave01.filter.home]</span>
TASK [Make sure we have a &apos;wheel&apos; group] *************************************************************************************************************************************************************************************************
<span style="color:#CC3980">changed: [slave01.filter.home]</span>
TASK [Allow &apos;wheel&apos; group to have passwordless sudo] *************************************************************************************************************************************************************************************
<span style="color:#CC3980">changed: [slave01.filter.home]</span>
TASK [Add user new_user_name] ************************************************************************************************************************************************************************************************************
<span style="color:#7F3FBF">[DEPRECATION WARNING]: Encryption using the Python crypt module is deprecated. The Python crypt module is deprecated and will be removed from Python 3.13. Install the passlib library for continued encryption functionality. This </span>
<span style="color:#7F3FBF">feature will be removed in version 2.17. Deprecation warnings can be disabled by setting deprecation_warnings=False in ansible.cfg.</span>
<span style="color:#CC3980">changed: [slave01.filter.home]</span>
TASK [adding user &apos;klussy&apos; to group wheel] ***********************************************************************************************************************************************************************************************
<span style="color:#CC3980">changed: [slave01.filter.home]</span>
TASK [Select new user] *******************************************************************************************************************************************************************************************************************
<span style="color:#7F3FBF">ok: [slave01.filter.home]</span>
TASK [Change root default password] ******************************************************************************************************************************************************************************************************
<span style="color:#7F3FBF">[DEPRECATION WARNING]: Encryption using the Python crypt module is deprecated. The Python crypt module is deprecated and will be removed from Python 3.13. Install the passlib library for continued encryption functionality. This </span>
<span style="color:#7F3FBF">feature will be removed in version 2.17. Deprecation warnings can be disabled by setting deprecation_warnings=False in ansible.cfg.</span>
<span style="color:#CC3980">changed: [slave01.filter.home]</span>
TASK [PermitRootLogin = no] **************************************************************************************************************************************************************************************************************
<span style="color:#CC3980">changed: [slave01.filter.home]</span>
TASK [PermitEmptyPasswords = no] *********************************************************************************************************************************************************************************************************
<span style="color:#7F3FBF">ok: [slave01.filter.home]</span>
TASK [Ensure localisation files for &apos;en_US.UTF-8&apos; are available] *************************************************************************************************************************************************************************
<span style="color:#7F3FBF">ok: [slave01.filter.home]</span>
TASK [Ensure localisation files for &apos;en_US.UTF-8&apos; are available] *************************************************************************************************************************************************************************
<span style="color:#7F3FBF">ok: [slave01.filter.home]</span>
TASK [Get current locale and language configuration] *************************************************************************************************************************************************************************************
<span style="color:#7F3FBF">ok: [slave01.filter.home]</span>
TASK [Configure locale to &apos;en_US.UTF-8&apos; and language to &apos;en_US.UTF-8&apos;] *******************************************************************************************************************************************************************
<span style="color:#CC3980">changed: [slave01.filter.home]</span>
TASK [reboot] ****************************************************************************************************************************************************************************************************************************
<span style="color:#CC3980">changed: [slave01.filter.home]</span>
PLAY RECAP *******************************************************************************************************************************************************************************************************************************
<span style="color:#CC3980">slave01.filter.home</span> : <span style="color:#7F3FBF">ok=10 </span> <span style="color:#CC3980">changed=11 </span> unreachable=0 failed=0 skipped=0 rescued=0 ignored=0 </pre>
<pre>PLAY [Preparethings] *********************************************************************************************************************************************************************************************************************
TASK [Gathering Facts] *******************************************************************************************************************************************************************************************************************
<span style="color:#7F3FBF">ok: [slave01.filter.home]</span>
TASK [debug] *****************************************************************************************************************************************************************************************************************************
<span style="color:#7F3FBF">ok: [slave01.filter.home] =&gt; {</span>
<span style="color:#7F3FBF"> &quot;set_hostname&quot;: &quot;slave01.filter.home&quot;</span>
<span style="color:#7F3FBF">}</span>
TASK [debug] *****************************************************************************************************************************************************************************************************************************
<span style="color:#7F3FBF">ok: [slave01.filter.home] =&gt; {</span>
<span style="color:#7F3FBF"> &quot;is_master&quot;: false</span>
<span style="color:#7F3FBF">}</span>
TASK [Set a hostname] ********************************************************************************************************************************************************************************************************************
<span style="color:#CC3980">changed: [slave01.filter.home]</span>
TASK [Swapoff] ***************************************************************************************************************************************************************************************************************************
<span style="color:#CC3980">changed: [slave01.filter.home]</span>
TASK [Disable ram on boot (orangepi)] ****************************************************************************************************************************************************************************************************
<span style="color:#7F7FFF">skipping: [slave01.filter.home]</span>
TASK [Disable ram on boot (armbian)] *****************************************************************************************************************************************************************************************************
<span style="color:#CC3980">changed: [slave01.filter.home]</span>
TASK [apt prune containerd] **************************************************************************************************************************************************************************************************************
<span style="color:#7F3FBF">ok: [slave01.filter.home]</span>
TASK [apt update] ************************************************************************************************************************************************************************************************************************
<span style="color:#7F3FBF">ok: [slave01.filter.home]</span>
TASK [apt upgrade] ***********************************************************************************************************************************************************************************************************************
<span style="color:#7F3FBF">ok: [slave01.filter.home]</span>
TASK [apt install gnupg] *****************************************************************************************************************************************************************************************************************
<span style="color:#7F3FBF">ok: [slave01.filter.home]</span>
TASK [Creating a new directory] **********************************************************************************************************************************************************************************************************
<span style="color:#7F3FBF">ok: [slave01.filter.home]</span>
TASK [Add Docker GPG key] ****************************************************************************************************************************************************************************************************************
<span style="color:#CC3980">changed: [slave01.filter.home]</span>
TASK [Add Docker APT repository] *********************************************************************************************************************************************************************************************************
<span style="color:#CC3980">changed: [slave01.filter.home]</span>
TASK [Add Kubernetes GPG key] ************************************************************************************************************************************************************************************************************
<span style="color:#CC3980">changed: [slave01.filter.home]</span>
TASK [Add Kubernetes APT repository] *****************************************************************************************************************************************************************************************************
<span style="color:#CC3980">changed: [slave01.filter.home]</span>
TASK [apt update] ************************************************************************************************************************************************************************************************************************
<span style="color:#CC3980">changed: [slave01.filter.home]</span>
TASK [Install Kubelet Kubeadm Kubectl] ***************************************************************************************************************************************************************************************************
<span style="color:#CC3980">changed: [slave01.filter.home]</span>
TASK [Hold kubeadm] **********************************************************************************************************************************************************************************************************************
<span style="color:#CC3980">changed: [slave01.filter.home]</span>
TASK [Hold kubelet] **********************************************************************************************************************************************************************************************************************
<span style="color:#CC3980">changed: [slave01.filter.home]</span>
TASK [Hold kubectl] **********************************************************************************************************************************************************************************************************************
<span style="color:#CC3980">changed: [slave01.filter.home]</span>
TASK [Install Container Runtime] *********************************************************************************************************************************************************************************************************
<span style="color:#CC3980">changed: [slave01.filter.home]</span>
TASK [Containerd set default config] *****************************************************************************************************************************************************************************************************
<span style="color:#CC3980">changed: [slave01.filter.home]</span>
TASK [SystemdCgroup = true] **************************************************************************************************************************************************************************************************************
<span style="color:#CC3980">changed: [slave01.filter.home]</span>
TASK [Iptables thingies (not touching specific firewall rules.)] *************************************************************************************************************************************************************************
<span style="color:#CC3980">changed: [slave01.filter.home]</span>
TASK [Iptables thingies] *****************************************************************************************************************************************************************************************************************
<span style="color:#CC3980">changed: [slave01.filter.home]</span>
TASK [Add the overlay module] ************************************************************************************************************************************************************************************************************
<span style="color:#7F3FBF">ok: [slave01.filter.home]</span>
TASK [Add the br_netfilter module] *******************************************************************************************************************************************************************************************************
<span style="color:#CC3980">changed: [slave01.filter.home]</span>
TASK [Apply changes (might need to use sysctl module with the reload flag, will try eventually)] *****************************************************************************************************************************************
<span style="color:#CC3980">changed: [slave01.filter.home]</span>
TASK [Enable kubelet] ********************************************************************************************************************************************************************************************************************
<span style="color:#7F3FBF">ok: [slave01.filter.home]</span>
TASK [Enable containerd] *****************************************************************************************************************************************************************************************************************
<span style="color:#7F3FBF">ok: [slave01.filter.home]</span>
TASK [Enable kubelet] ********************************************************************************************************************************************************************************************************************
<span style="color:#CC3980">changed: [slave01.filter.home]</span>
TASK [Restart containerd] ****************************************************************************************************************************************************************************************************************
<span style="color:#CC3980">changed: [slave01.filter.home]</span>
TASK [Set /etc/hosts file content (template/base)] ***************************************************************************************************************************************************************************************
<span style="color:#CC3980">changed: [slave01.filter.home]</span>
TASK [Init cluster] **********************************************************************************************************************************************************************************************************************
<span style="color:#7F7FFF">skipping: [slave01.filter.home]</span>
TASK [Export remote kubeconfig file] *****************************************************************************************************************************************************************************************************
<span style="color:#7F7FFF">skipping: [slave01.filter.home]</span>
TASK [Remove Taint (allows deployment in control plane node)] ****************************************************************************************************************************************************************************
<span style="color:#7F7FFF">skipping: [slave01.filter.home]</span>
TASK [Calico] ****************************************************************************************************************************************************************************************************************************
<span style="color:#7F7FFF">skipping: [slave01.filter.home]</span>
TASK [MetalLB] ***************************************************************************************************************************************************************************************************************************
<span style="color:#7F7FFF">skipping: [slave01.filter.home]</span>
TASK [Generate join token] ***************************************************************************************************************************************************************************************************************
<span style="color:#7F7FFF">skipping: [slave01.filter.home]</span>
TASK [set_fact] **************************************************************************************************************************************************************************************************************************
<span style="color:#7F7FFF">skipping: [slave01.filter.home]</span>
TASK [debug] *****************************************************************************************************************************************************************************************************************************
<span style="color:#7F7FFF">skipping: [slave01.filter.home]</span>
TASK [Store join command in &quot;./Exported/kubeadm-join.command&quot;] ***************************************************************************************************************************************************************************
<span style="color:#7F7FFF">skipping: [slave01.filter.home]</span>
TASK [Populate] **************************************************************************************************************************************************************************************************************************
<span style="color:#7F3FBF">ok: [slave01.filter.home]</span>
TASK [debug] *****************************************************************************************************************************************************************************************************************************
<span style="color:#7F3FBF">ok: [slave01.filter.home] =&gt; {</span>
<span style="color:#7F3FBF"> &quot;_kubeadm_join_command&quot;: &quot;kubeadm join 192.168.1.9:6443 --token fjw9iy.0v1vim764ls5mpyp --discovery-token-ca-cert-hash sha256:028116b6076078c15bd4212b8a96ecc159507a07df11d7f9859b2593533616c3&quot;</span>
<span style="color:#7F3FBF">}</span>
TASK [Join kubeadm] **********************************************************************************************************************************************************************************************************************
<span style="color:#CC3980">changed: [slave01.filter.home]</span>
TASK [Delete user] ***********************************************************************************************************************************************************************************************************************
<span style="color:#CC3980">changed: [slave01.filter.home]</span>
TASK [reboot] ****************************************************************************************************************************************************************************************************************************
<span style="color:#CC3980">changed: [slave01.filter.home]</span>
PLAY RECAP *******************************************************************************************************************************************************************************************************************************
<span style="color:#CC3980">slave01.filter.home</span> : <span style="color:#7F3FBF">ok=38 </span> <span style="color:#CC3980">changed=25 </span> unreachable=0 failed=0 <span style="color:#7F7FFF">skipped=10 </span> rescued=0 ignored=0</pre>
#### Check Cluster node list
The node was added correctly.
```shell
kubectl get nodes
```
```text
NAME STATUS ROLES AGE VERSION
pi4.filter.home Ready control-plane 22h v1.27.4
slave01.filter.home Ready <none> 78s v1.27.4
slave02.filter.home Ready <none> 21h v1.27.4
```
## Part 8
I bought the domain `fihome.xyz`.
### Install helm
I did follow the process documented from [here](https://helm.sh/docs/intro/install/)
Also, didn't saved the output, but the process it's fairly simple soo ... GL!
### Update the other reverse proxies to accept ingress from the domain `fihome.xyz`
On my scenario I had to update the reverse proxy from the host `srv` to allow from that domain.
As well had to modify the services that I am intented to serve publicly.
### Install webhook
The repository I will be using is:
https://github.com/snowdrop/godaddy-webhook
Following the installation steps listed in [here](https://github.com/snowdrop/godaddy-webhook#the-godaddy-webhook).
```shell
helm repo add godaddy-webhook https://fred78290.github.io/cert-manager-webhook-godaddy/
```
```text
"godaddy-webhook" has been added to your repositories
```
```shell
helm repo update
```
```text
Hang tight while we grab the latest from your chart repositories...
...Successfully got an update from the "godaddy-webhook" chart repository
...Successfully got an update from the "nfs-subdir-external-provisioner" chart repository
Update Complete. ⎈Happy Helming!⎈
```
```shell
helm upgrade -i godaddy-webhook godaddy-webhook/godaddy-webhook \
--set groupName=acme.fihome.xyz \
--set image.tag=v1.27.2 \
--set image.pullPolicy=Always \
--namespace cert-manager
```
### Update VirtualServices and deploy an Istio Gateway config
This includes adding the `fihome.xyz` domain to the VirtualService resources, and adding a TLS entry for the respective `Istio Gateway` configuration.
```shell
kubectl apply -f P8_Ingress -R
```
```text
gateway.networking.istio.io/fihome-gateway created
virtualservice.networking.istio.io/filebrowser-vs configured
virtualservice.networking.istio.io/gitea-vs configured
virtualservice.networking.istio.io/jelly-vs configured
virtualservice.networking.istio.io/tube-vs configured
```
### Deploy cert-manager config for the secondary domain
```shell
kubectl apply -f P8_Fihome_Cert -R
```
```text
clusterissuer.cert-manager.io/letsencrypt-fihome created
certificate.cert-manager.io/fihome-domain-cert-public created
secret/fihome-godaddy-api-key created
```
### Monitor the certificate provisioning
#### Check Certificate Issuer status
Well, it was very fast on my scenario, but since we are doing a DNS challenge, it could take some time, specially if we required to wait for the DNS propagation, which, as a rule of thumb, can take up to 72h.
```shell
kubectl get -n istio-system certificate fihome-domain-cert-public -o jsonpath='{.metadata.name}{"\t"}{.status.conditions[].reason}{"\t"}{.status.conditions[].message}{"\n"}'
```
```text
fihome-domain-cert-public Ready Certificate is up to date and has not expired
```
#### Certificate logs
```shell
kubectl get events -n istio-system --field-selector involvedObject.name=fihome-domain-cert-public,involvedObject.kind=Certificate --sort-by=.metadata.creationTimestamp --watch
```
```text
LAST SEEN TYPE REASON OBJECT MESSAGE
64s Normal Issuing certificate/fihome-domain-cert-public Issuing certificate as Secret does not exist
64s Normal Generated certificate/fihome-domain-cert-public Stored new private key in temporary Secret resource "fihome-domain-cert-public-wz9hv"
64s Normal Requested certificate/fihome-domain-cert-public Created new CertificateRequest resource "fihome-domain-cert-public-1"
61s Normal Issuing certificate/fihome-domain-cert-public The certificate has been successfully issued
```
## Part 9
Well, I did not document this either, yet I did deploy the following NFS provider from this repo:
- https://github.com/kubernetes-sigs/nfs-subdir-external-provisioner
So far it's been working correctly, can't say I tested much from it, the services I am running are working correctly so, so far so good.
## Part 10
# Difficulties
The Certificate Manager must be located at the same namespace as the `istiod` service, without the LB location being taken into account.
## no healthy upstream
During the "testing of configurations", I created a the `DestinationRule` for the Jelly on the namespace `default`, instead of `external`.
@@ -1936,7 +2634,7 @@ Let's check the "access logs" differences when using one selector or another.
#### Custom
```shell
kubectl get logs -l istio=public-ingress -n istio-system
kubectl logs -l istio=public-ingress -n istio-system
```
```text

View File

@@ -0,0 +1,12 @@
kind: PersistentVolumeClaim
apiVersion: v1
metadata:
name: prometheus-storage
namespace: observability
spec:
storageClassName: slow-nfs-01
accessModes:
- ReadWriteMany
resources:
requests:
storage: 1Gi

View File

@@ -0,0 +1,690 @@
- This time I won't be doing a "walkthrough" from the process, but instead a progress list.
The plan is to replace the `srv` server that is currently used as standalone docker/NFS server, with a Proxmox instance as it would allow some more flexibility.
My current requirements are:
- I need a NFS server (Proxmox can do that)
- Jenkins agent
## NFS
Meanwhile I configure the NFS entries, the Kubernetes services will be down.
## Jenkins
The idea is to replace Jenkins with ArgoCD eventually, so as per the moment will be a 🤷
## Core Services
They will be moved to the Kubernetes cluster.
### Jellyfin
Will need to wait until:
- NFS are set up
- Kubernetes worker node is set up / set up to only ARM64 arch.
### Home DHCP
I'm so good that I already was building an image with DHCP both for `amd64` and `arm64`.
### Registry
- Wait until NFS is set up
### Tube
- Wait until NFS is set up
- Kubernetes worker node is set up / set up to only ARM64 arch.
### QBitTorrent
- Wait until NFS is set up
### CoreDNS
- Will be deleted.
### Gitea
- Wait until NFS is set up
## Extra notes
Could create a new NFS pool for media related, specially when some data could b stored in an HDD and other could be stored in a SSD.
# Steps
## Make the DHCP server work in/from the Kubernetes cluster
- [x] Done
## Confirm how can I create a NFS server in Proxmox
https://www.reddit.com/r/Proxmox/comments/nnkt52/proxmox_host_as_nfs_server_or_guest_container_as/
https://forum.level1techs.com/t/how-to-create-a-nas-using-zfs-and-proxmox-with-pictures/117375
## Reorganize the local Network distribution/update the DHCP server
- [x] Done
## Update the DHCP server with the new arrangement
- [x] Ready
- [x] Done
## Update the DNS server with the new arrangement
- [x] Ready
- [x] Done
## Delete External service points for the Klussy deployments
- [x] Done
## Install Proxmox
- [x] Done
## Install NFS service on the Proxmox host
- [x] Done
## Configure NFS mount vols on the NFS server
- [x] Done
## Move directory from old NFS to new NFS server
- [x] Done
## Configure NFS mount vols on the klussy cluster to match the new NFS server
- [x] Done
## Deploy "old" external services (if possible) + their NFS mounts
- [x] Gitea
- [x] Tube (older version)
- [x] Registry # Maybe replace Registry for Harbor in the future
https://ruzickap.github.io/k8s-harbor/part-04/#install-harbor-using-helm
## Deploy new slave node on the Proxmox server
- [x] Done
## Update Cluster to latest version cause it's about time.
Made this Ansible script:
- https://gitea.filterhome.xyz/ofilter/ansible_update_cluster
- [x] Done
## Deploy remaining services + their NFS mounts
- [x] Jellyfin
- [x] QBitTorrent
- [x] Filebrowser
## [EXTRA] Deploy new slave node on the Proxmox server (slave04)
Decided to add ANOTHER VM as a slave to allow some flexibility between x64 nodes.
- [x] Created the VM and installed the OS
- [x] Set up GPU pass through for the newly created VM
- [x] Created a Kubernetes Node
- [x] Done
## Set up the GPU available in the Kubernetes Node
Very much what the title says. Steps below.
- [x] Done
### Install nvidia drivers
> **Note:**
> - Steps were performed in the VM Instance (Slave04). \
> - Snapshots were performed on the Proxmox node, taking a snapshot of the affected VM. \
> - `Kubectl` command(s) were performed on a computer of mine external to the Kubernetes Cluster/Nodes to interact with the Kubernetes Cluster.
#### Take snapshot
- [x] Done
#### Repo thingies
Enable `non-free` repo for debian.
aka. idk you do that
`non-free` and `non-free-firmware` are different things, so if `non-free-firmware` is already listed, but `non-free` not, slap that bitch in + `contrib`.
```md
FROM:
deb http://ftp.au.debian.org/debian/ buster main
TO:
deb-src http://ftp.au.debian.org/debian/ buster main non-free contrib
```
In my case that was enabled during the installation.
Once repos set up, use:
```shell
apt update && apt install nvidia-detect -y
```
##### [Error] Unable to locate package nvidia-detect
Ensure both `non-free` and `contrib` are in the repo file.
(File /etc/apt/sources.list)
####
```shell
nvidia-detect
```
```text
Detected NVIDIA GPUs:
00:10.0 VGA compatible controller [0300]: NVIDIA Corporation GM206 [GeForce GTX 960] [10de:1401] (rev a1)
Checking card: NVIDIA Corporation GM206 [GeForce GTX 960] (rev a1)
Your card is supported by all driver versions.
Your card is also supported by the Tesla drivers series.
Your card is also supported by the Tesla 470 drivers series.
It is recommended to install the
nvidia-driver
package.
```
### Install nvidia driver
```shell
apt install nvidia-driver
```
We might receive a complaint regarding "conflicting modules".
Just restart the VM.
#### Reboot VM
```shell
reboot
```
#### nvidia-smi
VM has access to the Nvidia drivers/GPU
```shell
nvidia-smi
```
```text
Fri Dec 15 00:00:36 2023
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 525.147.05 Driver Version: 525.147.05 CUDA Version: 12.0 |
|-------------------------------+----------------------+----------------------+
| GPU Name Persistence-M| Bus-Id Disp.A | Volatile Uncorr. ECC |
| Fan Temp Perf Pwr:Usage/Cap| Memory-Usage | GPU-Util Compute M. |
| | | MIG M. |
|===============================+======================+======================|
| 0 NVIDIA GeForce ... On | 00000000:00:10.0 Off | N/A |
| 0% 38C P8 11W / 160W | 1MiB / 4096MiB | 0% Default |
| | | N/A |
+-------------------------------+----------------------+----------------------+
+-----------------------------------------------------------------------------+
| Processes: |
| GPU GI CI PID Type Process name GPU Memory |
| ID ID Usage |
|=============================================================================|
| No running processes found |
+-----------------------------------------------------------------------------+
```
### Install Nvidia Container Runtime
#### Take snapshot
- [x] Done
#### Install curl
```shell
apt-get install curl
```
#### Add repo
https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/latest/install-guide.html#installing-with-apt
```shell
curl -fsSL https://nvidia.github.io/libnvidia-container/gpgkey | sudo gpg --dearmor -o /usr/share/keyrings/nvidia-container-toolkit-keyring.gpg \
&& curl -s -L https://nvidia.github.io/libnvidia-container/stable/deb/nvidia-container-toolkit.list | \
sed 's#deb https://#deb [signed-by=/usr/share/keyrings/nvidia-container-toolkit-keyring.gpg] https://#g' | \
sudo tee /etc/apt/sources.list.d/nvidia-container-toolkit.list
```
```shell
sudo apt-get update && sudo apt-get install -y nvidia-container-toolkit
```
### Update Containerd config
#### Select nvidia-container-runtime as new runtime for Containerd
> No clue if this is a requirement! as afterward also did more changes to the configuration.
```shell
sudo sed -i 's/runtime = "runc"/runtime = "nvidia-container-runtime"/g' /etc/containerd/config.toml
```
#### Reboot Containerd service
```shell
sudo systemctl restart containerd
```
#### Check status from Containerd
Check if Containerd has initialized correctly after restarting the service.
```shell
sudo systemctl status containerd
```
### Test nvidia runtime
#### Pull nvidia cuda image
I used the Ubuntu based container since I didn't find one specific for Debian.
```shell
sudo ctr images pull docker.io/nvidia/cuda:12.3.1-base-ubuntu20.04
```
```text
docker.io/nvidia/cuda:12.3.1-base-ubuntu20.04: resolved |++++++++++++++++++++++++++++++++++++++|
index-sha256:0654b44e2515f03b811496d0e2d67e9e2b81ca1f6ed225361bb3e3bb67d22e18: done |++++++++++++++++++++++++++++++++++++++|
manifest-sha256:7d8fdd2a5e96ec57bc511cda1fc749f63a70e207614b3485197fd734359937e7: done |++++++++++++++++++++++++++++++++++++++|
layer-sha256:25ad149ed3cff49ddb57ceb4418377f63c897198de1f9de7a24506397822de3e: done |++++++++++++++++++++++++++++++++++++++|
layer-sha256:1698c67699a3eee2a8fc185093664034bb69ab67c545ab6d976399d5500b2f44: done |++++++++++++++++++++++++++++++++++++++|
config-sha256:d13839a3c4fbd332f324c135a279e14c432e90c8a03a9cedc43ddf3858f882a7: done |++++++++++++++++++++++++++++++++++++++|
layer-sha256:ba7b66a9df40b8a1c1a41d58d7c3beaf33a50dc842190cd6a2b66e6f44c3b57b: done |++++++++++++++++++++++++++++++++++++++|
layer-sha256:c5f2ffd06d8b1667c198d4f9a780b55c86065341328ab4f59d60dc996ccd5817: done |++++++++++++++++++++++++++++++++++++++|
layer-sha256:520797292d9250932259d95f471bef1f97712030c1d364f3f297260e5fee1de8: done |++++++++++++++++++++++++++++++++++++++|
elapsed: 4.2 s
```
#### Start container
Containerd already has access to the nvidia gpu/drivers
```shell
sudo ctr run --rm --gpus 0 docker.io/nvidia/cuda:12.3.1-base-ubuntu20.04 nvidia-smi nvidia-smi
```
```text
Thu Dec 14 23:18:55 2023
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 525.147.05 Driver Version: 525.147.05 CUDA Version: 12.3 |
|-------------------------------+----------------------+----------------------+
| GPU Name Persistence-M| Bus-Id Disp.A | Volatile Uncorr. ECC |
| Fan Temp Perf Pwr:Usage/Cap| Memory-Usage | GPU-Util Compute M. |
| | | MIG M. |
|===============================+======================+======================|
| 0 NVIDIA GeForce ... On | 00000000:00:10.0 Off | N/A |
| 0% 41C P8 11W / 160W | 1MiB / 4096MiB | 0% Default |
| | | N/A |
+-------------------------------+----------------------+----------------------+
+-----------------------------------------------------------------------------+
| Processes: |
| GPU GI CI PID Type Process name GPU Memory |
| ID ID Usage |
|=============================================================================|
| No running processes found |
+-----------------------------------------------------------------------------+
```
### Set the GPU available in the Kubernetes Node
We `still` don't have the GPU added/available in the Node.
```shell
kubectl describe nodes | tr -d '\000' | sed -n -e '/^Name/,/Roles/p' -e '/^Capacity/,/Allocatable/p' -e '/^Allocated resources/,/Events/p' | grep -e Name -e nvidia.com | perl -pe 's/\n//' | perl -pe 's/Name:/\n/g' | sed 's/nvidia.com\/gpu:\?//g' | sed '1s/^/Node Available(GPUs) Used(GPUs)/' | sed 's/$/ 0 0 0/' | awk '{print $1, $2, $3}' | column -t
```
```text
Node Available(GPUs) Used(GPUs)
pi4.filter.home 0 0
slave01.filter.home 0 0
slave02.filter.home 0 0
slave03.filter.home 0 0
slave04.filter.home 0 0
```
#### Update
Set Containerd config with the following settings.
Obv do a backup of the config before proceeding to modify the file.
```toml
# /etc/containerd/config.toml
version = 2
[plugins]
[plugins."io.containerd.grpc.v1.cri"]
[plugins."io.containerd.grpc.v1.cri".containerd]
default_runtime_name = "nvidia"
[plugins."io.containerd.grpc.v1.cri".containerd.runtimes]
[plugins."io.containerd.grpc.v1.cri".containerd.runtimes.nvidia]
privileged_without_host_devices = false
runtime_engine = ""
runtime_root = ""
runtime_type = "io.containerd.runc.v2"
[plugins."io.containerd.grpc.v1.cri".containerd.runtimes.nvidia.options]
BinaryName = "/usr/bin/nvidia-container-runtime"
```
#### Restart containerd (again)
```shell
sudo systemctl restart containerd
```
#### Check status from Containerd
Check if Containerd has initialized correctly after restarting the service.
```shell
sudo systemctl status containerd
```
#### Set some labels to avoid spread
We will deploy Nvidia CRDs so will tag the Kubernetes nodes that **won't** have a GPU available to avoid running GPU related stuff on them.
```shell
kubectl label nodes slave0{1..3}.filter.home nvidia.com/gpu.deploy.operands=false
```
#### Deploy nvidia operators
"Why this `--set` flags?"
- Cause that's what worked out for me. Don't like it? Want to explore? Just try which combination works for you idk.
```shell
helm install --wait --generate-name \
nvidia/gpu-operator \
--set operator.defaultRuntime="containerd"\
-n gpu-operator \
--set driver.enabled=false \
--set toolkit.enabled=false
```
### Check running pods
Check all the pods are running (or have completed)
```shell
kubectl get pods -n gpu-operator -owide
```
```text
NAME READY STATUS RESTARTS AGE IP NODE NOMINATED NODE READINESS GATES
gpu-feature-discovery-4nctr 1/1 Running 0 9m34s 172.16.241.67 slave04.filter.home <none> <none>
gpu-operator-1702608759-node-feature-discovery-gc-79d6bb94h6fht 1/1 Running 0 9m57s 172.16.176.63 slave03.filter.home <none> <none>
gpu-operator-1702608759-node-feature-discovery-master-64c5nwww4 1/1 Running 0 9m57s 172.16.86.110 pi4.filter.home <none> <none>
gpu-operator-1702608759-node-feature-discovery-worker-72wqk 1/1 Running 0 9m57s 172.16.106.5 slave02.filter.home <none> <none>
gpu-operator-1702608759-node-feature-discovery-worker-7snt4 1/1 Running 0 9m57s 172.16.86.111 pi4.filter.home <none> <none>
gpu-operator-1702608759-node-feature-discovery-worker-9ngnw 1/1 Running 0 9m56s 172.16.176.5 slave03.filter.home <none> <none>
gpu-operator-1702608759-node-feature-discovery-worker-csnfq 1/1 Running 0 9m56s 172.16.241.123 slave04.filter.home <none> <none>
gpu-operator-1702608759-node-feature-discovery-worker-k6dxf 1/1 Running 0 9m57s 172.16.247.8 slave01.filter.home <none> <none>
gpu-operator-fcbd9bbd7-fv5kb 1/1 Running 0 9m57s 172.16.86.116 pi4.filter.home <none> <none>
nvidia-cuda-validator-xjfkr 0/1 Completed 0 5m37s 172.16.241.126 slave04.filter.home <none> <none>
nvidia-dcgm-exporter-q8kk4 1/1 Running 0 9m35s 172.16.241.125 slave04.filter.home <none> <none>
nvidia-device-plugin-daemonset-vvz4c 1/1 Running 0 9m35s 172.16.241.127 slave04.filter.home <none> <none>
nvidia-operator-validator-8899m 1/1 Running 0 9m35s 172.16.241.124 slave04.filter.home <none> <none>
```
### Done!
```shell
kubectl describe nodes | tr -d '\000' | sed -n -e '/^Name/,/Roles/p' -e '/^Capacity/,/Allocatable/p' -e '/^Allocated resources/,/Events/p' | grep -e Name -e nvidia.com | perl -pe 's/\n//' | perl -pe 's/Name:/\n/g' | sed 's/nvidia.com\/gpu:\?//g' | sed '1s/^/Node Available(GPUs) Used(GPUs)/' | sed 's/$/ 0 0 0/' | awk '{print $1, $2, $3}' | column -t
```
```text
Node Available(GPUs) Used(GPUs)
pi4.filter.home 0 0
slave01.filter.home 0 0
slave02.filter.home 0 0
slave03.filter.home 0 0
slave04.filter.home 1 0
```
### vGPU
I could use vGPU and split my GPU among multiple VMs, but, it would also mean that the GPU no longer posts to the Physical Monitor attached to the Proxmox PC/Server, which I would like to avoid.
Meanwhile, it's certainly not a requirement (and I only use the monitor on emergencies/whenever I need to touch the BIOS/Install a new OS), I **still** don't own a Serial connector, therefore I will consider making the change to use vGPU **in the future** (whenever I receive the package from Aliexpress, and I confirm it works).
[//]: # (```shell)
[//]: # (kubectl events pods --field-selector status.phase!=Running -n gpu-operator)
[//]: # (```)
[//]: # ()
[//]: # (```shell)
[//]: # (kubectl get pods --field-selector status.phase!=Running -n gpu-operator | awk '{print $1}' | tail -n +2 | xargs kubectl events -n gpu-operator pods)
[//]: # (```)
## Jellyfin GPU Acceleration
- [x] Configured Jellyfin with GPU acceleration
## Make Cluster HA
- [ ] Done
- [x] Aborted
Since it would mostly require to recreate the cluster, I would like to have the DNS/DHCP service externalized to the cluster, or a Load Balancer external to the cluster, etc etc.
So, I rather have a cluster with 2 points of failure:
- Single control plane
- No HA NFS/NAS
Then to having an Uroboros for Cluster.
I also just thought on having a DNS failover
But it's not the current case, as
## Update rest of the stuff/configs as required to match the new Network distribution
Which stuff?
IDK. It's an OS in case I'm forgetting something
- [x] Done Aka. everything seems to be running correctly
## Migrade Jenkins
https://devopscube.com/jenkins-build-agents-kubernetes/
https://www.jenkins.io/doc/book/installing/kubernetes/
- [x] Done
## Skaffold
- Learned to use Skaffold, yet requires manual execution.
- It's great tho
https://skaffold.dev/docs/references/yaml/
https://skaffold.dev/docs/builders/cross-platform/
## CI/CD Container creation
I have decided dump my old Jenkins architecture and rely on Skaffold, it's great.
I will work on integrating it with Jenkins.
# EXTRA EXTRA
## Secondary NFS provisioner
I will add a **secondary NFS Provisioner** as a new storage class.
This storage class will be targeting a **"slow"/HDD** directory/drive.
Mainly intended for storing a bunch of logs, files, videos, or whatever.
Looking at you Prometheus 👀👀.
NFS server: nfs.filter.home
Target directory: **/resources/slow_nfs_provisioner** (this is made up, I don't want to share it.)
## NFS Server
### Create the directory
- [x] Done
### Update NFS service config to allow such directory to be used.
- [x] Done
## Deploy new NFS provisioner
```shell
NFS_SERVER=nfs.filter.home
NFS_EXPORT_PATH=/resources/slow_nfs_provisioner
```
```shell
helm -n nfs-provisioner install slow-nfs-01 nfs-subdir-external-provisioner/nfs-subdir-external-provisioner \
--set nfs.server=${NFS_SERVER} \
--set nfs.path=${NFS_EXPORT_PATH} \
--set storageClass.defaultClass=true \
--set replicaCount=2 \
--set storageClass.name=slow-nfs-01 \
--set storageClass.provisionerName=slow-nfs-01
```
```text
NAME: slow-nfs-provisioner-01
LAST DEPLOYED: Fri Jan 12 23:32:25 2024
NAMESPACE: nfs-provisioner
STATUS: deployed
REVISION: 1
TEST SUITE: None
```
## Migrate some volumes to new dir
### Prometheus
(because he's the one filling my SSD.)
Copy files from (maintaining permissions):
**/resources/slow_nfs_provisioner/prometheus_generated_vol** to **/resources/slow_nfs_provisioner/prometheus_tmp**
This is mainly to "have them" already on the destination drive, folder name can be whatever.
### Create/Provision new PV
Since `path` value is immutable after creation, it will require to create a new volume, move the contents to the new volume, update the configs to match the new volume, recreate the workloads, then delete the old one.
Since this is my homelab, and I'm not bothered by some minutes of lost logs, I will instead, delete the old volume, delete the used deployment, create a new volume, then rename the folder `prometheus_tmp` I created on the previous step to replace the volume created (since the new volume is empty).
Then restart the Kubernetes deployment.
## Delete PVC
```shell
kubectl delete pvc -n observability prometheus-storage --force
```
This can take a bit since there are like 40GB of logs + it's still being used by the deployment.
```shell
kubectl get pvc -n observability prometheus-storage
```
```text
NAME STATUS VOLUME CAPACITY ACCESS MODES STORAGECLASS AGE
prometheus-storage Terminating pvc-698cf837-14a3-43ee-990a-5a34e1a396de 1Gi RWX nfs-01 94d
```
### Delete Deployment
```shell
kubectl delete deployment -n observability prometheus
```
```text
deployment.apps "prometheus" deleted
```
### Delete PV
```shell
kubectl delete pv pvc-698cf837-14a3-43ee-990a-5a34e1a396de
```
```text
persistentvolume "pvc-698cf837-14a3-43ee-990a-5a34e1a396de" deleted
```
### Create new volume.
```shell
kubectl create -f PrometheusVolume.yaml
```
```text
persistentvolumeclaim/prometheus-storage created
```
I later did some cleanup from the existent data cause 41GB was kind of too much for the usage I do (aka noticed that the container `prometheus-server` was taking forever to parse all the data).
Later will change the configurations to reduce the retention + data stored.
### Redeployed Prometheus
It's been a while since I did the deployment.
```bash
kubectl get deployment -n observability prometheus
```
```text
NAME READY UP-TO-DATE AVAILABLE AGE
prometheus 1/1 1 1 3h24m
```
# Interesting
https://kubernetes.io/docs/concepts/storage/persistent-volumes/#cross-namespace-data-sources

View File

@@ -0,0 +1,25 @@
# Initial notes
```
.1 Gateway
.2/3 DHCP-DNS
9-6 Kubernetes masters.
10-15 Kubernetes slaves.
20 Public Ingress
21 Local Ingress
22-38 Kubernetes LBs/Deployments/Services
39 Egress gateway
50-60 Standalone Hosts
61-70 Proxmox
100-120 VMs
140-149 Handpicked client hosts
150-200 DHCP range
250-255 Wifi and stuff
```

View File

@@ -0,0 +1,392 @@
# Description
Very much what the title says.
0. Search.
1. Create Proxmox VM and install OS on it.
2. Install cluster thingies to the VM.
3. Backup Cluster/Master Node
4. Stop Old Master Node
5. Restore Cluster on New Master Node
6. Update New Master Node IP to Use the Old Master Node IP
7. Rejoin All Nodes to the "New Cluster"
# Notes
## Possible issues?
- Master node name might present some discrepancies, will need to test.
- When the cluster is restored in the New Master Node, grant access to the client in that NFS server.
## Virtual Master Hardware
- 2 CPU Cores
- 8 GB of RAM
# Procedure
- [x] VM Created
- [x] SO (Debian) Installed
- [x] Edit Cluster Setup installer Ansible script into allowing not proceeding further after installing the packages/stuff necessary.
- [x] Install guest agent in all the VMs (I did kinda forgot about that)
- [x] Backup VM
- [x] Follow the guide from bellow
- [ ] Perform another backup to the control plane VM
# Links
I'm going to be following this:
https://serverfault.com/questions/1031093/migration-of-kubernetes-master-node-from-1-server-to-another-server
[//]: # ()
[//]: # (# Backup ETCD Kubernetes)
[//]: # ()
[//]: # (https://kubernetes.io/docs/tasks/administer-cluster/configure-upgrade-etcd/)
[//]: # ()
[//]: # ()
# Verify your etcd data directory
SSH into the masterk node.
```shell
kubectl get pods -n kube-system etcd-pi4.filter.home -oyaml | less
```
```yaml
...
volumeMounts:
- mountPath: /var/lib/etcd
name: etcd-data
- mountPath: /etc/kubernetes/pki/etcd
name: etcd-certs
...
volumes:
- hostPath:
path: /etc/kubernetes/pki/etcd
type: DirectoryOrCreate
name: etcd-certs
- hostPath:
path: /var/lib/etcd
type: DirectoryOrCreate
name: etcd-data
```
# Copy from old_master to new_master
> Why **bakup** instead of ba**ck**up? Because I want to use the K as Kubernetes.
## On new_master
```shell
mkdir bakup
```
## on OLD_master
```shell
sudo scp -r /etc/kubernetes/pki master2@192.168.1.173:~/bakup/
```
```console
healthcheck-client.key 100% 1679 577.0KB/s 00:00
server.crt 100% 1216 1.1MB/s 00:00
server.key 100% 1679 1.1MB/s 00:00
peer.crt 100% 1216 440.5KB/s 00:00
ca.crt 100% 1094 461.5KB/s 00:00
healthcheck-client.crt 100% 1159 417.8KB/s 00:00
ca.key 100% 1679 630.8KB/s 00:00
peer.key 100% 1679 576.4KB/s 00:00
front-proxy-client.crt 100% 1119 859.7KB/s 00:00
front-proxy-ca.key 100% 1679 672.4KB/s 00:00
ca.crt 100% 1107 386.8KB/s 00:00
sa.pub 100% 451 180.7KB/s 00:00
front-proxy-client.key 100% 1679 1.4MB/s 00:00
apiserver-etcd-client.key 100% 1675 1.3MB/s 00:00
apiserver.crt 100% 1294 819.1KB/s 00:00
ca.key 100% 1679 1.3MB/s 00:00
sa.key 100% 1679 1.5MB/s 00:00
apiserver-kubelet-client.crt 100% 1164 908.2KB/s 00:00
apiserver-kubelet-client.key 100% 1679 1.2MB/s 00:00
apiserver-etcd-client.crt 100% 1155 927.9KB/s 00:00
apiserver.key 100% 1675 1.4MB/s 00:00
front-proxy-ca.crt 100% 1123 939.7KB/s 00:00
```
## Remove "OLD" certs from the backup created
### on new_master
```shell
rm ~/bakup/pki/{apiserver.*,etcd/peer.*}
```
```console
removed '~/bakup/pki/apiserver.crt'
removed '~/bakup/pki/apiserver.key'
removed '~/bakup/pki/etcd/peer.crt'
removed '~/bakup/pki/etcd/peer.key'
```
## Move backup Kubernetes to the kubernetes directory (new_master)
```shell
cp -r ~/bakup/pki /etc/kubernetes/
```
```console
'~/bakup/pki' -> '/etc/kubernetes/pki'
'~/bakup/pki/etcd' -> '/etc/kubernetes/pki/etcd'
'~/bakup/pki/etcd/healthcheck-client.key' -> '/etc/kubernetes/pki/etcd/healthcheck-client.key'
'~/bakup/pki/etcd/server.crt' -> '/etc/kubernetes/pki/etcd/server.crt'
'~/bakup/pki/etcd/server.key' -> '/etc/kubernetes/pki/etcd/server.key'
'~/bakup/pki/etcd/ca.crt' -> '/etc/kubernetes/pki/etcd/ca.crt'
'~/bakup/pki/etcd/healthcheck-client.crt' -> '/etc/kubernetes/pki/etcd/healthcheck-client.crt'
'~/bakup/pki/etcd/ca.key' -> '/etc/kubernetes/pki/etcd/ca.key'
'~/bakup/pki/front-proxy-client.crt' -> '/etc/kubernetes/pki/front-proxy-client.crt'
'~/bakup/pki/front-proxy-ca.key' -> '/etc/kubernetes/pki/front-proxy-ca.key'
'~/bakup/pki/ca.crt' -> '/etc/kubernetes/pki/ca.crt'
'~/bakup/pki/sa.pub' -> '/etc/kubernetes/pki/sa.pub'
'~/bakup/pki/front-proxy-client.key' -> '/etc/kubernetes/pki/front-proxy-client.key'
'~/bakup/pki/apiserver-etcd-client.key' -> '/etc/kubernetes/pki/apiserver-etcd-client.key'
'~/bakup/pki/ca.key' -> '/etc/kubernetes/pki/ca.key'
'~/bakup/pki/sa.key' -> '/etc/kubernetes/pki/sa.key'
'~/bakup/pki/apiserver-kubelet-client.crt' -> '/etc/kubernetes/pki/apiserver-kubelet-client.crt'
'~/bakup/pki/apiserver-kubelet-client.key' -> '/etc/kubernetes/pki/apiserver-kubelet-client.key'
'~/bakup/pki/apiserver-etcd-client.crt' -> '/etc/kubernetes/pki/apiserver-etcd-client.crt'
'~/bakup/pki/front-proxy-ca.crt' -> '/etc/kubernetes/pki/front-proxy-ca.crt'
```
## ETCD snapshot on OLD_master
### from Kubectl
Check etcd api version.
```shell
kubectl exec -it etcd-pi4.filter.home -n kube-system -- etcdctl version
```
```console
etcdctl version: 3.5.10
API version: 3.5
```
### Create snapshot through etcd pod
```shell
kubectl exec -it etcd-pi4.filter.home -n kube-system -- etcdctl --endpoints https://127.0.0.1:2379 --cacert=/etc/kubernetes/pki/etcd/ca.crt --cert=/etc/kubernetes/pki/etcd/server.crt --key /etc/kubernetes/pki/etcd/server.key snapshot save /var/lib/etcd/snapshot1.db
```
```console
{"level":"info","ts":"2024-03-10T04:38:23.909625Z","caller":"snapshot/v3_snapshot.go:65","msg":"created temporary db file","path":"/var/lib/etcd/snapshot1.db.part"}
{"level":"info","ts":"2024-03-10T04:38:23.942816Z","logger":"client","caller":"v3@v3.5.10/maintenance.go:212","msg":"opened snapshot stream; downloading"}
{"level":"info","ts":"2024-03-10T04:38:23.942946Z","caller":"snapshot/v3_snapshot.go:73","msg":"fetching snapshot","endpoint":"https://127.0.0.1:2379"}
{"level":"info","ts":"2024-03-10T04:38:24.830242Z","logger":"client","caller":"v3@v3.5.10/maintenance.go:220","msg":"completed snapshot read; closing"}
{"level":"info","ts":"2024-03-10T04:38:25.395294Z","caller":"snapshot/v3_snapshot.go:88","msg":"fetched snapshot","endpoint":"https://127.0.0.1:2379","size":"19 MB","took":"1 second ago"}
{"level":"info","ts":"2024-03-10T04:38:25.395687Z","caller":"snapshot/v3_snapshot.go:97","msg":"saved","path":"/var/lib/etcd/snapshot1.db"}
Snapshot saved at /var/lib/etcd/snapshot1.db
```
### Transfer snapshot to the new_master node
### on the OLD_master
```shell
scp /var/lib/etcd/snapshot1.db master2@192.168.1.173:~/bakup
```
```text
snapshot1.db 100% 19MB 44.0MB/s 00:00
```
### Update kubeadm.config
### on the OLD_master
```shell
kubectl get cm -n kube-system kubeadm-config -oyaml
```
```text
apiVersion: v1
data:
ClusterConfiguration: |
apiServer:
extraArgs:
authorization-mode: Node,RBAC
timeoutForControlPlane: 4m0s
apiVersion: kubeadm.k8s.io/v1beta3
certificatesDir: /etc/kubernetes/pki
clusterName: kubernetes
controllerManager: {}
dns: {}
etcd:
local:
dataDir: /var/lib/etcd
imageRepository: registry.k8s.io
kind: ClusterConfiguration
kubernetesVersion: v1.28.7
networking:
dnsDomain: cluster.local
serviceSubnet: 10.96.0.0/12
scheduler: {}
kind: ConfigMap
metadata:
creationTimestamp: "2024-02-22T21:45:42Z"
name: kubeadm-config
namespace: kube-system
resourceVersion: "234"
uid: c56b87b1-691d-4277-b66c-ab6035cead6a
```
### on the new_master
#### Create kubeadm-config.yaml
```shell
touch kubeadm-config.yaml
```
I have used the information from the previously displayed cm to create the following file (basically filling the default kubeadmin-config file):
Note that the token used differs.
```yaml
apiVersion: kubeadm.k8s.io/v1beta3
bootstrapTokens:
- groups:
- system:bootstrappers:kubeadm:default-node-token
token: abcdef.abcdef0123456789
ttl: 24h0m0s
usages:
- signing
- authentication
kind: InitConfiguration
localAPIEndpoint:
advertiseAddress: 192.168.1.9
bindPort: 6443
nodeRegistration:
criSocket: unix:///var/run/containerd/containerd.sock
imagePullPolicy: IfNotPresent
name: masterk
taints: null
---
apiServer:
timeoutForControlPlane: 4m0s
apiVersion: kubeadm.k8s.io/v1beta3
certificatesDir: /etc/kubernetes/pki
clusterName: kubernetes
controllerManager: {}
dns: {}
etcd:
local:
dataDir: /var/lib/etcd
imageRepository: registry.k8s.io
kind: ClusterConfiguration
kubernetesVersion: 1.29.0
networking:
dnsDomain: cluster.local
serviceSubnet: 10.96.0.0/12
scheduler: {}
```
### Install etcdctl
https://github.com/etcd-io/etcd/releases/tag/v3.5.12
### Restore from snapshot into new_master
This time I will be using the `etcdctl` cli tool.
```shell
mkdir /var/lib/etcd
```
```shell
ETCDCTL_API=3 /tmp/etcd-download-test/etcdctl --endpoints https://127.0.0.1:2379 snapshot restore './bakup/snapshot1.db' && mv ./default.etcd/member/ /var/lib/etcd/
```
```console
Deprecated: Use `etcdutl snapshot restore` instead.
2024-03-10T06:09:17+01:00 info snapshot/v3_snapshot.go:260 restoring snapshot {"path": "./bakup/snapshot1.db", "wal-dir": "default.etcd/member/wal", "data-dir": "default.etcd", "snap-dir": "default.etcd/member/snap"}
2024-03-10T06:09:17+01:00 info membership/store.go:141 Trimming membership information from the backend...
2024-03-10T06:09:18+01:00 info membership/cluster.go:421 added member {"cluster-id": "cdf818194e3a8c32", "local-member-id": "0", "added-peer-id": "8e9e05c52164694d", "added-peer-peer-urls": ["http://localhost:2380"]}
2024-03-10T06:09:18+01:00 info snapshot/v3_snapshot.go:287 restored snapshot {"path": "./bakup/snapshot1.db", "wal-dir": "default.etcd/member/wal", "data-dir": "default.etcd", "snap-dir": "default.etcd/member/snap"}
```
### Do shenanigans to replace the OLD_node by the new_node
Aka replace the IP maneuvers.
### Start new node
```shell
kubeadm init --ignore-preflight-errors=DirAvailable--var-lib-etcd --config kubeadm-config.yaml
```
```console
kubeadm init --ignore-preflight-errors=DirAvailable--var-lib-etcd --config kubeadm-config.yaml
[init] Using Kubernetes version: v1.29.0
[preflight] Running pre-flight checks
[WARNING DirAvailable--var-lib-etcd]: /var/lib/etcd is not empty
[preflight] Pulling images required for setting up a Kubernetes cluster
[preflight] This might take a minute or two, depending on the speed of your internet connection
[preflight] You can also perform this action in beforehand using 'kubeadm config images pull'
W0310 06:42:10.268972 1600 checks.go:835] detected that the sandbox image "registry.k8s.io/pause:3.6" of the container runtime is inconsistent with that used by kubeadm. It is recommended that using "registry.k8s.io/pause:3.9" as the CRI sandbox image.
[certs] Using certificateDir folder "/etc/kubernetes/pki"
```
## Join "old nodes" into the "new masterk"
For my surprise I didn't need to rejoin nodes, only remove the old control plane.
```shell
kubectl get nodes
```
```console
NAME STATUS ROLES AGE VERSION
masterk.filter.home Ready control-plane 4m59s v1.29.2
pi4.filter.home NotReady control-plane 16d v1.29.2
slave01.filter.home Ready <none> 10d v1.29.2
slave02.filter.home Ready <none> 16d v1.29.2
slave03.filter.home Ready <none> 16d v1.29.2
slave04.filter.home Ready <none> 16d v1.29.2
```
```shell
kubectl delete node pi4.filter.home
```
```console
node "pi4.filter.home" deleted
```
```shell
kubectl get nodes
```
```console
NAME STATUS ROLES AGE VERSION
masterk.filter.home Ready control-plane 5m20s v1.29.2
slave01.filter.home Ready <none> 10d v1.29.2
slave02.filter.home Ready <none> 16d v1.29.2
slave03.filter.home Ready <none> 16d v1.29.2
slave04.filter.home Ready <none> 16d v1.29.2
```
So very much done, since I didn't need to rejoin I will be paying extra attention to the nodes for a while.

177
README.md
View File

@@ -3,7 +3,21 @@ gitea: none
include_toc: true
---
## Older patch notes/version
Select different tags.
## TLDR Changelog
- Replaced the old standalone Docker/NFS server for a Proxmox/NFS instance.
- Added 2 VMs as worker nodes to the cluster, they will be used/are intended for x64 bit images.
- One of the new added worker VMs receives a GPU through Proxmox PCI pass through.
- Some services might have been removed or added.
# Devices
## List of current devices:
@@ -11,122 +25,91 @@ include_toc: true
```yaml
Gateway: 192.168.1.1
Pi4: 192.168.1.2
Srv: 192.168.1.3
Proxmox/NFS: somwhere.
```
### Kluster
> Kubernetes Cluster
A set of Orange PI 5, so far all of them are the 4GB of RAM version.
- Pi 4 with 4GB running as a Master. (Masterk/Pi4)
- A pair of Orange PI 5, so far all of them are the 8GB of RAM version. (Slave01-2)
- Proxmox VMs, both with 3 CPU cores and 8GB of RAM (Slave03-4)
- `Slave04` contains a GPU through Proxmox CPU pass through.
```yaml
Masterk: 192.168.1.10
Slave01: 192.168.1.11
Masterk: 192.168.1.9
Slave01: 192.168.1.10
Slave02: 192.168.1.11
Slave03: 192.168.1.12
Slave04: 192.168.1.13
```
## Which services are running where.
```yaml
Node Available(GPUs) Used(GPUs)
pi4.filter.home 0 0
slave01.filter.home 0 0
slave02.filter.home 0 0
slave03.filter.home 0 0
slave04.filter.home 1 0
```
> **Note**:
> `Depracated` doesn't mean that the service has obliterated, but that the service is no longer being run in that specific node/instance.
## Which services I'm hosting
### Pi4 (main reverse proxy)
### Home Network
> Initially the Pi4 would only contain lightweight services, performing "core" functions on the network, as well of providing access to some very specific web services that wouldn't incur in much load (such as DNS, DHCP, Gitea, DuckDNS IP updater and `Tube` + `Traefik` as a main reverse proxy for the network).
- CoreDNS
- DHCPd
Services run on `docker` / `docker-compose`.
### Discord Bots
#### Containers
- Traefik
- Gitea
- Portainer
- Registry
- containrrr/watchtower
- https://gitea.filterhome.xyz/ofilter/Steam_Invite_Discord (both Master and Dev branches)
- Shlink + ShlinkUI (deployed as it has functionality with the Steam Discord Bot from above)
##### Monitoring
### Public DNS
- grafana
- prometheus
- alert manager
- zcube/cadvisor
##### Home Network
- Coredns
- dhcpd
- Godaddy
- Duckdns
##### Misc
### CRDs
- DuckDNS
- emulatorjs
- [Steam_Invite_Discord](https://gitea.filterhome.xyz/ofilter/Steam_Invite_Discord)
##### Depracated
- bind9 DNS
- [Internet speedtest metrics](https://github.com/nickmaccarthy/internet-speed-test-metrics)
- kanboard
- mantis
- minecraft server + [Minecraft Discord Bot](https://gitea.filterhome.xyz/ofilter/Minecraft_Discord_Bot)
- [FGO Tools](https://github.com/OriolFilter/FGO_tools)
- muximix
- openvpn
- Plex
- Protainer
- mantis
- [speedtest_container](https://gitea.filterhome.xyz/ofilter/speedtest_contiainer)
- splunk
- vaultwarden
### Srv (main media server)
> Initially the server would contain media services and some with higher load, like Minecraft and factorio servers. Right now this server is the designated media server provider, and as well contains other more generalized services, as currently in planning a migration to reorganize the infrastructure.
Services run on `docker` / `docker-compose`.
#### Containers
- Traefik
- Portainer
- Jenkins
- containrrr/watchtower
- zcube/cadvisor
##### Media
- kizaing/kavita
- prologic/tube
- gotson/komga
- lscr.io/linuxserver/qbittorrent
- grafana
- lscr.io/linuxserver/jellyfin
- difegue/lanraragi
- filebrowser/filebrowser
##### Misc
- chesscorp/chess-club
##### Depracated
##### Notes
Traefik generates public certificates automatically
> https://doc.traefik.io/traefik/https/acme/
#### Kluster
> Idk I can run whatever I want.\
> So far been a playground of Istio for me to create [an Istio documentation](https://gitea.filterhome.xyz/ofilter/Istio_Examples).
- Cilium
- Istio Service Mesh
- Cert Manager
- Istio
- Nvidia Gpu Operator
- NFS Volume Provisioner
- MetalLB
### Observability
- Grafana
- Prometheus
- Kiali
- Jaeger
### CI/CD
- Jenkins master + dynamic agent(s)
- Docker Registry
- Skaffold (Client/User side, not running on the Kubernetes cluster, yet relies on it to create multiarch docker images)
### Git servers
- Gitea
### Media related
- Tube
- Fireshare
- Filebrowser
- Jellyfin
- qBitTorrent
## Downsides of my current setup
- Only 1 Kubernetes master node, therefore no full High Availability
- Only 1 NFS server / no HA NFS server, therefore if the NFS server is down most of the services on the Kubernetes cluster will also be down as they depend on such NFS
##### Services
-

View File

@@ -1,42 +0,0 @@
https://github.com/mikeroyal/Self-Hosting-Guide#backups
https://github.com/mikeroyal/Self-Hosting-Guide#snapshots-managementsystem-recovery
https://github.com/mikeroyal/Self-Hosting-Guide#file-systems
https://github.com/mikeroyal/Self-Hosting-Guide#storage
https://goteleport.com/
---
Volumes
https://github.com/seaweedfs/seaweedfs
---
DNS
https://github.com/awesome-selfhosted/awesome-selfhosted#dns
https://github.com/awesome-foss/awesome-sysadmin#dns---control-panels--domain-management
---
#3dp
https://github.com/Floppy/van_dam
---
? https://goteleport.com/
---
Gitea thingies
https://docs.gitea.com/awesome?_highlight=content#sdk