Skip to content

Commit

Permalink
Basics
Browse files Browse the repository at this point in the history
  • Loading branch information
helmut-hoffer-von-ankershoffen committed Jul 14, 2019
0 parents commit 185912c
Show file tree
Hide file tree
Showing 60 changed files with 8,951 additions and 0 deletions.
12 changes: 12 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
.idea
*~
.DS_Store
._.DS_Store
.com.apple.timemachine.supported
.docker-sync
.gitattributes

workflow/provision/image/*
!workflow/provision/image/.gitkeep

.ipynb_checkpoints
21 changes: 21 additions & 0 deletions LICENSE
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
MIT License

Copyright (c) 2019 Helmut Hoffer von Ankershoffen

Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:

The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.

THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.
159 changes: 159 additions & 0 deletions Makefile
Original file line number Diff line number Diff line change
@@ -0,0 +1,159 @@
.DEFAULT_GOAL := help
SHELL := /bin/bash


help: ## This help panel.
@IFS=$$'\n' ; \
help_lines=(`fgrep -h "##" $(MAKEFILE_LIST) | fgrep -v fgrep | sed -e 's/\\$$//' | sed -e 's/##/:/'`); \
printf "%-30s %s\n" "DevOps console for Project Jetson" ; \
printf "%-30s %s\n" "==================================" ; \
printf "%-30s %s\n" "" ; \
printf "%-30s %s\n" "Target" "Help" ; \
printf "%-30s %s\n" "------" "----" ; \
for help_line in $${help_lines[@]}; do \
IFS=$$':' ; \
help_split=($$help_line) ; \
help_command=`echo $${help_split[0]} | sed -e 's/^ *//' -e 's/ *$$//'` ; \
help_info=`echo $${help_split[2]} | sed -e 's/^ *//' -e 's/ *$$//'` ; \
printf '\033[36m'; \
printf "%-30s %s" $$help_command ; \
printf '\033[0m'; \
printf "%s\n" $$help_info; \
done

%: # thanks to chakrit
@: # thanks to Wi.lliam Pursell


bootstrap-environment: requirements bootstrap-environment-message ## Bootstrap development environment!

requirements: requirements-bootstrap ## Install requirements on workstation

requirements-bootstrap: ## Prepare basic packages on workstation
workflow/requirements/macOS/bootstrap
source ~/.bash_profile && rbenv install --skip-existing 2.2.
source ~/.bash_profile && ansible-galaxy install -r workflow/requirements/macOS/ansible/requirements.yml
ansible-playbook -i "localhost," workflow/requirements/generic/ansible/playbook.yml --tags "hosts" --ask-become-pass
source ~/.bash_profile && ansible-playbook -i "localhost," workflow/requirements/macOS/ansible/playbook.yml --ask-become-pass
source ~/.bash_profile && $(SHELL) -c 'cd workflow/requirements/macOS/docker; . ./daemon_check.sh'

requirements-docker: ## Prepare Docker on workstation
source ~/.bash_profile && $(SHELL) -c 'cd workflow/requirements/macOS/docker; . ./daemon_check.sh'

requirements-hosts: ## Prepare /etc/hosts on workstation
ansible-playbook -i "localhost," workflow/requirements/generic/ansible/playbook.yml --tags "hosts" --ask-become-pass

requirements-packages: ## Install packages on workstation
ansible-playbook -i "localhost," workflow/requirements/macOS/ansible/playbook.yml --ask-become-pass

requirements-ansible: ## Install ansible requirements on workstation for provisioning jetson
ansible-galaxy install -r workflow/provision/requirements.yml

bootstrap-environment-message: ## Echo a message that the app installation is happening now
@echo ""
@echo ""
@echo "Welcome!"
@echo ""
@echo "1) Please follow the instructions to fully install and start Docker - Docker started up when its Icon ("the whale") is no longer moving."
@echo ""
@echo "2) Click on the Docker icon, goto Preferences / Advanced, set Memory to at least 4GiB and click Apply & Restart."
@echo ""
@echo ""


image-download: ## Download Nvidia Jetpack into workflow/provision/image
cd workflow/provision/image && wget -N -O jetson-nano-sd.zip https://developer.nvidia.com/embedded/dlc/jetson-nano-dev-kit-sd-card-image && unzip -o *.zip && rm -f jetson-nano-sd.zip

setup-access-secure: ## Allow passwordless ssh and sudo, disallow ssh with password
ssh-copy-id -i ~/.ssh/id_rsa [email protected]
cd workflow/provision && ansible-playbook main.yml --tags "access_secure" -b -K


provision: ## Provision the Nvidia Jetson Nano
cd workflow/provision && ansible-playbook main.yml --tags "provision"

provision-base: ## Provision base
cd workflow/provision && ansible-playbook main.yml --tags "base"

provision-kernel: ## Compile custom kernel for docker - takes ca. 60 minutes
cd workflow/provision && ansible-playbook main.yml --tags "kernel"

provision-firewall: ## Provision firewall
cd workflow/provision && ansible-playbook main.yml --tags "firewall"

provision-lxde: ## Provision LXDE
cd workflow/provision && ansible-playbook main.yml --tags "lxde"

provision-vnc: ## Provision VNC
cd workflow/provision && ansible-playbook main.yml --tags "vnc"

provision-xrdp: ## Provision XRDP
cd workflow/provision && ansible-playbook main.yml --tags "xrdp"

provision-k8s: ## Provision Kubernetes
cd workflow/provision && ansible-playbook main.yml --tags "k8s"

provision-build: ## Provision build environment
cd workflow/provision && ansible-playbook main.yml --tags "build"

provision-swap: ## Provision swap
cd workflow/provision && ansible-playbook main.yml --tags "swap"

provision-performance-mode: ## Set performace mode
cd workflow/provision && ansible-playbook main.yml --tags "performance_mode"

nano-one-ssh: ## ssh to nano-one as user admin
ssh [email protected]

nano-one-ssh-build: ## ssh to nano-one as user build
ssh [email protected]

nano-one-exec: ## exec command on nano-one - you must pass in arguments e.g. tegrastats
ssh [email protected] $(filter-out $@,$(MAKECMDGOALS))


k8s-proxy: ## Open proxy
kubectl proxy

k8s-dashboard-bearer-token-show: ## Show dashboard bearer token
workflow/k8s/dashboard-bearer-token-show

k8s-dashboard-open: ## Open Dashboard
python -mwebbrowser http://localhost:8001/api/v1/namespaces/kube-system/services/https:kubernetes-dashboard:/proxy/#!/overview?namespace=default


device-query-deploy: device-query-build-and-push ## Build and deploy device query
kubectl create namespace jetson-device-query || true
cd workflow/deploy/device-query && skaffold run

device-query-log-show: ## Show log of pod
cd workflow/deploy/device-query && ./log-show

device-query-delete: ## Delete device query deployment
kubectl delete namespace jetson-device-query || true
cd workflow/deploy/device-query && skaffold delete

device-query-dev: ## Enter build, deploy, tail, watch cycle for device query
kubectl create namespace jetson-device-query || true
cd workflow/deploy/device-query && skaffold dev


jupyter-deploy: jupyter-build-and-push ## Build and deploy jupyter
kubectl create namespace jetson-jupyter || true
kubectl create secret generic jupyter.polarize.ai --from-file workflow/deploy/jupyter/.basic-auth --namespace=jetson-jupyter || true
cd workflow/deploy/jupyter && skaffold run

jupyter-open: ## Open browser pointing to jupyter notebook
python -mwebbrowser http://jupyter.nano-one.local/

jupyter-log-show: ## Show log of pod
cd workflow/deploy/jupyter && ./log-show

jupyter-delete: ## Delete jupyter deployment
kubectl delete namespace jetson-jupyter || true
cd workflow/deploy/jupyter && skaffold delete

jupyter-dev: ## Enter build, deploy, tail, watch cycle for jupyter
kubectl create namespace jetson-jupyter || true
kubectl create secret generic jupyter.polarize.ai --from-file workflow/deploy/jupyter/.basic-auth --namespace=jetson-jupyter || true
cd workflow/deploy/jupyter && skaffold dev
93 changes: 93 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,93 @@
# jetson

Experimenting with Nvidia Jetson Nano, Kubernetes and ML.

Hints:
- Assumes an Nvidia Jetson Nano, TX2 or AGX Xavier as embedded device, called "nano" below for simplicity.
- Assumes a macOS device for development
- Assumes access to a bare-metal Kubernetes cluster the nano can join e.g. set up using https://github.com/helmuthva/ceil/tree/max.
- Assumes basic knowledge of Ansible, Docker and Kubernetes (k8s).


## Features

- [x] basics: Automatically provision requirements on macOS device for development
- [x] basics: Prepare hardware
- [x] basics: Manually provision os
- [x] basics: Automatically provision secure ssh access
- [x] basics: Automatically provision passwordless sudo
- [x] basics: Automatically install basic packages
- [x] basics: Automatically setup LXDE
- [x] basics: Automatically setup VNC
- [x] basics: Automatically setup RDP (optional)
- [x] basics: Automatically setup swap
- [x] basics: Automatically set performance mode
- [X] k8s: Automatically build custom kernel as required by Docker + Kubernetes + Weave networking
- [x] k8s: Automatically join Kubernetes cluster `max` as worker node labeled as `jetson` - see https://github.com/helmuthva/ceil/tree/max reg. `max`
- [x] k8s: Automatically build and deploy CUDA deviceQuery as pod in k8s cluster to validate access to GPU and correct labeling of jetson nodes
- [x] k8s: Build and deploy using Skaffold and kustomize
- [ ] basics: Update to Jetpack 4.2.1 providing support for NGC et al (waiting for release)
- [ ] security: Automatically setup firewall (waiting for iptables fix in Nvidia kernel sources)
- [x] ml: Use Archiconda - the arm flavor of Anacoda - for building Docker containers for arm64
- [x] ml: Automatically build and deploy Jupyter server with support for CUDA accelerated tensorflow and keras as pod in k8s cluster running on jetson node
- [ ] ml: Experiment with containers from NGC
- [ ] community: Author a blog post explaining how to set up ML in Kubernetes on Jetson devices
- [ ] ml: Scale out with Xaviers and deploy Polarize AI core (separate project)


## Bootstrap

1) Execute `make bootstrap-environment` to install requirements on your macOS device and setup hostnames such as `nano-one.local` in your `/etc/hosts`


## Provision

### Manually flash base os, create `admin` account and establish secure access

1) Execute `make image-download` to download and unzip the Nvidia Jetpack image into `workflow/provision/image/`
2) Start the `balenaEtcher` application and flash your micro sd card with the downloaded image
3) Insert the designated micro sd card in your Nvidia Jetson nano and power up
4) Create account with username `admin` and "Administrator" rights via the UI
5) Execute `make setup-access-secure` and enter the password you set for the `admin` user the step above - passwordless ssh access and sudo will be set up

Hints:
* The `balenaEtcher` application was installed as part of bootstrap on your macOS device

### Automatically provision services, kernel, k8s

1) Execute `make provision` - services will provisioned, kernel will be compiled, kubernetes cluster will be joined

Hints:
* If you want to provision step by step execute `make help | grep "provision-"` and execute the desired make target e.g. `make provision-kernel`
* SSH into your nano using `make nano-one-ssh` - your ssh public key was uploaded during provisioning so no password is needed
* VNC into your nano by starting the VNC Viewer application which was installed as part of bootstrap and connect to `nano-one.local:5901` - the password is `secret`
* You will have to update the `kubernetes.token` in `workflow/provision/group_vars/all.yml` to a valid join token that can be created using `make k8s-token-create` in `max` cluster


## Build and deploy

1) Execute `make device-query-deploy` to build and deploy a pod into the k8s cluster that queries CUDA capabilities thus validating GPU access from k8s - execute `make device-query-log-show` to show the result after deploying
2) Execute `make jupyter-deploy` to build and deploy a Jupyter server supporting CUDA accelerated TensorFlow + Keras as a k8s pod running on nano - execute `make jupyter-open` to open a browser tab pointing to the Jupyter server

Hints:
- Remote building on nano is implemented using Skaffold and a custom builder: E.g. use `make device-query-dev` to enter a build, deploy, tail, watch cycle.
- Deployments are defined using kustomize - you can thus define overlays for deployments on other clusters easily.
- Archiconda - the arm flavor of Anaconda - is used for installation inside Docker containers, see the Dockerfile of the Jupyter deployment
- To easily inspect the cluster execute the lovely `click` which was installed as part of bootstrap.
- Execute `make help` to show other targets that can be built and deployed


## Additional references

- https://developer.nvidia.com/embedded/learn/get-started-jetson-nano-devkit (intro)
- https://developer.nvidia.com/embedded/jetpack (jetpack)
- https://blog.hackster.io/getting-started-with-the-nvidia-jetson-nano-developer-kit-43aa7c298797 (jetpack,vnc)
- https://devtalk.nvidia.com/default/topic/1051327/jetson-nano-jetpack-4-2-firewall-broken-possible-kernel-compilation-issue-missing-iptables-modules/ (jetpack,firewall,ufw,bug)
- https://devtalk.nvidia.com/default/topic/1052748/jetson-nano/egx-nvidia-docker-runtime-on-nano/ (docker,nvidia,missing)
- https://blog.hypriot.com/post/nvidia-jetson-nano-build-kernel-docker-optimized/ (docker,workaround)
- https://github.com/Technica-Corporation/Tegra-Docker (docker,workaround)
- https://medium.com/@jerry_liang/deploy-gpu-enabled-kubernetes-pod-on-nvidia-jetson-nano-ce738e3bcda9 (k8s)
- https://gist.github.com/buptliuwei/8a340cc151507cb48a071cda04e1f882 (k8s)
- https://github.com/dusty-nv/jetson-inference/ (ml)
- https://docs.nvidia.com/deeplearning/frameworks/install-tf-jetson-platform/index.html (tensorflow)
- https://devtalk.nvidia.com/default/topic/1043951/jetson-agx-xavier/docker-gpu-acceleration-on-jetson-agx-for-ubuntu-18-04-image/post/5296647/#5296647 (docker,tensorflow)
30 changes: 30 additions & 0 deletions workflow/deploy/device-query/builder
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
#!/usr/bin/env bash

echo "Building $IMAGES ..."

## Sync src to nano
rsync -rlptza --delete -P src/ [email protected]:~/device-query

## Build on nano
ssh [email protected] << EOF
echo "Building executable ..."
cd /usr/local/cuda/samples/1_Utilities/deviceQuery
sudo make clean
sudo make
cp deviceQuery ~/device-query/deviceQuery
echo "Building Docker image ..."
docker build -t device_query ~/device-query
EOF

## Tag and possibly push image
for image in $(echo $IMAGES | tr " " "\n")
do
echo "Tagging with $image ..."
ssh [email protected] "docker tag device_query $image"
if $PUSH_IMAGE
then
echo "Pushing $image ..."
ssh [email protected] "docker push $image"
fi
done
61 changes: 61 additions & 0 deletions workflow/deploy/device-query/kustomize/base/deployment.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
apiVersion: apps/v1
kind: Deployment
metadata:
name: device-query
namespace: jetson-device-query
spec:
replicas: 1
selector:
matchLabels:
app: device-query
template:
metadata:
name: device-query
labels:
app: device-query
spec:
hostname: device-query
containers:
- name: device-query
image: max-one.loc al:5001/jetson/device-query
volumeMounts:
- mountPath: /dev/nvhost-ctrl
name: nvhost-ctrl
- mountPath: /dev/nvhost-ctrl-gpu
name: nvhost-ctrl-gpu
- mountPath: /dev/nvhost-prof-gpu
name: nvhost-prof-gpu
- mountPath: /dev/nvmap
name: nvmap
- mountPath: /dev/nvhost-gpu
name: nvhost-gpu
- mountPath: /dev/nvhost-as-gpu
name: nvhost-as-gpu
- mountPath: /usr/lib/aarch64-linux-gnu/tegra
name: lib
securityContext:
privileged: true
volumes:
- name: nvhost-ctrl
hostPath:
path: /dev/nvhost-ctrl
- name: nvhost-ctrl-gpu
hostPath:
path: /dev/nvhost-ctrl-gpu
- name: nvhost-prof-gpu
hostPath:
path: /dev/nvhost-prof-gpu
- name: nvmap
hostPath:
path: /dev/nvmap
- name: nvhost-gpu
hostPath:
path: /dev/nvhost-gpu
- name: nvhost-as-gpu
hostPath:
path: /dev/nvhost-as-gpu
- name: lib
hostPath:
path: /usr/lib/aarch64-linux-gnu/tegra
nodeSelector:
jetson: "true"
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
---
resources:
- deployment.yaml
Loading

0 comments on commit 185912c

Please sign in to comment.