Merge branch qos-scheduler:main into main

633b079f · Alka Nixon · aff242e9 · 6a1c98f8 · 633b079f · 633b079f
Commit 633b079f authored 1 month ago by Alka Nixon
--- a/.devcontainer/.gitattributes
+++ b/.devcontainer/.gitattributes
+# SPDX-FileCopyrightText: 2024 Siemens AG
+# SPDX-License-Identifier: Apache-2.0
+
+*.sh text eol=lf
+qos-scheduler/.devcontainer/Dockerfile eol=lf
--- a/.devcontainer/Dockerfile
+++ b/.devcontainer/Dockerfile
@@ -4,13 +4,15 @@
 # See here for image contents: https://github.com/microsoft/vscode-dev-containers/tree/v0.245.0/containers/go/.devcontainer/base.Dockerfile

 # [Choice] Go version (use -bullseye variants on local arm64/Apple Silicon): 1, 1.19, 1.18, 1-bullseye, 1.19-bullseye, 1.18-bullseye, 1-buster, 1.19-buster, 1.18-buster
-ARG VARIANT="1.19-bullseye"
+ARG VARIANT="1.21-bullseye"
 ARG GOOS="linux"
 ARG GOARCH="amd64"
-FROM mcr.microsoft.com/vscode/devcontainers/go:0-${VARIANT}
+ARG ENVTEST_K8S_VERSION="1.26.1"
+FROM mcr.microsoft.com/vscode/devcontainers/go:${VARIANT}

 ENV GOOS=$GOOS
 ENV GOARCH=$GOARCH
+ENV ENVTEST_K8S_VERSION=$ENVTEST_K8S_VERSION

 # [Choice] Node.js version: none, lts/*, 18, 16, 14
 ARG NODE_VERSION="none"
@@ -18,7 +20,7 @@ RUN if [ "${NODE_VERSION}" != "none" ]; then su vscode -c "umask 0002 && . /usr/

 # [Optional] Uncomment this section to install additional OS packages.
 RUN apt-get update && export DEBIAN_FRONTEND=noninteractive \
-    && apt-get -y install --no-install-recommends cmake
+    && apt-get -y install --no-install-recommends cmake yq

 # Install kubebuilder
 RUN curl -sL -o kubebuilder https://go.kubebuilder.io/dl/latest/$(go env GOOS)/$(go env GOARCH) \
@@ -40,6 +42,14 @@ chmod 755 -R /usr/local/include
 rm -f protoc.zip
 EOF

+# install kind
+RUN <<EOF
+#!/bin/bash
+curl -Lo ./kind https://kind.sigs.k8s.io/dl/v0.22.0/kind-linux-$(go env GOARCH)
+chmod +x ./kind
+sudo mv ./kind /usr/local/bin/kind
+EOF
+
 # [Optional] Uncomment the next lines to use go get to install anything else you need
 USER vscode
 # RUN go get -x <your-dependency-or-tool>
@@ -48,7 +58,18 @@ RUN go install github.com/onsi/ginkgo/v2/ginkgo@v2.9.2
 RUN go install github.com/onsi/gomega/...
 RUN go install github.com/golang/mock/mockgen@v1.6.0
 RUN go install sigs.k8s.io/kustomize/kustomize/v4@latest
+RUN go install github.com/jstemmer/go-junit-report/v2@latest

 # [Optional] Uncomment this line to install global node packages.
 # RUN su vscode -c "source /usr/local/share/nvm/nvm.sh && npm install -g <your-package-here>" 2>&1

+# install envtest
+WORKDIR /envtest
+RUN v="0.18.5" ; export GOBIN=/envtest ;\
+ path=controller-runtime/archive/refs/tags/v${v}.tar.gz ; \
+ curl -sL https://github.com/kubernetes-sigs/${path} | tar -xz ;\
+ cd controller-runtime-${v}/tools/setup-envtest ;\
+ go install . ;\
+ cd - ; rm -rf controller-runtime-${v} ;\
+ strip setup-envtest ;\
+ ./setup-envtest use ${ENVTEST_K8S_VERSION}
--- a/.devcontainer/devcontainer.json
+++ b/.devcontainer/devcontainer.json
@@ -8,14 +8,14 @@
 			// Update the VARIANT arg to pick a version of Go: 1, 1.19, 1.18
 			// Append -bullseye or -buster to pin to an OS version.
 			// Use -bullseye variants on local arm64/Apple Silicon.
-			"VARIANT": "1.20-bullseye",
+			"VARIANT": "1.22-bookworm",
 			// Options
 			"NODE_VERSION": "lts/*",
-			"GOOS": "linux",
-			"GOARCH": "arm64"
+			"ENVTEST_K8S_VERSION": "1.26.1",
+			"GOOS": "linux"
 		}
 	},
-	"runArgs": [ "--cap-add=SYS_PTRACE", "--security-opt", "seccomp=unconfined", "--network=host" ],
+	"runArgs": [ "--cap-add=SYS_PTRACE", "--security-opt", "seccomp=unconfined", "--network=host", "--privileged" ],

 	// Configure tool-specific properties.
 	"customizations": {
@@ -27,20 +27,22 @@
 				"go.useLanguageServer": true,
 				"go.gopath": "/go",
 				"go.toolsEnvVars": {
-					"GOOS": "linux",
-					"GOARCH": "arm64"
+					"GOOS": "linux"
 				}
 			},
 			
 			// Add the IDs of extensions you want installed when the container is created.
 			"extensions": [
-				"golang.Go"
+				"golang.Go",
+				"ms-azuretools.vscode-docker",
+                "ms-kubernetes-tools.vscode-kubernetes-tools",
+                "ms-kubernetes-tools.kind-vscode"
 			]
 		}
 	},

 	// Use 'forwardPorts' to make a list of ports inside the container available locally.
-	// "forwardPorts": [],
+	"forwardPorts": [6443],

 	// Use 'postCreateCommand' to run commands after the container is created.
 	// "postCreateCommand": "go version",
@@ -50,10 +52,10 @@

 	// activate docker host access
 	"features": {
-		"ghcr.io/devcontainers/features/docker-from-docker:1": {
+		"ghcr.io/devcontainers/features/docker-outside-of-docker:1": {
 			"version": "latest",
 			"moby": true,
-			"dockerDashComposeVersion": "v1"
+			"dockerDashComposeVersion": "v2"
 		},
 		"ghcr.io/devcontainers/features/kubectl-helm-minikube:1": {
 			"version": "latest",
@@ -64,7 +66,8 @@

 	// additional mounts
 	"mounts": [
-		"source=${localEnv:HOME}${localEnv:USERPROFILE}/.kube,target=/home/vscode/.kube,type=bind,consistency=cached"
+		"source=${localEnv:HOME}${localEnv:USERPROFILE}/.kube,target=/home/vscode/.kube,type=bind,consistency=cached",
+		"source=${localEnv:HOME}${localEnv:USERPROFILE}/.ssh,target=/home/vscode/.ssh,type=bind,consistency=cached,readonly"
 	],

 	"containerEnv": {

--- a/.gitattributes
+++ b/.gitattributes
+# SPDX-FileCopyrightText: 2024 Siemens AG
+# SPDX-License-Identifier: Apache-2.0
+
+*.sh text eol=lf
+qos-scheduler/.devcontainer/Dockerfile eol=lf
--- a/.gitignore
+++ b/.gitignore
@@ -22,9 +22,21 @@ reuse-report.txt
 deliv/**
 tmp/**
 z???????.mk
-/coverage
+vscode.mk
+tmp/**
+deliv/**
+*~
+./*.mk
+coverage
 coverage-out
+build/ci/script/iid.txt

 # Intellij Idea
 .idea
 .run
+scheduler/testdata/optrequest.json
+scheduler/testdata/qosmodel.json
+scheduler/testdata/qosmodel.save
+scheduler/testdata/qosmodel.txt
+scheduler/testdata/semiotics.json
+scheduler/testdata/semiotics.save
--- a/.gitlab-ci.yml
+++ b/.gitlab-ci.yml
--- a/.reuse/dep5
+++ b/.reuse/dep5
@@ -26,6 +26,7 @@ License: Apache-2.0
 ## Generated code for Protobuf
 Files:
 scheduler/assignment/optimizer*.pb.go
+ scheduler/api/v1/groupversion_info.go
 scheduler/api/v1alpha1/groupversion_info.go
 Copyright: 2023 Siemens AG
 License: Apache-2.0
@@ -33,8 +34,13 @@ License: Apache-2.0
 ## Files with version numbers
 Files:
 scheduler/.go-version
- container/goenv/version
 VERSION
+ build/ci/script/VERSION
+ build/ci/goenv/VERSION
+ build/ci/reuse/VERSION
+ build/ci/helm/VERSION
+ build/ci/manifest/VERSION
+ build/ci/shellcheck/VERSION
 Copyright: 2023 Siemens AG
 License: Apache-2.0


--- a/Documentation/#quick-start-oss.md#
+++ b/Documentation/#quick-start-oss.md#
+<!---
+SPDX-FileCopyrightText: 2023 Siemens AG
+SPDX-License-Identifier: CC-BY-SA-4.0
+-->
+
+# Quick Start
+
+Steps for getting a demo to run on a local Linux Helm environment and a local
+KiND cluster.
+
+## Preconditions
+
+- Docker
+- kubectl
+- Helm
+- KinD
+- make
+
+## Ensure access to container registry
+
+- Enable access to Docker hub
+
+  ```bash
+  docker login (=> enter username and password or access token)
+  ```
+  
+- Test registry access (optional)
+  
+  ```bash
+  REGISTRY_PREFIX=hecodeco/swm-
+  REPO_HOST=gitlab.eclipse.org
+  REPO_GROUP=/eclipse-research-labs/codeco-project/scheduling-and-workload-migration-swm/
+  REPO_URL=https://${REPO_HOST}${REPO_GROUP}
+  docker pull ${REGISTRY_PREFIX}controller:1.2.0
+  ```
+
+## Copy required scripts to local environment
+
+- One way is to clone the SWM repository
+
+  ```bash
+  cd <qos-scheduler-directory>
+  git clone ${REPO_URL}qos-scheduler.git
+  ```
+
+  **Attention:** If this is done in Windows (using Visual Studio Code - VSC),
+  depending on the settings of VSC, it may happen that text files use a CRLF
+  (Carriage Return Line Feed) as line separation (the "Windows way"). Scripts
+  will not execute with this or throw errors. The solution to this problem:
+  - Make sure the standard setting of VSC is to use LF instead of CRLF, or
+  - For each shell script, open it in VSC and adjust the End of Line Sequence
+    to "LF" (this can be seen and changed in the blue bar on the lower right
+    corner of the VSC window, when the file is selected)
+
+# Install K8s cluster using KinD
+
+- In local shell:
+
+  ```bash
+  cd <qos-scheduler-directory>
+  export SED="sed"
+  export STAT="stat"
+  ./start_cluster.sh
+  ```
+
+- This will install a KinD cluster with one master and two worker nodes.  The
+  K8s config to access the cluster will be appended to ~/.kube/config and in
+  case there are multiple clusters in the config file, the context (of
+  kubectl) will be switched to the new KinD cluster
+
+- Check whether cluster is working and whether your K8s config is pointing to
+  the right cluster
+
+  ```bash
+  kubectl get nodes
+  ```
+
+  This should output something like this:
+
+  ```text
+  NAME                 STATUS   ROLES                  AGE     VERSION
+  c1                   Ready    <none>                 118s    v1.23.1
+  c2                   Ready    <none>                 118s    v1.23.1
+  kind-control-plane   Ready    control-plane,master   2m31s   v1.23.1
+  ```
+
+# Install QoS scheduler
+
+## Install QoS Scheduler and Solver
+
+- In local shell:
+
+  ```bash
+  make chart
+  helm install qostest --namespace=he-codeco-swm --create-namespace tmp/helm
+  ```
+
+- Show network topology/ network links (Custom Resources)
+
+  ```bash
+  kubectl get networklinks -A
+  ```
+
+  This should show you links in the network-k8s-namespace namespace.
+
+- Show network paths (Custom Resources)
+
+  ```bash
+  kubectl get networkpaths -A
+  ```
+
+  This should show you paths in both the above namespace.
+
+# Deploy sample ApplicationGroup and Application
+
+- In local shell:
+
+  ```bash
+  kubectl apply -f config/demo/sample-topology.yaml
+  ```
+
+  This will create a sample network topology. If you are running a topology
+  operator in the network-demo-namespace namespace (true if you are using the
+  latest Helm chart without modifications), you should see the network links
+  and paths in this namespace soon.
+
+  ```bash
+  kubectl get networklinks -A
+  ```
+
+  ```bash
+  kubectl get networkpaths -A
+  ```
+  
+  When the network links and paths you need are there:
+
+  ```bash
+  kubectl apply -f config/demo/applicationgroup.yaml
+  kubectl apply -f config/demo/app-besteffort.yaml
+  ```
+
+- This will create an *ApplicationGroup* with minimum 1 Applications and the
+  Application app-besteffort, which consists of
+
+  - Two *Workloads* w1 and w2
+    - each Workload has a container wbitt/network-multitool (which has a
+      couple of networking tools to test and demonstrate communication)
+  - *Channel* "ernie" from w1 to w2 (5Mbit/s, 150µs)
+  - *Channel* with a generated name (app-besteffort-w2-to-app-besteffort-w1)
+    from w2 to w1 (2Mbit/s, 200µs)
+
+*Channel* "ernie" requests the *BESTEFFORT* network service class, which the
+default Helm chart maps to the *k8s* network (which gets created
+automatically).
+
+The other channel requests the *BESTEFFORT* network service class, which the
+default Helm chart implements using the *k8s* network.
+
+## Show optimizer call and output
+
+- First thing that happens is that the QoS-Scheduler Controller, once the
+  *ApplicationGroup* is complete, collects all inputs related to *Workloads*
+  and *Channels*, and calls the WorkloadPlacementSolver (Pod name: optimizer)
+  with this information, together with information related to the
+  infrastructure (compute and network)
+- The WorkloadPlacementSolver then calculates and returns a placement for all
+  Pods of the *ApplicationGroup*
+- Look into logs of Optimizer Pod (e.g. via Lens)
+- The output of the WorkloadPlacementSolver (assignment of Pods to Nodes) is
+  written to a custom resource *AssignmentPlan* (Lens: Custom
+  Resources/qos-scheduler.siemens.com/AssignmentPlan)
+
+## Show Channels
+
+- Show *Channels* (Custom Resource)
+
+  ```bash
+  kubectl get channels
+  ```
+
+  - or use Lens: Custom Resources/qos-scheduler.siemens.com/Channels
+  - One custom resource Channel is created per *Channel* that connects
+    *Workloads* placed on different nodes, and it corresponds to the channel
+    being set up
+- Services (svc): Each *Channel* also gets a K8s Service with the name
+  "svc-\<channel-name>-\<namespace>". This can be used to address the Pods
+  behind this channel using a DNS name.
+
+  ```bash
+  kubectl get svc
+  ```
+
+  If there was no name specified in the deployment file, a name is generated:
+  "svc-\<src-applicationname>-\<src-workloadname>\-\<tgt-applicationname>-\<tgt-workloadname>-\<namespace>
+- Endpoint (ep): IP address behind a *Channel*. This is what the corresponding
+  Service resolves to.
+
+  ```bash
+  kubectl get ep
+  ```
+
+# Uninstall ApplicationGroup and Applications
+
+- Either in Lens Click on the *ApplicationGroup* -> Delete, or
+
+  ```bash
+  kubectl delete -f config/demo/applicationgroup.yaml
+  ```
+
+- This will delete all *Applications* in the *ApplicationGroup* and all
+  dependent resources
+
+# Uninstall QoS Scheduler and Optimizer
+
+- In Lens:
+  - Helm/Releases/qostest => delete  
+- In a local shell
+
+  ```bash
+  helm list --namespace=controllers-system
+  helm uninstall --namespace=controllers-system qostest
+  ```
+
+- Delete the QoS-Scheduler CRDs (not uninstalled by the helm chart)
+
+  ```bash
+  kubectl delete crds --all
+  ```
+
+# Uninstall KinD cluster
+
+- In a local shell
+
+  ```bash
+  kind delete cluster
+  ```
--- a/Documentation/ADR/ADR-template.md
+++ b/Documentation/ADR/ADR-template.md
+<!---
+SPDX-FileCopyrightText: 2024 Siemens AG
+SPDX-License-Identifier: CC-BY-SA-4.0
+-->
+
+# ADR000 - Title
+Creation date: YYYY-MM-DD
+
+## Motivation/ Problem Statement
+...
+
+## Alternatives and Evaluation
+
+### A. Alternative A
+
+Description
+
+#### Advantages
+
+- ...
+
+#### Disadvantages
+
+- ...
+
+### B. Alternative B
+
+Description
+
+#### Advantages
+
+- ...
+
+#### Disadvantages
+
+- ...
+
+
+## Decision
+...
+
+## Consequences
+...
--- a/Documentation/ADR/ADR100 - Rescheduling result.md
+++ b/Documentation/ADR/ADR100 - Rescheduling result.md
+<!---
+SPDX-FileCopyrightText: 2024 Siemens AG
+SPDX-License-Identifier: CC-BY-SA-4.0
+-->
+
+# ADR100 - Rescheduling result
+
+Creation date: 2024-08-15
+
+## Motivation/ Problem Statement
+
+- Situation: The QoS-Scheduler calls the Solver/Optimizer to perform a re-scheduling, i.e. there is already an existing placement. The Solver/Optimizer calculates a new placement, which may be different to the existing one.
+- Problem: Shall the Solver/Optimizer return the complete new placement or only the delta to the existing placement?
+
+## Alternatives and Evaluation
+
+### A. Return complete new placement
+
+The Solver/Optimizer always returns the complete new placement.
+
+#### Advantages
+
+- There is no deviation possible, the Solver/Optimizer statement explicitely contains the intended state
+
+#### Disadvantages
+
+- Not compact, we need to always return the complete placement, even if no or little changes
+
+### B. Return only delta of new placement to existing placement
+
+The Solver/Optimizer only returns the changed placements:
+- For workloads with changed assignment: workload, new node (empty of no longer assigned), old node
+- For channels with changed assignment: channel, new path (empty if no longer assigned), old path
+
+#### Advantages
+
+- Compact return value (only deltas)
+- QoS-Solver does not need to compare new placement with existing placement to detect changes, it can directly work with the returned delta placement
+
+#### Disadvantages
+
+- Only transmitting delta could lead to error cumulation. This is however not critical, as if there are errors, the actual state (incl. the error) will be trasnmitted to the Solver/Optimizer (they are stateless) with the next placement call, i.e. errors cannot cumulate.
+
+## Decision
+
+Alternative B. Return only delta
+
+## Consequences
+
+- Implement accordingly in Solver, Optimizer, and QoS-Scheduler
+- gRPC interface already fit for this solution (but update documentation/ comments)
--- a/Documentation/EnablingCustomNetworksWithCni.md
+++ b/Documentation/EnablingCustomNetworksWithCni.md
@@ -74,9 +74,9 @@ list should look as follows:
  isPhysical: true
  namespace: TSN
  image:
-     repository: networkoperator
+     repository: network-topology
     tag: network-namespaces
-     command: /bin/topology-nw-operator
+     command: /bin/network-topology
 ```

 Each operator specifies the networks implementations it will process, and

--- a/Documentation/Releases.md
+++ b/Documentation/Releases.md
@@ -12,7 +12,7 @@ gitlab pipeline, among them:
 2. `optimizer` contains the optimizer client and a basic server stub in Go.
 3. `network` is base classes and sample network operator implementations
 4. `controller` is the controller-manager for our custom resources.
-5. `nodedaemon` gets deployed to the node daemon set, for information about compute nodes.
+5. `node-daemon` gets deployed to the node daemon set, for information about compute nodes.

 When the Gitlab pipeline builds a new version, it runs tests, builds images, labels them
 with the name of the current branch, and publishes them to the project’s container registry.

--- a/Documentation/Sequence_Orchestrator-ApplicationGroupController.plantuml
+++ b/Documentation/Sequence_Orchestrator-ApplicationGroupController.plantuml
@@ -13,64 +13,59 @@ participant "\nChannelController\n" as cc
 participant "\nSolver/Optimizer\n" as opt
 participant "\nK8s\n" as k8s

-== Waiting ==
+hnote over ag #LightGreen: Waiting

 ?o-> ac : create Application

-activate ac
-note right of ac #LightBlue
-Application in state Waiting
-end note
+hnote over ac #LightGreen: Waiting 
 ac -> ag : receive Application event
-deactivate ac

-activate ag
-note right of ag #LightYellow
-enough Applications Waiting?
-end note
+
+note over ag : enough Applications Waiting?
 ag -> ac : update owner reference
-deactivate  ag
+hnote over ag #LightGreen: Optimizing

-== Optimizing ==
+rnote over ag : Collect QoSModel \n(infrastructure and application models)
+loop while returned AssignmentPlan contains retryable error
+    ag -> pc : create unresolved AssignmentPlan

-note right of ag #LightYellow
-    Collect QoSModel: infrastructure and application models
-end note
-ag -> pc : create unresolved AssignmentPlan
+    pc -> opt : find placement (gRPC)
+    activate opt
+    rnote over opt : Calculate placement
+    return placement result (gRPC)

-activate pc
-pc -> opt : calculates assignments and paths
-deactivate pc
+    rnote over pc : Record Solver/Optimizer reply in AssignmentPlan
+    pc -> ag : AssignmentPlan with Solver/Optimizer reply exists

-activate opt
-opt -> pc : record reply in an assignment plan
-deactivate opt
+    note over ag : is the reply from Solver/Optimizer valid?
+end

-activate pc
-pc -> ag :looks for AssignmentPlan with solver reply
-deactivate pc
+hnote over ag #LightGreen: Scheduling

-activate ag
-note right of ag #LightYellow
-Will repeat if the AssignmentPlan contains a retryable error.
-Will Set Application Group to status "Scheduling" on valid reply from Solver / Optimizer
-end note
-ag -> cc: Initialize Channels based on channels in AssignmentPlan
-activate cc
-ag -> ac : Set Application to status "Scheduling"
-ac -> k8s : Create Pods
-ac -> ac: Set Application to status "Pending"
-ag -> ag : Set ApplicationGroup to status "Waiting"
-deactivate ag
+ag -> cc: Create Channels based on channels in AssignmentPlan

+activate cc
 note right of cc #LightYellow
 Will trigger a cascade of events
 Further specified in Sequence_Orchestrator-ChannelController.plantuml
 end note
-cc -> ac : Pods are running
-deactivate cc

-activate ac
-ac -> ac: set Application status to Running
-ac -> ac : create service and endpoints
+ag -> ac : Set Applications to status "Scheduling"
+hnote over ac #LightGreen: Scheduling
+deactivate ag
+
+ac -> k8s : Create Pods
+hnote over ac #LightGreen: Pending
+
+deactivate ag
+
+
+k8s -> ac : Pods are running
+
+hnote over ac #LightGreen: Running
+ac -> k8s : create services and endpoints
+ac -> ag : All Applications in status "Running"
+
+hnote over ag #LightGreen: Waiting
+deactivate cc
 @enduml
--- a/Documentation/States_ApplicationGroups.plantuml
+++ b/Documentation/States_ApplicationGroups.plantuml
@@ -20,7 +20,7 @@ end note
 note right of Scheduling
 for applications that have a plan entry:
   set application state to "Scheduling"
-   send stream requests
+   send channel requests
 end note

 note left of Optimizing

--- a/Documentation/getting-started.md
+++ b/Documentation/getting-started.md
@@ -5,41 +5,29 @@ SPDX-License-Identifier: CC-BY-SA-4.0

 # Getting Started

-Steps for getting a demo to run. This is for deploying the QoS Scheduler and the Workload Placement Solver to a K8s cluster.
+Steps for getting a demo to run. This is for the TSN version of the QoS Scheduler and according environment (K8s cluster with TSN network and Harbor registry).

 ## Preconditions

 ### You need a Kubernetes cluster

 - Kubernetes server version >= 20, prefer >= 23 (but currently <= 26)
- you must be able to obtain docker images from whichever registry you're using (Docker hub)
+- you must be able to obtain docker images from whichever registry you're using
 - you need to install a few things into the kube-system namespace and create one daemonset with network privileges, so you need the right access to your cluster
- some pods get installed with root privileges (e.g. node daemonset, cni plugin), your cluster access needs to make that possible.
+- some pods get installed with root privileges (e.g. node daemonset), your cluster access needs to make that possible.

 It does not matter which CNI plugin you start the cluster with.

 #### Container registry

 Docker images have to come from a container registry. You can have a local registry, or a harbor/artifactory/etc
-instance running in your network, or you can get images from the internet. 
+instance running in your network, or you can get images from the internet. For the siemens-built images, you either
+need access so you can fetch them from cr.siemens.com or you need to transfer them to a registry that you can access.

 Your Kubernetes nodes must be able to reach all the registries that they need to fetch images from.

 ## Setup steps

-### Prepare the nodes in your cluster
-In order to address specific nodes for workloads using the SWM CRs, you have to use specific labels on the nodes. When using the default environment (kind cluster), the labeling of the nodes is done in the start_cluster.sh script.
-It you are using another k8s cluster, you have to label the nodes as follows
-```bash
-# label the nodes with their Kubernetes node names for easier use with
-# WorkloadPlacementSolver compatible labels
-export NODE_LABEL_PREFIX="siemens.com.qosscheduler"
-export NODES=$(kubectl get no -o jsonpath="{.items[*].metadata.name}")
-for n in $NODES; do
-  kubectl label --overwrite nodes $n $NODE_LABEL_PREFIX.$n=
-done
-```
-
 ### How to run the Helm chart

 All the files for the Helm chart are in the `helm/qos-scheduler` directory. You need to install a Helm client (v3 is good) and run Helm from the `helm/qos-scheduler` directory.
@@ -50,26 +38,25 @@ Here are the parameters:

 - `qosNamespace`: the namespace where the controllers will run.
 - `image.repositoryPrefix`: where we should be getting the images for qos-scheduler, controllers etc from.
- `image.tag`: the docker image tag to use for all images except for the `solver`.
+- `image.tag`: the docker image tag to use for all images except for the `optimizer`.
 - `image.pullSecrets`: the chart will create image pull secrets for you using `image.credentials`. This is so the Kubernetes pods can get images from your registry.
 - `image.pullPolicy`: the default Kubernetes image pull policy. `Always` is good when you're developing, otherwise `IfNotPresent` is more efficient. You can override this for each subchart.
 - `image.credentials`: this is the login to the docker registry. For `password`, I recommend getting a token with registry read access. Do not put your actual password in here. These credentials will be stored in the Kubernetes cluster using secrets; they will not be encrypted.

 Other things you may want to check or modify in the Helm chart:

- you may need to edit `charts/optimizer/values.yaml` and specify an image tag that works for you.,
+- you probably need to edit `charts/optimizer/values.yaml` and specify an image that works for you. The system will use a grpc client, so you should run a service implementing the service proto.
 - take a look at `charts/network/values.yaml`. This determines the network controllers that the Helm chart will start for you. You can add your own network sections to the values file here or you can start your own network controllers outside the chart.
 - `templates/_helpers.tpl` contains the `serviceClassMap`. This is how the system decides which network to use for your channels' service classes. Each channel service class needs to have a key in the map, and the values need to be networks for which a channel operator exists.

 ```bash
-make chart
-helm install qostest --namespace=controllers-system --create-namespace tmp/helm
+helm install qostest --namespace=controllers-system --create-namespace --set global.image.credentials.username=your.email@siemens.com --set global.image.credentials.password=$YOURTOKEN --set global.image.credentials.email=your.email@siemens.com --set global.image.credentials.registry=cr.siemens.com .
 ```

 If you make changes to the helm chart and just want to apply those:

 ```bash
-helm upgrade qostest --namespace=controllers-system tmp/helm
+helm upgrade qostest --namespace=controllers-system --set global.image.credentials.username=your.email@siemens.com --set global.image.credentials.password=$YOURTOKEN --set global.image.credentials.email=your.email@siemens.com --set global.image.credentials.registry=cr.siemens.com .
 ```

 To uninstall the release:
@@ -202,7 +189,7 @@ Example:
 25s         Normal    phase change                      applicationgroup/applicationgroup-demo   changed phase from Optimizing to Failed
 ```

-This tells you that the application group entered a failed state because it tried to
+This tells me that the application group entered a failed state because it tried to
 call the optimizer and the optimizer returned a non-retryable error.

 Other reasons for failure include applications failing because their pods failed or because

--- a/container/readme.md
+++ b/container/readme.md
@@ -15,7 +15,7 @@ described in more detail below.
 Some more complex steps have subdirectories in `container`, others
 are implemented completely in the `.gitlab-ci.yml` file.

-1. dev-container
+1. ci-images
 2. lint
 3. test
 4. build
@@ -34,13 +34,13 @@ the `latest` image is broken, you may want to pin goenv_version to a different t
 `K8SVERSION` is the Kubernetes version your code is tied to. Switching
 this will not create code for the given version automatically; you need to run the `hack/maybe-update-deps.sh` script first, then check in the changes it makes.

-## dev-container
+## ci-images

 This step builds the `goenv` container, which has a go compiler,
 all the packages required by the code, as well as some tools for running tests.

 This step is not hermetic, so it runs in a kaniko container. It
-is also fairly complex, so it has its own subdirectory (`container/goenv`).
+is also fairly complex, so it has its own subdirectory (`build/ci/goenv`).

 The `pipeline-jobs.yml` file provides the steps before running kaniko. Mainly, this part copies `go.mod` files from the packages we
 build so they can get processed in the kaniko/Docker build.
@@ -70,7 +70,7 @@ big, and this step makes it even bigger. Of course
 the image produced here is never deployed to production. The deploy
 step creates a much smaller image.

-Similar to the dev-container step, there is a `force-build` file here which is really just for forcing a rebuild.
+Similar to the ci-images step, there is a `force-build` file here which is really just for forcing a rebuild.

 The most common reason for needing to force a rebuild is using the wrong
 value of `OPT_TAG`. This is the tag of the optimizer container you want to build on. This needs to be an optimizer container built on top of ubuntu:focal, not ubuntu:18.04. Currently this means you need the `build-experiments` tag. Once that has been merged, you can use the `develop` tag (or maybe, one day, the `latest` tag).
@@ -89,8 +89,9 @@ It runs in the `goenv` container and is hermetic.
 ## build

 This builds all go code. It runs in the `goenv` container and is hermetic.
- It produces the `qosScheduler` (custom scheduler), `manager` (controller-manager service),
-and `networkoperator` binaries and exports them to CI as artifacts.
+It produces the `qosScheduler` (custom scheduler), `controller`
+(controller-manager service), and binaries `network-k8s`, `network-l2sm`, `network-tsn`
+and `network-topology` and exports them to CI as artifacts.

 ## collect-configs

@@ -115,7 +116,9 @@ Here is the list of docker images you should have at the end:
 1. goenv (environment for building and testing go code)
 2. controller (the controller-manager binary)
 3. custom-scheduler (the custom scheduler binary)
-4. networkoperator (maintains network information)
-5. configs  (the package with all the yaml files and scripts)
+4. network-k8s (maintains Kubernetes network information)
+5. network-l2sm (maintains L2S-M network information)
+6. network-topology (maintains network information)
+7. configs  (the package with all the yaml files and scripts)

 Not all pipeline runs will rebuild all images. There are gitlab ci/cd settings in the yaml files that specify which file changes trigger which step.
--- a/Documentation/quick-start-oss.md
+++ b/Documentation/quick-start-oss.md
@@ -5,7 +5,8 @@ SPDX-License-Identifier: CC-BY-SA-4.0

 # Quick Start

-Steps for getting a demo to run on a local Linux Helm environment and a local KiND cluster.
+Steps for getting a demo to run on a local Linux Helm environment and a local
+KiND cluster.

 ## Preconditions

@@ -42,13 +43,18 @@ Steps for getting a demo to run on a local Linux Helm environment and a local Ki
  git clone ${REPO_URL}qos-scheduler.git
  ```

- **Attention:** If this is done in Windows (using Visual Studio Code - VSC), depending on the settings of VSC, it may happen that text files use a CRLF (Carriage Return Line Feed) as line separation (the "Windows way"). Scripts will not execute with this or throw errors. The solution to this problem:
+  **Attention:** If this is done in Windows (using Visual Studio Code - VSC),
+  depending on the settings of VSC, it may happen that text files use a CRLF
+  (Carriage Return Line Feed) as line separation (the "Windows way"). Scripts
+  will not execute with this or throw errors. The solution to this problem:
  - Make sure the standard setting of VSC is to use LF instead of CRLF, or
-  - For each shell script, open it in VSC and adjust the End of Line Sequence to "LF" (this can be seen and changed in the blue bar on the lower right corner of the VSC window, when the file is selected)
+  - For each shell script, open it in VSC and adjust the End of Line Sequence
+    to "LF" (this can be seen and changed in the blue bar on the lower right
+    corner of the VSC window, when the file is selected)

 # Install K8s cluster using KinD

- In local shell (Linux):
+- In local shell:

  ```bash
  cd <qos-scheduler-directory>
@@ -57,9 +63,13 @@ Steps for getting a demo to run on a local Linux Helm environment and a local Ki
  ./start_cluster.sh
  ```

- This will install a KinD cluster with one master and two worker nodes
-  - The K8s config to access the cluster will be appended to ~/.kube/config and in case there are multiple clusters in the config file, the context (of kubectl) will be switched to the new KinD cluster
- Check whether cluster is working and whether your K8s config is pointing to the right cluster
+- This will install a KinD cluster with one master and two worker nodes.  The
+  K8s config to access the cluster will be appended to ~/.kube/config and in
+  case there are multiple clusters in the config file, the context (of
+  kubectl) will be switched to the new KinD cluster.
+
+- Check whether cluster is working and whether your K8s config is pointing to
+  the right cluster

  ```bash
  kubectl get nodes
@@ -92,6 +102,7 @@ Steps for getting a demo to run on a local Linux Helm environment and a local Ki
  ```

  This should show you links in the network-k8s-namespace namespace.
+
 - Show network paths (Custom Resources)

  ```bash
@@ -105,11 +116,13 @@ Steps for getting a demo to run on a local Linux Helm environment and a local Ki
 - In local shell:

  ```bash
-  kubectl apply -f config/demo/sample-topology.yaml
+  kubectl apply -f config/demo/sample-topology-vlan.yaml
  ```

-  This will create a sample network topology. If you are running a topology operator in the
-  network-demo-namespace namespace (true if you are using the latest Helm chart without modifications), you should see the network links and paths in this namespace soon.
+  This will create a sample network topology. If you are running a topology
+  operator in the network-demo-namespace namespace (true if you are using the
+  latest Helm chart without modifications), you should see the network links
+  and paths in this namespace soon.

  ```bash
  kubectl get networklinks -A
@@ -126,24 +139,36 @@ Steps for getting a demo to run on a local Linux Helm environment and a local Ki
  kubectl apply -f config/demo/app-besteffort.yaml
  ```

- This will create an *ApplicationGroup* with minimum 1 Applications and the Application app-besteffort, which consists of
+- This will create an *ApplicationGroup* with minimum 1 Applications and the
+  Application app-besteffort, which consists of
+
  - Two *Workloads* w1 and w2
-    - each Workload has a container wbitt/network-multitool (which has a couple of networking tools to test and demonstrate communication)
+    - each Workload has a container wbitt/network-multitool (which has a
+      couple of networking tools to test and demonstrate communication)
  - *Channel* "ernie" from w1 to w2 (5Mbit/s, 150µs)
-  - *Channel* with a generated name (app-besteffort-w2-to-app-besteffort-w1) from w2 to w1 (2Mbit/s, 200µs)
+  - *Channel* with a generated name (app-besteffort-w2-to-app-besteffort-w1)
+    from w2 to w1 (2Mbit/s, 200µs)

-*Channel* "ernie" requests the *BESTEFFORT* network service class, which the default Helm chart
-maps to the *k8s* network (which gets created automatically).
+*Channel* "ernie" requests the *BESTEFFORT* network service class, which the
+default Helm chart maps to the *k8s* network (which gets created
+automatically).

-The other channel requests the *BESTEFFORT* network service class, which the default Helm chart
-implements using the *k8s* network.
+The other channel requests the *BESTEFFORT* network service class, which the
+default Helm chart implements using the *k8s* network.

 ## Show optimizer call and output

- First thing that happens is that the QoS-Scheduler Controller, once the *ApplicationGroup* is complete, collects all inputs related to *Workloads* and *Channels*, and calls the WorkloadPlacementSolver (Pod name: solver) with this information, together with information related to the infrastructure (compute and network)
- The WorkloadPlacementSolver then calculates and returns a placement for all Pods of the *ApplicationGroup*
- Look into logs of Solver Pod (e.g. via Lens)
- The output of the WorkloadPlacementSolver (assignment of Pods to Nodes) is written to a custom resource *AssignmentPlan* (Lens: Custom Resources/qos-scheduler.siemens.com/AssignmentPlan)
+- First thing that happens is that the QoS-Scheduler Controller, once the
+  *ApplicationGroup* is complete, collects all inputs related to *Workloads*
+  and *Channels*, and calls the WorkloadPlacementSolver (Pod name: optimizer)
+  with this information, together with information related to the
+  infrastructure (compute and network)
+- The WorkloadPlacementSolver then calculates and returns a placement for all
+  Pods of the *ApplicationGroup*
+- Look into logs of Optimizer Pod (e.g. via Lens)
+- The output of the WorkloadPlacementSolver (assignment of Pods to Nodes) is
+  written to a custom resource *AssignmentPlan* (Lens: Custom
+  Resources/qos-scheduler.siemens.com/AssignmentPlan)

 ## Show Channels

@@ -154,15 +179,23 @@ implements using the *k8s* network.
  ```

  - or use Lens: Custom Resources/qos-scheduler.siemens.com/Channels
-  - One custom resource Channel is created per *Channel* that connects *Workloads* placed on different nodes, and it corresponds to the channel being set up
- Services (svc): Each *Channel* also gets a K8s Service with the name "svc-\<channel-name>-\<namespace>". This can be used to address the Pods behind this channel using a DNS name.
+  - One custom resource Channel is created per *Channel* that connects
+    *Workloads* placed on different nodes, and it corresponds to the channel
+    being set up
+
+- Services (svc): Each *Channel* also gets a K8s Service with the name
+  "svc-\<channel-name>-\<namespace>". This can be used to address the Pods
+  behind this channel using a DNS name.

  ```bash
  kubectl get svc
  ```

-  If there was no name specified in the deployment file, a name is generated: "svc-\<src-applicationname>-\<src-workloadname>\-\<tgt-applicationname>-\<tgt-workloadname>-\<namespace>
- Endpoint (ep): IP address behind a *Channel*. This is what the corresponding Service resolves to.
+  If there was no name specified in the deployment file, a name is generated:
+  “svc-\<src-applicationname>-\<src-workloadname>\-\<tgt-applicationname>-\<tgt-workloadname>-\<namespace>”
+
+- Endpoint (ep): IP address behind a *Channel*. This is what the corresponding
+  Service resolves to.

  ```bash
  kubectl get ep
@@ -176,7 +209,8 @@ implements using the *k8s* network.
  kubectl delete -f config/demo/applicationgroup.yaml
  ```

- This will delete all *Applications* in the *ApplicationGroup* and all dependent resources
+- This will delete all *Applications* in the *ApplicationGroup* and all
+  dependent resources

 # Uninstall QoS Scheduler and Optimizer

@@ -188,6 +222,7 @@ implements using the *k8s* network.
  helm list --namespace=controllers-system
  helm uninstall --namespace=controllers-system qostest
  ```
+
 - Delete the QoS-Scheduler CRDs (not uninstalled by the helm chart)

  ```bash

--- a/Documentation/quick-start.md
+++ b/Documentation/quick-start.md
@@ -9,10 +9,14 @@ Steps for getting a demo to run on a local Linux Helm environment and a local Ki

 ## Preconditions

+The following tools need to be installed:
 - Docker
 - kubectl
 - Helm
 - KinD
+- make
+- rsync
+- yq

 ## Ensure access to container registry

@@ -24,18 +28,18 @@ Steps for getting a demo to run on a local Linux Helm environment and a local Ki
  ```bash
  DOCKERUSER=<your_docker_username>
  ACCESSTOKEN=<your_access_token>
-  REGISTRYHOST=colab-repo.intracom-telecom.com
-  REGISTRYNAME=${REGISTRYHOST}:5050
+  REGISTRYHOST=cr.siemens.com
+  REGISTRYNAME=${REGISTRYHOST}
  REGISTRYURL=https://${REGISTRYNAME}
-  REPOHOST=colab-repo.intracom-telecom.com
-  REPOGROUP=/colab-projects/he-codeco/swm/
+  REPOHOST=code.siemens.com
+  REPOGROUP=/itp_cloud_research/qos-scheduler
  docker login ${REGISTRYURL} -u ${DOCKERUSER} -p ${ACCESSTOKEN}
  ```

 - Test registry access (optional)
  
  ```bash
-  docker pull ${REGISTRYNAME}${REPOGROUP}qos-scheduler/custom-scheduler:main
+  docker pull ${REGISTRYNAME}${REPOGROUP}/scheduler:latest
  ```

 ## Copy required scripts to local environment
@@ -87,7 +91,7 @@ Steps for getting a demo to run on a local Linux Helm environment and a local Ki

  ```bash
  make chart
-  helm install qostest --namespace=controllers-system --create-namespace --set global.image.credentials.username=${DOCKERUSER} --set global.image.credentials.password=${ACCESSTOKEN} --set global.image.credentials.email=${DOCKERUSER} --set global.image.credentials.registry=${REGISTRYURL} tmp/helm
+  helm install qosscheduler --namespace=controllers-system --create-namespace --set global.image.credentials.username=${DOCKERUSER} --set global.image.credentials.password=${ACCESSTOKEN} --set global.image.credentials.email=${DOCKERUSER} --set global.image.credentials.registry=${REGISTRYURL} tmp/helm
  ```

 - Show network topology/ network links (Custom Resources)
@@ -110,7 +114,7 @@ Steps for getting a demo to run on a local Linux Helm environment and a local Ki
 - In local shell:

  ```bash
-  kubectl apply -f config/demo/sample-topology.yaml
+  kubectl apply -f config/demo/sample-topology-vlan.yaml
  ```

  This will create a sample network topology. If you are running a topology operator in the
@@ -191,9 +195,11 @@ implements using the *k8s* network.

  ```bash
  helm list --namespace=controllers-system
-  helm uninstall --namespace=controllers-system qostest
+  helm uninstall --namespace=controllers-system qosscheduler
  ```
- Delete the QoS-Scheduler CRDs (not uninstalled by the helm chart)
+- Delete the QoS-Scheduler CRDs (not uninstalled by the helm chart). 
+
+  _Attention:_ This deletes all CRDs, also others that were not installed by our helm chart

  ```bash
  kubectl delete crds --all

--- a/Makefile
+++ b/Makefile
--- a/README.md
+++ b/README.md
@@ -5,17 +5,32 @@ SPDX-License-Identifier: CC-BY-SA-4.0

 # SWM QoS Scheduler

-The SWM QoS scheduler is an extension of the Kubernetes scheduler that implements an extended model for describing enhanced application requirements and infrastructure capabilities, according to the Seamless Computing concept.
+The SWM QoS Scheduler is an extension of the Kubernetes scheduler that
+implements an extended model for describing enhanced application requirements
+and infrastructure capabilities, according to the Seamless Computing concept.
+“QoS” is short for “quality of service”.

-The SWM QoS scheduler considers the extended application (QoS) requirements when placing application components (further called workloads) to compute nodes in the K8s cluster. The main enhancements beyond the existing K8s scheduler are:
+The SWM QoS Scheduler considers the extended application (QoS) requirements
+when placing application components (further called workloads) to compute
+nodes in the Kubernetes cluster. The main enhancements beyond the existing
+Kubernetes scheduler are:

- schedule multiple workloads (K8s Pods) at once, considering dependencies between these workloads
- network aware scheduling: make K8s aware of the network topology, connectivity, and resource availability between worker nodes, and consider this in the scheduling decision (according to the communication requirements of the application)
+- schedule multiple workloads (Kubernetes pods) at once, considering
+  dependencies between these workloads
+- network-aware scheduling: make Kubernetes aware of the network topology,
+  connectivity, and resource availability between worker nodes, and consider
+  this in the scheduling decision (according to the communication requirements
+  of the applications)

 ## Getting Started

-For installing the SWM QoS Scheduler (including all CRDs and required containers) and to deploy a small demo application, see [quick start](Documentation/quick-start-oss.md).
+For installing the SWM QoS Scheduler (including all CRDs and required
+containers) and to deploy a small demo application, see [quick start][1].
+
+[1]: Documentation/quick-start-oss.md

 ## License
-All code files are licensed under Apache license version 2.0.
-All documentation is licensed under Creative Commons Attribution-ShareAlike 4.0 International.
+
+All code files are licensed under Apache license version 2.0.  All
+documentation is licensed under Creative Commons Attribution-ShareAlike 4.0
+International.