diff --git a/pyproject.toml b/pyproject.toml index 23254cb111e161eebc475ec823ab2490aaa20c36..acfb3bafa0a5a508b8aa17538e38839a775961b7 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -42,7 +42,7 @@ scripts.hypertool = "hypertool.cli:main" [tool.black] -line-length = 80 +line-length = 119 target-version = [ "py311" ] [tool.isort] diff --git a/requirements.txt b/requirements.txt index d456fa827f23d03a5e15e59a8e1669addf75a014..1464bccace3d9e92bcd92b525a2faa4f7d70341b 100644 --- a/requirements.txt +++ b/requirements.txt @@ -27,3 +27,4 @@ typing_extensions==4.12.2 virtualenv==20.29.1 wcmatch==10.0 wcwidth==0.2.13 +numpy==1.26.4 diff --git a/setup.cfg b/setup.cfg new file mode 100644 index 0000000000000000000000000000000000000000..3ef2ed4e59243e5e6a7888e811fdd9e6de07fc0d --- /dev/null +++ b/setup.cfg @@ -0,0 +1,3 @@ + +[flake8] +max-line-length = 119 diff --git a/src/hypertool/cli.py b/src/hypertool/cli.py index b72fbee5ac7e52ca4652ac10da10a9c18b89dfe2..d09ce3f6dba3dd0813d8ee8362657aa7fac31e54 100644 --- a/src/hypertool/cli.py +++ b/src/hypertool/cli.py @@ -29,16 +29,23 @@ def test_command() -> None: @click.option("--dry-run", is_flag=True, help="Explain the command's function") def run(dry_run: bool) -> None: network_interfaces = utils.get_network_interfaces_annotations() + monetary_cost = utils.get_monetary_cost_annotation() + energy_raw, energy_label = utils.get_energy_efficiency_annotation() + + all_annotations = { + **network_interfaces, + **monetary_cost, + **energy_raw, + **energy_label, + } if dry_run: - click.echo("[DRY-RUN] The following attributes would be applied: \n") + click.echo("[DRY-RUN] The following attributes would be applied:\n") click.echo("Annotations:") - for key, value in network_interfaces.items(): + for key, value in all_annotations.items(): click.echo(f"\t{key}: {value}") return - pass - @click.command("start_daemon", short_help="Start the diagnostics.") def start_daemon() -> None: @@ -46,9 +53,7 @@ def start_daemon() -> None: try: config.load_incluster_config() except ConfigException: - raise exceptions.HyperToolKubernetesError( - "Error loading in-cluster config" - ) + raise exceptions.HyperToolKubernetesError("Error loading in-cluster config") v1 = client.CoreV1Api() diff --git a/src/hypertool/cloud_costs.xlsx b/src/hypertool/cloud_costs.xlsx new file mode 100644 index 0000000000000000000000000000000000000000..2161a0ab82e5ae7b4d2e200ff3c822bdfc38d496 Binary files /dev/null and b/src/hypertool/cloud_costs.xlsx differ diff --git a/src/hypertool/clustered_instances.xlsx b/src/hypertool/clustered_instances.xlsx new file mode 100644 index 0000000000000000000000000000000000000000..a37736bde04868383f2f676b8f1864ddb3e343ca Binary files /dev/null and b/src/hypertool/clustered_instances.xlsx differ diff --git a/src/hypertool/cost_centroids.npy b/src/hypertool/cost_centroids.npy new file mode 100644 index 0000000000000000000000000000000000000000..91aee3257beb9ddf95b0bdca7963c3b67afeb5ba Binary files /dev/null and b/src/hypertool/cost_centroids.npy differ diff --git a/src/hypertool/k-means.py b/src/hypertool/k-means.py new file mode 100644 index 0000000000000000000000000000000000000000..aab9f5be3340f7548d752c9d1b0011914e645ef2 --- /dev/null +++ b/src/hypertool/k-means.py @@ -0,0 +1,72 @@ +import json +import os + +import numpy as np +import pandas as pd +from sklearn.cluster import KMeans +from sklearn.preprocessing import StandardScaler + + +def train_kmeans_from_excel(file_path, k=5, save_dir="output"): + # Read Excel file + df = pd.read_excel(file_path) + + # Required columns + required_cols = ["vCPU", "Memory", "On-Demand hourly rate"] + if not all(col in df.columns for col in required_cols): + raise ValueError(f"Excel must contain the following columns: {required_cols}") + + # Clean and convert memory and price columns + df["Memory"] = df["Memory"].astype(str).str.replace(" GiB", "", regex=False).astype(float) + df["On-Demand hourly rate"] = ( + df["On-Demand hourly rate"].astype(str).str.replace("$", "", regex=False).astype(float) + ) + + # Normalize features + features = df[["vCPU", "Memory", "On-Demand hourly rate"]] + scaler = StandardScaler() + features_scaled = scaler.fit_transform(features) + + # KMeans clustering + kmeans = KMeans(n_clusters=k, random_state=42, n_init="auto") + df["Original Cluster"] = kmeans.fit_predict(features_scaled) + + # Map clusters to qualitative cost labels based on average price + cluster_avg_cost = df.groupby("Original Cluster")["On-Demand hourly rate"].mean().sort_values() + sorted_clusters = cluster_avg_cost.index.tolist() + + # Define qualitative labels + qualitative_labels = ["very low", "low", "medium", "high", "very high"] + if k > len(qualitative_labels): + raise ValueError(f"Only {len(qualitative_labels)} labels defined for {k} clusters.") + + # Remap original cluster IDs to ordered 0–(k-1) + cluster_id_mapping = {old: new for new, old in enumerate(sorted_clusters)} + df["Cluster"] = df["Original Cluster"].map(cluster_id_mapping) + df["Cost Category"] = df["Cluster"].map(dict(enumerate(qualitative_labels))) + + # Save model outputs + os.makedirs(save_dir, exist_ok=True) + + # Overwrite existing files if they exist + clustered_file_path = os.path.join(save_dir, "clustered_instances.xlsx") + if os.path.exists(clustered_file_path): + os.remove(clustered_file_path) + + np.save(os.path.join(save_dir, "cost_centroids.npy"), kmeans.cluster_centers_) + scaler_data = { + "mean": scaler.mean_.tolist(), + "scale": scaler.scale_.tolist(), + } + with open(os.path.join(save_dir, "scaler.json"), "w") as f: + json.dump(scaler_data, f) + + # Save final result + df.drop(columns=["Original Cluster"], inplace=True) + df.to_excel(clustered_file_path, index=False) + + +if __name__ == "__main__": + current_dir = os.path.dirname(os.path.abspath(__file__)) + file_path = os.path.join(current_dir, "cloud_costs.xlsx") + train_kmeans_from_excel(file_path, k=5) diff --git a/src/hypertool/scaler.json b/src/hypertool/scaler.json new file mode 100644 index 0000000000000000000000000000000000000000..e218c97f030b7e28ab585386a3c82a1dce7b4067 --- /dev/null +++ b/src/hypertool/scaler.json @@ -0,0 +1 @@ +{"mean": [46.52039555006181, 334.4412855377009, 4.552835995055624], "scale": [57.05010390300931, 986.0795139030431, 10.824401690402746]} diff --git a/src/hypertool/utils.py b/src/hypertool/utils.py index 818efd8bcc4f15a6f2c0d72637b81d28ecae34bf..ee11abd945b6add603d16702f3ff16048abc57d5 100644 --- a/src/hypertool/utils.py +++ b/src/hypertool/utils.py @@ -1,6 +1,87 @@ +import json +import os +import subprocess +from pathlib import Path + import netifaces +import numpy as np def get_network_interfaces_annotations(): interfaces = ",".join(f"{item}" for item in netifaces.interfaces()) return {"hyperai.eu/node-available-interfaces": interfaces} + + +def get_cpu_count(): + cpu_count = os.cpu_count() + return cpu_count if cpu_count is not None else 1 + # fallback to 1 if not detectable + + +def get_ram_gb(): + try: + with open("/proc/meminfo") as f: + meminfo = f.read() + mem_line = [line for line in meminfo.split("\n") if "MemTotal" in line][0] + mem_total_kb = int(mem_line.split()[1]) + mem_total_gb = mem_total_kb / 1024 / 1024 + return mem_total_gb + except Exception: + return 0 # fallback if file missing + + +def load_kmeans_model(): + model_dir = Path(__file__).parent + centroids = np.load(model_dir / "cost_centroids.npy") + with open(model_dir / "scaler.json") as f: + scaler = json.load(f) + return centroids, np.array(scaler["mean"]), np.array(scaler["scale"]) + + +def predict_cost_category(cpu, ram): + centroids, mean, scale = load_kmeans_model() + input_vector = np.array([cpu, ram, 0.0]) # price excluded during inference + scaled = (input_vector - mean) / scale + idx = np.argmin(np.linalg.norm(centroids[:, :2] - scaled[:2], axis=1)) + labels = ["very low", "low", "medium", "high", "very high"] + return labels[idx] + + +def get_monetary_cost_annotation(): + cpu = get_cpu_count() + ram = get_ram_gb() + + try: + label = predict_cost_category(cpu, ram) + return {"hyperai.eu/node-monetary-cost-category": label} + except Exception: + return {"hyperai.eu/node-monetary-cost-category": "unknown"} + + +def get_energy_efficiency_annotation(): + try: + result = subprocess.run( + ["perf", "stat", "-a", "-e", "power/energy-pkg/", "sleep", "1"], + capture_output=True, + text=True, + check=True, + ) + output = result.stderr + for line in output.splitlines(): + if "Joules power/energy-pkg/" in line: + energy = float(line.strip().split()[0]) + if energy < 10: + label = "high" + elif energy < 20: + label = "medium" + else: + label = "low" + return ( + {"hyperai.eu/node-energy-usage-joules": round(energy, 2)}, + {"hyperai.eu/node-energy-efficiency": label}, + ) + except Exception: + return ( + {"hyperai.eu/node-energy-usage-joules": "unknown"}, + {"hyperai.eu/node-energy-efficiency": "unknown"}, + )