๐ Regression
Evolve ML pipelines that maximize Rยฒ on regression tasks.
Prerequisites
-
VEOX Server: Start the local VEOX server (requires Docker):
See the Quick Start for detailed server setup, health checks, and Docker Compose instructions.docker run -d \ --name veox-enclave-server \ -p 8090:8090 \ 714044927654.dkr.ecr.us-east-2.amazonaws.com/doug/single_enclave/veox-enclave-server:latest -
Python SDK: Install the
veoxpackage via PyPI:
What It Does
The regression family genetically evolves multi-stage ML pipelines for continuous value prediction:
Candidates are scored using Rยฒ (coefficient of determination) via K-fold cross-validation.
Quick Start
from sklearn.datasets import make_regression
import pandas as pd
from veox import VeoxEvolver
# Generate a regression dataset
X, y = make_regression(n_samples=500, n_features=10, n_informative=6,
noise=15, random_state=42)
df = pd.DataFrame(X, columns=[f"f{i}" for i in range(10)])
df["target"] = y
evolver = VeoxEvolver("regression")
evolver.fit(data=df, target_column="target", max_generations=5)
print(f"Best Rยฒ: {evolver.best_fitness_:.4f}")
Full Example
from sklearn.datasets import make_regression
import pandas as pd
from veox import VeoxEvolver
# 1. Generate challenging regression dataset
X, y = make_regression(
n_samples=1000,
n_features=15,
n_informative=8,
noise=20, # Significant noise
random_state=42,
)
df = pd.DataFrame(X, columns=[f"feature_{i}" for i in range(15)])
df["target"] = y
print(f"Dataset: {df.shape[0]} rows ร {df.shape[1]} cols")
print(f"Target range: [{y.min():.1f}, {y.max():.1f}]")
# 2. Connect
evolver = VeoxEvolver("regression", api_url="http://127.0.0.1:8090")
evolver.health_check()
# 3. Evolve
evolver.fit(
data=df,
target_column="target",
max_generations=10,
population_size=50,
timeout_per_eval=30,
max_poll_time=600,
)
# 4. Inspect
print(f"Best Rยฒ: {evolver.best_fitness_:.4f}")
print(f"Pipeline: {evolver.best_pipeline_}")
print(f"Evaluations: {evolver.result_.total_evals}")
# 5. Save
evolver.save("regression_results.json")
from sklearn.datasets import make_regression
import pandas as pd
from veox import VeoxEvolver
# 1. Generate challenging regression dataset
X, y = make_regression(
n_samples=1000,
n_features=15,
n_informative=8,
noise=20, # Significant noise
random_state=42,
)
df = pd.DataFrame(X, columns=[f"feature_{i}" for i in range(15)])
df["target"] = y
print(f"Dataset: {df.shape[0]} rows ร {df.shape[1]} cols")
print(f"Target range: [{y.min():.1f}, {y.max():.1f}]")
# 2. Connect
evolver = VeoxEvolver("regression", api_url="http://127.0.0.1:8090")
evolver.health_check()
# 3. Evolve
evolver.fit(
data=df,
target_column="target",
max_generations=10,
population_size=50,
num_islands=4, # ๐ PRO FEATURE: 4 parallel islands
timeout_per_eval=30,
max_poll_time=600,
)
# 4. Inspect
print(f"Best Rยฒ: {evolver.best_fitness_:.4f}")
print(f"Pipeline: {evolver.best_pipeline_}")
print(f"Evaluations: {evolver.result_.total_evals}")
# 5. Save
evolver.save("regression_results.json")
Live dashboard โ Rยฒ fitness scatter, champion trend, and pipeline leaderboard.
Fitness Configuration
| Parameter | Value |
|---|---|
| Primary Metric | Rยฒ (coefficient of determination) |
| K-Fold Splits | 3 |
| Direction | Maximize |
| Clamped Range | [-1.0, 1.0] |
| Exception Penalty | โ2.0 |
Open-Source Datasets to Try
from sklearn.datasets import fetch_california_housing, make_friedman1
import pandas as pd
from veox import VeoxEvolver
# California Housing (real-world)
data = fetch_california_housing(as_frame=True)
df = data.frame
df = df.rename(columns={"MedHouseVal": "target"})
evolver = VeoxEvolver("regression")
evolver.fit(data=df, target_column="target", max_generations=5)
# Friedman #1 (nonlinear benchmark โ 5 informative, 5 noise)
X, y = make_friedman1(n_samples=1000, n_features=10, noise=1.0)
df = pd.DataFrame(X, columns=[f"f{i}" for i in range(10)])
df["target"] = y
evolver = VeoxEvolver("regression")
evolver.fit(data=df, target_column="target", max_generations=5)
๐ VEOX Pro Activation
To unlock VIP Evaluators and Pro Algorithms (like PaperKit and Generative routines), you must authenticate your local node with a VEOX License Token.
from veox import VeoxEvolver
evolver = VeoxEvolver("regression", api_url="http://127.0.0.1:8090")
# 1. Fetch your unique Hardware Fingerprint
fingerprint = evolver.get_system_fingerprint()
print(f"My VEOX Node Fingerprint: {fingerprint}")
# Example Output: My VEOX Node Fingerprint: 476ad03474b31e3c84d07df9088d93f0
# 2. Provide this fingerprint to your VEOX Admin to receive a JWT Token
jwt_token = "eyJ0b2tlbiI6ICJVRExK...EXPIRES" # Replace with your token
# 3. Activate the Enclave
if evolver.activate_license(jwt_token):
print("VIP Features Unlocked!")
# evolver.fit(...) will now utilize full Pro capabilities
Multiple Datasets
Train on several datasets to evolve regressors that generalize across different distributions:
from sklearn.datasets import make_regression
import pandas as pd
from veox import VeoxEvolver
# Two regression datasets with different signal-to-noise ratios
def make_reg_df(seed, noise=10.0):
X, y = make_regression(n_samples=600, n_features=15, n_informative=8,
noise=noise, random_state=seed)
df = pd.DataFrame(X, columns=[f"f{i}" for i in range(15)])
df["target"] = y
return df
df1 = make_reg_df(seed=42, noise=5.0) # Low noise
df2 = make_reg_df(seed=99, noise=30.0) # High noise
evolver = VeoxEvolver("regression")
evolver.fit(
data=[df1, df2],
target_column="target",
max_generations=5,
)
print(f"Best Rยฒ (averaged): {evolver.best_fitness_:.4f}")
Tips
- Rยฒ = 1.0 means perfect fit; Rยฒ = 0 means predicting the mean.
- Multiple datasets: Pass a list to test generalization:
data=[df1, df2]. - File paths: Pass CSV paths directly:
data=["train_a.csv", "train_b.csv"]. - Noise matters: Higher noise โ more generations needed to evolve robust regressors.