mantel/benchmarks/benchmark_performance.py
2024-06-11 21:08:08 +10:00

70 lines
3.2 KiB
Python

from hypothesis import given, settings, HealthCheck, note, event, strategies as st
from returns.result import Success
import datetime
import numpy as np
import mlflow
import pytest
import matplotlib.pyplot as plt
import pandas as pd
from tests.models.common import generate_kohonen_samples, MINIMUM_NETWORK_DIMENSION
from models import train_kohonen_network, train_kohonen_network_sam, create_kohonen_params
mlflow_experiment_name = "Kohonen_Network_Benchmark"
timestamp = datetime.datetime.now(datetime.UTC).strftime('%Y-%m-%dT%H:%M:%SZ')
mlflow.set_experiment(f"{mlflow_experiment_name}_{timestamp}")
@pytest.fixture(scope="session")
def mlflow_context():
context = []
yield context
df = pd.DataFrame(context)
# Plot scatter chart of execution_time vs msam_execution_time
plt.figure(figsize=(10, 8))
plt.scatter(df['execution_time'], df['sam_execution_time'], alpha=0.7, color='blue')
plt.xlabel('Execution Time')
plt.ylabel('Sam Execution Time')
plt.title(f'Comparison of Execution Times (n = {len(context)})')
plt.plot([df['execution_time'].min(), df['sam_execution_time'].max()],
[df['execution_time'].min(), df['sam_execution_time'].max()], 'k--') # Diagonal line
plt.grid(True)
plt.savefig('benchmarks/execution_time_comparison.png')
@given(
data=st.data(),
feature_size=st.integers(min_value=1, max_value=10),
width=st.integers(min_value=MINIMUM_NETWORK_DIMENSION, max_value=200),
height=st.integers(min_value=MINIMUM_NETWORK_DIMENSION, max_value=200),
num_iterations=st.integers(min_value=10, max_value=1000),
initial_learning_rate=st.floats(min_value=1e-3, max_value=1.0, allow_nan=False, allow_infinity=False)
)
@settings(max_examples=20, deadline=None, suppress_health_check=(HealthCheck.too_slow,))
def benchmark_kohonen_networks_performance_mlflow(mlflow_context, data, feature_size, width, height, num_iterations, initial_learning_rate):
np.random.seed(42)
timestamp = datetime.datetime.now(datetime.UTC).strftime('%Y-%m-%dT%H:%M:%SZ')
X = data.draw(generate_kohonen_samples(feature_size))
# Train Kohonen Network and record metrics to MLFlow
with mlflow.start_run(run_name=f"train_kohonen_network_{timestamp}") as run:
params = create_kohonen_params(X, width, height, num_iterations, initial_learning_rate)
assert isinstance(params, Success)
_ = train_kohonen_network(X, params.unwrap(), use_mlflow=True)
assert isinstance(_, Success)
# Train Kohonen Network (Sam) and record metrics to MLFlow
with mlflow.start_run(run_name=f"train_kohonen_network_sam_{timestamp}") as sam_run:
_ = train_kohonen_network_sam(X, num_iterations, width, height,
feature_size, initial_learning_rate, use_mlflow=True)
# Read MLFlow to compare execution times
client = mlflow.tracking.MlflowClient()
execution_time = client.get_metric_history(run.info.run_id, "execution_time")[-1].value
sam_execution_time = client.get_metric_history(sam_run.info.run_id, "execution_time")[-1].value
mlflow_context.append({
"sam_execution_time": sam_execution_time,
"execution_time": execution_time
})