70 lines
3.2 KiB
Python
70 lines
3.2 KiB
Python
from hypothesis import given, settings, HealthCheck, note, event, strategies as st
|
|
from returns.result import Success
|
|
import datetime
|
|
import numpy as np
|
|
import mlflow
|
|
import pytest
|
|
import matplotlib.pyplot as plt
|
|
import pandas as pd
|
|
|
|
from tests.models.common import generate_kohonen_samples, MINIMUM_NETWORK_DIMENSION
|
|
from models import train_kohonen_network, train_kohonen_network_sam, create_kohonen_params
|
|
|
|
mlflow_experiment_name = "Kohonen_Network_Benchmark"
|
|
timestamp = datetime.datetime.now(datetime.UTC).strftime('%Y-%m-%dT%H:%M:%SZ')
|
|
mlflow.set_experiment(f"{mlflow_experiment_name}_{timestamp}")
|
|
|
|
@pytest.fixture(scope="session")
|
|
def mlflow_context():
|
|
context = []
|
|
yield context
|
|
|
|
df = pd.DataFrame(context)
|
|
|
|
# Plot scatter chart of execution_time vs msam_execution_time
|
|
plt.figure(figsize=(10, 8))
|
|
plt.scatter(df['execution_time'], df['sam_execution_time'], alpha=0.7, color='blue')
|
|
plt.xlabel('Execution Time')
|
|
plt.ylabel('Sam Execution Time')
|
|
plt.title(f'Comparison of Execution Times (n = {len(context)})')
|
|
plt.plot([df['execution_time'].min(), df['sam_execution_time'].max()],
|
|
[df['execution_time'].min(), df['sam_execution_time'].max()], 'k--') # Diagonal line
|
|
plt.grid(True)
|
|
plt.savefig('benchmarks/execution_time_comparison.png')
|
|
|
|
@given(
|
|
data=st.data(),
|
|
feature_size=st.integers(min_value=1, max_value=10),
|
|
width=st.integers(min_value=MINIMUM_NETWORK_DIMENSION, max_value=200),
|
|
height=st.integers(min_value=MINIMUM_NETWORK_DIMENSION, max_value=200),
|
|
num_iterations=st.integers(min_value=10, max_value=1000),
|
|
initial_learning_rate=st.floats(min_value=1e-3, max_value=1.0, allow_nan=False, allow_infinity=False)
|
|
)
|
|
@settings(max_examples=20, deadline=None, suppress_health_check=(HealthCheck.too_slow,))
|
|
def benchmark_kohonen_networks_performance_mlflow(mlflow_context, data, feature_size, width, height, num_iterations, initial_learning_rate):
|
|
np.random.seed(42)
|
|
timestamp = datetime.datetime.now(datetime.UTC).strftime('%Y-%m-%dT%H:%M:%SZ')
|
|
|
|
X = data.draw(generate_kohonen_samples(feature_size))
|
|
|
|
# Train Kohonen Network and record metrics to MLFlow
|
|
with mlflow.start_run(run_name=f"train_kohonen_network_{timestamp}") as run:
|
|
params = create_kohonen_params(X, width, height, num_iterations, initial_learning_rate)
|
|
assert isinstance(params, Success)
|
|
_ = train_kohonen_network(X, params.unwrap(), use_mlflow=True)
|
|
assert isinstance(_, Success)
|
|
|
|
# Train Kohonen Network (Sam) and record metrics to MLFlow
|
|
with mlflow.start_run(run_name=f"train_kohonen_network_sam_{timestamp}") as sam_run:
|
|
_ = train_kohonen_network_sam(X, num_iterations, width, height,
|
|
feature_size, initial_learning_rate, use_mlflow=True)
|
|
|
|
# Read MLFlow to compare execution times
|
|
client = mlflow.tracking.MlflowClient()
|
|
execution_time = client.get_metric_history(run.info.run_id, "execution_time")[-1].value
|
|
sam_execution_time = client.get_metric_history(sam_run.info.run_id, "execution_time")[-1].value
|
|
|
|
mlflow_context.append({
|
|
"sam_execution_time": sam_execution_time,
|
|
"execution_time": execution_time
|
|
}) |