Overview
MLflow's default file-based backend doesn't scale. This recipe uses PostgreSQL for experiment/run metadata and S3 (or MinIO locally) for model artifacts.
docker-compose.yml
services:
mlflow:
image: ghcr.io/mlflow/mlflow:v2.13.0
command: >
mlflow server
--backend-store-uri postgresql://${DB_USER}:${DB_PASS}@db:5432/mlflow
--artifact-root s3://${S3_BUCKET}/mlflow
--host 0.0.0.0
--port 5000
--workers 4
environment:
AWS_ACCESS_KEY_ID: ${AWS_ACCESS_KEY_ID}
AWS_SECRET_ACCESS_KEY: ${AWS_SECRET_ACCESS_KEY}
AWS_DEFAULT_REGION: ${AWS_DEFAULT_REGION:-us-east-1}
MLFLOW_S3_ENDPOINT_URL: ${MLFLOW_S3_ENDPOINT_URL:-} # set for MinIO
ports:
- "5000:5000"
depends_on:
db:
condition: service_healthy
db:
image: postgres:16-alpine
environment:
POSTGRES_DB: mlflow
POSTGRES_USER: ${DB_USER}
POSTGRES_PASSWORD: ${DB_PASS}
volumes:
- mlflowpg:/var/lib/postgresql/data
healthcheck:
test: ["CMD-SHELL", "pg_isready -U ${DB_USER}"]
interval: 5s
retries: 10
# MinIO for local S3-compatible storage
minio:
image: minio/minio
command: server /data --console-address ":9001"
environment:
MINIO_ROOT_USER: minioadmin
MINIO_ROOT_PASSWORD: minioadmin
ports:
- "9000:9000"
- "9001:9001"
volumes:
- miniodata:/data
profiles: [local]
volumes:
mlflowpg:
miniodata:
Python client
import mlflow
mlflow.set_tracking_uri("http://mlflow.internal:5000")
mlflow.set_experiment("fraud-detection-v2")
with mlflow.start_run(run_name="xgboost-baseline"):
mlflow.log_params({"n_estimators": 300, "max_depth": 6})
mlflow.log_metrics({"auc": 0.9821, "f1": 0.883})
mlflow.xgboost.log_model(model, "model")
Nginx auth proxy (protect the UI)
location /mlflow/ {
auth_basic "MLflow";
auth_basic_user_file /etc/nginx/.htpasswd;
proxy_pass http://127.0.0.1:5000/;
proxy_set_header Host $host;
}
Model serving
mlflow models serve \
--model-uri "models:/fraud-detector/Production" \
--host 0.0.0.0 --port 8080 \
--workers 4 \
--env-manager conda