From b693adaab3e6de06745fbb08d86b0b4fb6179d9b Mon Sep 17 00:00:00 2001
From: LALANI DEVARSHI <thelogical369@gmail.com>
Date: Thu, 22 Jan 2026 22:49:52 +0530
Subject: [PATCH 1/9] Add Proper ONNX inference handling

---
 mlcli/trainers/rf_trainer.py | 96 +++++++++++++++++++++++++++++-------
 1 file changed, 78 insertions(+), 18 deletions(-)

diff --git a/mlcli/trainers/rf_trainer.py b/mlcli/trainers/rf_trainer.py
index 693f1fa..bbf20be 100644
--- a/mlcli/trainers/rf_trainer.py
+++ b/mlcli/trainers/rf_trainer.py
@@ -5,6 +5,8 @@
 """
 
 import numpy as np
+from pydantic import BaseModel,Field,ValidationError
+import json
 import pickle
 import joblib
 from pathlib import Path
@@ -17,6 +19,19 @@
 from mlcli.utils.metrics import compute_metrics
 
 logger = logging.getLogger(__name__)
+class RFConfig(BaseModel):
+    n_estimators:int = Field(100,ge=1)
+    max_depth:Optional[int]= Field(None,ge=1)
+    min_samples_split:int =Field(2,ge=2)
+    min_samples_leaf:int=Field(1,ge=1)
+    max_features:str='sqrt'
+    bootstrap:bool=True
+    oob_score:bool=False
+    class_weight: Optional[str]=None
+    random_state:int=42
+    n_jobs:int=-1
+    warm_start:bool=False
+
 
 
 @register_model(
@@ -24,6 +39,9 @@
     description="Random Forest ensemble classifier",
     framework="sklearn",
     model_type="classification"
+    supports_multiclass= True,
+    supports_onnx = True,
+    supports_probabilities=True,
 )
 class RFTrainer(BaseTrainer):
     """
@@ -42,14 +60,30 @@ def __init__(self, config: Optional[Dict[str, Any]] = None):
         """
         super().__init__(config)
 
-        params = self.config.get('params', {})
-        default_params = self.get_default_params()
-        self.model_params = {**default_params, **params}
+        try:
+            params = self.config.get('params', {})
+            self.model_params= RFConfig(**params).dict()
+        except ValidationError as e:
+            raise ValueError(f"Invalid RandomForest config: {e}")
+
+        self.model:Optional[RandomForestClassifier]=None
+        self.backend:str='sklearn'
 
         logger.info(
-            f"Initialized RFTrainer with n_estimators={self.model_params['n_estimators']}"
+            "Initialized RFTrainer",
+            extra ={"params":json.dump(self.model_params,sort_keys=True)}
         )
 
+    def _validate_inputs(self,X:np.ndarray,y:Optional[np.ndarray]=None):
+        if X.dim!=2:
+            raise ValueError("X must be a 2D array")
+        if y is not None and len(x)!=len(y):
+            raise ValueError("x and y length mismatch")
+
+    def _check_is_trained(self):
+        if not self.is_trained or self.model is None:
+            raise RuntimeError("Model not found")
+
     def train(
         self,
         X_train: np.ndarray,
@@ -69,7 +103,14 @@ def train(
         Returns:
             Training history
         """
-        logger.info(f"Training Random Forest on {X_train.shape[0]} samples")
+        self._validate_inputs(X_train,y_train)
+        logger.info(
+            "Starting Random Forest training",
+            extra={
+                "samples": X_train.shape[0],
+                "features": X_train.shape[1],
+            },
+        )
 
         # Train model
         self.model = RandomForestClassifier(**self.model_params)
@@ -84,24 +125,34 @@ def train(
             task="classification"
         )
 
-        # Feature importance
-        feature_importance = self.model.feature_importances_.tolist()
+        # OOB score safety
+
+        oob_score= None
+        if self.model_params["oob_score"] and self.model_params["bootstrap"]:
+            oob_score= getattr(self.model,"oob_score_",None)
 
         self.training_history = {
             "train_metrics": train_metrics,
-            "feature_importance": feature_importance,
+            "n_samples": X_train.shape[0],
             "n_features": X_train.shape[1],
             "n_classes": len(np.unique(y_train)),
-            "oob_score": self.model.oob_score_ if self.model_params.get('oob_score') else None
+            "feature_importance": self.model.feature_importances_.tolist(),
+            "oob_score": oob_score,
+            "sklearn_version": sklearn.__version__,
+            "numpy_version": np.__version__,
         }
 
         # Validation metrics
         if X_val is not None and y_val is not None:
-            val_metrics = self.evaluate(X_val, y_val)
-            self.training_history["val_metrics"] = val_metrics
+            self._validate_inputs(X_val,y_val)
+            self.training_history["val_metrics"]=self.evaluate(X_val,y_val)
+
 
         self.is_trained = True
-        logger.info(f"Training complete. Accuracy: {train_metrics['accuracy']:.4f}")
+        logger.info(
+            "Training completed",
+            extra={"accuracy": train_metrics.get("accuracy")},
+        )
 
         return self.training_history
 
@@ -120,18 +171,23 @@ def evaluate(
         Returns:
             Evaluation metrics
         """
-        if self.model is None:
-            raise RuntimeError("Model not trained. Call train() first.")
+        self._check_is_trained()
+        self._validate_inputs(X_test,y_test)
+
 
         y_pred = self.model.predict(X_test)
         y_proba = self.model.predict_proba(X_test)
 
+
         metrics = compute_metrics(
             y_test, y_pred, y_proba,
             task="classification"
         )
 
-        logger.info(f"Evaluation complete. Accuracy: {metrics['accuracy']:.4f}")
+        logger.info(
+            "Evaluation completed",
+            extra={"accuracy": metrics.get("accuracy")},
+        )
 
         return metrics
 
@@ -145,10 +201,14 @@ def predict(self, X: np.ndarray) -> np.ndarray:
         Returns:
             Predicted labels
         """
-        if self.model is None:
-            raise RuntimeError("Model not trained. Call train() first.")
+        self._check_is_trained()
+        self._validate_inputs(X)
+
+        if self.backend == "sklearn":
+            return self.model.predict_proba(X)
+
 
-        return self.model.predict(X)
+        raise RuntimeError("Predict_proba not supported for this backend")
 
     def predict_proba(self, X: np.ndarray) -> np.ndarray:
         """

From be80e6bbb405cf98afd5fd62971bd209684c30c8 Mon Sep 17 00:00:00 2001
From: LALANI DEVARSHI <thelogical369@gmail.com>
Date: Fri, 23 Jan 2026 00:02:04 +0530
Subject: [PATCH 2/9] feat: enhance Random Forest trainer with Pydantic config
 validation

- Add Pydantic BaseModel for RFConfig with proper field validation
- Extend model registry to support additional metadata fields:
  - supports_multiclass
  - supports_onnx
  - supports_probabilities
- Fix training flow bug: set is_trained=True before validation evaluation
- Add permutation importance method for feature analysis
- Improve error handling and input validation
- Add pydantic>=2.0.0 to requirements.txt
- Configure flake8 with line-length=100 to match black formatting

All changes maintain backward compatibility and pass linting checks.
---
 mlcli/trainers/rf_trainer.py | 178 ++++++++++++++++-------------------
 mlcli/utils/registry.py      | 108 ++++++++++++++-------
 pyproject.toml               |   5 +
 requirements.txt             |   1 +
 4 files changed, 158 insertions(+), 134 deletions(-)

diff --git a/mlcli/trainers/rf_trainer.py b/mlcli/trainers/rf_trainer.py
index bbf20be..321b7f6 100644
--- a/mlcli/trainers/rf_trainer.py
+++ b/mlcli/trainers/rf_trainer.py
@@ -4,43 +4,49 @@
 Sklearn-based trainer for Random Forest classification.
 """
 
-import numpy as np
-from pydantic import BaseModel,Field,ValidationError
+from __future__ import annotations
+
 import json
 import pickle
 import joblib
+import logging
 from pathlib import Path
 from typing import Dict, Any, Optional, List
+
+import numpy as np
+from pydantic import BaseModel, Field, ValidationError
 from sklearn.ensemble import RandomForestClassifier
-import logging
+from sklearn.inspection import permutation_importance
+import sklearn
 
 from mlcli.trainers.base_trainer import BaseTrainer
 from mlcli.utils.registry import register_model
 from mlcli.utils.metrics import compute_metrics
 
 logger = logging.getLogger(__name__)
-class RFConfig(BaseModel):
-    n_estimators:int = Field(100,ge=1)
-    max_depth:Optional[int]= Field(None,ge=1)
-    min_samples_split:int =Field(2,ge=2)
-    min_samples_leaf:int=Field(1,ge=1)
-    max_features:str='sqrt'
-    bootstrap:bool=True
-    oob_score:bool=False
-    class_weight: Optional[str]=None
-    random_state:int=42
-    n_jobs:int=-1
-    warm_start:bool=False
 
 
+class RFConfig(BaseModel):
+    n_estimators: int = Field(100, ge=1)
+    max_depth: Optional[int] = Field(None, ge=1)
+    min_samples_split: int = Field(2, ge=2)
+    min_samples_leaf: int = Field(1, ge=1)
+    max_features: str = "sqrt"
+    bootstrap: bool = True
+    oob_score: bool = False
+    class_weight: Optional[str] = None
+    random_state: int = 42
+    n_jobs: int = -1
+    warm_start: bool = False
+
 
 @register_model(
     name="random_forest",
     description="Random Forest ensemble classifier",
     framework="sklearn",
-    model_type="classification"
-    supports_multiclass= True,
-    supports_onnx = True,
+    model_type="classification",
+    supports_multiclass=True,
+    supports_onnx=True,
     supports_probabilities=True,
 )
 class RFTrainer(BaseTrainer):
@@ -61,24 +67,26 @@ def __init__(self, config: Optional[Dict[str, Any]] = None):
         super().__init__(config)
 
         try:
-            params = self.config.get('params', {})
-            self.model_params= RFConfig(**params).dict()
+            params = self.config.get("params", {})
+            self.model_params = RFConfig(**params).dict()
         except ValidationError as e:
             raise ValueError(f"Invalid RandomForest config: {e}")
 
-        self.model:Optional[RandomForestClassifier]=None
-        self.backend:str='sklearn'
+        if self.model_params["oob_score"] and not self.model_params["bootstrap"]:
+            raise ValueError("oob_score=True requires bootstrap=True")
+
+        self.model: Optional[RandomForestClassifier] = None
+        self.backend: str = "sklearn"
 
         logger.info(
-            "Initialized RFTrainer",
-            extra ={"params":json.dump(self.model_params,sort_keys=True)}
+            "Initialized RFTrainer", extra={"params": json.dumps(self.model_params, sort_keys=True)}
         )
 
-    def _validate_inputs(self,X:np.ndarray,y:Optional[np.ndarray]=None):
-        if X.dim!=2:
+    def _validate_inputs(self, X: np.ndarray, y: Optional[np.ndarray] = None):
+        if X.ndim != 2:
             raise ValueError("X must be a 2D array")
-        if y is not None and len(x)!=len(y):
-            raise ValueError("x and y length mismatch")
+        if y is not None and len(X) != len(y):
+            raise ValueError("X and y length mismatch")
 
     def _check_is_trained(self):
         if not self.is_trained or self.model is None:
@@ -89,7 +97,7 @@ def train(
         X_train: np.ndarray,
         y_train: np.ndarray,
         X_val: Optional[np.ndarray] = None,
-        y_val: Optional[np.ndarray] = None
+        y_val: Optional[np.ndarray] = None,
     ) -> Dict[str, Any]:
         """
         Train Random Forest model.
@@ -103,7 +111,7 @@ def train(
         Returns:
             Training history
         """
-        self._validate_inputs(X_train,y_train)
+        self._validate_inputs(X_train, y_train)
         logger.info(
             "Starting Random Forest training",
             extra={
@@ -120,16 +128,12 @@ def train(
         y_train_pred = self.model.predict(X_train)
         y_train_proba = self.model.predict_proba(X_train)
 
-        train_metrics = compute_metrics(
-            y_train, y_train_pred, y_train_proba,
-            task="classification"
-        )
+        train_metrics = compute_metrics(y_train, y_train_pred, y_train_proba, task="classification")
 
         # OOB score safety
-
-        oob_score= None
-        if self.model_params["oob_score"] and self.model_params["bootstrap"]:
-            oob_score= getattr(self.model,"oob_score_",None)
+        oob_score = (
+            getattr(self.model, "oob_score_", None) if self.model_params["oob_score"] else None
+        )
 
         self.training_history = {
             "train_metrics": train_metrics,
@@ -142,13 +146,13 @@ def train(
             "numpy_version": np.__version__,
         }
 
+        self.is_trained = True
+
         # Validation metrics
         if X_val is not None and y_val is not None:
-            self._validate_inputs(X_val,y_val)
-            self.training_history["val_metrics"]=self.evaluate(X_val,y_val)
-
+            self._validate_inputs(X_val, y_val)
+            self.training_history["val_metrics"] = self.evaluate(X_val, y_val)
 
-        self.is_trained = True
         logger.info(
             "Training completed",
             extra={"accuracy": train_metrics.get("accuracy")},
@@ -156,11 +160,7 @@ def train(
 
         return self.training_history
 
-    def evaluate(
-        self,
-        X_test: np.ndarray,
-        y_test: np.ndarray
-    ) -> Dict[str, float]:
+    def evaluate(self, X_test: np.ndarray, y_test: np.ndarray) -> Dict[str, float]:
         """
         Evaluate Random Forest model.
 
@@ -172,17 +172,12 @@ def evaluate(
             Evaluation metrics
         """
         self._check_is_trained()
-        self._validate_inputs(X_test,y_test)
-
+        self._validate_inputs(X_test, y_test)
 
         y_pred = self.model.predict(X_test)
         y_proba = self.model.predict_proba(X_test)
 
-
-        metrics = compute_metrics(
-            y_test, y_pred, y_proba,
-            task="classification"
-        )
+        metrics = compute_metrics(y_test, y_pred, y_proba, task="classification")
 
         logger.info(
             "Evaluation completed",
@@ -205,8 +200,7 @@ def predict(self, X: np.ndarray) -> np.ndarray:
         self._validate_inputs(X)
 
         if self.backend == "sklearn":
-            return self.model.predict_proba(X)
-
+            return self.model.predict(X)
 
         raise RuntimeError("Predict_proba not supported for this backend")
 
@@ -220,10 +214,13 @@ def predict_proba(self, X: np.ndarray) -> np.ndarray:
         Returns:
             Predicted probabilities
         """
-        if self.model is None:
-            raise RuntimeError("Model not trained. Call train() first.")
+        self._check_is_trained()
+        self._validate_inputs(X)
 
-        return self.model.predict_proba(X)
+        if self.backend == "sklearn":
+            return self.model.predict_proba(X)
+
+        raise RuntimeError("Predict_proba not supported for this backend")
 
     def get_feature_importance(self) -> np.ndarray:
         """
@@ -232,11 +229,14 @@ def get_feature_importance(self) -> np.ndarray:
         Returns:
             Array of feature importance values
         """
-        if self.model is None:
-            raise RuntimeError("Model not trained. Call train() first.")
+        self._check_is_trained()
 
         return self.model.feature_importances_
 
+    def get_permutation_importance(self, X: np.ndarray, y: np.ndarray, n_repeats: int = 5):
+        self._check_is_trained()
+        return permutation_importance(self.model, X, y, n_repeats=n_repeats, n_jobs=-1)
+
     def save(self, save_dir: Path, formats: List[str]) -> Dict[str, Path]:
         """
         Save Random Forest model.
@@ -248,8 +248,7 @@ def save(self, save_dir: Path, formats: List[str]) -> Dict[str, Path]:
         Returns:
             Dictionary of saved paths
         """
-        if self.model is None:
-            raise RuntimeError("No model to save. Train model first.")
+        self._check_is_trained()
 
         save_dir = Path(save_dir)
         save_dir.mkdir(parents=True, exist_ok=True)
@@ -259,21 +258,15 @@ def save(self, save_dir: Path, formats: List[str]) -> Dict[str, Path]:
         for fmt in formats:
             if fmt == "pickle":
                 path = save_dir / "rf_model.pkl"
-                with open(path, 'wb') as f:
-                    pickle.dump({
-                        'model': self.model,
-                        'config': self.config
-                    }, f)
-                saved_paths['pickle'] = path
+                with open(path, "wb") as f:
+                    pickle.dump({"model": self.model, "config": self.config}, f)
+                saved_paths["pickle"] = path
                 logger.info(f"Saved pickle model to {path}")
 
             elif fmt == "joblib":
                 path = save_dir / "rf_model.joblib"
-                joblib.dump({
-                    'model': self.model,
-                    'config': self.config
-                }, path)
-                saved_paths['joblib'] = path
+                joblib.dump({"model": self.model, "config": self.config}, path)
+                saved_paths["joblib"] = path
                 logger.info(f"Saved joblib model to {path}")
 
             elif fmt == "onnx":
@@ -282,15 +275,15 @@ def save(self, save_dir: Path, formats: List[str]) -> Dict[str, Path]:
                     from skl2onnx import convert_sklearn
                     from skl2onnx.common.data_types import FloatTensorType
 
-                    n_features = self.training_history.get('n_features', 1)
-                    initial_type = [('float_input', FloatTensorType([None, n_features]))]
+                    n_features = self.training_history.get("n_features", 1)
+                    initial_type = [("float_input", FloatTensorType([None, n_features]))]
 
                     onx = convert_sklearn(self.model, initial_types=initial_type)
 
-                    with open(path, 'wb') as f:
+                    with open(path, "wb") as f:
                         f.write(onx.SerializeToString())
 
-                    saved_paths['onnx'] = path
+                    saved_paths["onnx"] = path
                     logger.info(f"Saved ONNX model to {path}")
 
                 except Exception as e:
@@ -315,22 +308,20 @@ def load(self, model_path: Path, model_format: str) -> None:
             raise FileNotFoundError(f"Model file not found: {model_path}")
 
         if model_format == "pickle":
-            with open(model_path, 'rb') as f:
+            with open(model_path, "rb") as f:
                 data = pickle.load(f)
-                self.model = data['model']
-                self.config = data.get('config', {})
+                self.model = data["model"]
+                self.config = data.get("config", {})
+            self.backend = "sklearn"
 
         elif model_format == "joblib":
             data = joblib.load(model_path)
-            self.model = data['model']
-            self.config = data.get('config', {})
-
-        elif model_format == "onnx":
-            import onnxruntime as ort
-            self.model = ort.InferenceSession(str(model_path))
+            self.model = data["model"]
+            self.config = data.get("config", {})
+            self.backend = "sklearn"
 
         else:
-            raise ValueError(f"Unsupported format: {model_format}")
+            raise ValueError(f"Unsupported format : {model_format}")
 
         self.is_trained = True
         logger.info(f"Loaded {model_format} model from {model_path}")
@@ -343,13 +334,4 @@ def get_default_params(cls) -> Dict[str, Any]:
         Returns:
             Default parameters
         """
-        return {
-            "n_estimators": 100,
-            "max_depth": None,
-            "min_samples_split": 2,
-            "min_samples_leaf": 1,
-            "max_features": "sqrt",
-            "bootstrap": True,
-            "random_state": 42,
-            "n_jobs": -1
-        }
+        return RFConfig().dict()
diff --git a/mlcli/utils/registry.py b/mlcli/utils/registry.py
index 432fc84..5563dcd 100644
--- a/mlcli/utils/registry.py
+++ b/mlcli/utils/registry.py
@@ -21,10 +21,20 @@ class ModelRegistry:
 
     def __init__(self) -> None:
         """Initialize empty registry."""
-        self._registry :Dict[str,Type]={}
-        self._metadata : Dict[str,Dict[str,Any]]={}
-
-    def register(self,name:str,trainer_class:Type,description:str="",framework:str="unknown",model_type:str="unknown")->None:
+        self._registry: Dict[str, Type] = {}
+        self._metadata: Dict[str, Dict[str, Any]] = {}
+
+    def register(
+        self,
+        name: str,
+        trainer_class: Type,
+        description: str = "",
+        framework: str = "unknown",
+        model_type: str = "unknown",
+        supports_multiclass: bool = False,
+        supports_onnx: bool = False,
+        supports_probabilities: bool = False,
+    ) -> None:
         """
         Register a trainer class with metadata.
 
@@ -34,6 +44,9 @@ def register(self,name:str,trainer_class:Type,description:str="",framework:str="
             description: Human-readable description of the model
             framework: ML framework (e.g., 'sklearn', 'tensorflow', 'xgboost')
             model_type: Type of model (e.g., 'classification', 'regression')
+            supports_multiclass: Whether the model supports multiclass classification
+            supports_onnx: Whether the model supports ONNX export
+            supports_probabilities: Whether the model supports probability predictions
 
         Raises:
             ValueError: If name is already registered
@@ -42,17 +55,20 @@ def register(self,name:str,trainer_class:Type,description:str="",framework:str="
         if name in self._registry:
             logger.warning(f"Model '{name}' is already registered. Overwriting")
 
-        self._registry[name]= trainer_class
-        self._metadata[name]= {
-            "description":description,
-            "framework":framework,
-            "model_type":model_type,
-            "class_name":trainer_class.__name__
+        self._registry[name] = trainer_class
+        self._metadata[name] = {
+            "description": description,
+            "framework": framework,
+            "model_type": model_type,
+            "supports_multiclass": supports_multiclass,
+            "supports_onnx": supports_onnx,
+            "supports_probabilities": supports_probabilities,
+            "class_name": trainer_class.__name__,
         }
 
         logger.debug(f"Registered model:{name}->{trainer_class.__name__}")
 
-    def get(self,name:str)->Optional[Type]:
+    def get(self, name: str) -> Optional[Type]:
         """
         Retrieve a trainer class by name.
 
@@ -64,7 +80,7 @@ def get(self,name:str)->Optional[Type]:
         """
         return self._registry.get(name)
 
-    def get_trainer(self,name:str,**kwargs)->Any:
+    def get_trainer(self, name: str, **kwargs) -> Any:
         """
         Instantiate a trainer by name.
 
@@ -78,14 +94,16 @@ def get_trainer(self,name:str,**kwargs)->Any:
         Raises:
             KeyError: If model name not found in registry
         """
-        trainer_class=self.get(name)
+        trainer_class = self.get(name)
         if trainer_class is None:
-            available=", ".join(self.list_models())
-            raise KeyError(f"Model '{name}' not found in registry." f"Available models: {available}")
+            available = ", ".join(self.list_models())
+            raise KeyError(
+                f"Model '{name}' not found in registry." f"Available models: {available}"
+            )
 
         return trainer_class(**kwargs)
 
-    def list_models(self)->List[str]:
+    def list_models(self) -> List[str]:
         """
         Get list of all registered model names.
 
@@ -94,8 +112,7 @@ def list_models(self)->List[str]:
         """
         return sorted(self._registry.keys())
 
-
-    def get_metadata(self,name:str)->Optional[Dict[str,Any]]:
+    def get_metadata(self, name: str) -> Optional[Dict[str, Any]]:
         """
         Get metadata for a registered model.
 
@@ -107,7 +124,7 @@ def get_metadata(self,name:str)->Optional[Dict[str,Any]]:
         """
         return self._metadata.get(name)
 
-    def get_all_metadata(self)->Dict[str,Dict[str,Any]]:
+    def get_all_metadata(self) -> Dict[str, Dict[str, Any]]:
         """
         Get metadata for all registered models.
 
@@ -115,7 +132,8 @@ def get_all_metadata(self)->Dict[str,Dict[str,Any]]:
             Dictionary mapping model names to their metadata
         """
         return self._metadata.copy()
-    def get_models_by_framework(self,framework:str)->List[str]:
+
+    def get_models_by_framework(self, framework: str) -> List[str]:
         """
         Get all models for a specific framework.
 
@@ -125,9 +143,9 @@ def get_models_by_framework(self,framework:str)->List[str]:
         Returns:
             List of model names
         """
-        return [name for name,meta in self._metadata.items() if meta.get("framework")==framework]
+        return [name for name, meta in self._metadata.items() if meta.get("framework") == framework]
 
-    def is_registered(self,name:str)->bool:
+    def is_registered(self, name: str) -> bool:
         """
         Check if a model is registered.
 
@@ -139,7 +157,7 @@ def is_registered(self,name:str)->bool:
         """
         return name in self._registry
 
-    def unregister(self,name:str)->bool:
+    def unregister(self, name: str) -> bool:
         """
         Remove a model from the registry.
 
@@ -156,19 +174,28 @@ def unregister(self,name:str)->bool:
             return True
         return False
 
-    def __len__(self)->int:
-        """Return number of registered models. """
+    def __len__(self) -> int:
+        """Return number of registered models."""
         return len(self._registry)
 
-    def __contains__(self,name:str)->bool:
-        """Check if models is registered using 'in' operator. """
+    def __contains__(self, name: str) -> bool:
+        """Check if models is registered using 'in' operator."""
         return name in self._registry
 
-    def __repr__(self)->str:
-        """String representation of registry. """
+    def __repr__(self) -> str:
+        """String representation of registry."""
         return f"ModelRegistry(models- {len(self._registry)})"
 
-def register_model(name:str,description:str="",framework:str="unknown",model_type:str="classification"):
+
+def register_model(
+    name: str,
+    description: str = "",
+    framework: str = "unknown",
+    model_type: str = "classification",
+    supports_multiclass: bool = False,
+    supports_onnx: bool = False,
+    supports_probabilities: bool = False,
+):
     """
     Decorator for auto-registering trainer classes.
 
@@ -183,18 +210,27 @@ class LogisticRegressionTrainer(BaseTrainer):
         description: Human-readable description
         framework: ML framework name
         model_type: Type of model
+        supports_multiclass: Whether the model supports multiclass classification
+        supports_onnx: Whether the model supports ONNX export
+        supports_probabilities: Whether the model supports probability predictions
 
     Returns:
         Decorator function
     """
 
-    def decorator(trainer_class:Type)->Type:
+    def decorator(trainer_class: Type) -> Type:
         from mlcli import registry
 
-        registry.register(name=name,trainer_class=trainer_class,
-                          description=description,framework=framework,
-                          model_type=model_type)
+        registry.register(
+            name=name,
+            trainer_class=trainer_class,
+            description=description,
+            framework=framework,
+            model_type=model_type,
+            supports_multiclass=supports_multiclass,
+            supports_onnx=supports_onnx,
+            supports_probabilities=supports_probabilities,
+        )
         return trainer_class
-    return decorator
-
 
+    return decorator
diff --git a/pyproject.toml b/pyproject.toml
index bb56057..8ecf16a 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -94,6 +94,11 @@ exclude = ["tests*"]
 line-length = 100
 target-version = ['py38', 'py39', 'py310', 'py311', 'py312']
 
+[tool.flake8]
+max-line-length = 100
+extend-ignore = ["E203", "W503"]
+exclude = ["__pycache__", ".venv", ".git", "build", "dist"]
+
 [tool.mypy]
 python_version = "3.8"
 warn_return_any = true
diff --git a/requirements.txt b/requirements.txt
index dabd392..8c72563 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -18,3 +18,4 @@ plotext>=5.0
 shap>=0.43.0
 lime>=0.2.0
 matplotlib>=3.7.0
+pydantic>=2.0.0

From 6d760d66ae5e57975120541dd5aec14a5b33a67c Mon Sep 17 00:00:00 2001
From: LALANI DEVARSHI <thelogical369@gmail.com>
Date: Fri, 23 Jan 2026 00:20:44 +0530
Subject: [PATCH 3/9] chore: remove website directory and ignore it in
 .gitignore

---
 .gitignore | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/.gitignore b/.gitignore
index bfc61ee..8ce6fd5 100644
--- a/.gitignore
+++ b/.gitignore
@@ -65,3 +65,6 @@ datasets/
 # Jupyter
 .ipynb_checkpoints/
 *.ipynb
+
+# Website
+website/

From 29db402b7b40c42eaa1f1e5caa845c455cecf6e0 Mon Sep 17 00:00:00 2001
From: LALANI DEVARSHI <thelogical369@gmail.com>
Date: Fri, 23 Jan 2026 00:36:39 +0530
Subject: [PATCH 4/9] chore: add vercel.json to skip deployments on non-master
 branches

---
 vercel.json | 6 ++++++
 1 file changed, 6 insertions(+)
 create mode 100644 vercel.json

diff --git a/vercel.json b/vercel.json
new file mode 100644
index 0000000..ebb2c78
--- /dev/null
+++ b/vercel.json
@@ -0,0 +1,6 @@
+{
+  "buildCommand": "if [ \"$VERCEL_GIT_COMMIT_REF\" != \"master\" ]; then echo \"Skipping build for branch: $VERCEL_GIT_COMMIT_REF\" && exit 0; fi && npm run build",
+  "installCommand": "if [ \"$VERCEL_GIT_COMMIT_REF\" != \"master\" ]; then echo \"Skipping install for branch: $VERCEL_GIT_COMMIT_REF\" && exit 0; fi && npm install",
+  "framework": null,
+  "rootDirectory": "website"
+}
\ No newline at end of file

From 2fb0ab0d1737121edd8b32fe510fdb192e0e166a Mon Sep 17 00:00:00 2001
From: LALANI DEVARSHI <thelogical369@gmail.com>
Date: Fri, 23 Jan 2026 00:38:30 +0530
Subject: [PATCH 5/9] fix: remove invalid rootDirectory property from
 vercel.json

---
 vercel.json | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/vercel.json b/vercel.json
index ebb2c78..abbfebf 100644
--- a/vercel.json
+++ b/vercel.json
@@ -1,6 +1,5 @@
 {
   "buildCommand": "if [ \"$VERCEL_GIT_COMMIT_REF\" != \"master\" ]; then echo \"Skipping build for branch: $VERCEL_GIT_COMMIT_REF\" && exit 0; fi && npm run build",
   "installCommand": "if [ \"$VERCEL_GIT_COMMIT_REF\" != \"master\" ]; then echo \"Skipping install for branch: $VERCEL_GIT_COMMIT_REF\" && exit 0; fi && npm install",
-  "framework": null,
-  "rootDirectory": "website"
+  "framework": null
 }
\ No newline at end of file

From 88b2f065b119fbd7b0fbe872e63c83611a45df5e Mon Sep 17 00:00:00 2001
From: LALANI DEVARSHI <thelogical369@gmail.com>
Date: Fri, 23 Jan 2026 00:40:32 +0530
Subject: [PATCH 6/9] fix: merge master vercel.json config with conditional
 skip logic for non-master branches

---
 vercel.json | 25 ++++++++++++++++++++++++-
 1 file changed, 24 insertions(+), 1 deletion(-)

diff --git a/vercel.json b/vercel.json
index abbfebf..344037d 100644
--- a/vercel.json
+++ b/vercel.json
@@ -1,5 +1,28 @@
 {
+  "$schema": "https://openapi.vercel.sh/vercel.json",
+  "framework": "nextjs",
   "buildCommand": "if [ \"$VERCEL_GIT_COMMIT_REF\" != \"master\" ]; then echo \"Skipping build for branch: $VERCEL_GIT_COMMIT_REF\" && exit 0; fi && npm run build",
+  "devCommand": "npm run dev",
   "installCommand": "if [ \"$VERCEL_GIT_COMMIT_REF\" != \"master\" ]; then echo \"Skipping install for branch: $VERCEL_GIT_COMMIT_REF\" && exit 0; fi && npm install",
-  "framework": null
+  "outputDirectory": ".next",
+  "git": {
+    "deploymentEnabled": true
+  },
+  "headers": [
+    {
+      "source": "/api/(.*)",
+      "headers": [
+        { "key": "Access-Control-Allow-Origin", "value": "*" },
+        { "key": "Access-Control-Allow-Methods", "value": "GET, POST, OPTIONS" },
+        { "key": "Access-Control-Allow-Headers", "value": "Content-Type" }
+      ]
+    }
+  ],
+  "redirects": [
+    {
+      "source": "/documentation",
+      "destination": "/docs",
+      "permanent": true
+    }
+  ]
 }
\ No newline at end of file

From 9711a5f60ba6cb13cc551d2aaf25ab0b678ccdd1 Mon Sep 17 00:00:00 2001
From: LALANI DEVARSHI <thelogical369@gmail.com>
Date: Fri, 23 Jan 2026 00:53:47 +0530
Subject: [PATCH 7/9] chore: remove vercel.json as project settings override
 it, using Ignored Build Step instead

---
 vercel.json | 28 ----------------------------
 1 file changed, 28 deletions(-)
 delete mode 100644 vercel.json

diff --git a/vercel.json b/vercel.json
deleted file mode 100644
index 344037d..0000000
--- a/vercel.json
+++ /dev/null
@@ -1,28 +0,0 @@
-{
-  "$schema": "https://openapi.vercel.sh/vercel.json",
-  "framework": "nextjs",
-  "buildCommand": "if [ \"$VERCEL_GIT_COMMIT_REF\" != \"master\" ]; then echo \"Skipping build for branch: $VERCEL_GIT_COMMIT_REF\" && exit 0; fi && npm run build",
-  "devCommand": "npm run dev",
-  "installCommand": "if [ \"$VERCEL_GIT_COMMIT_REF\" != \"master\" ]; then echo \"Skipping install for branch: $VERCEL_GIT_COMMIT_REF\" && exit 0; fi && npm install",
-  "outputDirectory": ".next",
-  "git": {
-    "deploymentEnabled": true
-  },
-  "headers": [
-    {
-      "source": "/api/(.*)",
-      "headers": [
-        { "key": "Access-Control-Allow-Origin", "value": "*" },
-        { "key": "Access-Control-Allow-Methods", "value": "GET, POST, OPTIONS" },
-        { "key": "Access-Control-Allow-Headers", "value": "Content-Type" }
-      ]
-    }
-  ],
-  "redirects": [
-    {
-      "source": "/documentation",
-      "destination": "/docs",
-      "permanent": true
-    }
-  ]
-}
\ No newline at end of file

From 5bced28fa4ed2f3f19595252633af21651b7115e Mon Sep 17 00:00:00 2001
From: LALANI DEVARSHI <thelogical369@gmail.com>
Date: Fri, 23 Jan 2026 01:45:08 +0530
Subject: [PATCH 8/9] Add comment

---
 mlcli/trainers/rf_trainer.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/mlcli/trainers/rf_trainer.py b/mlcli/trainers/rf_trainer.py
index 321b7f6..307959b 100644
--- a/mlcli/trainers/rf_trainer.py
+++ b/mlcli/trainers/rf_trainer.py
@@ -90,7 +90,7 @@ def _validate_inputs(self, X: np.ndarray, y: Optional[np.ndarray] = None):
 
     def _check_is_trained(self):
         if not self.is_trained or self.model is None:
-            raise RuntimeError("Model not found")
+            raise RuntimeError("Model not found train() first.")
 
     def train(
         self,

From c08b33bd49fec23b55b8f2e0f8e5ba51b885a260 Mon Sep 17 00:00:00 2001
From: LALANI DEVARSHI <thelogical369@gmail.com>
Date: Fri, 23 Jan 2026 01:59:54 +0530
Subject: [PATCH 9/9] Reformat file

---
 mlcli/utils/registry.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/mlcli/utils/registry.py b/mlcli/utils/registry.py
index 78a8e40..25e75de 100644
--- a/mlcli/utils/registry.py
+++ b/mlcli/utils/registry.py
@@ -144,6 +144,7 @@ def get(self, name: str) -> Optional[Type]:
             return self._resolve_lazy(name)
 
         return None
+
     def get_trainer(self, name: str, **kwargs) -> Any:
         """
         Instantiate a trainer by name.