Source code for dswe.svm

# Copyright (c) 2022 Pratyush Kumar, Abhinav Prakash, and Yu Ding

# This source code is licensed under the MIT license found in the
# LICENSE file in the root directory of this source tree.

import numpy as np
import pandas as pd
from sklearn.svm import SVC, SVR
from sklearn.preprocessing import StandardScaler


[docs]class SVMPowerCurve(object): """ Parameters ---------- kernel: string Kernel type to be used in the algorithm. Default is 'rbf' else can be 'linear', 'poly', 'sigmoid'. 'poly' mean polynomial and 'rbf' means radial basis function. degree: int Degree of the polynomial kernel function ('poly'). Ignored by all other kernels. gamma: string Kernel coefficient for 'poly', 'radial' and 'sigmoid'. Can take 'scale' or 'auto' or float value. If 'scale' (default), the gamma value is 1/(number_of_features*variance_of_X_train). If 'auto', the gamma value is 1/number_of_features. C: float Regularization parameter. The strength of the regularization is inversely proportional to C. Must be strictly positive. """ def __init__(self, kernel='rbf', degree=3, gamma='scale', C=1.0): if isinstance(kernel, str): if kernel not in ['linear', 'poly', 'rbf', 'sigmoid']: raise ValueError( "The kernel can only take followings as input: linear, radial, polynomial and sigmoid.") else: raise ValueError("The kernel can only take string input.") if not isinstance(degree, int): raise ValueError("The degree must be an integer value.") if not (isinstance(gamma, int) or isinstance(gamma, float)): if gamma not in ['scale', 'auto']: raise ValueError( "The gamma must be set to 'scale' or 'auto' or a numeric value.") if not (isinstance(C, int) or isinstance(C, float)) and C > 0: raise ValueError("The C must be a numeric value greater than 0.") self.kernel = kernel self.degree = degree self.gamma = gamma self.C = C
[docs] def fit(self, X_train, y_train): """ Parameters ---------- X_train: np.ndarray or pd.DataFrame A matrix or dataframe of input variable values in the training dataset. y_train: np.array A numeric array for response values in the training dataset. Returns ------- SVMPowerCurve self with trained parameter values. """ if not (isinstance(X_train, list) or isinstance(X_train, pd.DataFrame) or isinstance(X_train, pd.Series) or isinstance(X_train, np.ndarray)): raise ValueError( "The X_train should be either a list or numpy array or dataframe.") if not (isinstance(y_train, list) or isinstance(y_train, np.ndarray)) or isinstance(y_train, pd.Series) or isinstance(y_train, pd.DataFrame): raise ValueError( "The target data should be either a list or numpy array or dataframe.") if len(X_train) != len(y_train): raise ValueError( "The X_train and y_train should have same number of data points.") self.X_train = np.array(X_train) self.y_train = np.array(y_train) if len(self.X_train.shape) == 1: self.X_train = self.X_train.reshape(-1, 1) # scale the features self.scale_features = StandardScaler() self.scale_features.fit(self.X_train) self.X_train = self.scale_features.transform(self.X_train) self.is_discrete = False if (self.y_train == self.y_train.astype(int)).all(): # target values are discrete self.y_train = self.y_train.astype(int) self.is_discrete = True if self.kernel == 'linear': self.model = SVC(kernel=self.kernel, C=self.C) elif self.kernel == 'poly': self.model = SVC(kernel=self.kernel, degree=self.degree, gamma=self.gamma, C=self.C) else: self.model = SVC(kernel=self.kernel, gamma=self.gamma, C=self.C) self.model.fit(self.X_train, self.y_train) else: # target values are continuous self.scale_target = StandardScaler() # scale the target self.scale_target.fit(self.y_train.reshape(-1, 1)) self.y_train = self.scale_target.transform( self.y_train.reshape(-1, 1)).squeeze() if self.kernel == 'linear': self.model = SVR(kernel=self.kernel, C=self.C) elif self.kernel == 'poly': self.model = SVR(kernel=self.kernel, degree=self.degree, gamma=self.gamma, C=self.C) else: self.model = SVR(kernel=self.kernel, gamma=self.gamma, C=self.C) self.model.fit(self.X_train, self.y_train) return self
[docs] def predict(self, X_test): """ Parameters ---------- X_test: np.ndarray or pd.DataFrame A matrix or dataframe of test input variable values to compute predictions. Returns ------- np.array A numeric array for predictions at the data points in X_test. """ if not (isinstance(X_test, list) or isinstance(X_test, pd.DataFrame) or isinstance(X_test, pd.Series) or isinstance(X_test, np.ndarray)): raise ValueError( "The X_test should be either a list or numpy array or dataframe.") X_test = np.array(X_test) if len(X_test.shape) == 1: X_test = X_test.reshape(-1, 1) if len(self.X_train.shape) > 1: if X_test.shape[1] != self.X_train.shape[1]: raise ValueError( "The number of features in train and test set must be same.") X_test = self.scale_features.transform(X_test) predictions = self.model.predict(X_test) if not self.is_discrete: predictions = self.scale_target.inverse_transform( predictions.reshape(-1, 1)).squeeze() return predictions