# Copyright (c) 2022 Pratyush Kumar, Abhinav Prakash, and Yu Ding
# This source code is licensed under the MIT license found in the
# LICENSE file in the root directory of this source tree.
import numpy as np
import pandas as pd
from sklearn.svm import SVC, SVR
from sklearn.preprocessing import StandardScaler
[docs]class SVMPowerCurve(object):
"""
Parameters
----------
kernel: string
Kernel type to be used in the algorithm. Default is 'rbf' else can be 'linear', 'poly', 'sigmoid'.
'poly' mean polynomial and 'rbf' means radial basis function.
degree: int
Degree of the polynomial kernel function ('poly'). Ignored by all other kernels.
gamma: string
Kernel coefficient for 'poly', 'radial' and 'sigmoid'. Can take 'scale' or 'auto' or float value.
If 'scale' (default), the gamma value is 1/(number_of_features*variance_of_X_train).
If 'auto', the gamma value is 1/number_of_features.
C: float
Regularization parameter. The strength of the regularization is inversely proportional to C.
Must be strictly positive.
"""
def __init__(self, kernel='rbf', degree=3, gamma='scale', C=1.0):
if isinstance(kernel, str):
if kernel not in ['linear', 'poly', 'rbf', 'sigmoid']:
raise ValueError(
"The kernel can only take followings as input: linear, radial, polynomial and sigmoid.")
else:
raise ValueError("The kernel can only take string input.")
if not isinstance(degree, int):
raise ValueError("The degree must be an integer value.")
if not (isinstance(gamma, int) or isinstance(gamma, float)):
if gamma not in ['scale', 'auto']:
raise ValueError(
"The gamma must be set to 'scale' or 'auto' or a numeric value.")
if not (isinstance(C, int) or isinstance(C, float)) and C > 0:
raise ValueError("The C must be a numeric value greater than 0.")
self.kernel = kernel
self.degree = degree
self.gamma = gamma
self.C = C
[docs] def fit(self, X_train, y_train):
"""
Parameters
----------
X_train: np.ndarray or pd.DataFrame
A matrix or dataframe of input variable values in the training dataset.
y_train: np.array
A numeric array for response values in the training dataset.
Returns
-------
SVMPowerCurve
self with trained parameter values.
"""
if not (isinstance(X_train, list) or isinstance(X_train, pd.DataFrame) or isinstance(X_train, pd.Series) or isinstance(X_train, np.ndarray)):
raise ValueError(
"The X_train should be either a list or numpy array or dataframe.")
if not (isinstance(y_train, list) or isinstance(y_train, np.ndarray)) or isinstance(y_train, pd.Series) or isinstance(y_train, pd.DataFrame):
raise ValueError(
"The target data should be either a list or numpy array or dataframe.")
if len(X_train) != len(y_train):
raise ValueError(
"The X_train and y_train should have same number of data points.")
self.X_train = np.array(X_train)
self.y_train = np.array(y_train)
if len(self.X_train.shape) == 1:
self.X_train = self.X_train.reshape(-1, 1)
# scale the features
self.scale_features = StandardScaler()
self.scale_features.fit(self.X_train)
self.X_train = self.scale_features.transform(self.X_train)
self.is_discrete = False
if (self.y_train == self.y_train.astype(int)).all():
# target values are discrete
self.y_train = self.y_train.astype(int)
self.is_discrete = True
if self.kernel == 'linear':
self.model = SVC(kernel=self.kernel, C=self.C)
elif self.kernel == 'poly':
self.model = SVC(kernel=self.kernel,
degree=self.degree, gamma=self.gamma, C=self.C)
else:
self.model = SVC(kernel=self.kernel,
gamma=self.gamma, C=self.C)
self.model.fit(self.X_train, self.y_train)
else:
# target values are continuous
self.scale_target = StandardScaler() # scale the target
self.scale_target.fit(self.y_train.reshape(-1, 1))
self.y_train = self.scale_target.transform(
self.y_train.reshape(-1, 1)).squeeze()
if self.kernel == 'linear':
self.model = SVR(kernel=self.kernel, C=self.C)
elif self.kernel == 'poly':
self.model = SVR(kernel=self.kernel,
degree=self.degree, gamma=self.gamma, C=self.C)
else:
self.model = SVR(kernel=self.kernel,
gamma=self.gamma, C=self.C)
self.model.fit(self.X_train, self.y_train)
return self
[docs] def predict(self, X_test):
"""
Parameters
----------
X_test: np.ndarray or pd.DataFrame
A matrix or dataframe of test input variable values to compute predictions.
Returns
-------
np.array
A numeric array for predictions at the data points in X_test.
"""
if not (isinstance(X_test, list) or isinstance(X_test, pd.DataFrame) or isinstance(X_test, pd.Series) or isinstance(X_test, np.ndarray)):
raise ValueError(
"The X_test should be either a list or numpy array or dataframe.")
X_test = np.array(X_test)
if len(X_test.shape) == 1:
X_test = X_test.reshape(-1, 1)
if len(self.X_train.shape) > 1:
if X_test.shape[1] != self.X_train.shape[1]:
raise ValueError(
"The number of features in train and test set must be same.")
X_test = self.scale_features.transform(X_test)
predictions = self.model.predict(X_test)
if not self.is_discrete:
predictions = self.scale_target.inverse_transform(
predictions.reshape(-1, 1)).squeeze()
return predictions