# Copyright (c) 2022 Pratyush Kumar, Abhinav Prakash, and Yu Ding
# This source code is licensed under the MIT license found in the
# LICENSE file in the root directory of this source tree.
import numpy as np
import pandas as pd
from ._AMK_subroutine import *
[docs]class AMK(object):
"""
Parameters
----------
X_train: np.ndarray or pd.DataFrame
A matrix or dataframe of input variable values in the training dataset.
y_train: np.array
A numeric array for response values in the training dataset.
X_test: np.ndarray or pd.DataFrame
A matrix or dataframe of test input variable values to compute predictions.
bw: string or int
A numeric array or a character input for bandwidth. If character, bandwidth
computed internally; the input should be either 'dpi' or 'dpi_gap'.
Default value is 'dpi_gap'.
n_multi_cov: int
An integer or a character input specifying the number of multiplicative covariates
in each additive term. Default is 3 (same as Lee et al., 2015). The character
inputs can be: 'all' for a completely multiplicative model, or 'none' for a
completely additive model. Ignored if the number of covariates is 1.
fixed_cov: list
An integer list specifying the fixed covariates column number(s).
Ignored if n_multi_cov is set to 'all' or 'none' or if the number of covariates is less than 3.
Default value is [0,1].
cir_cov: list or int
A list specifying the circular covariates column number(s) in X_train,
An integer when only one circular covariates present.
Default value is None.
Returns
-------
AMK
self with trained parameter values. \n
- predictions: stored numeric array of model output at the data points in X_test.
"""
def __init__(self, X_train, y_train, X_test, bw='dpi', n_multi_cov=3, fixed_cov=[0, 1], cir_cov=None):
if not (isinstance(X_train, list) or isinstance(X_train, pd.DataFrame) or isinstance(X_train, pd.Series) or isinstance(X_train, np.ndarray)):
raise ValueError(
"The X_train should be either a list or numpy array or dataframe.")
if not (isinstance(X_test, list) or isinstance(X_test, pd.DataFrame) or isinstance(X_test, pd.Series) or isinstance(X_test, np.ndarray)):
raise ValueError(
"The X_test should be either a list or numpy array or dataframe.")
if len(X_train.shape) > 1:
if X_train.shape[1] != X_test.shape[1]:
raise ValueError(
"The number of features in train and test set must be same.")
if not (isinstance(y_train, list) or isinstance(y_train, np.ndarray)) or isinstance(y_train, pd.Series) or isinstance(y_train, pd.DataFrame):
raise ValueError(
"The target data should be either a list or numpy array or dataframe.")
if len(X_train) != len(y_train):
raise ValueError(
"The X_train and y_train should have same number of data points.")
if len(X_train.shape) == 2:
ncov = X_train.shape[1]
else:
ncov = 1
if not (isinstance(bw, list) or isinstance(bw, np.ndarray)):
if bw not in ['dpi', 'dpi_gap']:
raise ValueError(
"The bw must a list or an array or set to 'dpi' or 'dpi_gap'.")
elif len(bw) != ncov:
raise ValueError(
"The length of bw must be same as the number of covariates.")
if type(n_multi_cov) != int:
if n_multi_cov not in ['all', 'none']:
raise ValueError(
"The n_multi_cov must be set to 'all' or 'none' or an integer.")
if ncov == 1:
n_multi_cov = 'all'
elif ncov == 2:
if n_multi_cov != 'none':
n_multi_cov = 'all'
if n_multi_cov not in ['all', 'none']:
if n_multi_cov < 1 or n_multi_cov > ncov:
raise ValueError(
"if n_multi_cov is not set to 'all' or 'none', then it must be set to an integer greater than 1, and less than or equal to the number of covariates.")
elif n_multi_cov == ncov:
n_multi_cov = 'all'
fixed_cov = None
elif n_multi_cov < ncov:
if fixed_cov is not None:
if not (isinstance(fixed_cov, list) or isinstance(fixed_cov, np.ndarray)):
raise ValueError(
"The fixed_cov should either be a list or an array or set to None.")
elif len(list(set(fixed_cov).intersection(list(range(ncov))))) != len(fixed_cov):
raise ValueError(
"Any or all the values in fixed_cov exceeds the number of columns in X_train.")
elif len(fixed_cov) >= n_multi_cov:
raise ValueError(
"The fixed_cov should be less than n_multi_cov.")
elif n_multi_cov in ['all', 'none']:
fixed_cov = None
if cir_cov is not None:
if not (isinstance(cir_cov, list) or isinstance(cir_cov, np.ndarray) or type(cir_cov) == int):
raise ValueError(
"The circ_cov should be a list or 1d-array or single integer value or set to None.")
if type(cir_cov) == int:
cir_cov = [cir_cov]
elif len(list(set(cir_cov).intersection(list(range(ncov))))) != len(cir_cov):
raise ValueError(
"Any or all the values in cir_cov exceeds the number of columns in X_train.")
self.X_train = np.array(X_train)
self.y_train = np.array(y_train)
self.X_test = np.array(X_test)
self.bw = bw
self.n_multi_cov = n_multi_cov
self.fixed_cov = fixed_cov
self.cir_cov = cir_cov
if len(self.X_train.shape) == 1:
self.X_train = self.X_train.reshape(-1, 1)
self.X_test = self.X_test.reshape(-1, 1)
self.predictions = kern_pred(X_train, y_train, X_test, bw,
n_multi_cov, fixed_cov, cir_cov)