From 112484fbd2da8ab720e0fa9fb6a5039ae68fa069 Mon Sep 17 00:00:00 2001 From: xuhongzuo Date: Thu, 9 Nov 2023 16:08:17 +0800 Subject: [PATCH] doc init --- README.rst | 69 +++- deepod/core/base_model.py | 1 + deepod/core/networks/__init__.py | 12 + deepod/core/networks/base_networks.py | 32 +- deepod/core/networks/ts_network_tcn.py | 5 +- .../core/networks/ts_network_transformer.py | 13 +- deepod/metrics/_tsad_adjustment.py | 8 +- deepod/models/__init__.py | 5 +- deepod/models/tabular/devnet.py | 99 ++--- deepod/models/tabular/dif.py | 14 +- deepod/models/tabular/dsad.py | 3 +- deepod/models/tabular/dsvdd.py | 35 +- deepod/models/tabular/feawad.py | 3 + deepod/models/tabular/goad.py | 3 + deepod/models/tabular/icl.py | 6 +- deepod/models/tabular/neutral.py | 4 + deepod/models/tabular/prenet.py | 3 + deepod/models/tabular/rca.py | 77 ++-- deepod/models/tabular/rdp.py | 3 + deepod/models/tabular/repen.py | 4 +- deepod/models/tabular/rosas.py | 4 + deepod/models/tabular/slad.py | 4 + .../models/time_series/anomalytransformer.py | 5 + deepod/models/time_series/couta.py | 147 ++++---- deepod/models/time_series/dcdetector.py | 26 ++ deepod/models/time_series/devnet.py | 3 + deepod/models/time_series/dif.py | 4 + deepod/models/time_series/dsad.py | 4 +- deepod/models/time_series/dsvdd.py | 8 +- deepod/models/time_series/ncad.py | 344 ++++++++++++++++++ deepod/models/time_series/prenet.py | 3 + deepod/models/time_series/tcned.py | 3 + deepod/models/time_series/timesnet.py | 4 + deepod/models/time_series/tranad.py | 4 + deepod/models/time_series/usad.py | 3 + deepod/utils/data.py | 1 - docs/.debug.yml | 3 - docs/_config.yml | 14 - docs/_templates/class.rst | 22 ++ docs/additional.contributing.rst | 35 ++ docs/additional.license.rst | 33 ++ docs/additional.star_history.rst | 9 + docs/api_cc.rst | 28 ++ docs/api_reference.base_networks.rst | 23 ++ docs/api_reference.metrics.rst | 17 + docs/api_reference.rst | 17 + docs/api_reference.tabular.rst | 53 +++ docs/api_reference.time_series.rst | 40 ++ docs/conf.py | 36 +- docs/index.rst | 39 +- docs/install.rst | 34 -- docs/requirements.txt | 5 +- docs/zreferences.bib | 17 + testbed/testbed_unsupervised_tsad.py | 7 +- 54 files changed, 1111 insertions(+), 287 deletions(-) create mode 100644 deepod/models/time_series/ncad.py delete mode 100644 docs/.debug.yml delete mode 100644 docs/_config.yml create mode 100644 docs/_templates/class.rst create mode 100644 docs/additional.contributing.rst create mode 100644 docs/additional.license.rst create mode 100644 docs/additional.star_history.rst create mode 100644 docs/api_cc.rst create mode 100644 docs/api_reference.base_networks.rst create mode 100644 docs/api_reference.metrics.rst create mode 100644 docs/api_reference.rst create mode 100644 docs/api_reference.tabular.rst create mode 100644 docs/api_reference.time_series.rst delete mode 100644 docs/install.rst diff --git a/README.rst b/README.rst index 9936eb7..6d6ef62 100644 --- a/README.rst +++ b/README.rst @@ -5,6 +5,10 @@ Python Deep Outlier/Anomaly Detection (DeepOD) :target: https://github.com/xuhongzuo/DeepOD/actions/workflows/testing.yml :alt: testing2 +.. image:: https://readthedocs.org/projects/deepod/badge/?version=latest + :target: https://deepod.readthedocs.io/en/latest/?badge=latest + :alt: Documentation Status + .. image:: https://coveralls.io/repos/github/xuhongzuo/DeepOD/badge.svg?branch=main :target: https://coveralls.io/github/xuhongzuo/DeepOD?branch=main :alt: coveralls @@ -18,7 +22,7 @@ Python Deep Outlier/Anomaly Detection (DeepOD) and `Anomaly Detection `_. ``DeepOD`` supports tabular anomaly detection and time-series anomaly detection. -DeepOD includes **26** deep outlier detection / anomaly detection algorithms (in unsupervised/weakly-supervised paradigm). +DeepOD includes **27** deep outlier detection / anomaly detection algorithms (in unsupervised/weakly-supervised paradigm). More baseline algorithms will be included later. @@ -169,14 +173,14 @@ Implemented Models RCA, IJCAI, 2021, unsupervised, RCA: A Deep Collaborative Autoencoder Approach for Anomaly Detection [#Liu2021RCA]_ GOAD, ICLR, 2020, unsupervised, Classification-Based Anomaly Detection for General Data [#Bergman2020GOAD]_ NeuTraL, ICML, 2021, unsupervised, Neural Transformation Learning for Deep Anomaly Detection Beyond Images [#Qiu2021Neutral]_ - ICL, ICLR, 2022, unsupervised, Anomaly Detection for Tabular Data with Internal Contrastive Learning - DIF, TKDE, 2023, unsupervised, Deep Isolation Forest for Anomaly Detection - SLAD, ICML, 2023, unsupervised, Fascinating Supervisory Signals and Where to Find Them: Deep Anomaly Detection with Scale Learning - DevNet, KDD, 2019, weakly-supervised, Deep Anomaly Detection with Deviation Networks - PReNet, KDD, 2023, weakly-supervised, Deep Weakly-supervised Anomaly Detection - Deep SAD, ICLR, 2020, weakly-supervised, Deep Semi-Supervised Anomaly Detection - FeaWAD, TNNLS, 2021, weakly-supervised, Feature Encoding with AutoEncoders for Weakly-supervised Anomaly Detection - RoSAS, IP&M, 2023, weakly-supervised, RoSAS: Deep semi-supervised anomaly detection with contamination-resilient continuous supervision + ICL, ICLR, 2022, unsupervised, Anomaly Detection for Tabular Data with Internal Contrastive Learning [#Shenkar2022ICL]_ + DIF, TKDE, 2023, unsupervised, Deep Isolation Forest for Anomaly Detection [#Xu2023DIF]_ + SLAD, ICML, 2023, unsupervised, Fascinating Supervisory Signals and Where to Find Them: Deep Anomaly Detection with Scale Learning [#Xu2023SLAD]_ + DevNet, KDD, 2019, weakly-supervised, Deep Anomaly Detection with Deviation Networks [#Pang2019DevNet]_ + PReNet, KDD, 2023, weakly-supervised, Deep Weakly-supervised Anomaly Detection [#Pang2023PreNet]_ + Deep SAD, ICLR, 2020, weakly-supervised, Deep Semi-Supervised Anomaly Detection [#Ruff2020DSAD]_ + FeaWAD, TNNLS, 2021, weakly-supervised, Feature Encoding with AutoEncoders for Weakly-supervised Anomaly Detection [#Zhou2021FeaWAD]_ + RoSAS, IP&M, 2023, weakly-supervised, RoSAS: Deep semi-supervised anomaly detection with contamination-resilient continuous supervision [#Xu2023RoSAS]_ **Time-series Anomaly Detection models:** @@ -187,15 +191,16 @@ Implemented Models DCdetector, KDD, 2023, unsupervised, DCdetector: Dual Attention Contrastive Representation Learning for Time Series Anomaly Detection [#Yang2023dcdetector]_ TimesNet, ICLR, 2023, unsupervised, TIMESNET: Temporal 2D-Variation Modeling for General Time Series Analysis [#Wu2023timesnet]_ AnomalyTransformer, ICLR, 2022, unsupervised, Anomaly Transformer: Time Series Anomaly Detection with Association Discrepancy [#Xu2022transformer]_ - TranAD, VLDB, 2022, unsupervised, TranAD: Deep Transformer Networks for Anomaly Detection in Multivariate Time Series Data - COUTA, arXiv, 2022, unsupervised, Calibrated One-class Classification for Unsupervised Time Series Anomaly Detection + NCAD, IJCAI, 2022, unsupervised, Neural Contextual Anomaly Detection for Time Series [#Carmona2022NCAD]_ + TranAD, VLDB, 2022, unsupervised, TranAD: Deep Transformer Networks for Anomaly Detection in Multivariate Time Series Data [#Tuli2022TranAD]_ + COUTA, arXiv, 2022, unsupervised, Calibrated One-class Classification for Unsupervised Time Series Anomaly Detection [#Xu2022COUTA]_ USAD, KDD, 2020, unsupervised, USAD: UnSupervised Anomaly Detection on Multivariate Time Series - DIF, TKDE, 2023, unsupervised, Deep Isolation Forest for Anomaly Detection - TcnED, TNNLS, 2021, unsupervised, An Evaluation of Anomaly Detection and Diagnosis in Multivariate Time Series - Deep SVDD (TS), ICML, 2018, unsupervised, Deep One-Class Classification - DevNet (TS), KDD, 2019, weakly-supervised, Deep Anomaly Detection with Deviation Networks - PReNet (TS), KDD, 2023, weakly-supervised, Deep Weakly-supervised Anomaly Detection - Deep SAD (TS), ICLR, 2020, weakly-supervised, Deep Semi-Supervised Anomaly Detection + DIF, TKDE, 2023, unsupervised, Deep Isolation Forest for Anomaly Detection [#Xu2023DIF]_ + TcnED, TNNLS, 2021, unsupervised, An Evaluation of Anomaly Detection and Diagnosis in Multivariate Time Series [#Garg2021Evaluation]_ + Deep SVDD (TS), ICML, 2018, unsupervised, Deep One-Class Classification [#Ruff2018Deep]_ + DevNet (TS), KDD, 2019, weakly-supervised, Deep Anomaly Detection with Deviation Networks [#Pang2019DevNet]_ + PReNet (TS), KDD, 2023, weakly-supervised, Deep Weakly-supervised Anomaly Detection [#Pang2023PreNet]_ + Deep SAD (TS), ICLR, 2020, weakly-supervised, Deep Semi-Supervised Anomaly Detection [#Ruff2020DSAD]_ NOTE: @@ -252,8 +257,32 @@ Reference .. [#Qiu2021Neutral] Qiu, Chen, et al. "Neural Transformation Learning for Deep Anomaly Detection Beyond Images". ICML. 2021. -.. [#Xu2022transformer] Xu Jiehui, et al. "Anomaly Transformer: Time Series Anomaly Detection with Association Discrepancy". ICLR, 2022. +.. [#Shenkar2022ICL] Shenkar, Tom, et al. "Anomaly Detection for Tabular Data with Internal Contrastive Learning". ICLR. 2022. -.. [#Wu2023timesnet] Wu Haixu, et al. "TimesNet: Temporal 2D-Variation Modeling for General Time Series Analysis". ICLR. 2023. +.. [#Pang2019DevNet] Pang, Guansong, et al. "Deep Anomaly Detection with Deviation Networks". KDD. 2019. -.. [#Yang2023dcdetector] Yang Yiyuan et al. "DCdetector: Dual Attention Contrastive Representation Learning for Time Series Anomaly Detection". KDD. 2023 +.. [#Pang2023PreNet] Pang, Guansong, et al. "Deep Weakly-supervised Anomaly Detection". KDD. 2023. + +.. [#Ruff2020DSAD] Ruff, Lukas, et al. "Deep Semi-Supervised Anomaly Detection". ICLR. 2020. + +.. [#Zhou2021FeaWAD] Zhou, Yingjie, et al. "Feature Encoding with AutoEncoders for Weakly-supervised Anomaly Detection". TNNLS. 2021. + +.. [#Xu2022transformer] Xu, Jiehui, et al. "Anomaly Transformer: Time Series Anomaly Detection with Association Discrepancy". ICLR, 2022. + +.. [#Wu2023timesnet] Wu, Haixu, et al. "TimesNet: Temporal 2D-Variation Modeling for General Time Series Analysis". ICLR. 2023. + +.. [#Yang2023dcdetector] Yang, Yiyuan, et al. "DCdetector: Dual Attention Contrastive Representation Learning for Time Series Anomaly Detection". KDD. 2023 + +.. [#Tuli2022TranAD] Tuli, Shreshth, et al. "TranAD: Deep Transformer Networks for Anomaly Detection in Multivariate Time Series Data". VLDB. 2022. + +.. [#Carmona2022NCAD] Carmona, Chris U., et al. "Neural Contextual Anomaly Detection for Time Series". IJCAI. 2022. + +.. [#Garg2021Evaluation] Garg, Astha, et al. "An Evaluation of Anomaly Detection and Diagnosis in Multivariate Time Series". TNNLS. 2021. + +.. [#Xu2022COUTA] Xu, Hongzuo et al. "Calibrated One-class Classification for Unsupervised Time Series Anomaly Detection". arXiv:2207.12201. 2022. + +.. [#Xu2023DIF] Xu, Hongzuo et al. "Deep Isolation Forest for Anomaly Detection". TKDE. 2023. + +.. [#Xu2023SLAD] Xu, Hongzuo et al. "Fascinating supervisory signals and where to find them: deep anomaly detection with scale learning". ICML. 2023. + +.. [#Xu2023RoSAS] Xu, Hongzuo et al. "RoSAS: Deep semi-supervised anomaly detection with contamination-resilient continuous supervision". IP&M. 2023. \ No newline at end of file diff --git a/deepod/core/base_model.py b/deepod/core/base_model.py index e316533..dad5dac 100644 --- a/deepod/core/base_model.py +++ b/deepod/core/base_model.py @@ -21,6 +21,7 @@ from functools import partial from deepod.utils.utility import get_sub_seqs, get_sub_seqs_label import pickle + class BaseDeepAD(metaclass=ABCMeta): """ Abstract class for deep outlier detection models diff --git a/deepod/core/networks/__init__.py b/deepod/core/networks/__init__.py index e69de29..8152106 100644 --- a/deepod/core/networks/__init__.py +++ b/deepod/core/networks/__init__.py @@ -0,0 +1,12 @@ +from .base_networks import MLPnet +from .base_networks import MlpAE +from .base_networks import GRUNet +from .base_networks import LSTMNet +from .base_networks import ConvSeqEncoder +from .base_networks import ConvNet +from .ts_network_transformer import TSTransformerEncoder +from .ts_network_tcn import TCNnet +from .ts_network_tcn import TcnAE + +__all__ = ['MLPnet', 'MlpAE', 'GRUNet', 'LSTMNet', 'ConvSeqEncoder', + 'ConvNet', 'TSTransformerEncoder', 'TCNnet', 'TcnAE'] \ No newline at end of file diff --git a/deepod/core/networks/base_networks.py b/deepod/core/networks/base_networks.py index f7006a7..aaf11c6 100644 --- a/deepod/core/networks/base_networks.py +++ b/deepod/core/networks/base_networks.py @@ -6,7 +6,7 @@ from deepod.core.networks.ts_network_dilated_conv import DilatedConvEncoder from deepod.core.networks.ts_network_tcn import TCNnet, TcnAE # from deepod.core.base_transformer_network_dev import TSTransformerEncoder from deepod.core.networks.network_utility import _instantiate_class, _handle_n_hidden - +import torch.nn.modules.activation sequential_net_name = ['TCN', 'GRU', 'LSTM', 'Transformer', 'ConvSeq', 'DilatedConv'] @@ -32,6 +32,26 @@ def get_network(network_name): class ConvNet(torch.nn.Module): + """Convolutional Network + + Args: + n_features (int): + number of input data features + kernel_size (int): + kernel size (Default=1) + n_hidden (int): + number of hidden units in hidden layers (Default=8) + n_layers (int): + number of layers (Default=5) + activation (str): + name of activation layer, + activation should be implemented in torch.nn.module.activation + (Default='ReLU') + bias (bool): + use bias or not + (Default=False) + + """ def __init__(self, n_features, kernel_size=1, n_hidden=8, n_layers=5, activation='ReLU', bias=False): super(ConvNet, self).__init__() @@ -49,7 +69,7 @@ class ConvNet(torch.nn.Module): self.layers += [ # torch.nn.LeakyReLU(inplace=True) _instantiate_class(module_name="torch.nn.modules.activation", - class_name=activation) + class_name=activation) ] in_channels = n_hidden @@ -62,6 +82,7 @@ class ConvNet(torch.nn.Module): class MlpAE(torch.nn.Module): + """MLP-based AutoEncoder""" def __init__(self, n_features, n_hidden='500,100', n_emb=20, activation='ReLU', bias=False, batch_norm=False, skip_connection=None, dropout=None @@ -105,6 +126,7 @@ class MlpAE(torch.nn.Module): class MLPnet(torch.nn.Module): + """MLP-based Representation Network""" def __init__(self, n_features, n_hidden='500,100', n_output=20, mid_channels=None, activation='ReLU', bias=False, batch_norm=False, skip_connection=None, dropout=None): @@ -140,7 +162,6 @@ class MLPnet(torch.nn.Module): ] self.network = torch.nn.Sequential(*self.layers) - def forward(self, x): x = self.network(x) return x @@ -158,6 +179,7 @@ class MLPnet(torch.nn.Module): class LinearBlock(torch.nn.Module): + """Linear Block""" def __init__(self, in_channels, out_channels, mid_channels=None, activation='Tanh', bias=False, batch_norm=False, skip_connection=None, dropout=None): @@ -214,6 +236,7 @@ class LinearBlock(torch.nn.Module): class GRUNet(torch.nn.Module): + """GRU Network""" def __init__(self, n_features, n_hidden='20', n_output=20, bias=False, dropout=None, activation='ReLU'): super(GRUNet, self).__init__() @@ -235,8 +258,8 @@ class GRUNet(torch.nn.Module): return out - class LSTMNet(torch.nn.Module): + """LSTM Network""" def __init__(self, n_features, n_hidden='20', n_output=20, bias=False, dropout=None, activation='ReLU'): super(LSTMNet, self).__init__() @@ -309,6 +332,7 @@ class ConvSeqEncoder(torch.nn.Module): class ConvResBlock(torch.nn.Module): + """Convolutional Residual Block""" def __init__(self, in_dim, out_dim, conv_param=None, down_sample=None, batch_norm=False, bias=False, activation='ReLU'): super(ConvResBlock, self).__init__() diff --git a/deepod/core/networks/ts_network_tcn.py b/deepod/core/networks/ts_network_tcn.py index 31d6465..c53c524 100644 --- a/deepod/core/networks/ts_network_tcn.py +++ b/deepod/core/networks/ts_network_tcn.py @@ -1,9 +1,12 @@ +# TCN is partially adapted from https://github.com/locuslab/TCN + import torch from torch.nn.utils import weight_norm from deepod.core.networks.network_utility import _instantiate_class, _handle_n_hidden class TcnAE(torch.nn.Module): + """Temporal Convolutional Network-based AutoEncoder""" def __init__(self, n_features, n_hidden='500,100', n_emb=20, activation='ReLU', bias=False, kernel_size=2, dropout=0.2): super(TcnAE, self).__init__() @@ -60,7 +63,7 @@ class TcnAE(torch.nn.Module): class TCNnet(torch.nn.Module): - """TCN is adapted from https://github.com/locuslab/TCN""" + """Temporal Convolutional Network (TCN) for encoding/representing input time series sequences""" def __init__(self, n_features, n_hidden='8', n_output=20, kernel_size=2, bias=False, dropout=0.2, activation='ReLU'): diff --git a/deepod/core/networks/ts_network_transformer.py b/deepod/core/networks/ts_network_transformer.py index 7d95266..be1986b 100644 --- a/deepod/core/networks/ts_network_transformer.py +++ b/deepod/core/networks/ts_network_transformer.py @@ -271,16 +271,13 @@ class TransformerBatchNormEncoderLayer(torch.nn.modules.Module): return src - - class TSTransformerEncoder(torch.nn.Module): """ - Simplest classifier/regressor. Can be either regressor or classifier because the output does not include - softmax. Concatenates final layer embeddings and uses 0s to ignore padding embeddings in final output layer. + Transformer for encoding/representing input time series sequences """ def __init__(self, n_features, n_output=20, seq_len=100, d_model=128, - n_heads=8, n_hidden='128', dropout=0.1, + n_heads=8, n_hidden='512', dropout=0.1, token_encoding='convolutional', pos_encoding='fixed', activation='GELU', bias=False, attn='self_attn', norm='LayerNorm', freeze=False): super(TSTransformerEncoder, self).__init__() @@ -290,11 +287,11 @@ class TSTransformerEncoder(torch.nn.Module): n_hidden, n_layers = _handle_n_hidden(n_hidden) # parameter check - assert token_encoding in ['linear', 'convolutional'], \ + assert token_encoding in ['linear', 'convolutional'], \ f"use 'linear' or 'convolutional', {token_encoding} is not supported in token_encoding" - assert pos_encoding in ['learnable', 'fixed'],\ + assert pos_encoding in ['learnable', 'fixed'],\ f"use 'learnable' or 'fixed', {pos_encoding} is not supported in pos_encoding" - assert norm in ['LayerNorm', 'BatchNorm'],\ + assert norm in ['LayerNorm', 'BatchNorm'],\ f"use 'learnable' or 'fixed', {norm} is not supported in norm" if token_encoding == 'linear': diff --git a/deepod/metrics/_tsad_adjustment.py b/deepod/metrics/_tsad_adjustment.py index 5ed8f70..4fe9022 100644 --- a/deepod/metrics/_tsad_adjustment.py +++ b/deepod/metrics/_tsad_adjustment.py @@ -13,7 +13,13 @@ def point_adjustment(y_true, y_score): data label, 0 indicates normal timestamp, and 1 is anomaly y_score: np.array, required - anomaly score, higher score indicates higher likelihoods to be anomaly + predicted anomaly scores, higher score indicates higher likelihoods to be anomaly + + Returns + ------- + score: np.array + adjusted anomaly scores + """ score = y_score.copy() assert len(score) == len(y_true) diff --git a/deepod/models/__init__.py b/deepod/models/__init__.py index f3a3d43..c5d8c9a 100644 --- a/deepod/models/__init__.py +++ b/deepod/models/__init__.py @@ -23,16 +23,17 @@ from deepod.models.time_series.dsvdd import DeepSVDDTS from deepod.models.time_series.dcdetector import DCdetector from deepod.models.time_series.timesnet import TimesNet from deepod.models.time_series.anomalytransformer import AnomalyTransformer +from deepod.models.time_series.ncad import NCAD from deepod.models.time_series.tranad import TranAD from deepod.models.time_series.couta import COUTA from deepod.models.time_series.usad import USAD from deepod.models.time_series.tcned import TcnED - __all__ = [ 'RCA', 'DeepSVDD', 'GOAD', 'NeuTraL', 'RDP', 'ICL', 'SLAD', 'DeepIsolationForest', 'DeepSAD', 'DevNet', 'PReNet', 'FeaWAD', 'REPEN', 'RoSAS', - 'DCdetector', 'TimesNet', 'AnomalyTransformer', 'TranAD', 'COUTA', 'USAD', 'TcnED', + 'DCdetector', 'TimesNet', 'AnomalyTransformer', 'NCAD', + 'TranAD', 'COUTA', 'USAD', 'TcnED', 'DeepIsolationForestTS', 'DeepSVDDTS', 'PReNetTS', 'DeepSADTS', 'DevNetTS' ] \ No newline at end of file diff --git a/deepod/models/tabular/devnet.py b/deepod/models/tabular/devnet.py index fa2da40..9b2da49 100644 --- a/deepod/models/tabular/devnet.py +++ b/deepod/models/tabular/devnet.py @@ -15,54 +15,47 @@ import numpy as np class DevNet(BaseDeepAD): """ - Parameters - ---------- - epochs: int, optional (default=100) - Number of training epochs + Deviation Networks for Weakly-supervised Anomaly Detection (KDD'19) + :cite:`pang2019deep` - batch_size: int, optional (default=64) - Number of samples in a mini-batch - - lr: float, optional (default=1e-3) - Learning rate - - rep_dim: int, optional (default=128) - it is for consistency, unused in this model - - hidden_dims: list, str or int, optional (default='100,50') - Number of neural units in hidden layers - - If list, each item is a layer - - If str, neural units of hidden layers are split by comma - - If int, number of neural units of single hidden layer - - act: str, optional (default='ReLU') - activation layer name - choice = ['ReLU', 'LeakyReLU', 'Sigmoid', 'Tanh'] - - bias: bool, optional (default=False) - Additive bias in linear layer - - margin: float, optional (default=5.) - margin value used in the deviation loss function - - l: int, optional (default=5000.) - the size of samples of the Gaussian distribution used in the deviation loss function - - epoch_steps: int, optional (default=-1) - Maximum steps in an epoch - - If -1, all the batches will be processed - - prt_steps: int, optional (default=10) - Number of epoch intervals per printing - - device: str, optional (default='cuda') - torch device, - - verbose: int, optional (default=1) - Verbosity mode - - random_state: int, optional (default=42) - the seed used by the random + Args: + epochs (int, optional): + number of training epochs (default: 100). + batch_size (int, optional): + number of samples in a mini-batch (default: 64) + lr (float, optional): + learning rate (default: 1e-3) + rep_dim (int, optional): + it is for consistency, unused in this model. + hidden_dims (list, str or int, optional): + number of neural units in hidden layers, + If list, each item is a layer; + If str, neural units of hidden layers are split by comma; + If int, number of neural units of single hidden layer + (default: '100,50') + act (str, optional): + activation layer name, + choice = ['ReLU', 'LeakyReLU', 'Sigmoid', 'Tanh'] + (default='ReLU') + bias (bool, optional): + Additive bias in linear layer (default=False) + margin (float, optional): + margin value used in the deviation loss function (default=5.) + l (int, optional): + the size of samples of the Gaussian distribution + used in the deviation loss function (default=5000.) + epoch_steps (int, optional): + Maximum steps in an epoch. + If -1, all the batches will be processed + (default=-1) + prt_steps (int, optional): + Number of epoch intervals per printing (default=10) + device (str, optional): + torch device (default='cuda'). + verbose (int, optional): + Verbosity mode (default=1) + random_state (int, optional): + the seed used by the random (default=42) """ def __init__(self, epochs=100, batch_size=64, lr=1e-3, network='MLP', @@ -87,6 +80,18 @@ class DevNet(BaseDeepAD): return def training_prepare(self, X, y): + """ + + Args: + X (np.array): input data array + y (np.array): input data label + + Returns: + train_loader (torch.DataLoader): data loader of training data + net (torch.nn.Module): neural network + criterion (torch.nn.Module): loss function + + """ # loader: balanced loader, a mini-batch contains a half of normal data and a half of anomalies n_anom = np.where(y == 1)[0].shape[0] n_norm = self.n_samples - n_anom diff --git a/deepod/models/tabular/dif.py b/deepod/models/tabular/dif.py index 9dad579..fb4001c 100644 --- a/deepod/models/tabular/dif.py +++ b/deepod/models/tabular/dif.py @@ -17,9 +17,19 @@ import numpy as np class DeepIsolationForest(BaseDeepAD): + """ + Deep Isolation Forest for Anomaly Detection + + Args: + epochs (int): + number of training epochs (Default=100). + batch_size (int): + number of samples in a mini-batch (Default=64) + lr (float): + it is for consistency, unused in this model + """ def __init__(self, epochs=100, batch_size=1000, lr=1e-3, - seq_len=100, stride=1, rep_dim=128, hidden_dims='100,50', act='ReLU', bias=False, n_ensemble=50, n_estimators=6, max_samples=256, n_jobs=1, @@ -28,7 +38,7 @@ class DeepIsolationForest(BaseDeepAD): super(DeepIsolationForest, self).__init__( model_name='DIF', data_type='tabular', epochs=epochs, batch_size=batch_size, lr=lr, - network='MLP', seq_len=seq_len, stride=stride, + network='MLP', epoch_steps=epoch_steps, prt_steps=prt_steps, device=device, verbose=verbose, random_state=random_state ) diff --git a/deepod/models/tabular/dsad.py b/deepod/models/tabular/dsad.py index 713a0c3..f652cdf 100644 --- a/deepod/models/tabular/dsad.py +++ b/deepod/models/tabular/dsad.py @@ -15,8 +15,7 @@ from collections import Counter class DeepSAD(BaseDeepAD): - """ Deep Semi-supervised Anomaly Detection (Deep SAD) - See :cite:`ruff2020dsad` for details + """ Deep Semi-supervised Anomaly Detection (ICLR'20) Parameters ---------- diff --git a/deepod/models/tabular/dsvdd.py b/deepod/models/tabular/dsvdd.py index 02ae293..c01be4d 100644 --- a/deepod/models/tabular/dsvdd.py +++ b/deepod/models/tabular/dsvdd.py @@ -17,8 +17,9 @@ from functools import partial class DeepSVDD(BaseDeepAD): - """ Deep One-class Classification (Deep SVDD) for anomaly detection - See :cite:`ruff2018deepsvdd` for details + """ + Deep One-class Classification for Anomaly Detection (ICML'18) + :cite:`ruff2018deepsvdd` Parameters ---------- @@ -63,7 +64,27 @@ class DeepSVDD(BaseDeepAD): random_state: int, optional (default=42) the seed used by the random + + Attributes + ---------- + decision_scores_ : numpy array of shape (n_samples,) + The outlier scores of the training data. + The higher, the more abnormal. Outliers tend to have higher + scores. This value is available once the detector is + fitted. + + threshold_ : float + The threshold is based on ``contamination``. It is the + ``n_samples * contamination`` most abnormal samples in + ``decision_scores_``. The threshold is calculated for generating + binary outlier labels. + + labels_ : int, either 0 or 1 + The binary labels of the training data. 0 stands for inliers + and 1 for outliers/anomalies. It is generated by applying + ``threshold_`` on ``decision_scores_``. """ + def __init__(self, epochs=100, batch_size=64, lr=1e-3, rep_dim=128, hidden_dims='100,50', act='ReLU', bias=False, epoch_steps=-1, prt_steps=10, device='cuda', @@ -96,7 +117,7 @@ class DeepSVDD(BaseDeepAD): net = MLPnet(**network_params).to(self.device) self.c = self._set_c(net, train_loader) - criterion = DSVDDLoss(c=self.c) + criterion = _DSVDDLoss(c=self.c) if self.verbose >= 2: print(net) @@ -107,7 +128,7 @@ class DeepSVDD(BaseDeepAD): test_loader = DataLoader(X, batch_size=self.batch_size, drop_last=False, shuffle=False) assert self.c is not None - self.criterion = DSVDDLoss(c=self.c, reduction='none') + self.criterion = _DSVDDLoss(c=self.c, reduction='none') return test_loader def training_forward(self, batch_x, net, criterion): @@ -132,7 +153,7 @@ class DeepSVDD(BaseDeepAD): self.net = self.set_tuned_net(config) self.c = self._set_c(self.net, train_loader) - criterion = DSVDDLoss(c=self.c, reduction='mean') + criterion = _DSVDDLoss(c=self.c, reduction='mean') optimizer = torch.optim.Adam(self.net.parameters(), lr=config['lr'], eps=1e-6) @@ -230,7 +251,7 @@ class DeepSVDD(BaseDeepAD): return c -class DSVDDLoss(torch.nn.Module): +class _DSVDDLoss(torch.nn.Module): """ Parameters @@ -247,7 +268,7 @@ class DSVDDLoss(torch.nn.Module): """ def __init__(self, c, reduction='mean'): - super(DSVDDLoss, self).__init__() + super(_DSVDDLoss, self).__init__() self.c = c self.reduction = reduction diff --git a/deepod/models/tabular/feawad.py b/deepod/models/tabular/feawad.py index a86c813..3b03099 100644 --- a/deepod/models/tabular/feawad.py +++ b/deepod/models/tabular/feawad.py @@ -15,6 +15,9 @@ import numpy as np class FeaWAD(BaseDeepAD): """ + Feature Encoding with AutoEncoders for Weakly-supervised Anomaly Detection + (TNNLS'21) + Parameters ---------- epochs: int, optional (default=100) diff --git a/deepod/models/tabular/goad.py b/deepod/models/tabular/goad.py index 5384535..ae5ccb9 100644 --- a/deepod/models/tabular/goad.py +++ b/deepod/models/tabular/goad.py @@ -15,6 +15,9 @@ import numpy as np class GOAD(BaseDeepAD): + """ + Classification-Based Anomaly Detection for General Data (ICLR'20) + """ def __init__(self, epochs=100, batch_size=64, lr=1e-3, n_trans=256, trans_dim=32, alpha=0.1, margin=1., eps=0, diff --git a/deepod/models/tabular/icl.py b/deepod/models/tabular/icl.py index fb8ef78..3feba6f 100644 --- a/deepod/models/tabular/icl.py +++ b/deepod/models/tabular/icl.py @@ -15,8 +15,10 @@ import numpy as np class ICL(BaseDeepAD): - """ Anomaly Detection for Tabular Data with Internal Contrastive Learning (ICL for short) - See :cite:`shenkar2022internal` for details + """ + Anomaly Detection for Tabular Data with Internal Contrastive Learning + (ICLR'22) + :cite:`shenkar2022internal` Parameters ---------- diff --git a/deepod/models/tabular/neutral.py b/deepod/models/tabular/neutral.py index 269179e..1e3546e 100644 --- a/deepod/models/tabular/neutral.py +++ b/deepod/models/tabular/neutral.py @@ -14,6 +14,10 @@ import numpy as np class NeuTraL(BaseDeepAD): + """ + Neural Transformation Learning-based Anomaly Detection (ICML'21) + + """ def __init__(self, epochs=100, batch_size=64, lr=1e-3, n_trans=11, trans_type='residual', temp=0.1, rep_dim=128, hidden_dims='100,50', trans_hidden_dims=50, diff --git a/deepod/models/tabular/prenet.py b/deepod/models/tabular/prenet.py index f50c46b..0b5a570 100644 --- a/deepod/models/tabular/prenet.py +++ b/deepod/models/tabular/prenet.py @@ -11,6 +11,9 @@ import numpy as np class PReNet(BaseDeepAD): + """ + Deep Weakly-supervised Anomaly Detection (KDD‘23) + """ def __init__(self, epochs=100, batch_size=64, lr=1e-3, rep_dim=128, hidden_dims='100,50', act='LeakyReLU', bias=False, epoch_steps=-1, prt_steps=10, device='cuda', diff --git a/deepod/models/tabular/rca.py b/deepod/models/tabular/rca.py index 0adaa41..babc721 100644 --- a/deepod/models/tabular/rca.py +++ b/deepod/models/tabular/rca.py @@ -15,58 +15,61 @@ import numpy as np class RCA(BaseDeepAD): """ - epochs: int, optional (default=100) - Number of training epochs + A Deep Collaborative Autoencoder Approach for Anomaly Detection (IJCAI'21) - batch_size: int, optional (default=64) - Number of samples in a mini-batch + Args: + epochs: int, optional (default=100) + Number of training epochs - lr: float, optional (default=1e-3) - Learning rate + batch_size: int, optional (default=64) + Number of samples in a mini-batch - rep_dim: int, optional (default=128) - Dimensionality of the representation space + lr: float, optional (default=1e-3) + Learning rate - hidden_dims: list, str or int, optional (default='100,50') - Number of neural units in hidden layers - - If list, each item is a layer - - If str, neural units of hidden layers are split by comma - - If int, number of neural units of single hidden layer + rep_dim: int, optional (default=128) + Dimensionality of the representation space - act: str, optional (default='ReLU') - activation layer name - choice = ['ReLU', 'LeakyReLU', 'Sigmoid', 'Tanh'] + hidden_dims: list, str or int, optional (default='100,50') + Number of neural units in hidden layers + - If list, each item is a layer + - If str, neural units of hidden layers are split by comma + - If int, number of neural units of single hidden layer - bias: bool, optional (default=False) - Additive bias in linear layer + act: str, optional (default='ReLU') + activation layer name + choice = ['ReLU', 'LeakyReLU', 'Sigmoid', 'Tanh'] - alpha: float, optional (default=0.5) - decay rate in determining beta + bias: bool, optional (default=False) + Additive bias in linear layer - anom_ratio: float, optional (default=0.5) - decay rate in determining beta + alpha: float, optional (default=0.5) + decay rate in determining beta - dropout: float or None, optional (default=0.5) - dropout probability, the default setting is 0.5 + anom_ratio: float, optional (default=0.5) + decay rate in determining beta - inference_ensemble: int, optional(default=10) - the ensemble size during the inference stage + dropout: float or None, optional (default=0.5) + dropout probability, the default setting is 0.5 - epoch_steps: int, optional (default=-1) - Maximum steps in an epoch - - If -1, all the batches will be processed + inference_ensemble: int, optional(default=10) + the ensemble size during the inference stage - prt_steps: int, optional (default=10) - Number of epoch intervals per printing + epoch_steps: int, optional (default=-1) + Maximum steps in an epoch + - If -1, all the batches will be processed - device: str, optional (default='cuda') - torch device, + prt_steps: int, optional (default=10) + Number of epoch intervals per printing - verbose: int, optional (default=1) - Verbosity mode + device: str, optional (default='cuda') + torch device, - random_state: int, optional (default=42) - the seed used by the random + verbose: int, optional (default=1) + Verbosity mode + + random_state: int, optional (default=42) + the seed used by the random """ def __init__(self, epochs=100, batch_size=64, lr=1e-3, rep_dim=128, hidden_dims='100,50', act='LeakyReLU', bias=False, diff --git a/deepod/models/tabular/rdp.py b/deepod/models/tabular/rdp.py index d436804..96c6b8a 100644 --- a/deepod/models/tabular/rdp.py +++ b/deepod/models/tabular/rdp.py @@ -15,6 +15,9 @@ import copy class RDP(BaseDeepAD): """ + Unsupervised Representation Learning by Predicting Random Distances + (IJCAI'20) + Parameters ---------- epochs: int, optional (default=100) diff --git a/deepod/models/tabular/repen.py b/deepod/models/tabular/repen.py index dcfb103..75821f1 100644 --- a/deepod/models/tabular/repen.py +++ b/deepod/models/tabular/repen.py @@ -19,9 +19,9 @@ import numpy as np class REPEN(BaseDeepAD): """ - Pang et al.: Learning Representations of Ultrahigh-dimensional Data for Random + Learning Representations of Ultrahigh-dimensional Data for Random Distance-based Outlier Detection (KDD'18) - See :cite:`pang2018repen` for details + :cite:`pang2018repen` """ def __init__(self, epochs=100, batch_size=64, lr=1e-3, diff --git a/deepod/models/tabular/rosas.py b/deepod/models/tabular/rosas.py index 85498f5..13d4c2a 100644 --- a/deepod/models/tabular/rosas.py +++ b/deepod/models/tabular/rosas.py @@ -7,6 +7,10 @@ from deepod.core.networks.base_networks import MLPnet class RoSAS(BaseDeepAD): + """ + RoSAS: Deep semi-supervised anomaly detection with contamination-resilient + continuous supervision (IP&M'23) + """ def __init__(self, epochs=100, batch_size=128, lr=0.005, rep_dim=32, hidden_dims='32', act='LeakyReLU', bias=False, margin=5., alpha=0.5, T=2, k=2, diff --git a/deepod/models/tabular/slad.py b/deepod/models/tabular/slad.py index 3651a37..1a9711d 100644 --- a/deepod/models/tabular/slad.py +++ b/deepod/models/tabular/slad.py @@ -12,6 +12,10 @@ import torch class SLAD(BaseDeepAD): + """ + Fascinating Supervisory Signals and Where to Find Them: + Deep Anomaly Detection with Scale Learning (ICML'23) + """ def __init__(self, epochs=100, batch_size=128, lr=1e-3, hidden_dims=100, act='LeakyReLU', distribution_size=10, # the member size in a group, c in the paper diff --git a/deepod/models/time_series/anomalytransformer.py b/deepod/models/time_series/anomalytransformer.py index 7a9d744..00a6f59 100644 --- a/deepod/models/time_series/anomalytransformer.py +++ b/deepod/models/time_series/anomalytransformer.py @@ -15,6 +15,11 @@ def my_kl_loss(p, q): class AnomalyTransformer(BaseDeepAD): + """ + Anomaly Transformer: Time Series Anomaly Detection with Association Discrepancy + (ICLR'22) + + """ def __init__(self, seq_len=100, stride=1, lr=0.0001, epochs=10, batch_size=32, epoch_steps=20, prt_steps=1, device='cuda', k=3, verbose=2, random_state=42): diff --git a/deepod/models/time_series/couta.py b/deepod/models/time_series/couta.py index 14aca67..718a2fa 100644 --- a/deepod/models/time_series/couta.py +++ b/deepod/models/time_series/couta.py @@ -1,6 +1,6 @@ """ Calibrated One-class classifier for Unsupervised Time series Anomaly detection (COUTA) -@author: Hongzuo Xu (hongzuo.xu@gmail.com) +@author: Hongzuo Xu """ import numpy as np @@ -20,7 +20,8 @@ from deepod.metrics import ts_metrics, point_adjustment class COUTA(BaseDeepAD): """ - COUTA class for Calibrated One-class classifier for Unsupervised Time series Anomaly detection + Calibrated One-class classifier for Unsupervised Time series + Anomaly detection (arXiv'22) Parameters ---------- @@ -144,7 +145,7 @@ class COUTA(BaseDeepAD): train_seqs = sequences val_seqs = None - self.net = COUTANet( + self.net = _COUTANet( input_dim=self.n_features, hidden_dims=self.hidden_dims, n_output=self.rep_dim, @@ -187,7 +188,7 @@ class COUTA(BaseDeepAD): The anomaly score of the input samples. """ test_sub_seqs = get_sub_seqs(X, seq_len=self.seq_len, stride=1) - test_dataset = SubseqData(test_sub_seqs) + test_dataset = _SubseqData(test_sub_seqs) dataloader = DataLoader(dataset=test_dataset, batch_size=self.batch_size, drop_last=False, shuffle=False) representation_lst = [] @@ -211,12 +212,12 @@ class COUTA(BaseDeepAD): return dis_pad def train(self, net, train_seqs, val_seqs=None): - val_loader = DataLoader(dataset=SubseqData(val_seqs), + val_loader = DataLoader(dataset=_SubseqData(val_seqs), batch_size=self.batch_size, drop_last=False, shuffle=False) if val_seqs is not None else None optimizer = torch.optim.Adam(net.parameters(), lr=self.lr) - criterion_oc_umc = DSVDDUncLoss(c=self.c, reduction='mean') + criterion_oc_umc = _DSVDDUncLoss(c=self.c, reduction='mean') criterion_mse = torch.nn.MSELoss(reduction='mean') y0 = -1 * torch.ones(self.batch_size).float().to(self.device) @@ -229,7 +230,7 @@ class COUTA(BaseDeepAD): copy_times += 1 train_seqs = np.concatenate([train_seqs for _ in range(copy_times)]) - train_loader = DataLoader(dataset=SubseqData(train_seqs), + train_loader = DataLoader(dataset=_SubseqData(train_seqs), batch_size=self.batch_size, drop_last=True, pin_memory=True, shuffle=True) @@ -246,7 +247,7 @@ class COUTA(BaseDeepAD): loss_oc = criterion_oc_umc(rep_x0, rep_x0_dup) neg_cand_idx = RandomState(epoch_seed[ii]).randint(0, self.batch_size, self.neg_batch_size) - x1, y1 = create_batch_neg(batch_seqs=x0[neg_cand_idx], + x1, y1 = self.create_batch_neg(batch_seqs=x0[neg_cand_idx], max_cut_ratio=self.max_cut_ratio, seed=epoch_seed[ii], return_mul_label=False, @@ -300,14 +301,14 @@ class COUTA(BaseDeepAD): train_data = self.train_data[:int(0.8 * len(self.train_data))] val_data = self.train_data[int(0.8 * len(self.train_data)):] - train_loader = DataLoader(dataset=SubseqData(train_data), batch_size=self.batch_size, + train_loader = DataLoader(dataset=_SubseqData(train_data), batch_size=self.batch_size, drop_last=True, pin_memory=True, shuffle=True) - val_loader = DataLoader(dataset=SubseqData(val_data), batch_size=self.batch_size, + val_loader = DataLoader(dataset=_SubseqData(val_data), batch_size=self.batch_size, drop_last=True, pin_memory=True, shuffle=True) self.net = self.set_tuned_net(config) self.c = self._set_c(self.net, train_data) - criterion_oc_umc = DSVDDUncLoss(c=self.c, reduction='mean') + criterion_oc_umc = _DSVDDUncLoss(c=self.c, reduction='mean') criterion_mse = torch.nn.MSELoss(reduction='mean') optimizer = torch.optim.Adam(self.net.parameters(), lr=config['lr'], eps=1e-6) @@ -331,7 +332,7 @@ class COUTA(BaseDeepAD): neg_batch_size = int(config['neg_batch_ratio'] * self.batch_size) neg_candidate_idx = tmp_rng.randint(0, self.batch_size, neg_batch_size) - x1, y1 = create_batch_neg( + x1, y1 = self.create_batch_neg( batch_seqs=x0[neg_candidate_idx], max_cut_ratio=self.max_cut_ratio, seed=epoch_seed[ii], @@ -413,7 +414,7 @@ class COUTA(BaseDeepAD): return config def set_tuned_net(self, config): - net = COUTANet( + net = _COUTANet( input_dim=self.n_features, hidden_dims=config['hidden_dims'], n_output=config['rep_dim'], @@ -436,7 +437,7 @@ class COUTA(BaseDeepAD): def _set_c(self, net, seqs, eps=0.1): """Initializing the center for the hypersphere""" - dataloader = DataLoader(dataset=SubseqData(seqs), batch_size=self.batch_size, + dataloader = DataLoader(dataset=_SubseqData(seqs), batch_size=self.batch_size, drop_last=False, pin_memory=True, shuffle=True) z_ = [] net.eval() @@ -468,79 +469,79 @@ class COUTA(BaseDeepAD): """define test_loader""" return + @staticmethod + def create_batch_neg(batch_seqs, max_cut_ratio=0.5, seed=0, return_mul_label=False, ss_type='FULL'): + rng = np.random.RandomState(seed=seed) -def create_batch_neg(batch_seqs, max_cut_ratio=0.5, seed=0, return_mul_label=False, ss_type='FULL'): - rng = np.random.RandomState(seed=seed) + batch_size, l, dim = batch_seqs.shape + cut_start = l - rng.randint(1, int(max_cut_ratio * l), size=batch_size) + n_cut_dim = rng.randint(1, dim+1, size=batch_size) + cut_dim = [rng.randint(dim, size=n_cut_dim[i]) for i in range(batch_size)] - batch_size, l, dim = batch_seqs.shape - cut_start = l - rng.randint(1, int(max_cut_ratio * l), size=batch_size) - n_cut_dim = rng.randint(1, dim+1, size=batch_size) - cut_dim = [rng.randint(dim, size=n_cut_dim[i]) for i in range(batch_size)] + if type(batch_seqs) == np.ndarray: + batch_neg = batch_seqs.copy() + neg_labels = np.zeros(batch_size, dtype=int) + else: + batch_neg = batch_seqs.clone() + neg_labels = torch.LongTensor(batch_size) - if type(batch_seqs) == np.ndarray: - batch_neg = batch_seqs.copy() - neg_labels = np.zeros(batch_size, dtype=int) - else: - batch_neg = batch_seqs.clone() - neg_labels = torch.LongTensor(batch_size) + if ss_type != 'FULL': + pool = rng.randint(1e+6, size=int(1e+4)) + if ss_type == 'collective': + pool = [a % 6 == 0 or a % 6 == 1 for a in pool] + elif ss_type == 'contextual': + pool = [a % 6 == 2 or a % 6 == 3 for a in pool] + elif ss_type == 'point': + pool = [a % 6 == 4 or a % 6 == 5 for a in pool] + flags = rng.choice(pool, size=batch_size, replace=False) + else: + flags = rng.randint(1e+5, size=batch_size) - if ss_type != 'FULL': - pool = rng.randint(1e+6, size=int(1e+4)) - if ss_type == 'collective': - pool = [a % 6 == 0 or a % 6 == 1 for a in pool] - elif ss_type == 'contextual': - pool = [a % 6 == 2 or a % 6 == 3 for a in pool] - elif ss_type == 'point': - pool = [a % 6 == 4 or a % 6 == 5 for a in pool] - flags = rng.choice(pool, size=batch_size, replace=False) - else: - flags = rng.randint(1e+5, size=batch_size) + n_types = 6 + for ii in range(batch_size): + flag = flags[ii] - n_types = 6 - for ii in range(batch_size): - flag = flags[ii] + # collective anomalies + if flag % n_types == 0: + batch_neg[ii, cut_start[ii]:, cut_dim[ii]] = 0 + neg_labels[ii] = 1 - # collective anomalies - if flag % n_types == 0: - batch_neg[ii, cut_start[ii]:, cut_dim[ii]] = 0 - neg_labels[ii] = 1 + elif flag % n_types == 1: + batch_neg[ii, cut_start[ii]:, cut_dim[ii]] = 1 + neg_labels[ii] = 1 - elif flag % n_types == 1: - batch_neg[ii, cut_start[ii]:, cut_dim[ii]] = 1 - neg_labels[ii] = 1 + # contextual anomalies + elif flag % n_types == 2: + mean = torch.mean(batch_neg[ii, -10:, cut_dim[ii]], dim=0) + batch_neg[ii, -1, cut_dim[ii]] = mean + 0.5 + neg_labels[ii] = 2 - # contextual anomalies - elif flag % n_types == 2: - mean = torch.mean(batch_neg[ii, -10:, cut_dim[ii]], dim=0) - batch_neg[ii, -1, cut_dim[ii]] = mean + 0.5 - neg_labels[ii] = 2 + elif flag % n_types == 3: + mean = torch.mean(batch_neg[ii, -10:, cut_dim[ii]], dim=0) + batch_neg[ii, -1, cut_dim[ii]] = mean - 0.5 + neg_labels[ii] = 2 - elif flag % n_types == 3: - mean = torch.mean(batch_neg[ii, -10:, cut_dim[ii]], dim=0) - batch_neg[ii, -1, cut_dim[ii]] = mean - 0.5 - neg_labels[ii] = 2 + # point anomalies + elif flag % n_types == 4: + batch_neg[ii, -1, cut_dim[ii]] = 2 + neg_labels[ii] = 3 - # point anomalies - elif flag % n_types == 4: - batch_neg[ii, -1, cut_dim[ii]] = 2 - neg_labels[ii] = 3 + elif flag % n_types == 5: + batch_neg[ii, -1, cut_dim[ii]] = -2 + neg_labels[ii] = 3 - elif flag % n_types == 5: - batch_neg[ii, -1, cut_dim[ii]] = -2 - neg_labels[ii] = 3 - - if return_mul_label: - return batch_neg, neg_labels - else: - neg_labels = torch.ones(batch_size).long() - return batch_neg, neg_labels + if return_mul_label: + return batch_neg, neg_labels + else: + neg_labels = torch.ones(batch_size).long() + return batch_neg, neg_labels -class COUTANet(torch.nn.Module): +class _COUTANet(torch.nn.Module): def __init__(self, input_dim, hidden_dims=32, rep_hidden=32, pretext_hidden=16, n_output=10, kernel_size=2, dropout=0.2, out_dim=2, bias=True, dup=True, pretext=True): - super(COUTANet, self).__init__() + super(_COUTANet, self).__init__() self.layers = [] @@ -598,7 +599,7 @@ class COUTANet(torch.nn.Module): return rep -class SubseqData(Dataset): +class _SubseqData(Dataset): def __init__(self, x, y=None, w1=None, w2=None): self.sub_seqs = x self.label = y @@ -624,7 +625,7 @@ class SubseqData(Dataset): return self.sub_seqs[idx] -class DSVDDUncLoss(torch.nn.Module): +class _DSVDDUncLoss(torch.nn.Module): def __init__(self, c, reduction='mean'): super(DSVDDUncLoss, self).__init__() self.c = c diff --git a/deepod/models/time_series/dcdetector.py b/deepod/models/time_series/dcdetector.py index cb5784b..15e9f9c 100644 --- a/deepod/models/time_series/dcdetector.py +++ b/deepod/models/time_series/dcdetector.py @@ -17,6 +17,10 @@ def my_kl_loss(p, q): class DCdetector(BaseDeepAD): + """ + DCdetector: Dual Attention Contrastive Representation Learning + for Time Series Anomaly Detection (KDD'23) + """ def __init__(self, seq_len=100, stride=1, lr=0.0001, epochs=5, batch_size=128, epoch_steps=20, prt_steps=1, device='cuda', n_heads=1, d_model=256, e_layers=3, patch_size=None, @@ -156,6 +160,28 @@ class DCdetector(BaseDeepAD): return test_energy, preds # (n,d) + def predict(self, X, return_confidence=False): + + ## self.threshold + + + self.threshold_ = None + + + # ------------------------------ # + + pred_score = self.decision_function(X) + + prediction = (pred_score > self.threshold_).astype('int').ravel() + + if return_confidence: + confidence = self._predict_confidence(pred_score) + return prediction, confidence + + return prediction + + + def training_forward(self, batch_x, net, criterion): """define forward step in training""" return diff --git a/deepod/models/time_series/devnet.py b/deepod/models/time_series/devnet.py index 0b129b0..ab0db0c 100644 --- a/deepod/models/time_series/devnet.py +++ b/deepod/models/time_series/devnet.py @@ -16,6 +16,9 @@ import numpy as np class DevNetTS(BaseDeepAD): """ + Deviation Networks for Weakly-supervised Anomaly Detection (KDD'19) + :cite:`pang2019deep` + Parameters ---------- epochs: int, optional (default=100) diff --git a/deepod/models/time_series/dif.py b/deepod/models/time_series/dif.py index 07db3f3..eec4a08 100644 --- a/deepod/models/time_series/dif.py +++ b/deepod/models/time_series/dif.py @@ -17,6 +17,10 @@ import numpy as np class DeepIsolationForestTS(BaseDeepAD): + """ + Deep isolation forest for anomaly detection (TKDE'23) + + """ def __init__(self, epochs=100, batch_size=1000, lr=1e-3, seq_len=100, stride=1, diff --git a/deepod/models/time_series/dsad.py b/deepod/models/time_series/dsad.py index cbe932d..8888f55 100644 --- a/deepod/models/time_series/dsad.py +++ b/deepod/models/time_series/dsad.py @@ -16,8 +16,8 @@ from collections import Counter class DeepSADTS(BaseDeepAD): - """ Deep Semi-supervised Anomaly Detection (Deep SAD) - See :cite:`ruff2020dsad` for details + """ Deep Semi-supervised Anomaly Detection (ICLR'20) + :cite:`ruff2020dsad` Parameters ---------- diff --git a/deepod/models/time_series/dsvdd.py b/deepod/models/time_series/dsvdd.py index 9e08ad3..96076fd 100644 --- a/deepod/models/time_series/dsvdd.py +++ b/deepod/models/time_series/dsvdd.py @@ -11,14 +11,12 @@ import torch class DeepSVDDTS(BaseDeepAD): - """ Deep One-class Classification (Deep SVDD) for anomaly detection - See :cite:`ruff2018deepsvdd` for details + """ + Deep One-class Classification for Anomaly Detection (ICML'18) + :cite:`ruff2018deepsvdd` Parameters ---------- - data_type: str, optional (default='tabular') - Data type, choice=['tabular', 'ts'] - epochs: int, optional (default=100) Number of training epochs diff --git a/deepod/models/time_series/ncad.py b/deepod/models/time_series/ncad.py new file mode 100644 index 0000000..7416a57 --- /dev/null +++ b/deepod/models/time_series/ncad.py @@ -0,0 +1,344 @@ +# -*- coding: utf-8 -*- +""" +Neural Contextual Anomaly Detection for Time Series (NCAD) +@Author: Hongzuo Xu +""" + +from deepod.core.base_model import BaseDeepAD +from deepod.core.networks.ts_network_tcn import TCNnet +from torch.utils.data import DataLoader, TensorDataset +import torch +import numpy as np +import torch.nn.functional as F + + +class NCAD(BaseDeepAD): + """ + Neural Contextual Anomaly Detection for Time Series (IJCAI'22) + + Parameters + ---------- + epochs: int, optional (default=100) + Number of training epochs + + batch_size: int, optional (default=64) + Number of samples in a mini-batch + + lr: float, optional (default=1e-3) + Learning rate + + rep_dim: int, optional (default=128) + Dimensionality of the representation space + + hidden_dims: list, str or int, optional (default='100,50') + Number of neural units in hidden layers + - If list, each item is a layer + - If str, neural units of hidden layers are split by comma + - If int, number of neural units of single hidden layer + + act: str, optional (default='ReLU') + activation layer name + choice = ['ReLU', 'LeakyReLU', 'Sigmoid', 'Tanh'] + + bias: bool, optional (default=False) + Additive bias in linear layer + + epoch_steps: int, optional (default=-1) + Maximum steps in an epoch + - If -1, all the batches will be processed + + prt_steps: int, optional (default=10) + Number of epoch intervals per printing + + device: str, optional (default='cuda') + torch device, + + verbose: int, optional (default=1) + Verbosity mode + + random_state: int, optional (default=42) + the seed used by the random + + """ + + def __init__(self, epochs=100, batch_size=64, lr=3e-4, + seq_len=100, stride=1, + suspect_win_len=10, coe_rate=0.5, mixup_rate=2.0, + hidden_dims='32,32,32,32', rep_dim=128, + act='ReLU', bias=False, + kernel_size=5, dropout=0.0, + epoch_steps=-1, prt_steps=10, device='cuda', + verbose=2, random_state=42): + super(NCAD, self).__init__( + model_name='NCAD', data_type='ts', epochs=epochs, batch_size=batch_size, lr=lr, + seq_len=seq_len, stride=stride, + epoch_steps=epoch_steps, prt_steps=prt_steps, device=device, + verbose=verbose, random_state=random_state + ) + + self.suspect_win_len = suspect_win_len + + self.coe_rate = coe_rate + self.mixup_rate = mixup_rate + + self.hidden_dims = hidden_dims + self.rep_dim = rep_dim + self.act = act + self.bias = bias + self.dropout = dropout + + self.kernel_size = kernel_size + + return + + def training_prepare(self, X, y): + y_train = np.zeros(len(X)) + train_dataset = TensorDataset(torch.from_numpy(X).float(), + torch.from_numpy(y_train).long()) + + train_loader = DataLoader(train_dataset, batch_size=self.batch_size, + drop_last=True, pin_memory=True, shuffle=True) + + net = NCADNet( + n_features=self.n_features, + n_hidden=self.hidden_dims, + n_output=self.rep_dim, + kernel_size=self.kernel_size, + bias=True, + eps=1e-10, + dropout=0.2, + activation=self.act, + ).to(self.device) + + criterion = torch.nn.BCELoss() + + return train_loader, net, criterion + + def training_forward(self, batch_x, net, criterion): + x0, y0 = batch_x + + if self.coe_rate > 0: + x_oe, y_oe = self.coe_batch( + x=x0.transpose(2, 1), + y=y0, + coe_rate=self.coe_rate, + suspect_window_length=self.suspect_win_len, + random_start_end=True, + ) + # Add COE to training batch + x0 = torch.cat((x0, x_oe.transpose(2, 1)), dim=0) + y0 = torch.cat((y0, y_oe), dim=0) + + if self.mixup_rate > 0.0: + x_mixup, y_mixup = self.mixup_batch( + x=x0.transpose(2, 1), + y=y0, + mixup_rate=self.mixup_rate, + ) + # Add Mixup to training batch + x0 = torch.cat((x0, x_mixup.transpose(2, 1)), dim=0) + y0 = torch.cat((y0, y_mixup), dim=0) + + x0 = x0.float().to(self.device) + y0 = y0.float().to(self.device) + + x_context = x0[:, :-self.suspect_win_len] + logits_anomaly = net(x0, x_context) + probs_anomaly = torch.sigmoid(logits_anomaly.squeeze()) + + # Calculate Loss + loss = criterion(probs_anomaly, y0) + return loss + + def inference_forward(self, batch_x, net, criterion): + ts = batch_x.float().to(self.device) + + # ts = ts.transpose(2, 1) + # stride = self.suspect_win_len + # unfold_layer = torch.nn.Unfold( + # kernel_size=(self.n_features, self.win_len), + # stride=stride + # ) + # ts_windows = unfold_layer(ts.unsqueeze(1)) + # + # num_windows = int(1 + (self.seq_len - self.win_len) / stride) + # assert ts_windows.shape == ( + # batch_x.shape[0], + # self.n_features * self.win_len, + # num_windows, + # ) + # ts_windows = ts_windows.transpose(1, 2) + # ts_windows = ts_windows.reshape( + # batch_x.shape[0], num_windows, + # self.n_features, self.win_len + # ) + # x0 = ts_windows.flatten(start_dim=0, end_dim=1) + # x0 = x0.transpose(2, 1) + + x0 = ts + + x_context = x0[:, :-self.suspect_win_len] + logits_anomaly = net(x0, x_context) + logits_anomaly = logits_anomaly.squeeze() + return batch_x, logits_anomaly + + def inference_prepare(self, X): + test_loader = DataLoader(X, batch_size=self.batch_size, + drop_last=False, shuffle=False) + self.criterion.reduction = 'none' + return test_loader + + @staticmethod + def coe_batch(x: torch.Tensor, y: torch.Tensor, coe_rate: float, suspect_window_length: int, + random_start_end: bool = True): + """Contextual Outlier Exposure. + + Args: + x : Tensor of shape (batch, ts channels, time) + y : Tensor of shape (batch, ) + coe_rate : Number of generated anomalies as proportion of the batch size. + random_start_end : If True, a random subset within the suspect segment is permuted between time series; + if False, the whole suspect segment is randomly permuted. + """ + + if coe_rate == 0: + raise ValueError(f"coe_rate must be > 0.") + batch_size = x.shape[0] + ts_channels = x.shape[1] + oe_size = int(batch_size * coe_rate) + + # Select indices + idx_1 = torch.arange(oe_size) + idx_2 = torch.arange(oe_size) + while torch.any(idx_1 == idx_2): + idx_1 = torch.randint(low=0, high=batch_size, size=(oe_size,)).type_as(x).long() + idx_2 = torch.randint(low=0, high=batch_size, size=(oe_size,)).type_as(x).long() + + if ts_channels > 3: + numb_dim_to_swap = np.random.randint(low=3, high=ts_channels, size=(oe_size)) + # print(numb_dim_to_swap) + else: + numb_dim_to_swap = np.ones(oe_size) * ts_channels + + x_oe = x[idx_1].clone() # .detach() + oe_time_start_end = np.random.randint( + low=x.shape[-1] - suspect_window_length, high=x.shape[-1] + 1, size=(oe_size, 2) + ) + oe_time_start_end.sort(axis=1) + # for start, end in oe_time_start_end: + for i in range(len(idx_2)): + # obtain the dimensons to swap + numb_dim_to_swap_here = int(numb_dim_to_swap[i]) + dims_to_swap_here = np.random.choice( + range(ts_channels), size=numb_dim_to_swap_here, replace=False + ) + + # obtain start and end of swap + start, end = oe_time_start_end[i] + + # swap + x_oe[i, dims_to_swap_here, start:end] = x[idx_2[i], dims_to_swap_here, start:end] + + # Label as positive anomalies + y_oe = torch.ones(oe_size).type_as(y) + + return x_oe, y_oe + + @staticmethod + def mixup_batch(x: torch.Tensor, y: torch.Tensor, mixup_rate: float): + """ + Args: + x : Tensor of shape (batch, ts channels, time) + y : Tensor of shape (batch, ) + mixup_rate : Number of generated anomalies as proportion of the batch size. + """ + + if mixup_rate == 0: + raise ValueError(f"mixup_rate must be > 0.") + batch_size = x.shape[0] + mixup_size = int(batch_size * mixup_rate) # + + # Select indices + idx_1 = torch.arange(mixup_size) + idx_2 = torch.arange(mixup_size) + while torch.any(idx_1 == idx_2): + idx_1 = torch.randint(low=0, high=batch_size, size=(mixup_size,)).type_as(x).long() + idx_2 = torch.randint(low=0, high=batch_size, size=(mixup_size,)).type_as(x).long() + + # sample mixing weights: + beta_param = float(0.05) + beta_distr = torch.distributions.beta.Beta( + torch.tensor([beta_param]), torch.tensor([beta_param]) + ) + weights = torch.from_numpy(np.random.beta(beta_param, beta_param, (mixup_size,))).type_as(x) + oppose_weights = 1.0 - weights + + # Create contamination + x_mix_1 = x[idx_1].clone() + x_mix_2 = x[idx_1].clone() + x_mixup = ( + x_mix_1 * weights[:, None, None] + x_mix_2 * oppose_weights[:, None, None] + ) # .detach() + + # Label as positive anomalies + y_mixup = y[idx_1].clone() * weights + y[idx_2].clone() * oppose_weights + + return x_mixup, y_mixup + + +class NCADNet(torch.nn.Module): + def __init__(self, n_features, n_hidden=32, n_output=128, + kernel_size=2, bias=True, + eps=1e-10, dropout=0.2, activation='ReLU', + ): + super(NCADNet, self).__init__() + + self.network = TCNnet( + n_features=n_features, + n_hidden=n_hidden, + n_output=n_output, + kernel_size=kernel_size, + bias=bias, + dropout=dropout, + activation=activation + ) + + self.distance_metric = CosineDistance() + self.eps = eps + + def forward(self, x, x_c): + x_whole_embedding = self.network(x) + x_context_embedding = self.network(x_c) + + dists = self.distance_metric(x_whole_embedding, x_context_embedding) + + # Probability of the two embeddings being equal: exp(-dist) + log_prob_equal = -dists + + # Computation of log_prob_different + prob_different = torch.clamp(1 - torch.exp(log_prob_equal), self.eps, 1) + log_prob_different = torch.log(prob_different) + + logits_different = log_prob_different - log_prob_equal + + return logits_different + + +class CosineDistance(torch.nn.Module): + r"""Returns the cosine distance between :math:`x_1` and :math:`x_2`, computed along dim.""" + + def __init__( self, dim=1, keepdim=True): + super().__init__() + self.dim = int(dim) + self.keepdim = bool(keepdim) + self.eps = 1e-10 + + def forward(self, x1, x2): + # Cosine of angle between x1 and x2 + cos_sim = F.cosine_similarity(x1, x2, dim=self.dim, eps=self.eps) + dist = -torch.log((1 + cos_sim) / 2) + + if self.keepdim: + dist = dist.unsqueeze(dim=self.dim) + return dist + diff --git a/deepod/models/time_series/prenet.py b/deepod/models/time_series/prenet.py index 605bc60..d48a3b8 100644 --- a/deepod/models/time_series/prenet.py +++ b/deepod/models/time_series/prenet.py @@ -12,6 +12,9 @@ import numpy as np class PReNetTS(BaseDeepAD): + """ + Deep Weakly-supervised Anomaly Detection (KDD‘23) + """ def __init__(self, epochs=100, batch_size=64, lr=1e-3, network='Transformer', seq_len=30, stride=1, rep_dim=128, hidden_dims='512', act='GELU', bias=False, diff --git a/deepod/models/time_series/tcned.py b/deepod/models/time_series/tcned.py index 4c124a1..bd94239 100644 --- a/deepod/models/time_series/tcned.py +++ b/deepod/models/time_series/tcned.py @@ -15,6 +15,9 @@ from ray.air import session, Checkpoint class TcnED(BaseDeepAD): + """ + An Evaluation of Anomaly Detection and Diagnosis in Multivariate Time Series (TNNLS'21) + """ def __init__(self, seq_len=100, stride=1, epochs=10, batch_size=32, lr=1e-4, rep_dim=32, hidden_dims=32, kernel_size=3, act='ReLU', bias=True, dropout=0.2, epoch_steps=-1, prt_steps=1, device='cuda', diff --git a/deepod/models/time_series/timesnet.py b/deepod/models/time_series/timesnet.py index b28f812..97feb1c 100644 --- a/deepod/models/time_series/timesnet.py +++ b/deepod/models/time_series/timesnet.py @@ -10,6 +10,10 @@ from deepod.core.base_model import BaseDeepAD class TimesNet(BaseDeepAD): + """ + TIMESNET: Temporal 2D-Variation Modeling for General Time Series Analysis (ICLR'23) + + """ def __init__(self, seq_len=100, stride=1, lr=0.0001, epochs=10, batch_size=32, epoch_steps=20, prt_steps=1, device='cuda', pred_len=0, e_layers=2, d_model=64, d_ff=64, dropout=0.1, top_k=5, num_kernels=6, diff --git a/deepod/models/time_series/tranad.py b/deepod/models/time_series/tranad.py index e505a2e..523ac79 100644 --- a/deepod/models/time_series/tranad.py +++ b/deepod/models/time_series/tranad.py @@ -10,6 +10,10 @@ from deepod.core.base_model import BaseDeepAD class TranAD(BaseDeepAD): + """ + TranAD: Deep Transformer Networks for Anomaly Detection in Multivariate Time Series Data (VLDB'22) + + """ def __init__(self, seq_len=100, stride=1, lr=0.001, epochs=5, batch_size=128, epoch_steps=20, prt_steps=1, device='cuda', verbose=2, random_state=42): diff --git a/deepod/models/time_series/usad.py b/deepod/models/time_series/usad.py index 8434381..e0db840 100644 --- a/deepod/models/time_series/usad.py +++ b/deepod/models/time_series/usad.py @@ -9,6 +9,9 @@ from deepod.core.base_model import BaseDeepAD class USAD(BaseDeepAD): + """ + + """ def __init__(self, seq_len=100, stride=1, hidden_dims=100, rep_dim=128, epochs=100, batch_size=128, lr=1e-3, es=1, train_val_pc=0.2, diff --git a/deepod/utils/data.py b/deepod/utils/data.py index 97fdc54..f0a8ce2 100644 --- a/deepod/utils/data.py +++ b/deepod/utils/data.py @@ -72,7 +72,6 @@ def _generate_data(n_inliers, n_outliers, n_features, coef, offset, return X, y - def generate_data(n_train=1000, n_test=500, n_features=2, contamination=0.1, train_only=False, offset=10, random_state=None, n_nan=0, n_inf=0): diff --git a/docs/.debug.yml b/docs/.debug.yml deleted file mode 100644 index 05d534a..0000000 --- a/docs/.debug.yml +++ /dev/null @@ -1,3 +0,0 @@ -remote_theme: false - -theme: jekyll-rtd-theme \ No newline at end of file diff --git a/docs/_config.yml b/docs/_config.yml deleted file mode 100644 index 2c79bd9..0000000 --- a/docs/_config.yml +++ /dev/null @@ -1,14 +0,0 @@ -title: Your project name -lang: en -description: a catchy description for your project - -remote_theme: rundocs/jekyll-rtd-theme - -readme_index: - with_frontmatter: true - -exclude: - - Makefile - - CNAME - - Gemfile - - Gemfile.lock \ No newline at end of file diff --git a/docs/_templates/class.rst b/docs/_templates/class.rst new file mode 100644 index 0000000..56c952d --- /dev/null +++ b/docs/_templates/class.rst @@ -0,0 +1,22 @@ +{{ fullname }} +{{ underline }} +.. currentmodule:: {{ module }} +.. autoclass:: {{ objname }} + {% block methods %} + {% if methods %} + .. rubric:: Methods + .. autosummary:: + {% for item in methods %} + ~{{ name }}.{{ item }} + {%- endfor %} + {% endif %} + {% endblock %} + {% block attributes %} + {% if attributes %} + .. rubric:: Attributes + .. autosummary:: + {% for item in attributes %} + ~{{ name }}.{{ item }} + {%- endfor %} + {% endif %} + {% endblock %} \ No newline at end of file diff --git a/docs/additional.contributing.rst b/docs/additional.contributing.rst new file mode 100644 index 0000000..ea6edcf --- /dev/null +++ b/docs/additional.contributing.rst @@ -0,0 +1,35 @@ +Contributing +============= + +Everyone are very welcome to contribute. + +We share the same values of the `scikit-learn `_ community + + +.. note:: + We are a community based on openness and friendly, didactic, discussions. + + We aspire to treat everybody equally, and value their contributions. We are particularly seeking people + from underrepresented backgrounds in Open Source Software and scikit-learn in particular to participate + and contribute their expertise and experience. + + Decisions are made based on technical merit and consensus. + + Code is not the only way to help the project. Reviewing pull requests, + answering questions to help others on mailing lists or issues, organizing and teaching tutorials, + working on the website, improving the documentation, are all priceless contributions. + + We abide by the principles of openness, respect, and consideration of others of the Python + Software Foundation: https://www.python.org/psf/codeofconduct/ + + In case you experience issues using this package, do not hesitate to submit a ticket to the GitHub issue tracker. + You are also welcome to post feature requests or pull requests. + + + +For any questions, you may open issue on Github or drop me an email at hongzuoxu(at)126.com. + + +TODO list +--------- +We attach a TODO list below, we are very pleased if you can contribute anything on this list. diff --git a/docs/additional.license.rst b/docs/additional.license.rst new file mode 100644 index 0000000..0912fd4 --- /dev/null +++ b/docs/additional.license.rst @@ -0,0 +1,33 @@ +License +======= + + +This project is licensed under the BSD 2-Clause License. + +.. code-block:: + + BSD 2-Clause License + + Copyright (c) 2023, Hongzuo Xu All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are met: + + 1. Redistributions of source code must retain the above copyright notice, this + list of conditions and the following disclaimer. + + 2. Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimer in the documentation + and/or other materials provided with the distribution. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE + FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR + SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, + OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + diff --git a/docs/additional.star_history.rst b/docs/additional.star_history.rst new file mode 100644 index 0000000..8666dd4 --- /dev/null +++ b/docs/additional.star_history.rst @@ -0,0 +1,9 @@ +Star History on Github +====================== + + + +.. image:: https://api.star-history.com/svg?repos=xuhongzuo/DeepOD&type=Date + :target: https://star-history.com/#xuhongzuo/DeepOD&Date + :align: center + diff --git a/docs/api_cc.rst b/docs/api_cc.rst new file mode 100644 index 0000000..4dd110f --- /dev/null +++ b/docs/api_cc.rst @@ -0,0 +1,28 @@ +API CheatSheet +============== + +The following APIs are applicable for all detector models for easy use. + +* :func:`deepod.core.base_model.BaseDeepAD.fit`: Fit detector. y is ignored in unsupervised methods. +* :func:`deepod.core.base_model.BaseDeepAD.decision_function`: Predict raw anomaly score of X using the fitted detector. +* :func:`deepod.core.base_model.BaseDeepAD.predict`: Predict if a particular sample is an outlier or not using the fitted detector. + + +Key Attributes of a fitted model: + +* :attr:`deepod.core.base_model.BaseDeepAD.decision_scores_`: The outlier scores of the training data. The higher, the more abnormal. + Outliers tend to have higher scores. +* :attr:`deepod.core.base_model.BaseDeepAD.labels_`: The binary labels of the training data. 0 stands for inliers and 1 for outliers/anomalies. + + +See base class definition below: + +deepod.core.base_model module +----------------------- + +.. automodule:: deepod.core.base_model + :members: + :undoc-members: + :show-inheritance: + :inherited-members: + diff --git a/docs/api_reference.base_networks.rst b/docs/api_reference.base_networks.rst new file mode 100644 index 0000000..20bdcb7 --- /dev/null +++ b/docs/api_reference.base_networks.rst @@ -0,0 +1,23 @@ +Network Architectures +------------------------------------ + + + +.. currentmodule:: deepod + +.. autosummary:: + :nosignatures: + :template: class.rst + :toctree: generated + + core.networks.MLPnet + core.networks.MlpAE + core.networks.GRUNet + core.networks.LSTMNet + core.networks.ConvSeqEncoder + core.networks.ConvNet + core.networks.TcnAE + core.networks.TCNnet + core.networks.TSTransformerEncoder + + diff --git a/docs/api_reference.metrics.rst b/docs/api_reference.metrics.rst new file mode 100644 index 0000000..c6210f2 --- /dev/null +++ b/docs/api_reference.metrics.rst @@ -0,0 +1,17 @@ +Evaluation Metrics +=================== + + + + +.. automodule:: deepod.metrics + :members: + :undoc-members: + :show-inheritance: + :inherited-members: + +.. rubric:: References + +.. bibliography:: + :cited: + :labelprefix: B \ No newline at end of file diff --git a/docs/api_reference.rst b/docs/api_reference.rst new file mode 100644 index 0000000..215378f --- /dev/null +++ b/docs/api_reference.rst @@ -0,0 +1,17 @@ +API Reference +------------- + +This is the API documentation for ``DeepOD``. + + +.. toctree:: + + api_reference.tabular + api_reference.time_series + api_reference.base_networks + api_reference.metrics + + + + + diff --git a/docs/api_reference.tabular.rst b/docs/api_reference.tabular.rst new file mode 100644 index 0000000..163c492 --- /dev/null +++ b/docs/api_reference.tabular.rst @@ -0,0 +1,53 @@ +Models for Tabular Data +------------------------------------------ + + +.. automodule:: deepod.models.tabular + :no-members: + :no-inherited-members: + + +.. currentmodule:: deepod + + +Unsupervised Models +^^^^^^^^^^^^^^^^^^^^^ +implemented unsupervised anomaly detection models + +.. autosummary:: + :nosignatures: + :template: class.rst + :toctree: generated + + models.DeepSVDD + models.RCA + models.DevNet + models.DeepIsolationForest + models.REPEN + models.SLAD + models.ICL + models.RDP + models.GOAD + models.NeuTraL + +Weakly-supervised Models +^^^^^^^^^^^^^^^^^^^^^^^^^^ +implemented weakly-sueprvised anomaly detection models + +.. autosummary:: + :nosignatures: + :template: class.rst + :toctree: generated + + models.DevNet + models.DeepSAD + models.FeaWAD + models.RoSAS + models.PReNet + + +.. rubric:: References + +.. bibliography:: + :cited: + :labelprefix: B \ No newline at end of file diff --git a/docs/api_reference.time_series.rst b/docs/api_reference.time_series.rst new file mode 100644 index 0000000..fc7d316 --- /dev/null +++ b/docs/api_reference.time_series.rst @@ -0,0 +1,40 @@ +Models for Time Series +======================================== + + +.. automodule:: deepod.models.time_series + :no-members: + :no-inherited-members: + + +.. currentmodule:: deepod + + + +implemented unsupervised anomaly detection models for time series data. + +.. autosummary:: + :nosignatures: + :template: class.rst + :toctree: generated + + models.TimesNet + models.DCdetector + models.AnomalyTransformer + models.NCAD + models.TranAD + models.COUTA + models.TcnED + models.DeepIsolationForestTS + models.DeepSVDDTS + models.DeepSADTS + models.DevNetTS + models.PReNetTS + + + +.. rubric:: References + +.. bibliography:: + :cited: + :labelprefix: B diff --git a/docs/conf.py b/docs/conf.py index 0e9e411..06d13f3 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -24,7 +24,7 @@ version_path = os.path.join(deepod_dir, 'deepod', 'version.py') exec(open(version_path).read()) # -- Project information ----------------------------------------------------- -project = 'deepod' +project = 'DeepOD' copyright = '2023, Hongzuo Xu' author = 'Hongzuo Xu' @@ -45,9 +45,11 @@ extensions = [ 'sphinx.ext.coverage', 'sphinx.ext.imgmath', 'sphinx.ext.viewcode', + 'sphinx.ext.autosummary', 'sphinxcontrib.bibtex', # 'sphinx.ext.napoleon', - # 'sphinx_rtd_theme', + 'sphinx_rtd_theme', + 'sphinx.ext.napoleon' ] bibtex_bibfiles = ['zreferences.bib'] @@ -68,7 +70,7 @@ master_doc = 'index' # # This is also used if you do content translation via gettext catalogs. # Usually you set "language" from the command line for these cases. -language = None +language = 'en' # List of patterns, relative to source directory, that match files and # directories to ignore when looking for source files. @@ -84,19 +86,43 @@ pygments_style = 'sphinx' # a list of builtin themes. # https://www.sphinx-doc.org/en/master/usage/theming.html#themes#Themes # html_theme = 'default' -html_theme = "alabaster" +# html_theme = "alabaster" +# html_theme = 'sphinxawesome_theme' +html_theme = 'furo' +# html_theme = 'sphinx_rtd_theme' # Theme options are theme-specific and customize the look and feel of a theme # further. For a list of options available for each theme, see the # documentation. # -# html_theme_options = {} +# html_theme_options = { +# 'canonical_url': '', +# 'logo_only': False, +# 'display_version': True, +# 'prev_next_buttons_location': 'bottom', +# 'style_external_links': False, +# #'vcs_pageview_mode': '', +# #'style_nav_header_background': 'white', +# # Toc options +# 'collapse_navigation': True, +# 'sticky_navigation': True, +# 'navigation_depth': 7, +# 'includehidden': True, +# 'titles_only': False, +# } # Add any paths that contain custom static files (such as style sheets) here, # relative to this directory. They are copied after the builtin static files, # so a file named "default.css" will overwrite the builtin "default.css". html_static_path = ['_static'] +autosummary_generate = True +autodoc_default_options = {'members': True, + 'inherited-members': True, + } +autodoc_typehints = "none" + + # Custom sidebar templates, must be a dictionary that maps document names # to template names. # diff --git a/docs/index.rst b/docs/index.rst index c4c8969..27cc225 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -13,6 +13,10 @@ Welcome to DeepOD documentation! :target: https://github.com/xuhongzuo/DeepOD/actions/workflows/testing.yml :alt: testing2 +.. image:: https://readthedocs.org/projects/deepod/badge/?version=latest + :target: https://deepod.readthedocs.io/en/latest/?badge=latest + :alt: Documentation Status + .. image:: https://coveralls.io/repos/github/xuhongzuo/DeepOD/badge.svg?branch=main :target: https://coveralls.io/github/xuhongzuo/DeepOD?branch=main :alt: coveralls @@ -20,7 +24,9 @@ Welcome to DeepOD documentation! .. image:: https://static.pepy.tech/personalized-badge/deepod?period=total&units=international_system&left_color=black&right_color=orange&left_text=Downloads :target: https://pepy.tech/project/deepod :alt: downloads - + + + ``DeepOD`` is an open-source python library for Deep Learning-based `Outlier Detection `_ and `Anomaly Detection `_. ``DeepOD`` supports tabular anomaly detection and time-series anomaly detection. @@ -45,6 +51,9 @@ If you are interested in our project, we are pleased to have your stars and fork + + + Citation ~~~~~~~~~~~~~~~~~ If you use this library in your work, please cite this paper: @@ -68,15 +77,6 @@ You can also use the BibTex entry below for citation. } -Star History -~~~~~~~~~~~~~~~~~ -.. image:: https://api.star-history.com/svg?repos=xuhongzuo/DeepOD&type=Date - :target: https://star-history.com/#xuhongzuo/DeepOD&Date - :align: center - - - - ---- @@ -87,6 +87,23 @@ Star History :hidden: :caption: Getting Started + start.install + start.examples + start.model_save - install +.. toctree:: + :maxdepth: 2 + :hidden: + :caption: Documentation + api_reference + api_cc + +.. toctree:: + :maxdepth: 2 + :hidden: + :caption: Additional Information + + additional.contributing + additional.license + additional.star_history \ No newline at end of file diff --git a/docs/install.rst b/docs/install.rst deleted file mode 100644 index 3ea5082..0000000 --- a/docs/install.rst +++ /dev/null @@ -1,34 +0,0 @@ -Installation -============ - -It is recommended to use **pip** for installation. Please make sure -**the latest version** is installed, as DeepOD is updated frequently: - -.. code-block:: bash - - pip install deepod # normal install - pip install --upgrade deepod # or update if needed - - -Alternatively, you could clone and run setup.py file: - -.. code-block:: bash - - git clone https://github.com/xuhongzuo/deepod.git - cd pyod - pip install . - - -**Required Dependencies**\ : - - -* Python 3.7+ -* numpy>=1.19 -* scipy>=1.5.1 -* scikit_learn>=0.20.0 -* pandas>=1.0.0 -* torch>1.10.0,<1.13.1 -* ray==2.6.1 -* pyarrow>=11.0.0 -* einops - diff --git a/docs/requirements.txt b/docs/requirements.txt index cf5782e..ce2c7ae 100644 --- a/docs/requirements.txt +++ b/docs/requirements.txt @@ -1,2 +1,3 @@ -sphinx-rtd-theme -sphinxcontrib-bibtex +sphinx-rtd-theme==1.3.0 +sphinxawesome-theme==4.1.0 +sphinxcontrib-bibtex==2.5.0 \ No newline at end of file diff --git a/docs/zreferences.bib b/docs/zreferences.bib index d0870d5..fe7aca3 100644 --- a/docs/zreferences.bib +++ b/docs/zreferences.bib @@ -28,4 +28,21 @@ number={}, pages={1-14}, doi={10.1109/TKDE.2023.3270293} +} + + +@article{ruff2018deepsvdd, + title={Deep One-Class Classification}, + author={Ruff, Lukas and Vandermeulen, Robert and Görnitz, Nico and Deecke, Lucas and Siddiqui, Shoaib and Binder, Alexander and Müller, Emmanuel and Kloft, Marius}, + journal={International conference on machine learning}, + year={2018} +} + + +@inproceedings{pang2019deep, + title={Deep anomaly detection with deviation networks}, + author={Pang, Guansong and Shen, Chunhua and van den Hengel, Anton}, + booktitle={Proceedings of the 25th ACM SIGKDD international conference on knowledge discovery \& data mining}, + pages={353--362}, + year={2019} } \ No newline at end of file diff --git a/testbed/testbed_unsupervised_tsad.py b/testbed/testbed_unsupervised_tsad.py index ab335ca..440491e 100644 --- a/testbed/testbed_unsupervised_tsad.py +++ b/testbed/testbed_unsupervised_tsad.py @@ -23,14 +23,17 @@ parser.add_argument("--runs", type=int, default=5, "obtain the average performance") parser.add_argument("--output_dir", type=str, default='@records/', help="the output file path") -parser.add_argument("--dataset", type=str, default='ASD', +parser.add_argument("--dataset", type=str, + default='ASD', + # default='SMD,MSL,SMAP,SWaT_cut,EP,DASADS', help='dataset name or a list of names split by comma') parser.add_argument("--entities", type=str, + # default='omi-1', default='FULL', help='FULL represents all the csv file in the folder, ' 'or a list of entity names split by comma') parser.add_argument("--entity_combined", type=int, default=1) -parser.add_argument("--model", type=str, default='DCdetector', help="") +parser.add_argument("--model", type=str, default='NCAD', help="") parser.add_argument("--auto_hyper", default=False, action='store_true', help="") parser.add_argument('--silent_header', action='store_true')