% Die Hyperref Option hyperref={pdfpagelabels=false} verhindert die Warnung:
% Package hyperref Warning: Option `pdfpagelabels' is turned off
% (hyperref) because \thepage is undefined.
% Hyperref stopped early
% Das Paket lmodern erspart die folgenden Warnungen:
% LaTeX Font Warning: Font shape `OT1/cmss/m/n' in size <4> not available
% (Font) size <5> substituted on input line 22.
% LaTeX Font Warning: Size substitutions with differences
% (Font) up to 1.0pt have occurred.
% Wenn \titel{\ldots} \author{\ldots} erst nach \begin{document} kommen,
% kommt folgende Warnung:
% Package hyperref Warning: Option `pdfauthor' has already been used,
% (hyperref) ...
% Daher steht es hier vor \begin{document}
\title[Anomaly Detection and AutoML]{Anomaly Detection and AutoML}
\author{Simon Kluettermann}
\institute{ls9 tu Dortmund}
\begin{beamercolorbox}[wd=.4\paperwidth,ht=2.25ex,dp=1ex,center]{author in head/foot}%
\usebeamerfont{author in head/foot}\insertshorttitle
\begin{beamercolorbox}[wd=.25\paperwidth,ht=2.25ex,dp=1ex,center]{title in head/foot}%
\usebeamerfont{title in head/foot}\insertsection
\begin{beamercolorbox}[wd=.3499\paperwidth,ht=2.25ex,dp=1ex,right]{date in head/foot}%
\usebeamerfont{date in head/foot}\insertshortdate{}\hspace*{2em}
\hyperlink{toc}{\insertframenumber{} / \inserttotalframenumber\hspace*{2ex}}
\usebeamerfont{framesource}\usebeamercolor[fg!66]{framesource} Source: {#1}
{\huge\bfseries \par}
{\LARGE\itshape Simon Kluettermann\par}
{\scshape\Large Master Thesis in Physics\par}
{\Large submitted to the \par}
{\scshape\Large Faculty of Mathematics Computer Science and Natural Sciences \par}
{\Large \par}
{\scshape\Large RWTH Aachen University}
{\scshape\Large Department of Physics\par}
{\scshape\Large Insitute for theoretical Particle Physics and Cosmology\par}
{ \Large\par}
{\Large First Referee: Prof. Dr. Michael Kraemer \par}
{\Large Second Referee: Prof. Dr. Felix Kahlhoefer}
{\large November 2020 \par}
\begin{frame}[label=Anomaly Detection]
\frametitle{Anomaly Detection}
\item Find strange (unexpected) samples.
\item $\Rightarrow$If a traffic light is constantly yellow, probably something broke
\item But this could happen in a lot of different ways
\item $\Rightarrow$Most likely the traffic light is just off. But it could also fluctuate quickly or start smoking
\item How to cover all possible anomalies?
\item $\Rightarrow$Unsupervised Machine Learning
\begin{frame}[label=Unsupervised Machine Learning]
\frametitle{Unsupervised Machine Learning}
\item Normal machine learning: Input - Label
\item Here: Only Input.
\item $\Rightarrow$Instead of classifying different types, try to understand your given dataset
\item Deviations from this understanding are anomalies
\item x: training samples
\item tx: test samples
\item ty: test labels (is a certain sample an anomaly or not)
\item Useful: \emph{peak /global/cardio.npz}
\begin{columns}[c] % align columns
\item How to do this? Here one algorithm: kNN
\item Goal: Generate an anomaly score (high value$\Rightarrow$highly anomalous)
\item Here: The anomaly score is the distance to the kth closest samples
\includegraphics[width=0.9\textwidth]{..//prep/03kNN/yanghuang 08.png}
\label{fig:prep03kNNyanghuang 08png}
\caption{[Yang, Huang 08]}
\begin{columns}[c] % align columns
\item How to do this? Here one algorithm: kNN
\item Goal: Generate an anomaly score (high value$\Rightarrow$highly anomalous)
\item Here: The anomaly score is the distance to the kth closest samples
\begin{frame}[label=AUC Score]
\frametitle{AUC Score}
\begin{columns}[c] % align columns
\begin{frame}[label=AUC Score]
\frametitle{AUC Score}
\begin{columns}[c] % align columns
\item Iterate every threshold
\item Plot fpr vs tpr
\item False Positive Rate
\item $\frac{FP}{FP+TN}$
\item True Positive Rate
\item $\frac{TP}{TP+FN}$
\item ROC-AUC: Integral of this curve!
\begin{frame}[label=AUC Score]
\frametitle{AUC Score}
\item calculcate with \emph{sklearn.metrics.roc\_auc\_score}
\item Higher AUC score$\Rightarrow$better
\item $AUC=1.0$$\Rightarrow$Perfect seperation
\item $AUC=0.5$$\Rightarrow$Random model
\item $AUC=0.0$$\Rightarrow$Inverse seperation (every anomaly is normal, and every normal sample is anomalous)
\begin{frame}[label=AUC Scores]
\frametitle{AUC Scores}
\item But: We can beat this!
\item How? Hyperparameter
\item Every algorithm has hyperparameter that control how it works
\item For example: k in kNN (number of close points considered)
\item Lets take the worst algorithm (kNN: $0.927$) and try to improve it
%from file ../case2/data/011Optimize.txt
%from file ../case2/data/013flaml.txt
\begin{columns}[c] % align columns
\item \emph{source folder/bin/activate}
\item \emph{pip install flaml}
%from file ../case2/data/016Your Turn.txt
\begin{frame}[label=Your Turn]
\frametitle{Your Turn}
\item Remember your last algorithm
\item Find its hyperparameters (Tip: pyod website)
\item Optimize your algorithm and give me a new AUC!
\item Bonus Question: Is there a problem with what we are doing?