\UseRawInputEncoding %\documentclass[hyperref={pdfpagelabels=false}]{beamer} \documentclass[hyperref={pdfpagelabels=false},aspectratio=169]{beamer} % Die Hyperref Option hyperref={pdfpagelabels=false} verhindert die Warnung: % Package hyperref Warning: Option `pdfpagelabels' is turned off % (hyperref) because \thepage is undefined. % Hyperref stopped early % \usepackage{lmodern} % Das Paket lmodern erspart die folgenden Warnungen: % LaTeX Font Warning: Font shape `OT1/cmss/m/n' in size <4> not available % (Font) size <5> substituted on input line 22. % LaTeX Font Warning: Size substitutions with differences % (Font) up to 1.0pt have occurred. % % Wenn \titel{\ldots} \author{\ldots} erst nach \begin{document} kommen, % kommt folgende Warnung: % Package hyperref Warning: Option `pdfauthor' has already been used, % (hyperref) ... % Daher steht es hier vor \begin{document} \title[Anomaly Detection and AutoML]{Anomaly Detection and AutoML} \author{Simon Kluettermann} \date{\today} \institute{ls9 tu Dortmund} % Dadurch wird verhindert, dass die Navigationsleiste angezeigt wird. \setbeamertemplate{navigation symbols}{} % zusaetzlich ist das usepackage{beamerthemeshadow} eingebunden \usepackage{beamerthemeshadow} \hypersetup{pdfstartview={Fit}} % fits the presentation to the window when first displayed \usepackage{appendixnumberbeamer} \usepackage{listings} \usetheme{CambridgeUS} \usepackage{ngerman} \usecolortheme{dolphin} % \beamersetuncovermixins{\opaqueness<1>{25}}{\opaqueness<2$\Rightarrow${15}} % sorgt dafuer das die Elemente die erst noch (zukuenftig) kommen % nur schwach angedeutet erscheinen %\beamersetuncovermixins{\opaqueness<1>{25}}{\opaqueness<2$\Rightarrow${15}}%here disabled % klappt auch bei Tabellen, wenn teTeX verwendet wird\ldots \renewcommand{\figurename}{} \setbeamertemplate{footline} { \leavevmode% \hbox{% \begin{beamercolorbox}[wd=.4\paperwidth,ht=2.25ex,dp=1ex,center]{author in head/foot}% \usebeamerfont{author in head/foot}\insertshorttitle \end{beamercolorbox}% \begin{beamercolorbox}[wd=.25\paperwidth,ht=2.25ex,dp=1ex,center]{title in head/foot}% \usebeamerfont{title in head/foot}\insertsection \end{beamercolorbox}% \begin{beamercolorbox}[wd=.3499\paperwidth,ht=2.25ex,dp=1ex,right]{date in head/foot}% \usebeamerfont{date in head/foot}\insertshortdate{}\hspace*{2em} \hyperlink{toc}{\insertframenumber{} / \inserttotalframenumber\hspace*{2ex}} \end{beamercolorbox}}% \vskip0pt% } \usepackage[absolute,overlay]{textpos} \usepackage{graphicx} \newcommand{\source}[1]{\begin{textblock*}{9cm}(0.1cm,8.9cm) \begin{beamercolorbox}[ht=0.5cm,left]{framesource} \usebeamerfont{framesource}\usebeamercolor[fg!66]{framesource} Source: {#1} \end{beamercolorbox} \end{textblock*}} \begin{document} %from file ../case2/data/000.txt \begin{frame}[label=] \frametitle{} \begin{titlepage} \centering {\huge\bfseries \par} \vspace{2cm} {\LARGE\itshape Simon Kluettermann\par} \vspace{1.5cm} {\scshape\Large Master Thesis in Physics\par} \vspace{0.2cm} {\Large submitted to the \par} \vspace{0.2cm} {\scshape\Large Faculty of Mathematics Computer Science and Natural Sciences \par} \vspace{0.2cm} {\Large \par} \vspace{0.2cm} {\scshape\Large RWTH Aachen University} \vspace{1cm} \vfill {\scshape\Large Department of Physics\par} \vspace{0.2cm} {\scshape\Large Insitute for theoretical Particle Physics and Cosmology\par} \vspace{0.2cm} { \Large\par} \vspace{0.2cm} {\Large First Referee: Prof. Dr. Michael Kraemer \par} {\Large Second Referee: Prof. Dr. Felix Kahlhoefer} \vfill % Bottom of the page {\large November 2020 \par} \end{titlepage} \pagenumbering{roman} \thispagestyle{empty} \null \newpage \setcounter{page}{1} \pagenumbering{arabic} \end{frame} %from file ../case2/data/001Anomaly Detection.txt \begin{frame}[label=Anomaly Detection] \frametitle{Anomaly Detection} \begin{itemize} \item Find strange (unexpected) samples. \item $\Rightarrow$If a traffic light is constantly yellow, probably something broke \item But this could happen in a lot of different ways \item $\Rightarrow$Most likely the traffic light is just off. But it could also fluctuate quickly or start smoking \item How to cover all possible anomalies? \item $\Rightarrow$Unsupervised Machine Learning \end{itemize} \end{frame} %from file ../case2/data/002Unsupervised Machine Learning.txt \begin{frame}[label=Unsupervised Machine Learning] \frametitle{Unsupervised Machine Learning} \begin{itemize} \item Normal machine learning: Input - Label \item Here: Only Input. \item $\Rightarrow$Instead of classifying different types, try to understand your given dataset \item Deviations from this understanding are anomalies \begin{itemize} \item x: training samples \item tx: test samples \item ty: test labels (is a certain sample an anomaly or not) \end{itemize} \item Useful: \emph{peak /global/cardio.npz} \end{itemize} \end{frame} %from file ../case2/data/003kNN.txt \begin{frame}[label=kNN] \frametitle{kNN} \begin{columns}[c] % align columns \begin{column}{0.48\textwidth}%.48 \begin{itemize} \item How to do this? Here one algorithm: kNN \item Goal: Generate an anomaly score (high value$\Rightarrow$highly anomalous) \item Here: The anomaly score is the distance to the kth closest samples \end{itemize} \end{column}% \hfill% \begin{column}{0.48\textwidth}%.48 \begin{figure}[H] \centering \includegraphics[width=0.9\textwidth]{..//prep/03kNN/yanghuang 08.png} \label{fig:prep03kNNyanghuang 08png} \caption{[Yang, Huang 08]} \end{figure} \end{column}% \hfill% \end{columns} \end{frame} %from file ../case2/data/004kNN.txt \begin{frame}[label=kNN] \frametitle{kNN} \begin{columns}[c] % align columns \begin{column}{0.48\textwidth}%.48 \begin{itemize} \item How to do this? Here one algorithm: kNN \item Goal: Generate an anomaly score (high value$\Rightarrow$highly anomalous) \item Here: The anomaly score is the distance to the kth closest samples \end{itemize} \end{column}% \hfill% \begin{column}{0.48\textwidth}%.48 \begin{figure}[H] \centering \includegraphics[width=0.9\textwidth]{..//prep/04kNN/dist0.pdf} \label{fig:prep04kNNdist0pdf} \end{figure} \end{column}% \hfill% \end{columns} \end{frame} %from file ../case2/data/005.txt \begin{frame}[label=] \frametitle{} \begin{figure}[H] \centering \includegraphics[width=0.8\textwidth]{..//prep/05/dist0.pdf} \label{fig:prep05dist0pdf} \end{figure} \end{frame} %from file ../case2/data/006AUC Score.txt \begin{frame}[label=AUC Score] \frametitle{AUC Score} \begin{columns}[c] % align columns \begin{column}{0.47619047619047616\textwidth}%.48 \begin{figure}[H] \centering \includegraphics[width=0.9\textwidth]{..//prep/06AUC_Score/02confusion.png} \label{fig:prep06AUC_Score02confusionpng} \end{figure} \end{column}% \hfill% \begin{column}{0.47619047619047616\textwidth}%.48 \begin{figure}[H] \centering \includegraphics[width=0.9\textwidth]{..//prep/06AUC_Score/01dist0.pdf} \label{fig:prep06AUC_Score01dist0pdf} \end{figure} \end{column}% \hfill% \end{columns} \end{frame} %from file ../case2/data/007AUC Score.txt \begin{frame}[label=AUC Score] \frametitle{AUC Score} \begin{columns}[c] % align columns \begin{column}{0.48\textwidth}%.48 \begin{itemize} \item Iterate every threshold \item Plot fpr vs tpr \item False Positive Rate \begin{itemize} \item $\frac{FP}{FP+TN}$ \end{itemize} \item True Positive Rate \begin{itemize} \item $\frac{TP}{TP+FN}$ \end{itemize} \item ROC-AUC: Integral of this curve! \end{itemize} \end{column}% \hfill% \begin{column}{0.48\textwidth}%.48 \begin{figure}[H] \centering \includegraphics[width=0.8\textwidth]{..//prep/07AUC_Score/roc.pdf} \label{fig:prep07AUC_Scorerocpdf} \end{figure} \end{column}% \hfill% \end{columns} \end{frame} %from file ../case2/data/008AUC Score.txt \begin{frame}[label=AUC Score] \frametitle{AUC Score} \begin{itemize} \item calculcate with \emph{sklearn.metrics.roc\_auc\_score} \item Higher AUC score$\Rightarrow$better \item $AUC=1.0$$\Rightarrow$Perfect seperation \item $AUC=0.5$$\Rightarrow$Random model \item $AUC=0.0$$\Rightarrow$Inverse seperation (every anomaly is normal, and every normal sample is anomalous) \end{itemize} \end{frame} %from file ../case2/data/009AUC Scores.txt \begin{frame}[label=AUC Scores] \frametitle{AUC Scores} \begin{figure}[H] \centering \includegraphics[width=0.9\textwidth]{..//prep/09AUC_Scores/students.png} \label{fig:prep09AUC_Scoresstudentspng} \end{figure} \end{frame} %from file ../case2/data/010AutoML.txt \begin{frame}[label=AutoML] \frametitle{AutoML} \begin{itemize} \item But: We can beat this! \item How? Hyperparameter \begin{itemize} \item Every algorithm has hyperparameter that control how it works \item For example: k in kNN (number of close points considered) \end{itemize} \item Lets take the worst algorithm (kNN: $0.927$) and try to improve it \end{itemize} \end{frame} %from file ../case2/data/011Optimize.txt \begin{frame}[label=Optimize] \frametitle{Optimize} \begin{figure}[H] \centering \includegraphics[width=0.9\textwidth]{..//prep/11Optimize/baseline.png} \label{fig:prep11Optimizebaselinepng} \end{figure} \end{frame} %from file ../case2/data/012Optimize.txt \begin{frame}[label=Optimize] \frametitle{Optimize} \begin{figure}[H] \centering \includegraphics[width=0.7\textwidth]{..//prep/12Optimize/optimize.png} \label{fig:prep12Optimizeoptimizepng} \end{figure} \end{frame} %from file ../case2/data/013flaml.txt \begin{frame}[label=flaml] \frametitle{flaml} \begin{columns}[c] % align columns \begin{column}{0.48\textwidth}%.48 \begin{itemize} \item \emph{source folder/bin/activate} \item \emph{pip install flaml} \end{itemize} \end{column}% \hfill% \begin{column}{0.48\textwidth}%.48 \begin{figure}[H] \centering \includegraphics[width=0.9\textwidth]{..//prep/15flaml/forflaml.png} \label{fig:prep15flamlforflamlpng} \end{figure} \end{column}% \hfill% \end{columns} \end{frame} %from file ../case2/data/014flaml.txt \begin{frame}[label=flaml] \frametitle{flaml} \begin{figure}[H] \centering \includegraphics[width=0.9\textwidth]{..//prep/16flaml/flaml.png} \label{fig:prep16flamlflamlpng} \end{figure} \end{frame} %from file ../case2/data/015.txt \begin{frame}[label=] \frametitle{} \begin{figure}[H] \centering \includegraphics[width=0.7\textwidth]{..//prep/17/hist.pdf} \label{fig:prep17histpdf} \end{figure} \end{frame} %from file ../case2/data/016Your Turn.txt \begin{frame}[label=Your Turn] \frametitle{Your Turn} \begin{itemize} \item Remember your last algorithm \item Find its hyperparameters (Tip: pyod website) \item Optimize your algorithm and give me a new AUC! \item Bonus Question: Is there a problem with what we are doing? \end{itemize} \end{frame} \end{document}