{\huge\bfseries \par}
{\LARGE\itshape Simon Kluettermann\par}
{\scshape\Large Master Thesis in Physics\par}
{\Large submitted to the \par}
{\scshape\Large Faculty of Mathematics Computer Science and Natural Sciences \par}
{\Large \par}
{\scshape\Large RWTH Aachen University}
{\scshape\Large Department of Physics\par}
{\scshape\Large Insitute for theoretical Particle Physics and Cosmology\par}
{ \Large\par}
{\Large First Referee: Prof. Dr. Michael Kraemer \par}
{\Large Second Referee: Prof. Dr. Felix Kahlhoefer}
{\large November 2020 \par} topic \end{itemize} \item then short presentation in front of our chair (15min, relaxed) \begin{itemize} \item get some feedback/suggestions \end{itemize} \item afterwards register the thesis \begin{itemize} \item (different for CS/DS students) \end{itemize} \item Problem: We are not able to supervise more than 2 students at the same time (CS faculty rules) \end{itemize} \end{frame} %from file ../knn1//data/002Today.txt \begin{frame}[label=Today] \frametitle{Today} \begin{itemize} \item First: A short summary of each Topic \item Then time for questions/Talk with your supervisor about each topic that sounds interesting \item Your own topics are always welcome;) \end{itemize} \end{frame} %from file ../knn1//data/003Anomaly Detection.txt \begin{frame}[label=Anomaly Detection] \frametitle{Anomaly Detection} \begin{columns}[c] % align columns \begin{column}{0.48\textwidth}%.48 \begin{itemize} \item Im working on Anomaly Detection \item That means characterising an often very complex distributions, to find events that dont match the expected distribution \end{itemize} \end{column}% \hfill% \begin{column}{0.48\textwidth}%.48 \begin{figure}[H] \centering \includegraphics[width=0.9\textwidth]{../prep/03Anomaly_Detection/circle.pdf} \label{fig:prep03Anomaly_Detectioncirclepdf} \end{figure} \end{column}% \hfill% \end{columns} \end{frame} %from file ../knn1//data/004knn.txt \begin{frame}[label=knn] \frametitle{knn} \begin{itemize} \item kNN algorithm can also be used for AD \item if the k closest point is further away, a sample is considered more anomalous \item $r=\frac{k}{2N\cdot pdf}$ \item Powerful method, as it can model the pdf directly \end{itemize} \end{frame} %from file ../knn1//data/005Better knn.txt \begin{frame}[label=Better knn] \frametitle{Better knn} \begin{itemize} \item The model (mostly) ignores every known sample except one \item So there are extensions \item $avg=\frac{1}{N} \sum_i knn_i(x)$ \item $wavg=\frac{1}{N} \sum_i \frac{knn_i(x)}{i}$ \end{itemize} \end{frame} %from file ../knn1//data/006Comparison.txt \begin{frame}[label=Comparison] %\frametitle{Comparison} \begin{tabular}{llllll} \hline Dataset & wavg & avg & 1 & 3 & 5 \\ \hline $vertebral$ & $\textbf{0.4506}$ & $\textbf{0.4506}$ & $\textbf{0.4667}$ & $\textbf{0.4667}$ & $\textbf{0.45}$ \\ ... & & & & & \\ $thyroid$ & $\textbf{0.9138}$ & $\textbf{0.9151}$ & $\textbf{0.8763}$ & $\textbf{0.9086}$ & $\textbf{0.914}$ \\ $Iris\_setosa$ & $\textbf{0.9333}$ & $\textbf{0.9333}$ & $\textbf{0.9333}$ & $\textbf{0.9}$ & $\textbf{0.9}$ \\ $breastw$ & $\textbf{0.9361}$ & $\textbf{0.9361}$ & $\textbf{0.9211}$ & $\textbf{0.9248}$ & $\textbf{0.9286}$ \\ $wine$ & $\textbf{0.95}$ & $\textbf{0.95}$ & $\textbf{0.9}$ & $\textbf{0.95}$ & $\textbf{0.95}$ \\ $pendigits$ & $\textbf{0.9487}$ & $\textbf{0.9487}$ & $\textbf{0.9391}$ & $\textbf{0.9295}$ & $\textbf{0.9359}$ \\ $segment$ & $\textbf{0.9747}$ & $\textbf{0.9747}$ & $\textbf{0.9495}$ & $\textbf{0.9545}$ & $\textbf{0.9394}$ \\ $banknote-authentication$ & $\textbf{0.9777}$ & $\textbf{0.9776}$ & $\textbf{0.9408}$ & $\textbf{0.943}$ & $\textbf{0.9583}$ \\ $vowels$ & $\textbf{0.9998}$ & $\textbf{0.9972}$ & $\textbf{0.99}$ & $\textbf{0.92}$ & $\textbf{0.93}$ \\ $Ecoli$ & $\textbf{1.0}$ & $\textbf{1.0}$ & $\textbf{0.9}$ & $\textbf{1.0}$ & $\textbf{1.0}$ \\ $$ & $$ & $$ & $$ & $$ & $$ \\ $Average$ & $\textbf{0.7528} $ & $\textbf{0.7520} $ & $0.7325 $ & $0.7229 $ & $0.7157 $ \\ \hline \end{tabular} \end{frame} %from file ../knn1//data/007What to do?.txt \begin{frame}[label=What to do?] \frametitle{What to do?} \begin{itemize} \item Evaluation as anomaly detector is complicated \begin{itemize} \item Requires known anomalies \end{itemize} \item $\Rightarrow$So evaluate as density estimator \begin{itemize} \item Does not require anomalies \item Allows generating infinite amounts of training data \end{itemize} \end{itemize} \end{frame} %from file ../knn1//data/008What to do?.txt \begin{frame}[label=What to do?] \frametitle{What to do?} \begin{itemize} \item Collect Extensions of the oc-knn algorithm \item Define some distance measure to a known pdf \item Generate random datapoints following the pdf \item Evaluate which algorithm finds the pdf the best \end{itemize} \end{frame} %from file ../knn1//data/009Requirements.txt \begin{frame}[label=Requirements] \frametitle{Requirements} \begin{itemize} \item Knowledge of python ( sum([i for i in range(5) if i\%2]) ) \begin{itemize} \item Ideally incl numpy \end{itemize} \item Basic university level Math (you could argue that $r_k \propto \frac{k}{pdf}$) \item Ideally some experience working on a ssh server \item $\Rightarrow$Good as a Bachelor Thesis \item For a Master Thesis, I would extend this a bit (Could you also find $k$?) \end{itemize} \end{frame} %from file ../knn1//data/010Normalising Flows.txt \begin{frame}[label=Normalising Flows] \frametitle{Normalising Flows} \begin{itemize} \item Deep Learning Method, in which the output is normalised \item $\int f(x) dx=1 \; \forall f(x)$ \item Can be used to estimate probability density functions \item $\Rightarrow$Thus useful for AD \item $\int f(h(x)) \|\frac{\delta x}{\delta h}\| dh=1 \; \forall h(x)$ \end{itemize} \end{frame} %from file ../knn1//data/011Graph Normalising Flows.txt \begin{frame}[label=Graph Normalising Flows] \frametitle{Graph Normalising Flows} \begin{itemize} \item How to apply this to graphs? \item One Paper (Liu 2019) uses two NN: \item Autoencoder graph$\Rightarrow$vector \item NF on vector data \item which is fine, but also not really graph specific \item No interaction between encoding and transformation \end{itemize} \end{frame} %from file ../knn1//data/012Graph Normalising Flows.txt \begin{frame}[label=Graph Normalising Flows] \frametitle{Graph Normalising Flows} \begin{itemize} \item So why not do this directly? \item $\Rightarrow$Requires differentiating a graph \item Why not use only one Network? \item Graph$\Rightarrow$Vector$\Rightarrow$pdf \item $\Rightarrow$Finds trivial solution, as $ \propto \frac{1}{\sigma_{Vector}}$ \item So regularise the standart deviation of the vector space! \begin{itemize} \item Interplay between encoding and NF \item Could also be useful for highdim data \end{itemize} \end{itemize} \end{frame} %from file ../knn1//data/013Requirements.txt \begin{frame}[label=Requirements] \frametitle{Requirements} \begin{itemize} \item Proficient in python ( [i for i in range(1,N) if not [j for j in range(2,i) if not i\%j]] ) \begin{itemize} \item Ideally incl numpy, tensorflow, keras \end{itemize} \item Some deep learning experience \item University level math (google Cholesky Decomposition. Why is this useful for NF?) \item Ideally some experience working on a ssh server \item A bit more challenging$\Rightarrow$Better as a Master thesis \item (Still we would start very slowly of course) \end{itemize} \end{frame} %from file ../knn1//data/014Old Thesis Sina.txt \begin{frame}[label=Old Thesis Sina] \frametitle{Sina} \begin{columns}[c] % align columns \begin{column}{0.48\textwidth}%.48 \begin{itemize} \item Isolation Forest: Different Anomaly Detection Algorithm \item Problem: Isolation Forests dont work on categorical data \item $\Rightarrow$Extend them to categorical data \end{itemize} \end{column}% \hfill% \begin{column}{0.48\textwidth}%.48 \begin{figure}[H] \centering \includegraphics[width=0.9\textwidth]{../prep/20Old_Thesis_Sina/Bildschirmfoto vom 2022-09-26 16-22-30.png} \label{fig:prep20Old_Thesis_SinaBildschirmfoto vom 2022-09-26 16-22-30png} \end{figure} \end{column}% \hfill% \end{columns} \end{frame} %from file ../knn1//data/015Old Thesis Britta.txt \begin{frame}[label=Old Thesis Britta] \frametitle{Britta} \begin{columns}[c] % align columns \begin{column}{0.58\textwidth}%.48 \begin{itemize} \item Reidentification: Find known objects in new images \item Task: Find if two images of pallet blocks are of the same pallet block \item Use AD to represent the pallet blocks \end{itemize} \end{column}% \hfill% \begin{column}{0.38\textwidth}%.48 \begin{figure}[H] \centering \includegraphics[width=0.9\textwidth]{../prep/21Old_Thesis_Britta/Bildschirmfoto vom 2022-09-26 16-23-26.png} \label{fig:prep21Old_Thesis_BrittaBildschirmfoto vom 2022-09-26 16-23-26png} \end{figure} \end{column}% \hfill% \end{columns} \end{frame} %from file ../knn1//data/016Old Thesis Hsin Ping.txt \begin{frame}[label=Old Thesis Hsin Ping] \frametitle{Hsin Ping} \begin{columns}[c] % align columns \begin{column}{0.48\textwidth}%.48 \begin{itemize} \item Ensemble: Combination of multiple models \item Task: Explain the prediction of a model using the ensemble structure \end{itemize} \end{column}% \hfill% \begin{column}{0.48\textwidth}%.48 \begin{figure}[H] \centering \includegraphics[width=0.9\textwidth]{../prep/22Old_Thesis_Hsin_Ping/Bildschirmfoto vom 2022-09-26 16-24-14.png} \label{fig:prep22Old_Thesis_Hsin_PingBildschirmfoto vom 2022-09-26 16-24-14png} \end{figure} \end{column}% \hfill% \end{columns} \end{frame} %from file ../knn1//data/017Old Thesis Nikitha.txt \begin{frame}[label=Old Thesis Nikitha] \frametitle{Nikitha} \begin{columns}[c] % align columns \begin{column}{0.48\textwidth}%.48 \begin{itemize} \item Task: Explore a new kind of ensemble \item Instead of many uncorrelated models, let the models interact during training \end{itemize} \end{column}% \hfill% \begin{column}{0.48\textwidth}%.48 \begin{figure}[H] \centering \includegraphics[width=0.9\textwidth]{../prep/23Old_Thesis_Nikitha/Bildschirmfoto vom 2022-09-26 16-25-06.png} \label{fig:prep23Old_Thesis_NikithaBildschirmfoto vom 2022-09-26 16-25-06png} \end{figure} \end{column}% \hfill% \end{columns} \end{frame} \begin{frame} Questions? \end{frame} \end{document}