diff --git a/papers/.gitignore b/papers/.gitignore index 2e36f16b..395205c3 100644 --- a/papers/.gitignore +++ b/papers/.gitignore @@ -6,4 +6,10 @@ *.out *.gz *.listing +*.vrb +*.uN9447 +*.snm +*.nav **/_minted-main/ +*.pygtex +*.pygstyle diff --git a/papers/eutypes-2019/tex/abstract.tex b/papers/eutypes-2019/abstract/tex/abstract.tex similarity index 100% rename from papers/eutypes-2019/tex/abstract.tex rename to papers/eutypes-2019/abstract/tex/abstract.tex diff --git a/papers/eutypes-2019/tex/bib_database.bib b/papers/eutypes-2019/abstract/tex/bib_database.bib similarity index 100% rename from papers/eutypes-2019/tex/bib_database.bib rename to papers/eutypes-2019/abstract/tex/bib_database.bib diff --git a/papers/eutypes-2019/tex/bibliography.tex b/papers/eutypes-2019/abstract/tex/bibliography.tex similarity index 100% rename from papers/eutypes-2019/tex/bibliography.tex rename to papers/eutypes-2019/abstract/tex/bibliography.tex diff --git a/papers/eutypes-2019/tex/main.pdf b/papers/eutypes-2019/abstract/tex/main.pdf similarity index 82% rename from papers/eutypes-2019/tex/main.pdf rename to papers/eutypes-2019/abstract/tex/main.pdf index ab143bd0..d2e3a123 100644 Binary files a/papers/eutypes-2019/tex/main.pdf and b/papers/eutypes-2019/abstract/tex/main.pdf differ diff --git a/papers/eutypes-2019/tex/main.tex b/papers/eutypes-2019/abstract/tex/main.tex similarity index 100% rename from papers/eutypes-2019/tex/main.tex rename to papers/eutypes-2019/abstract/tex/main.tex diff --git a/papers/eutypes-2019/tex/style.sty b/papers/eutypes-2019/abstract/tex/style.sty similarity index 100% rename from papers/eutypes-2019/tex/style.sty rename to papers/eutypes-2019/abstract/tex/style.sty diff --git a/papers/eutypes-2019/presentation/img/background.jpg b/papers/eutypes-2019/presentation/img/background.jpg new file mode 100644 index 00000000..f9e6237e Binary files /dev/null and b/papers/eutypes-2019/presentation/img/background.jpg differ diff --git a/papers/eutypes-2019/presentation/img/compact_blank_titlepage.jpg b/papers/eutypes-2019/presentation/img/compact_blank_titlepage.jpg new file mode 100644 index 00000000..0a68d282 Binary files /dev/null and b/papers/eutypes-2019/presentation/img/compact_blank_titlepage.jpg differ diff --git a/papers/eutypes-2019/presentation/img/exact_length_ct.png b/papers/eutypes-2019/presentation/img/exact_length_ct.png new file mode 100644 index 00000000..42a99523 Binary files /dev/null and b/papers/eutypes-2019/presentation/img/exact_length_ct.png differ diff --git a/papers/eutypes-2019/presentation/img/exact_length_rt.png b/papers/eutypes-2019/presentation/img/exact_length_rt.png new file mode 100644 index 00000000..1e3f546b Binary files /dev/null and b/papers/eutypes-2019/presentation/img/exact_length_rt.png differ diff --git a/papers/eutypes-2019/presentation/img/hpt-boq.png b/papers/eutypes-2019/presentation/img/hpt-boq.png new file mode 100644 index 00000000..ba502c46 Binary files /dev/null and b/papers/eutypes-2019/presentation/img/hpt-boq.png differ diff --git a/papers/eutypes-2019/presentation/img/length_ct.png b/papers/eutypes-2019/presentation/img/length_ct.png new file mode 100644 index 00000000..704b791c Binary files /dev/null and b/papers/eutypes-2019/presentation/img/length_ct.png differ diff --git a/papers/eutypes-2019/presentation/img/length_rt.png b/papers/eutypes-2019/presentation/img/length_rt.png new file mode 100644 index 00000000..e2da8c4a Binary files /dev/null and b/papers/eutypes-2019/presentation/img/length_rt.png differ diff --git a/papers/eutypes-2019/presentation/img/reverse_ct.png b/papers/eutypes-2019/presentation/img/reverse_ct.png new file mode 100644 index 00000000..b993a553 Binary files /dev/null and b/papers/eutypes-2019/presentation/img/reverse_ct.png differ diff --git a/papers/eutypes-2019/presentation/img/reverse_rt.png b/papers/eutypes-2019/presentation/img/reverse_rt.png new file mode 100644 index 00000000..8a6e4a47 Binary files /dev/null and b/papers/eutypes-2019/presentation/img/reverse_rt.png differ diff --git a/papers/eutypes-2019/presentation/img/section_head.jpg b/papers/eutypes-2019/presentation/img/section_head.jpg new file mode 100644 index 00000000..db546997 Binary files /dev/null and b/papers/eutypes-2019/presentation/img/section_head.jpg differ diff --git a/papers/eutypes-2019/presentation/img/title.jpg b/papers/eutypes-2019/presentation/img/title.jpg new file mode 100644 index 00000000..1cb14754 Binary files /dev/null and b/papers/eutypes-2019/presentation/img/title.jpg differ diff --git a/papers/eutypes-2019/presentation/img/tyfuns_ct.png b/papers/eutypes-2019/presentation/img/tyfuns_ct.png new file mode 100644 index 00000000..bf4c9ba0 Binary files /dev/null and b/papers/eutypes-2019/presentation/img/tyfuns_ct.png differ diff --git a/papers/eutypes-2019/presentation/img/tyfuns_rt.png b/papers/eutypes-2019/presentation/img/tyfuns_rt.png new file mode 100644 index 00000000..cc9a3e61 Binary files /dev/null and b/papers/eutypes-2019/presentation/img/tyfuns_rt.png differ diff --git a/papers/eutypes-2019/presentation/prez.notes b/papers/eutypes-2019/presentation/prez.notes new file mode 100644 index 00000000..be06c3ce --- /dev/null +++ b/papers/eutypes-2019/presentation/prez.notes @@ -0,0 +1,83 @@ +Intro to GRIN: + - the problem (small functions, laziness) + - GRIN is the solution (interprocedural, whole, IR) + + - store, fetch, update + - eval + - (other restrictions ...) + + - 5-6 (*) + + - simple code generation + - many small transformations + - HPT is the core + + - 6-7 + +Extensions: + - LLVM (sum simple?) + - new HPT: + - original: for node info + - then: node info with simple type info (for LLVM) + - finally: type inference (polymorph primops) + + - 8-9 -1 (less HPT) + + - Dead Data Elimination + +Dead Data Elimination: + - first by Remi Turk for Agda + - motivational example: (*) + - length (List -> Nat) + - other applications: + - Map -> Set + - type class dictionaries + - Vector type index (EUTypes) + + - 11-12 + + - what else do we need? + - CBy (extended HPT, example) (*) + - LVA (standard) + - producer grouping (graph example) (*) + - transformations + + - 14-15 + +Results: + - only interpreted results + - pipeline setup + - diagrams + + - 18 - 19 + +Conclusions: + - the optimizer works well: + - #stores, #fetches, #funcalls and #cases significantly reduced + - code structure closer to imperative style + + - DDE is: + - a bit costly + - kinda specific, but can work wonders + - can trigger other transformations + + - 20 + + +Q&A: + - Why do we need Lambda? (closure conversion + lambda lifting) + cc: find free variables + ll: convert lambdas to top level functions + + - eval inlining ~?~ STG + eval/apply -> GRIN with indirect calls (funptr) + static analysis + - analysis cost: compiled abstract interpretation + + +no stores & no fetches --> in ideal case, everything can be put into registers (in worst onto the stack) + + +Notes: + - upto example (with head?) + - LLVM code example + - opt list + - max 25 slides \ No newline at end of file diff --git a/papers/eutypes-2019/presentation/template.tex b/papers/eutypes-2019/presentation/template.tex new file mode 100644 index 00000000..44f7e350 --- /dev/null +++ b/papers/eutypes-2019/presentation/template.tex @@ -0,0 +1,59 @@ +\documentclass[bigger]{beamer} +\usepackage[utf8]{inputenc} +\usepackage[T1]{fontenc} +\usepackage{graphicx} + +\usebackgroundtemplate% +{% + \includegraphics[width=\paperwidth,height=\paperheight]{background.jpg}% +} + +\setbeamercolor{title}{fg=white} +\setbeamercolor{author}{fg=white} +\setbeamercolor{institute}{fg=white} +\setbeamercolor{date}{fg=white} +\setbeamercolor{frametitle}{fg=white} + +\title{\bf Sample title} +\author{Anonymous} +\institute{Eötvös Loránd University (ELTE), \\ Budapest, Hungary} +\date{2018.} + +\begin{document} + +{ +\usebackgroundtemplate{\includegraphics[width=\paperwidth]{title.jpg}}% +\frame{\vspace{15mm}\titlepage} +} + +\begin{frame}{Introduction} +\begin{itemize} +\item 1 +\item 2 +\item 3 +\end{itemize} +\end{frame} + +% this slide need not be used in the presentation, but must be +% present when you archieve your talk + +{ +\usebackgroundtemplate{\includegraphics[width=\paperwidth]{title.jpg}}% +\begin{frame}{} + +\bigskip\bigskip\bigskip + +{\bf\Huge\color{white} THANK YOU} + +\bigskip + +{\bf\Huge\color{white} FOR YOUR} + +\bigskip + +{\bf\Huge\color{white} ATTENTION!} + +\end{frame} +} + +\end{document} diff --git a/papers/eutypes-2019/presentation/tex/main.pdf b/papers/eutypes-2019/presentation/tex/main.pdf new file mode 100644 index 00000000..bdbeec7d Binary files /dev/null and b/papers/eutypes-2019/presentation/tex/main.pdf differ diff --git a/papers/eutypes-2019/presentation/tex/main.tex b/papers/eutypes-2019/presentation/tex/main.tex new file mode 100644 index 00000000..140f54d8 --- /dev/null +++ b/papers/eutypes-2019/presentation/tex/main.tex @@ -0,0 +1,717 @@ + + +\documentclass[bigger]{beamer} + +\usepackage{style} + + +\title[GRIN] %optional +{GRIN: Dead data elimination} + +\subtitle{in the context of dependently typed languages} + +\author[P. Podlovics, Cs. Hruska ] % (optional, for multiple authors) +{Péter Podlovics, Csaba Hruska, Andor Pénzes} + +\institute[ELTE] % (optional) +{ + Eötvös Loránd University (ELTE), \\ Budapest, Hungary +} + +\date{EUTypes-2019} % (optional) + + + +\begin{document} + +{ + \usebackgroundtemplate{\includegraphics[width=\paperwidth]{title.jpg}}% + \frame{\vspace{15mm}\titlepage} +} + +\begin{frame} + \frametitle{Overview} + \tableofcontents +\end{frame} + + +\section{Introduction} + +\begin{frame}[fragile] + \frametitle{Why functional?} + + \begin{vfitemize} + \item Declarativeness + \begin{itemize} + \item[pro:] can program on a higher abstraction level + \end{itemize} + \item Composability\\ + \begin{itemize} + \item[pro:] can easily piece together smaller programs + \item[con:] results in a lot of function calls + \end{itemize} + \item Functions are first class citizens + \begin{itemize} + \item[pro:] higher order functions + \item[con:] unknown function calls + \end{itemize} + \end{vfitemize} + +\end{frame} + + +\begin{frame} +\frametitle{Graph Reduction Intermediate Notation} + +\begin{figure}[h] + \centering + \begin{adjustbox}{scale = 1.4} + \tikzset{every loop/.style={-{Stealth[scale=1.5]}}} + + \begin{tikzpicture}[ node distance = 1.5cm and 1.5cm + , on grid + , loop/.append style={-triangle 60} + ] + + \node [draw=black] (haskell) {Haskell}; + \node [draw=black] (idris) [left =of haskell] {Idris}; + \node [draw=black] (agda) [right =of haskell] {Agda}; + \node [draw=black] (grin) [below =of haskell] {GRIN}; + \node [draw=black] (mc) [below =of grin] {Machine Code}; + + \path[-{Stealth[scale=1.5]}] + (idris) edge [] (grin) + (haskell) edge [] (grin) + (agda) edge [] (grin) + (grin) edge [] (mc); + + + \end{tikzpicture} + \end{adjustbox} + \label{grin-backend} +\end{figure} +\end{frame} + + +\begin{frame}[fragile] +\frametitle{Front end code} + +\begin{minipage}{0.35\textwidth} + + \begin{haskellcode} + main = sum (upto 0 10) + + upto n m + | n > m = [] + | otherwise = n : upto (n+1) m + + sum [] = 0 + sum (x:xs) = x + sum xs + \end{haskellcode} +\end{minipage} +\hfill +\pause +\begin{minipage}{0.4\textwidth} + \vspace{2cm} + \begin{figure}[h] + \centering + \begin{adjustbox}{scale = 1.4} + \tikzset{every loop/.style={-{Stealth[scale=1.5]}}} + + \begin{tikzpicture}[ node distance = 1.3cm and 1cm + , on grid + , loop/.append style={-triangle 60} + ] + + \node [shape=ellipse,draw=black] (main) {main}; + \node [shape=ellipse,draw=black] (eval) [below =of main] {eval}; + \node [shape=ellipse,draw=black] (sum) [below left =of eval] {sum}; + \node [shape=ellipse,draw=black] (upto) [below right =of eval] {upto}; + + \path[-{Stealth[scale=1.5]}] + (main) edge [] (eval) + (eval) edge [bend left] (sum) + (eval) edge [bend right] (upto) + (sum) edge [bend left] (eval) + (upto) edge [bend right] (eval); + + + \end{tikzpicture} + \end{adjustbox} + \label{control-flow-lazy} + \end{figure} +\end{minipage} +\end{frame} + + +\begin{frame}[fragile] +\frametitle{GRIN code} + +\begin{minipage}{0.4\textwidth} + + \begin{haskellcode} + grinMain = + t1 <- store (CInt 1) + t2 <- store (CInt 10) + t3 <- store (Fupto t1 t2) + t4 <- store (Fsum t3) + (CInt r) <- eval t4 + _prim_int_print r + \end{haskellcode} +\end{minipage} +\hfill +\begin{minipage}{0.48\textwidth} + \vspace{1cm} + \begin{haskellcode} + eval p = + v <- fetch p + case v of + (CInt n) -> pure v + (CNil) -> pure v + (CCons y ys) -> pure v + (Fupto a b) -> + zs <- upto a b + update p zs + pure zs + (Fsum c) -> + s <- sum c + update p s + pure s + \end{haskellcode} +\end{minipage} + + +\end{frame} + + +\begin{frame}[fragile] +\frametitle{Transformation machinery} + + \begin{vfitemize} + + \item Inline calls to \mintinline{haskell}{eval} + \item Run dataflow analyses: + \begin{itemize} + \item Heap points-to analysis + \item Sharing analysis + \end{itemize} + \item Run transformations until we reach a fixed-point: + \begin{itemize} + \item Sparse Case Optimization + \item Common Subexpression Elimination + \item Generalized Unboxing + \item etc \dots + \end{itemize} + + \end{vfitemize} + + +\end{frame} + + +\section{Extensions} + +\begin{frame}[fragile] +\frametitle{Extending Heap points-to} + + \vspace{1cm} + \begin{minipage}{\textwidth} + \begin{figure} + \includegraphics[scale=0.3]{hpt-boq.png} + \end{figure} + \end{minipage} + \vfill + \pause + \begin{minipage}{\textwidth} + \begin{figure} + $BAS \in \{ \text{Int64}, \text{Float}, \text{Bool}, \text{String}, \text{Char} \}$ + \end{figure} + \end{minipage} + \vfill + \pause + \begin{center} + \begin{minipage}{0.8\textwidth} + % real type would be: a -> State# s -> (# State# s, MutVar# s a #) + \begin{haskellcode} + indexArray# :: Array# a -> Int# -> (# a #) + newMutVar# :: a -> s -> (# s, MutVar# s a #) + \end{haskellcode} + \end{minipage} + \end{center} + +\end{frame} + + +\begin{frame}[fragile] +\frametitle{LLVM back end} + + \hspace{-4cm} + \begin{minipage}[t]{0.30\textwidth} + \begin{minted}[fontsize=\scriptsize]{haskell} + grinMain = + t1 <- store (CInt 1) + t2 <- store (CInt 10) + t3 <- store (Fupto t1 t2) + t4 <- store (Fsum t3) + (CInt r') <- eval t4 + _prim_int_print r' + + upto m n = + (CInt m') <- eval m + (CInt n') <- eval n + b' <- _prim_int_gt m' n' + case b' of + #True -> pure (CNil) + + sum l = ... + + eval p = ... + \end{minted} + \end{minipage} + \hspace{1.8cm} + \pause + \begin{minipage}[t]{0.30\textwidth} + \begin{minted}[fontsize=\scriptsize]{haskell} + grinMain = + n1 <- sum 0 1 10 + _prim_int_print n1 + + sum s lo hi = + b <- _prim_int_gt lo hi + if b then + pure s + else + lo' <- _prim_int_add lo 1 + s' <- _prim_int_add s lo + sum s' lo' hi + + \end{minted} + \end{minipage} + \hspace{0.5cm} + \pause + \begin{minipage}[t]{0.30\textwidth} + \begin{minted}[fontsize=\scriptsize]{asm} + grinMain: + # BB#0: + movabsq $55, %rdi + jmp _prim_int_print + \end{minted} + \end{minipage} + +\end{frame} +%$ + +\section{Dead Data Elimination} + +\begin{frame}[fragile] +\frametitle{Dead data elimination I.} + +\begin{center} + \begin{minipage}{0.30\textwidth} + \begin{haskellcode} + length : List a -> Nat + length Nil = Z + length (Cons x xs) + = S (length xs) + \end{haskellcode} + \end{minipage} + \hspace{1cm} + $\xRightarrow{\text{DDE}}$ + \hfill + \begin{minipage}{0.5\textwidth} + \begin{haskellcode} + length p = + xs <- fetch p + case xs of + (Cons ys) -> + l1 <- length ys + l2 <- _prim_int_add l1 1 + pure l2 + (Nil) -> + pure 0 + \end{haskellcode} + \end{minipage} +\end{center} + +\end{frame} + + +\begin{frame}[fragile] +\frametitle{Dead data elimination II.} + +\begin{center} + \begin{minipage}{0.85\textwidth} + \begin{haskellcode} + data Bin : Nat -> Type where + N : Bin 0 + O : {n : Nat} -> Bin n -> Bin (2*n + 0) + I : {n : Nat} -> Bin n -> Bin (2*n + 1) + \end{haskellcode} + \vspace{0.5cm} + \pause + \begin{haskellcode} + binToNat : Bin n -> Nat + binToNat N = 0 + binToNat (O {n} _) = 2*n + binToNat (I {n} _) = 2*n + 1 + \end{haskellcode} + \end{minipage} +\end{center} + +\end{frame} + + +\begin{frame} +\frametitle{Applications} + + \begin{vfitemize} + \item Map $\rightarrow$ Set + \item Type class dictionaries + \item Type erasure for dependently typed languages + \end{vfitemize} + +\end{frame} + +\begin{frame} +\frametitle{What do we need?} + + \begin{vfitemize} + \item Producers \& consumers + \item Detect dead fields + \item Connect consumers to producer + \item Remove or transform dead fields + \end{vfitemize} + +\end{frame} + + +\begin{frame}[fragile] +\frametitle{Created-by} + +\begin{center} + \begin{minipage}{0.50\textwidth} + \begin{haskellcode} + null xs = + y <- case xs of + (CNil) -> + a <- pure (CTrue) + pure a + (CCons z zs) -> + b <- pure (CFalse) + pure b + pure y + \end{haskellcode} + \end{minipage} + \hfill + \begin{minipage}{0.475\textwidth} + \begin{tcolorbox}[tab2,tabularx={l|r}] + Var & Producers \\ + \hline\hline + \pilcode{xs} & $CNil[\dots], CCons[\dots]$ \\\hline + \pilcode{a} & $CTrue[\pilcode{a}]$ \\\hline + \pilcode{b} & $CFalse[\pilcode{b}]$ \\\hline + \pilcode{y} & $CTrue[\pilcode{a}], CFalse[\pilcode{b}]$ \\ + \end{tcolorbox} + \end{minipage} +\end{center} + +\end{frame} + + +\begin{frame} +\frametitle{Producers and consumers} + +\begin{figure}[h] +\centering +\begin{adjustbox}{scale = 1.3} + \begin{tikzpicture}[ node distance = 1cm and 2cm, on grid ] + + \node<1> [shape=circle,draw=black] (P1) {$P_1$}; + \node<1> [shape=circle,draw=black] (P2) [right =of P1] {$P_2$}; + \coordinate (Middle) at ($(P1)!0.5!(P2)$); + \node<1> [shape=circle,draw=black] (C2) [below =of Middle] {$C_2$}; + \node<1> [shape=circle,draw=black] (C1) [left =of C2] {$C_1$}; + \node<1> [shape=circle,draw=black] (C3) [right =of C2] {$C_3$}; + + \path<1>[-{Stealth[scale=1.5]}] (P1) edge [] (C1) + (P1) edge [] (C2) + (P2) edge [] (C2) + (P2) edge [] (C3); + + \pause + + \node<2,3,4> [shape=circle,draw=black] (P1) {\pilcode{upto}}; + \node [shape=circle,draw=black] (P2) [right =of P1] {\pilcode{upto}}; + \coordinate (Middle) at ($(P1)!0.5!(P2)$); + \node<2> [shape=circle,draw=black] (C2) [below =of Middle] {\pilcode{len}}; + \node<2> [shape=circle,draw=black] (C1) [left =of C2] {\pilcode{len}}; + \node<2,3,4,5> [shape=circle,draw=black] (C3) [right =of C2] {\pilcode{sum}}; + + \path[-{Stealth[scale=1.5]}] (P1) edge [] (C1) + (P1) edge [] (C2) + (P2) edge [] (C2) + (P2) edge [] (C3); + + \pause + + \node<3> [shape=circle,draw=black,fill=green] (C2) [below =of Middle] {\pilcode{len}}; + \node<3> [shape=circle,draw=black,fill=green] (C1) [left =of C2] {\pilcode{len}}; + \node<3> [shape=circle,draw=black,fill=red] (C3) [right =of C2] {\pilcode{sum}}; + + \pause + + \node<4,5,6,7,8,9> [shape=circle,draw=black,dashed] (C2) [below =of Middle] {\pilcode{len}}; + \node<4,5,6,7,8,9> [shape=circle,draw=black,dashed] (C1) [left =of C2] {\pilcode{len}}; + + \pause + + \node<5,6,7,8,9> [shape=circle,draw=black,dashed] (P1) {\pilcode{upto}}; + + \pause + + \node<6,7,8,9> [shape=circle,draw=black,fill=lightgray] (C3) [right =of C2] {\pilcode{sum}}; + + \pause + + \node<7,8,9> [shape=circle,draw=black,fill=lightgray] (P2) [right =of P1] {\pilcode{upto}}; + + \pause + + \node<8> [shape=circle,draw=black,dashed,fill=lightgray] (C2) [below =of Middle] {\pilcode{len}}; + + \pause + + \node<9> [shape=circle,draw=black,dashed,fill=lightgray] (C2) [below =of Middle] {\pilcode{len}\Lightning}; + + \pause + + % first solution is not doing anything + + \node<10> [shape=circle,draw=black,fill=lightgray] (P1) {\pilcode{upto}}; + \node<10,11> [shape=circle,draw=black,fill=lightgray] (P2) [right =of P1] {\pilcode{upto}}; + \node<10,11> [shape=circle,draw=black,fill=lightgray] (C2) [below =of Middle] {\pilcode{len}}; + \node<10,11> [shape=circle,draw=black,fill=lightgray] (C1) [left =of C2] {\pilcode{len}}; + \node<10,11> [shape=circle,draw=black,fill=lightgray] (C3) [right =of C2] {\pilcode{sum}}; + + \pause + + % second solution is to keep each C & P's structure as it is, but dummify P1 + + \node<11> [shape=circle,draw=black,fill=yellow] (P1) {\pilcode{upto}}; + + \pause + + % third solution is to restructure C2, but keep the original pattern as well (code duplication, needs IR improvement) + + \node<12> [shape=circle,draw=black,dashed] (P1) {\pilcode{upto}}; + \node<12> [shape=circle,draw=black,fill=lightgray] (P2) [right =of P1] {\pilcode{upto}}; + + \node<12> [shape=circle,draw=black,pattern=north east lines, dashed] (C2) [below =of Middle] {\pilcode{len}}; + \node<12> [shape=circle,draw=black,dashed] (C1) [left =of C2] {\pilcode{len}}; + \node<12> [shape=circle,draw=black,fill=lightgray] (C3) [right =of C2] {\pilcode{sum}}; + + + + + \end{tikzpicture} +\end{adjustbox} +\label{fig:producers-and-consumers} +\end{figure} + +\end{frame} + + + +\section{Results} + +\begin{frame}[fragile] +\frametitle{Setup} + + \vspace{1.5cm} + \begin{vfitemize} + \item Small Idris code snippets from: \\ + \textit{Type-driven Development with Idris} by Edwin Brady + \item Only interpreted code + \item Compile- \& runtime measurements + \item Pipeline setup: + \end{vfitemize} + + \begin{figure} + \begin{adjustbox}{scale = 1} + \tikzset{every loop/.style={-{Stealth[scale=1.5]}}} + + %\hspace{-1cm} + \begin{tikzpicture}[ node distance = 1.5cm and 3cm + , on grid + , loop/.append style={-triangle 60} + ] + + \node [draw=black] (cg) {Code gen.}; + \node [draw=black] (ro1) [right =of cg] {Regular Opts.}; + \node [draw=black] (dde) [right =2.5cm of ro1] {DDE}; + \node [draw=black] (ro2) [right =2.5cm of dde] {Regular Opts.}; + + \path[-{Stealth[scale=1.5]}] + (cg) edge [] (ro1) + (ro1) edge [loop] (ro1) + (ro1) edge [] (dde) + (dde) edge [] (ro2) + (ro2) edge [loop] (ro2); + + + \end{tikzpicture} + \end{adjustbox} + \label{fig:-measurement-pipeline} + \end{figure} + +\end{frame} + + + +\begin{frame}[fragile] +\frametitle{Length} + % real example + + \begin{figure} + \hspace{-1cm} + \begin{minipage}{0.45\textwidth} + \resizebox{\width}{5.5cm}{\includegraphics[scale=0.40]{length_rt.png}} + \end{minipage} + \hspace{1cm} + \begin{minipage}{0.45\textwidth} + \resizebox{\width}{5.5cm}{\includegraphics[scale=0.40]{length_ct.png}} + \end{minipage} + \end{figure} + +\end{frame} + +\begin{frame}[fragile] +\frametitle{Exact length} + % no stores & no fetches! (Maybe transformed) + \begin{figure} + \hspace{-1cm} + \begin{minipage}{0.45\textwidth} + \resizebox{\width}{5.5cm}{\includegraphics[scale=0.40]{exact_length_rt.png}} + \end{minipage} + \hspace{1cm} + \begin{minipage}{0.45\textwidth} + \resizebox{\width}{5.5cm}{\includegraphics[scale=0.40]{exact_length_ct.png}} + \end{minipage} + \end{figure} +\end{frame} + +\begin{frame}[fragile] +\frametitle{Reverse} + % interesting example, but no DDE + \begin{figure} + \hspace{-1cm} + \begin{minipage}{0.45\textwidth} + \resizebox{\width}{5.5cm}{\includegraphics[scale=0.40]{reverse_rt.png}} + \end{minipage} + \hspace{1cm} + \begin{minipage}{0.45\textwidth} + \resizebox{\width}{5.5cm}{\includegraphics[scale=0.40]{reverse_ct.png}} + \end{minipage} + \end{figure} +\end{frame} + +\begin{frame}[fragile] +\frametitle{Type level functions} + % caveat + \begin{figure} + \hspace{-1cm} + \begin{minipage}{0.45\textwidth} + \resizebox{\width}{5.5cm}{\includegraphics[scale=0.40]{tyfuns_rt.png}} + \end{minipage} + \hspace{1cm} + \begin{minipage}{0.45\textwidth} + \resizebox{\width}{5.5cm}{\includegraphics[scale=0.40]{tyfuns_ct.png}} + \end{minipage} + \end{figure} +\end{frame} + + +\begin{frame}[fragile] +\frametitle{Conclusions} + \begin{vfitemize} + \item The optimizer works well: + \begin{itemize} + \item the number of stores, fetches, function calls and pattern matches significantly decreased + \item the structure of the code resembles that of an imperative language + \end{itemize} + \item Dead Data Elimination: + \begin{itemize} + \item is a bit costly + \item is a specific optimization + \item can completely transform data structures + \item can trigger further transformations + \end{itemize} + \end{vfitemize} +\end{frame} + + +{ + \usebackgroundtemplate{\includegraphics[width=\paperwidth]{title.jpg}}% + \begin{frame}{} + + \bigskip\bigskip\bigskip + + {\bf\Huge\color{white} THANK YOU} + + \bigskip + + {\bf\Huge\color{white} FOR YOUR} + + \bigskip + + {\bf\Huge\color{white} ATTENTION!} + +\end{frame} +} + +% Q&A + +\begin{frame}[fragile] +\frametitle{Sparse case optimization} + +\begin{center} + \begin{minipage}{0.40\textwidth} + \begin{haskellcode} + + v <- eval l + case v of + CNil -> + CCons x xs -> + \end{haskellcode} + \end{minipage} + $\xRightarrow{v \in \{ \text{CCons}\}}$ + \hfill + \begin{minipage}{0.40\textwidth} + \begin{haskellcode} + + v <- eval l + case v of + CCons x xs -> + \end{haskellcode} + \end{minipage} +\end{center} + +\end{frame} + + +\begin{frame} +\frametitle{Compiled data flow analysis} + +\begin{vfitemize} + \item Analyzing the syntax tree has an interpretation overhead + \item We can work around this by "compiling" our analysis into an executable program + \item The compiled abstract program is independent of the AST + \item It can be executed in a different context (ie.: by another program or on GPU) + \item After run (iteratively), it produces the result of the given analysis +\end{vfitemize} +\end{frame} + + + +\end{document} + diff --git a/papers/eutypes-2019/presentation/tex/style.sty b/papers/eutypes-2019/presentation/tex/style.sty new file mode 100644 index 00000000..430c4a3b --- /dev/null +++ b/papers/eutypes-2019/presentation/tex/style.sty @@ -0,0 +1,249 @@ +\ProvidesPackage{style} + +\frenchspacing +\setcounter{tocdepth}{1} + +\setbeamertemplate{footline}[miniframes theme no subsection] +\setbeamertemplate{itemize items}[ball] + +\usepackage[toc,page]{appendix} +\usepackage{hyperref} +\usepackage{float} +\usepackage{subcaption} +\usepackage{cite} +\usepackage{url} +\usepackage{caption} +\usepackage{graphicx} +\graphicspath{ {../img/} } + +%\usepackage{enumitem} +%\setlist{nosep} + +\usepackage[utf8]{inputenc} +\usepackage[T1]{fontenc} +\usepackage{etoolbox} +\usepackage{adjustbox} +\usepackage{latexsym,amssymb,amsmath,mathtools} + +\usepackage{marvosym} +% bugfix for marvosym (must be after include) +\renewcommand{\mvchr}[1]{\mbox{\mvs\symbol{#1}}} + +\usepackage{algorithm} +\usepackage{algorithmicx} +\usepackage{algpseudocode} + +\DeclarePairedDelimiter\set\{\} + +\hypersetup{% + colorlinks=true,% hyperlinks will be coloured + allcolors=blue,% hyperlink text will be green + linkcolor= +} + +\usepackage{minted} +\usepackage{xcolor} +\usepackage{listings} +\usepackage{lstautogobble} +\definecolor{identifierColor}{rgb}{0.65,0.16,0.16} +\definecolor{keywordColor}{rgb}{0.65,0.20,0.90} +\lstnewenvironment{code} +{ \lstset + { language = Haskell + , basicstyle = \small\ttfamily + , breaklines = true + , backgroundcolor = \color{gray!15} + , frame = single + , autogobble = true + , xleftmargin = 0.1cm + , xrightmargin = 0.2cm + %, identifierstyle = \color{gray} + , keywordstyle = \color{violet} + , morekeywords = {} + , escapechar = \% + } +} +{} + +\PassOptionsToPackage{usenames,dvipsnames,svgnames}{xcolor} +\usepackage{tikz} +\usetikzlibrary{arrows,arrows.meta,shapes,positioning,patterns,automata,calc} + +\usepackage{pgfplots} +\usepackage{tcolorbox} +\usepackage{tabularx} +\usepackage{array} +\usepackage{zref-savepos} +\usepackage{diagbox} +\usepackage{colortbl} +\tcbuselibrary{skins} +\tcbuselibrary{minted} + +\newcolumntype{Y}{>{\raggedleft\arraybackslash}X} +\tcbset +{ tab2/.style = + { enhanced + , fonttitle=\bfseries + , fontupper=\normalsize\sffamily + , colback = gray!5!white + , colframe = gray!75!black + , colbacktitle=yellow!40!white + , coltitle=black,center title + } + , hbox +} + +\newminted{haskell}{autogobble} + +\newtcblisting{haskell} +{ listing engine = minted + , minted style = colorful + , minted language = Haskell + , minted options = { fontsize = \small + , breaklines + , autogobble + , linenos + , numbersep = 3mm + , escapeinside = \%\% + } + , colback = gray!5!white + , colframe = gray!75!black + , listing only + , left = 5mm + , enhanced + , overlay = { \begin{tcbclipinterior} + \fill[gray!80!blue!20!white] (frame.south west) rectangle ([xshift=5mm]frame.north west); + \end{tcbclipinterior} + } +} + + +\newtcblisting{oneLineHaskell} +{ listing engine = minted + , minted style = colorful + , minted language = Haskell + , minted options = { fontsize = \normalsize + , breaklines + , autogobble + , numbersep = 3mm + , escapeinside = \%\% + } + , colback = gray!5!white + , colframe = gray!75!black + , listing only + , left = 2mm + , top = 1mm + , bottom = 1mm + , enhanced +} + +\colorlet{lightgreen}{green!50!white} +\colorlet{lightblue}{blue!40!white} +\colorlet{lightyellow}{yellow!50!white} +\colorlet{lightred}{red!40!white} + +\newcommand*{\paper}{thesis} +\newcommand*{\ext}[1]{\texttt{#1}} +\newcommand*{\chk}[1]{\texttt{#1}} +\newcommand*{\lvar}[1]{\textit{#1}} +\newcommand*{\ilcode}[1]{\mintinline{Haskell}{#1}} % inline code +\newcommand*{\pilcode}[1]{\texttt{#1}} % plain inline code + +% NOTE: This command need styRectDef to be defined locally +\newcommand*{\tikzcustom}[0] +{ + % \tikzset{styRectDef/.style = {rectangle, rounded corners, draw=black, inner xsep=6mm, inner ysep=3mm}} + \tikzset{styRectGn/.style = {styRectDef, draw=green, fill=green!20}} + \tikzset{styRectBl/.style = {styRectDef, draw=cyan, fill=cyan!40}} + \tikzset{styRectGy/.style = {styRectDef, draw=gray, fill=gray!17}} + \tikzset{styConn/.style = {very thick, ->, -latex, shorten <=1mm, shorten >=1mm}} + + \tikzset{styAnnotDef/.style = {rectangle, rounded corners, draw=black, inner xsep=2mm, inner ysep=1mm}} + \tikzset{styLabel/.style = {styAnnotDef, draw=black, fill=gray!10}} + +} + +\newcounter{NoTableEntry} +\renewcommand*{\theNoTableEntry}{NTE-\the\value{NoTableEntry}} + +\newcommand*{\notableentry}{% + \multicolumn{1}{@{}c@{}|}{% + \stepcounter{NoTableEntry}% + \vadjust pre{\zsavepos{\theNoTableEntry t}}% top + \vadjust{\zsavepos{\theNoTableEntry b}}% bottom + \zsavepos{\theNoTableEntry l}% left + \hspace{0pt plus 1filll}% + \zsavepos{\theNoTableEntry r}% right + \tikz[overlay]{% + \draw[red] + let + \n{llx}={\zposx{\theNoTableEntry l}sp-\zposx{\theNoTableEntry r}sp}, + \n{urx}={0}, + \n{lly}={\zposy{\theNoTableEntry b}sp-\zposy{\theNoTableEntry r}sp}, + \n{ury}={\zposy{\theNoTableEntry t}sp-\zposy{\theNoTableEntry r}sp} + in + (\n{llx}, \n{lly}) -- (\n{urx}, \n{ury}) + (\n{llx}, \n{ury}) -- (\n{urx}, \n{lly}) + ; + }% + }% +} + +\makeatletter +\newcommand{\captionabove}[2][] +{ + \vskip-\abovecaptionskip + \vskip+\belowcaptionskip + \ifx\@nnil#1\@nnil + \caption{#2}% + \else + \caption[#1]{#2}% + \fi + \vskip+\abovecaptionskip + \vskip-\belowcaptionskip +} + +% automatic period at the end of footnote +\makeatletter% +\long\def\@makefntext#1{% + \parindent 1em\noindent \hb@xt@ 1.8em{\hss\@makefnmark}#1.} +\makeatother + +\newenvironment{vfitemize} +{ \begin{itemize} % + \let\olditem\item % + \renewcommand\item{\vfill\olditem} +} % +{\end{itemize}} + + + + +% EFOP template stuff +\usebackgroundtemplate% +{% + \includegraphics[width=\paperwidth,height=\paperheight]{background.jpg}% +} + +\setbeamercolor{title}{fg=white} +\setbeamercolor{author}{fg=white} +\setbeamercolor{institute}{fg=white} +\setbeamercolor{date}{fg=white} +\setbeamercolor{frametitle}{fg=white} + +\AtBeginSection[] +{ + { + \usebackgroundtemplate% + {% + \includegraphics[width=\paperwidth,height=\paperheight]{section_head.jpg}% + } + + \begin{frame} + \centering + \color{white}\Huge\insertsectionhead + + \end{frame} + } + +} \ No newline at end of file diff --git a/papers/eutypes-2019/presentation/tex/template.tex b/papers/eutypes-2019/presentation/tex/template.tex new file mode 100644 index 00000000..44f7e350 --- /dev/null +++ b/papers/eutypes-2019/presentation/tex/template.tex @@ -0,0 +1,59 @@ +\documentclass[bigger]{beamer} +\usepackage[utf8]{inputenc} +\usepackage[T1]{fontenc} +\usepackage{graphicx} + +\usebackgroundtemplate% +{% + \includegraphics[width=\paperwidth,height=\paperheight]{background.jpg}% +} + +\setbeamercolor{title}{fg=white} +\setbeamercolor{author}{fg=white} +\setbeamercolor{institute}{fg=white} +\setbeamercolor{date}{fg=white} +\setbeamercolor{frametitle}{fg=white} + +\title{\bf Sample title} +\author{Anonymous} +\institute{Eötvös Loránd University (ELTE), \\ Budapest, Hungary} +\date{2018.} + +\begin{document} + +{ +\usebackgroundtemplate{\includegraphics[width=\paperwidth]{title.jpg}}% +\frame{\vspace{15mm}\titlepage} +} + +\begin{frame}{Introduction} +\begin{itemize} +\item 1 +\item 2 +\item 3 +\end{itemize} +\end{frame} + +% this slide need not be used in the presentation, but must be +% present when you archieve your talk + +{ +\usebackgroundtemplate{\includegraphics[width=\paperwidth]{title.jpg}}% +\begin{frame}{} + +\bigskip\bigskip\bigskip + +{\bf\Huge\color{white} THANK YOU} + +\bigskip + +{\bf\Huge\color{white} FOR YOUR} + +\bigskip + +{\bf\Huge\color{white} ATTENTION!} + +\end{frame} +} + +\end{document} diff --git a/papers/grin-benchmarks.tar.gz b/papers/grin-benchmarks.tar.gz deleted file mode 100644 index f5418a93..00000000 Binary files a/papers/grin-benchmarks.tar.gz and /dev/null differ diff --git a/papers/mgs-2019/presentation/img/background.jpg b/papers/mgs-2019/presentation/img/background.jpg new file mode 100644 index 00000000..f9e6237e Binary files /dev/null and b/papers/mgs-2019/presentation/img/background.jpg differ diff --git a/papers/mgs-2019/presentation/img/compact_blank_titlepage.jpg b/papers/mgs-2019/presentation/img/compact_blank_titlepage.jpg new file mode 100644 index 00000000..0a68d282 Binary files /dev/null and b/papers/mgs-2019/presentation/img/compact_blank_titlepage.jpg differ diff --git a/papers/mgs-2019/presentation/img/exact_length_ct.png b/papers/mgs-2019/presentation/img/exact_length_ct.png new file mode 100644 index 00000000..42a99523 Binary files /dev/null and b/papers/mgs-2019/presentation/img/exact_length_ct.png differ diff --git a/papers/mgs-2019/presentation/img/exact_length_rt.png b/papers/mgs-2019/presentation/img/exact_length_rt.png new file mode 100644 index 00000000..1e3f546b Binary files /dev/null and b/papers/mgs-2019/presentation/img/exact_length_rt.png differ diff --git a/papers/mgs-2019/presentation/img/hpt-boq.png b/papers/mgs-2019/presentation/img/hpt-boq.png new file mode 100644 index 00000000..ba502c46 Binary files /dev/null and b/papers/mgs-2019/presentation/img/hpt-boq.png differ diff --git a/papers/mgs-2019/presentation/img/length_ct.png b/papers/mgs-2019/presentation/img/length_ct.png new file mode 100644 index 00000000..704b791c Binary files /dev/null and b/papers/mgs-2019/presentation/img/length_ct.png differ diff --git a/papers/mgs-2019/presentation/img/length_rt.png b/papers/mgs-2019/presentation/img/length_rt.png new file mode 100644 index 00000000..e2da8c4a Binary files /dev/null and b/papers/mgs-2019/presentation/img/length_rt.png differ diff --git a/papers/mgs-2019/presentation/img/reverse_ct.png b/papers/mgs-2019/presentation/img/reverse_ct.png new file mode 100644 index 00000000..b993a553 Binary files /dev/null and b/papers/mgs-2019/presentation/img/reverse_ct.png differ diff --git a/papers/mgs-2019/presentation/img/reverse_rt.png b/papers/mgs-2019/presentation/img/reverse_rt.png new file mode 100644 index 00000000..8a6e4a47 Binary files /dev/null and b/papers/mgs-2019/presentation/img/reverse_rt.png differ diff --git a/papers/mgs-2019/presentation/img/section_head.jpg b/papers/mgs-2019/presentation/img/section_head.jpg new file mode 100644 index 00000000..db546997 Binary files /dev/null and b/papers/mgs-2019/presentation/img/section_head.jpg differ diff --git a/papers/mgs-2019/presentation/img/title.jpg b/papers/mgs-2019/presentation/img/title.jpg new file mode 100644 index 00000000..1cb14754 Binary files /dev/null and b/papers/mgs-2019/presentation/img/title.jpg differ diff --git a/papers/mgs-2019/presentation/img/tyfuns_ct.png b/papers/mgs-2019/presentation/img/tyfuns_ct.png new file mode 100644 index 00000000..bf4c9ba0 Binary files /dev/null and b/papers/mgs-2019/presentation/img/tyfuns_ct.png differ diff --git a/papers/mgs-2019/presentation/img/tyfuns_rt.png b/papers/mgs-2019/presentation/img/tyfuns_rt.png new file mode 100644 index 00000000..cc9a3e61 Binary files /dev/null and b/papers/mgs-2019/presentation/img/tyfuns_rt.png differ diff --git a/papers/mgs-2019/presentation/prez.notes b/papers/mgs-2019/presentation/prez.notes new file mode 100644 index 00000000..be06c3ce --- /dev/null +++ b/papers/mgs-2019/presentation/prez.notes @@ -0,0 +1,83 @@ +Intro to GRIN: + - the problem (small functions, laziness) + - GRIN is the solution (interprocedural, whole, IR) + + - store, fetch, update + - eval + - (other restrictions ...) + + - 5-6 (*) + + - simple code generation + - many small transformations + - HPT is the core + + - 6-7 + +Extensions: + - LLVM (sum simple?) + - new HPT: + - original: for node info + - then: node info with simple type info (for LLVM) + - finally: type inference (polymorph primops) + + - 8-9 -1 (less HPT) + + - Dead Data Elimination + +Dead Data Elimination: + - first by Remi Turk for Agda + - motivational example: (*) + - length (List -> Nat) + - other applications: + - Map -> Set + - type class dictionaries + - Vector type index (EUTypes) + + - 11-12 + + - what else do we need? + - CBy (extended HPT, example) (*) + - LVA (standard) + - producer grouping (graph example) (*) + - transformations + + - 14-15 + +Results: + - only interpreted results + - pipeline setup + - diagrams + + - 18 - 19 + +Conclusions: + - the optimizer works well: + - #stores, #fetches, #funcalls and #cases significantly reduced + - code structure closer to imperative style + + - DDE is: + - a bit costly + - kinda specific, but can work wonders + - can trigger other transformations + + - 20 + + +Q&A: + - Why do we need Lambda? (closure conversion + lambda lifting) + cc: find free variables + ll: convert lambdas to top level functions + + - eval inlining ~?~ STG + eval/apply -> GRIN with indirect calls (funptr) + static analysis + - analysis cost: compiled abstract interpretation + + +no stores & no fetches --> in ideal case, everything can be put into registers (in worst onto the stack) + + +Notes: + - upto example (with head?) + - LLVM code example + - opt list + - max 25 slides \ No newline at end of file diff --git a/papers/mgs-2019/presentation/template.tex b/papers/mgs-2019/presentation/template.tex new file mode 100644 index 00000000..44f7e350 --- /dev/null +++ b/papers/mgs-2019/presentation/template.tex @@ -0,0 +1,59 @@ +\documentclass[bigger]{beamer} +\usepackage[utf8]{inputenc} +\usepackage[T1]{fontenc} +\usepackage{graphicx} + +\usebackgroundtemplate% +{% + \includegraphics[width=\paperwidth,height=\paperheight]{background.jpg}% +} + +\setbeamercolor{title}{fg=white} +\setbeamercolor{author}{fg=white} +\setbeamercolor{institute}{fg=white} +\setbeamercolor{date}{fg=white} +\setbeamercolor{frametitle}{fg=white} + +\title{\bf Sample title} +\author{Anonymous} +\institute{Eötvös Loránd University (ELTE), \\ Budapest, Hungary} +\date{2018.} + +\begin{document} + +{ +\usebackgroundtemplate{\includegraphics[width=\paperwidth]{title.jpg}}% +\frame{\vspace{15mm}\titlepage} +} + +\begin{frame}{Introduction} +\begin{itemize} +\item 1 +\item 2 +\item 3 +\end{itemize} +\end{frame} + +% this slide need not be used in the presentation, but must be +% present when you archieve your talk + +{ +\usebackgroundtemplate{\includegraphics[width=\paperwidth]{title.jpg}}% +\begin{frame}{} + +\bigskip\bigskip\bigskip + +{\bf\Huge\color{white} THANK YOU} + +\bigskip + +{\bf\Huge\color{white} FOR YOUR} + +\bigskip + +{\bf\Huge\color{white} ATTENTION!} + +\end{frame} +} + +\end{document} diff --git a/papers/mgs-2019/presentation/tex/idris-compilation-pipeline.tex b/papers/mgs-2019/presentation/tex/idris-compilation-pipeline.tex new file mode 100644 index 00000000..ea8525cb --- /dev/null +++ b/papers/mgs-2019/presentation/tex/idris-compilation-pipeline.tex @@ -0,0 +1,31 @@ +\documentclass[main.tex]{subfiles} +\begin{document} + \newlength{\vDist} + \newlength{\hDist} + \begin{tikzpicture}[align=center, draw=black, scale=1] + + % add styles + \tikzset{styRectDef/.style = {rectangle, rounded corners, draw=black, inner xsep=6mm, inner ysep=3mm}} + \tikzcustom{} + + % set distances + \setlength{\vDist}{0.75cm} + \setlength{\hDist}{1.75cm} + + % nodes + \node [styRectGn] (grin-cg) {GRIN code gen.}; + \node [styRectBl, right=\hDist of grin-cg] (dde) {DDE}; + \node [styRectBl, above=\vDist of dde] (reg-opts-1) {Regular opts.}; + \node [styRectBl, below=\vDist of dde] (reg-opts-2) {Regular opts.}; + \node [styRectGn, right=\hDist of dde] (bin-gen) {Binary generation}; + + \node [styLabel] (iteratively1) at ([shift={(1.25,0.45)}]reg-opts-1) {iteratively}; + \node [styLabel] (iteratively2) at ([shift={(1.25,0.45)}]reg-opts-2) {iteratively}; + + % connections + \draw[styConn, bend left] (grin-cg) to (reg-opts-1); + \draw[styConn] (reg-opts-1) to (dde); + \draw[styConn] (dde) to (reg-opts-2); + \draw[styConn, bend right] (reg-opts-2) to (bin-gen); + \end{tikzpicture} +\end{document} \ No newline at end of file diff --git a/papers/mgs-2019/presentation/tex/main.pdf b/papers/mgs-2019/presentation/tex/main.pdf new file mode 100644 index 00000000..800b7ef7 Binary files /dev/null and b/papers/mgs-2019/presentation/tex/main.pdf differ diff --git a/papers/mgs-2019/presentation/tex/main.tex b/papers/mgs-2019/presentation/tex/main.tex new file mode 100644 index 00000000..26e8bd96 --- /dev/null +++ b/papers/mgs-2019/presentation/tex/main.tex @@ -0,0 +1,777 @@ + + +\documentclass[bigger]{beamer} + +\usepackage{style} +\usepackage{subfiles} + + +\title[GRIN] %optional +{A modern look at GRIN} + +\subtitle{an optimizing functional language back end} + +\author[P. Podlovics, Cs. Hruska, Andor Pénzes ] % (optional, for multiple authors) +{Péter Podlovics, Csaba Hruska, Andor Pénzes} + +\institute[ELTE] % (optional) +{ + Eötvös Loránd University (ELTE), \\ Budapest, Hungary +} + +\date{MGS-2019} % (optional) + + + +\begin{document} + +{ + \usebackgroundtemplate{\includegraphics[width=\paperwidth]{title.jpg}}% + \frame{\vspace{15mm}\titlepage} +} + +\begin{frame} + \frametitle{Overview} + \tableofcontents +\end{frame} + + +\section{Introduction} + +\begin{frame}[fragile] + \frametitle{Why functional?} + + \begin{vfitemize} + \item Declarativeness + \begin{itemize} + \item[pro:] can program on a higher abstraction level + \end{itemize} + \item Composability\\ + \begin{itemize} + \item[pro:] can easily piece together smaller programs + \item[con:] results in a lot of function calls + \end{itemize} + \item Functions are first class citizens + \begin{itemize} + \item[pro:] higher order functions + \item[con:] unknown function calls + \end{itemize} + \end{vfitemize} + +\end{frame} + + +\begin{frame} +\frametitle{Graph Reduction Intermediate Notation} + +\begin{figure}[h] + \centering + \begin{adjustbox}{scale = 1.4} + \tikzset{every loop/.style={-{Stealth[scale=1.5]}}} + + \begin{tikzpicture}[ node distance = 1.5cm and 1.5cm + , on grid + , loop/.append style={-triangle 60} + ] + + \node [draw=black] (haskell) {Haskell}; + \node [draw=black] (idris) [left =of haskell] {Idris}; + \node [draw=black] (agda) [right =of haskell] {Agda}; + \node [draw=black] (grin) [below =of haskell] {GRIN}; + \node [draw=black] (mc) [below =of grin] {Machine Code}; + + \path[-{Stealth[scale=1.5]}] + (idris) edge [] (grin) + (haskell) edge [] (grin) + (agda) edge [] (grin) + (grin) edge [] (mc); + + + \end{tikzpicture} + \end{adjustbox} + \label{grin-backend} +\end{figure} +\end{frame} + + +\begin{frame}[fragile] +\frametitle{Front end code} + +\begin{minipage}{0.35\textwidth} + + \begin{haskellcode} + main = sum (upto 0 10) + + upto n m + | n > m = [] + | otherwise = n : upto (n+1) m + + sum [] = 0 + sum (x:xs) = x + sum xs + \end{haskellcode} +\end{minipage} +\hfill +\pause +\begin{minipage}{0.4\textwidth} + \vspace{2cm} + \begin{figure}[h] + \centering + \begin{adjustbox}{scale = 1.4} + \tikzset{every loop/.style={-{Stealth[scale=1.5]}}} + + \begin{tikzpicture}[ node distance = 1.3cm and 1cm + , on grid + , loop/.append style={-triangle 60} + ] + + \node [shape=ellipse,draw=black] (main) {main}; + \node [shape=ellipse,draw=black] (eval) [below =of main] {eval}; + \node [shape=ellipse,draw=black] (sum) [below left =of eval] {sum}; + \node [shape=ellipse,draw=black] (upto) [below right =of eval] {upto}; + + \path[-{Stealth[scale=1.5]}] + (main) edge [] (eval) + (eval) edge [bend left] (sum) + (eval) edge [bend right] (upto) + (sum) edge [bend left] (eval) + (upto) edge [bend right] (eval); + + + \end{tikzpicture} + \end{adjustbox} + \label{control-flow-lazy} + \end{figure} +\end{minipage} +\end{frame} + + +\begin{frame}[fragile] +\frametitle{GRIN code} + +\begin{minipage}{0.4\textwidth} + + \begin{haskellcode} + grinMain = + t1 <- store (CInt 1) + t2 <- store (CInt 10) + t3 <- store (Fupto t1 t2) + t4 <- store (Fsum t3) + (CInt r) <- eval t4 + _prim_int_print r + \end{haskellcode} +\end{minipage} +\hfill +\begin{minipage}{0.48\textwidth} + \vspace{1cm} + \begin{haskellcode} + eval p = + v <- fetch p + case v of + (CInt n) -> pure v + (CNil) -> pure v + (CCons y ys) -> pure v + (Fupto a b) -> + zs <- upto a b + update p zs + pure zs + (Fsum c) -> + s <- sum c + update p s + pure s + \end{haskellcode} +\end{minipage} + + +\end{frame} + + +\begin{frame}[fragile] +\frametitle{Transformation machinery} + + \begin{vfitemize} + + \item Inline calls to \mintinline{haskell}{eval} + \item Run dataflow analyses: + \begin{itemize} + \item Heap points-to analysis + \item Sharing analysis + \end{itemize} + \item Run transformations until we reach a fixed-point: + \begin{itemize} + \item Sparse Case Optimization + \item Common Subexpression Elimination + \item Generalized Unboxing + \item etc \dots + \end{itemize} + + \end{vfitemize} + + +\end{frame} + + +\section{Extensions} + +\begin{frame}[fragile] +\frametitle{Extending Heap points-to} + + \vspace{1cm} + \begin{minipage}{\textwidth} + \begin{figure} + \includegraphics[scale=0.3]{hpt-boq.png} + \end{figure} + \end{minipage} + \vfill + \pause + \begin{minipage}{\textwidth} + \begin{figure} + $BAS \in \{ \text{Int64}, \text{Float}, \text{Bool}, \text{String}, \text{Char} \}$ + \end{figure} + \end{minipage} + \vfill + \pause + \begin{center} + \begin{minipage}{0.8\textwidth} + % real type would be: a -> State# s -> (# State# s, MutVar# s a #) + \begin{haskellcode} + indexArray# :: Array# a -> Int# -> (# a #) + newMutVar# :: a -> s -> (# s, MutVar# s a #) + \end{haskellcode} + \end{minipage} + \end{center} + +\end{frame} + + +\begin{frame}[fragile] +\frametitle{LLVM back end} + + \hspace{-4cm} + \begin{minipage}[t]{0.30\textwidth} + \begin{minted}[fontsize=\scriptsize]{haskell} + grinMain = + t1 <- store (CInt 1) + t2 <- store (CInt 10) + t3 <- store (Fupto t1 t2) + t4 <- store (Fsum t3) + (CInt r') <- eval t4 + _prim_int_print r' + + upto m n = + (CInt m') <- eval m + (CInt n') <- eval n + b' <- _prim_int_gt m' n' + case b' of + #True -> pure (CNil) + + sum l = ... + + eval p = ... + \end{minted} + \end{minipage} + \hspace{1.8cm} + \pause + \begin{minipage}[t]{0.30\textwidth} + \begin{minted}[fontsize=\scriptsize]{haskell} + grinMain = + n1 <- sum 0 1 10 + _prim_int_print n1 + + sum s lo hi = + b <- _prim_int_gt lo hi + if b then + pure s + else + lo' <- _prim_int_add lo 1 + s' <- _prim_int_add s lo + sum s' lo' hi + + \end{minted} + \end{minipage} + \hspace{0.5cm} + \pause + \begin{minipage}[t]{0.30\textwidth} + \begin{minted}[fontsize=\scriptsize]{asm} + grinMain: + # BB#0: + movabsq $55, %rdi + jmp _prim_int_print + \end{minted} + \end{minipage} + +\end{frame} +%$ + +\section{Dead Data Elimination} + +\begin{frame}[fragile] +\frametitle{Dead data elimination I.} + +\begin{center} + \begin{minipage}{0.30\textwidth} + \begin{haskellcode} + length : List a -> Nat + length Nil = Z + length (Cons x xs) + = S (length xs) + \end{haskellcode} + \end{minipage} + \hspace{1cm} + $\xRightarrow{\text{DDE}}$ + \hfill + \begin{minipage}{0.5\textwidth} + \begin{haskellcode} + length p = + xs <- fetch p + case xs of + (Cons ys) -> + l1 <- length ys + l2 <- _prim_int_add l1 1 + pure l2 + (Nil) -> + pure 0 + \end{haskellcode} + \end{minipage} +\end{center} + +\end{frame} + + +\begin{frame}[fragile] +\frametitle{Dead data elimination II.} + +\begin{center} + \begin{minipage}{0.85\textwidth} + \begin{haskellcode} + data Bin : Nat -> Type where + N : Bin 0 + O : {n : Nat} -> Bin n -> Bin (2*n + 0) + I : {n : Nat} -> Bin n -> Bin (2*n + 1) + \end{haskellcode} + \vspace{0.5cm} + \pause + \begin{haskellcode} + binToNat : Bin n -> Nat + binToNat N = 0 + binToNat (O {n} _) = 2*n + binToNat (I {n} _) = 2*n + 1 + \end{haskellcode} + \end{minipage} +\end{center} + +\end{frame} + + +\begin{frame} +\frametitle{Applications} + + \begin{vfitemize} + \item Map $\rightarrow$ Set + \item Type class dictionaries + \item Type erasure for dependently typed languages + \end{vfitemize} + +\end{frame} + +\begin{frame} +\frametitle{What do we need?} + + \begin{vfitemize} + \item Producers \& consumers + \item Detect dead fields + \item Connect consumers to producer + \item Remove or transform dead fields + \end{vfitemize} + +\end{frame} + + +\begin{frame}[fragile] +\frametitle{Created-by} + +\begin{center} + \begin{minipage}{0.50\textwidth} + \begin{haskellcode} + null xs = + y <- case xs of + (CNil) -> + a <- pure (CTrue) + pure a + (CCons z zs) -> + b <- pure (CFalse) + pure b + pure y + \end{haskellcode} + \end{minipage} + \hfill + \begin{minipage}{0.475\textwidth} + \begin{tcolorbox}[tab2,tabularx={l|r}] + Var & Producers \\ + \hline\hline + \pilcode{xs} & $CNil[\dots], CCons[\dots]$ \\\hline + \pilcode{a} & $CTrue[\pilcode{a}]$ \\\hline + \pilcode{b} & $CFalse[\pilcode{b}]$ \\\hline + \pilcode{y} & $CTrue[\pilcode{a}], CFalse[\pilcode{b}]$ \\ + \end{tcolorbox} + \end{minipage} +\end{center} + +\end{frame} + + +\begin{frame} +\frametitle{Producers and consumers} + +\begin{figure}[h] +\centering +\begin{adjustbox}{scale = 1.3} + \begin{tikzpicture}[ node distance = 1cm and 2cm, on grid ] + + \node<1> [shape=circle,draw=black] (P1) {$P_1$}; + \node<1> [shape=circle,draw=black] (P2) [right =of P1] {$P_2$}; + \coordinate (Middle) at ($(P1)!0.5!(P2)$); + \node<1> [shape=circle,draw=black] (C2) [below =of Middle] {$C_2$}; + \node<1> [shape=circle,draw=black] (C1) [left =of C2] {$C_1$}; + \node<1> [shape=circle,draw=black] (C3) [right =of C2] {$C_3$}; + + \path<1>[-{Stealth[scale=1.5]}] (P1) edge [] (C1) + (P1) edge [] (C2) + (P2) edge [] (C2) + (P2) edge [] (C3); + + \pause + + \node<2,3,4> [shape=circle,draw=black] (P1) {\pilcode{upto}}; + \node [shape=circle,draw=black] (P2) [right =of P1] {\pilcode{upto}}; + \coordinate (Middle) at ($(P1)!0.5!(P2)$); + \node<2> [shape=circle,draw=black] (C2) [below =of Middle] {\pilcode{len}}; + \node<2> [shape=circle,draw=black] (C1) [left =of C2] {\pilcode{len}}; + \node<2,3,4,5> [shape=circle,draw=black] (C3) [right =of C2] {\pilcode{sum}}; + + \path[-{Stealth[scale=1.5]}] (P1) edge [] (C1) + (P1) edge [] (C2) + (P2) edge [] (C2) + (P2) edge [] (C3); + + \pause + + \node<3> [shape=circle,draw=black,fill=green] (C2) [below =of Middle] {\pilcode{len}}; + \node<3> [shape=circle,draw=black,fill=green] (C1) [left =of C2] {\pilcode{len}}; + \node<3> [shape=circle,draw=black,fill=red] (C3) [right =of C2] {\pilcode{sum}}; + + \pause + + \node<4,5,6,7,8,9> [shape=circle,draw=black,dashed] (C2) [below =of Middle] {\pilcode{len}}; + \node<4,5,6,7,8,9> [shape=circle,draw=black,dashed] (C1) [left =of C2] {\pilcode{len}}; + + \pause + + \node<5,6,7,8,9> [shape=circle,draw=black,dashed] (P1) {\pilcode{upto}}; + + \pause + + \node<6,7,8,9> [shape=circle,draw=black,fill=lightgray] (C3) [right =of C2] {\pilcode{sum}}; + + \pause + + \node<7,8,9> [shape=circle,draw=black,fill=lightgray] (P2) [right =of P1] {\pilcode{upto}}; + + \pause + + \node<8> [shape=circle,draw=black,dashed,fill=lightgray] (C2) [below =of Middle] {\pilcode{len}}; + + \pause + + \node<9> [shape=circle,draw=black,dashed,fill=lightgray] (C2) [below =of Middle] {\pilcode{len}\Lightning}; + + \pause + + % first solution is not doing anything + + \node<10> [shape=circle,draw=black,fill=lightgray] (P1) {\pilcode{upto}}; + \node<10,11> [shape=circle,draw=black,fill=lightgray] (P2) [right =of P1] {\pilcode{upto}}; + \node<10,11> [shape=circle,draw=black,fill=lightgray] (C2) [below =of Middle] {\pilcode{len}}; + \node<10,11> [shape=circle,draw=black,fill=lightgray] (C1) [left =of C2] {\pilcode{len}}; + \node<10,11> [shape=circle,draw=black,fill=lightgray] (C3) [right =of C2] {\pilcode{sum}}; + + \pause + + % second solution is to keep each C & P's structure as it is, but dummify P1 + + \node<11> [shape=circle,draw=black,fill=yellow] (P1) {\pilcode{upto}}; + + \pause + + % third solution is to restructure C2, but keep the original pattern as well (code duplication, needs IR improvement) + + \node<12> [shape=circle,draw=black,dashed] (P1) {\pilcode{upto}}; + \node<12> [shape=circle,draw=black,fill=lightgray] (P2) [right =of P1] {\pilcode{upto}}; + + \node<12> [shape=circle,draw=black,pattern=north east lines, dashed] (C2) [below =of Middle] {\pilcode{len}}; + \node<12> [shape=circle,draw=black,dashed] (C1) [left =of C2] {\pilcode{len}}; + \node<12> [shape=circle,draw=black,fill=lightgray] (C3) [right =of C2] {\pilcode{sum}}; + + + + + \end{tikzpicture} +\end{adjustbox} +\label{fig:producers-and-consumers} +\end{figure} + +\end{frame} + + + +\section{Results} + +\begin{frame}[fragile] +\frametitle{Setup} + + \vspace{1.5cm} + \begin{vfitemize} + \item Small Idris code snippets from: \\ + \textit{Type-driven Development with Idris} by Edwin Brady + \item Both interpreted GRIN code and executed binaries + \item Compile- \& runtime measurements + \end{vfitemize} + + \vspace{-0.5cm} + \begin{figure}[H] + \centering + \begin{adjustbox}{scale = 0.75} + \subfile{idris-compilation-pipeline} + \end{adjustbox} + \end{figure} + +\end{frame} + + + +\begin{frame}[fragile] +\frametitle{Length - GRIN statistics} + % real example + + \begin{figure} + \hspace{-1cm} + \begin{minipage}{0.45\textwidth} + \resizebox{\width}{5.5cm}{\includegraphics[scale=0.40]{length_rt.png}} + \end{minipage} + \hspace{1cm} + \begin{minipage}{0.45\textwidth} + \resizebox{\width}{5.5cm}{\includegraphics[scale=0.40]{length_ct.png}} + \end{minipage} + \end{figure} + +\end{frame} + +\begin{frame}[fragile] +\frametitle{Length - CPU binary statistics} + + \begin{center} + \begin{minipage}{0.95\linewidth} + \label{table:length-binary-results} + \begin{tcolorbox}[tab2,tabularx={l||r|r|r|r}] + Stage & Size & Instructions & Stores & Loads \\ + \hline\hline + \pilcode{normal-O0} & 23928 & 769588 & 212567 & 233305 \\\hline + \pilcode{normal-O3} & 23928 & 550065 & 160252 & 170202 \\\hline + \pilcode{regular-opt} & 19832 & 257397 & 14848 & 45499 \\\hline + \pilcode{dde-O0} & 15736 & 256062 & 14243 & 45083 \\\hline + \pilcode{dde-O3} & 15736 & 284970 & 33929 & 54555 \\ + \end{tcolorbox} + \end{minipage} + \end{center} + +\end{frame} + +\begin{frame}[fragile] +\frametitle{Exact length - GRIN statistics} + % no stores & no fetches! (Maybe transformed) + \begin{figure} + \hspace{-1cm} + \begin{minipage}{0.45\textwidth} + \resizebox{\width}{5.5cm}{\includegraphics[scale=0.40]{exact_length_rt.png}} + \end{minipage} + \hspace{1cm} + \begin{minipage}{0.45\textwidth} + \resizebox{\width}{5.5cm}{\includegraphics[scale=0.40]{exact_length_ct.png}} + \end{minipage} + \end{figure} +\end{frame} + +\begin{frame}[fragile] +\frametitle{Exact length - CPU binary statistics} + + \begin{center} + \begin{minipage}{0.9\linewidth} + \label{table:exact-length-binary-results} + \begin{tcolorbox}[tab2,tabularx={l||r|r|r|r}] + Stage & Size & Instructions & Stores & Loads \\ + \hline\hline + \pilcode{normal-O0} & 18800 & 188469 & 14852 & 46566 \\\hline + \pilcode{normal-O3} & 14704 & 187380 & 14621 & 46233 \\\hline + \pilcode{regular-opt} & 10608 & 183560 & 13462 & 45214 \\\hline + \pilcode{dde-O0} & 10608 & 183413 & 13431 & 45189 \\\hline + \pilcode{dde-O3} & 10608 & 183322 & 13430 & 44226 \\ + \end{tcolorbox} + \end{minipage} + \end{center} + +\end{frame} + +\begin{frame}[fragile] +\frametitle{Type level functions - GRIN statistics} + % caveat + \begin{figure} + \hspace{-1cm} + \begin{minipage}{0.45\textwidth} + \resizebox{\width}{5.5cm}{\includegraphics[scale=0.40]{tyfuns_rt.png}} + \end{minipage} + \hspace{1cm} + \begin{minipage}{0.45\textwidth} + \resizebox{\width}{5.5cm}{\includegraphics[scale=0.40]{tyfuns_ct.png}} + \end{minipage} + \end{figure} +\end{frame} + +\begin{frame}[fragile] +\frametitle{Type level functions - CPU binary statistics} + + \begin{center} + \begin{minipage}{0.9\linewidth} + \label{table:tyfuns-binary-results} + \begin{tcolorbox}[tab2,tabularx={l||r|r|r|r}] + Stage & Size & Instructions & Stores & Loads \\ + \hline\hline + \pilcode{normal-O0} & 65128 & 383012 & 49191 & 86754 \\\hline + \pilcode{normal-O3} & 69224 & 377165 & 47556 & 84156 \\\hline + \pilcode{regular-opt} & 36456 & 312122 & 34340 & 71162 \\\hline + \pilcode{dde-O0} & 32360 & 312075 & 34331 & 70530 \\\hline + \pilcode{dde-O3} & 28264 & 309822 & 33943 & 70386 \\ + \end{tcolorbox} + \end{minipage} + \end{center} + +\end{frame} + +\begin{frame}[fragile] +\frametitle{Reverse - GRIN statistics} +% interesting example, but no DDE +\begin{figure} + \hspace{-1cm} + \begin{minipage}{0.45\textwidth} + \resizebox{\width}{5.5cm}{\includegraphics[scale=0.40]{reverse_rt.png}} + \end{minipage} + \hspace{1cm} + \begin{minipage}{0.45\textwidth} + \resizebox{\width}{5.5cm}{\includegraphics[scale=0.40]{reverse_ct.png}} + \end{minipage} +\end{figure} +\end{frame} + +\begin{frame}[fragile] +\frametitle{Reverse - CPU binary statistics} + +\begin{center} +\begin{minipage}{0.96\linewidth} + \label{table:reverse-binary-results} + \begin{tcolorbox}[tab2,tabularx={l||r|r|r|r}] + Stage & Size & Instructions & Stores & Loads \\ + \hline\hline + \pilcode{normal-O0} & 27112 & 240983 & 25018 & 58253 \\\hline + \pilcode{normal-O3} & 31208 & 236570 & 23808 & 56617 \\\hline + \pilcode{regular-opt-O0} & 14824 & 222085 & 19757 & 53125 \\\hline + \pilcode{regular-opt-O3} & 14824 & 220837 & 19599 & 52827 \\ + \end{tcolorbox} +\end{minipage} +\end{center} + +\end{frame} + + +\begin{frame}[fragile] +\frametitle{Conclusions} + \begin{vfitemize} + \item Dead Data Elimination: + \begin{itemize} + \item is demanding on resources + \item can completely transform data structures + \item can trigger further transformations + \item can considerably reduce binary size + \end{itemize} + \item Regular optimizations: + \begin{itemize} + \item GRIN works well for dependently-typed languages as well + \item the optimized GRIN code is significantly more efficient + \item the GRIN optimizations are orthogonal to the LLVM optimizations + \end{itemize} + \end{vfitemize} +\end{frame} + + +{ + \usebackgroundtemplate{\includegraphics[width=\paperwidth]{title.jpg}}% + \begin{frame}{} + + \bigskip\bigskip\bigskip + + {\bf\Huge\color{white} THANK YOU} + + \bigskip + + {\bf\Huge\color{white} FOR YOUR} + + \bigskip + + {\bf\Huge\color{white} ATTENTION!} + +\end{frame} +} + +% Q&A + +\begin{frame}[fragile] +\frametitle{Sparse case optimization} + +\begin{center} + \begin{minipage}{0.40\textwidth} + \begin{haskellcode} + + v <- eval l + case v of + CNil -> + CCons x xs -> + \end{haskellcode} + \end{minipage} + $\xRightarrow{v \in \{ \text{CCons}\}}$ + \hfill + \begin{minipage}{0.40\textwidth} + \begin{haskellcode} + + v <- eval l + case v of + CCons x xs -> + \end{haskellcode} + \end{minipage} +\end{center} + +\end{frame} + + +\begin{frame} +\frametitle{Compiled data flow analysis} + +\begin{vfitemize} + \item Analyzing the syntax tree has an interpretation overhead + \item We can work around this by "compiling" our analysis into an executable program + \item The compiled abstract program is independent of the AST + \item It can be executed in a different context (ie.: by another program or on GPU) + \item After run (iteratively), it produces the result of the given analysis +\end{vfitemize} +\end{frame} + + + +\end{document} + diff --git a/papers/mgs-2019/presentation/tex/style.sty b/papers/mgs-2019/presentation/tex/style.sty new file mode 100644 index 00000000..430c4a3b --- /dev/null +++ b/papers/mgs-2019/presentation/tex/style.sty @@ -0,0 +1,249 @@ +\ProvidesPackage{style} + +\frenchspacing +\setcounter{tocdepth}{1} + +\setbeamertemplate{footline}[miniframes theme no subsection] +\setbeamertemplate{itemize items}[ball] + +\usepackage[toc,page]{appendix} +\usepackage{hyperref} +\usepackage{float} +\usepackage{subcaption} +\usepackage{cite} +\usepackage{url} +\usepackage{caption} +\usepackage{graphicx} +\graphicspath{ {../img/} } + +%\usepackage{enumitem} +%\setlist{nosep} + +\usepackage[utf8]{inputenc} +\usepackage[T1]{fontenc} +\usepackage{etoolbox} +\usepackage{adjustbox} +\usepackage{latexsym,amssymb,amsmath,mathtools} + +\usepackage{marvosym} +% bugfix for marvosym (must be after include) +\renewcommand{\mvchr}[1]{\mbox{\mvs\symbol{#1}}} + +\usepackage{algorithm} +\usepackage{algorithmicx} +\usepackage{algpseudocode} + +\DeclarePairedDelimiter\set\{\} + +\hypersetup{% + colorlinks=true,% hyperlinks will be coloured + allcolors=blue,% hyperlink text will be green + linkcolor= +} + +\usepackage{minted} +\usepackage{xcolor} +\usepackage{listings} +\usepackage{lstautogobble} +\definecolor{identifierColor}{rgb}{0.65,0.16,0.16} +\definecolor{keywordColor}{rgb}{0.65,0.20,0.90} +\lstnewenvironment{code} +{ \lstset + { language = Haskell + , basicstyle = \small\ttfamily + , breaklines = true + , backgroundcolor = \color{gray!15} + , frame = single + , autogobble = true + , xleftmargin = 0.1cm + , xrightmargin = 0.2cm + %, identifierstyle = \color{gray} + , keywordstyle = \color{violet} + , morekeywords = {} + , escapechar = \% + } +} +{} + +\PassOptionsToPackage{usenames,dvipsnames,svgnames}{xcolor} +\usepackage{tikz} +\usetikzlibrary{arrows,arrows.meta,shapes,positioning,patterns,automata,calc} + +\usepackage{pgfplots} +\usepackage{tcolorbox} +\usepackage{tabularx} +\usepackage{array} +\usepackage{zref-savepos} +\usepackage{diagbox} +\usepackage{colortbl} +\tcbuselibrary{skins} +\tcbuselibrary{minted} + +\newcolumntype{Y}{>{\raggedleft\arraybackslash}X} +\tcbset +{ tab2/.style = + { enhanced + , fonttitle=\bfseries + , fontupper=\normalsize\sffamily + , colback = gray!5!white + , colframe = gray!75!black + , colbacktitle=yellow!40!white + , coltitle=black,center title + } + , hbox +} + +\newminted{haskell}{autogobble} + +\newtcblisting{haskell} +{ listing engine = minted + , minted style = colorful + , minted language = Haskell + , minted options = { fontsize = \small + , breaklines + , autogobble + , linenos + , numbersep = 3mm + , escapeinside = \%\% + } + , colback = gray!5!white + , colframe = gray!75!black + , listing only + , left = 5mm + , enhanced + , overlay = { \begin{tcbclipinterior} + \fill[gray!80!blue!20!white] (frame.south west) rectangle ([xshift=5mm]frame.north west); + \end{tcbclipinterior} + } +} + + +\newtcblisting{oneLineHaskell} +{ listing engine = minted + , minted style = colorful + , minted language = Haskell + , minted options = { fontsize = \normalsize + , breaklines + , autogobble + , numbersep = 3mm + , escapeinside = \%\% + } + , colback = gray!5!white + , colframe = gray!75!black + , listing only + , left = 2mm + , top = 1mm + , bottom = 1mm + , enhanced +} + +\colorlet{lightgreen}{green!50!white} +\colorlet{lightblue}{blue!40!white} +\colorlet{lightyellow}{yellow!50!white} +\colorlet{lightred}{red!40!white} + +\newcommand*{\paper}{thesis} +\newcommand*{\ext}[1]{\texttt{#1}} +\newcommand*{\chk}[1]{\texttt{#1}} +\newcommand*{\lvar}[1]{\textit{#1}} +\newcommand*{\ilcode}[1]{\mintinline{Haskell}{#1}} % inline code +\newcommand*{\pilcode}[1]{\texttt{#1}} % plain inline code + +% NOTE: This command need styRectDef to be defined locally +\newcommand*{\tikzcustom}[0] +{ + % \tikzset{styRectDef/.style = {rectangle, rounded corners, draw=black, inner xsep=6mm, inner ysep=3mm}} + \tikzset{styRectGn/.style = {styRectDef, draw=green, fill=green!20}} + \tikzset{styRectBl/.style = {styRectDef, draw=cyan, fill=cyan!40}} + \tikzset{styRectGy/.style = {styRectDef, draw=gray, fill=gray!17}} + \tikzset{styConn/.style = {very thick, ->, -latex, shorten <=1mm, shorten >=1mm}} + + \tikzset{styAnnotDef/.style = {rectangle, rounded corners, draw=black, inner xsep=2mm, inner ysep=1mm}} + \tikzset{styLabel/.style = {styAnnotDef, draw=black, fill=gray!10}} + +} + +\newcounter{NoTableEntry} +\renewcommand*{\theNoTableEntry}{NTE-\the\value{NoTableEntry}} + +\newcommand*{\notableentry}{% + \multicolumn{1}{@{}c@{}|}{% + \stepcounter{NoTableEntry}% + \vadjust pre{\zsavepos{\theNoTableEntry t}}% top + \vadjust{\zsavepos{\theNoTableEntry b}}% bottom + \zsavepos{\theNoTableEntry l}% left + \hspace{0pt plus 1filll}% + \zsavepos{\theNoTableEntry r}% right + \tikz[overlay]{% + \draw[red] + let + \n{llx}={\zposx{\theNoTableEntry l}sp-\zposx{\theNoTableEntry r}sp}, + \n{urx}={0}, + \n{lly}={\zposy{\theNoTableEntry b}sp-\zposy{\theNoTableEntry r}sp}, + \n{ury}={\zposy{\theNoTableEntry t}sp-\zposy{\theNoTableEntry r}sp} + in + (\n{llx}, \n{lly}) -- (\n{urx}, \n{ury}) + (\n{llx}, \n{ury}) -- (\n{urx}, \n{lly}) + ; + }% + }% +} + +\makeatletter +\newcommand{\captionabove}[2][] +{ + \vskip-\abovecaptionskip + \vskip+\belowcaptionskip + \ifx\@nnil#1\@nnil + \caption{#2}% + \else + \caption[#1]{#2}% + \fi + \vskip+\abovecaptionskip + \vskip-\belowcaptionskip +} + +% automatic period at the end of footnote +\makeatletter% +\long\def\@makefntext#1{% + \parindent 1em\noindent \hb@xt@ 1.8em{\hss\@makefnmark}#1.} +\makeatother + +\newenvironment{vfitemize} +{ \begin{itemize} % + \let\olditem\item % + \renewcommand\item{\vfill\olditem} +} % +{\end{itemize}} + + + + +% EFOP template stuff +\usebackgroundtemplate% +{% + \includegraphics[width=\paperwidth,height=\paperheight]{background.jpg}% +} + +\setbeamercolor{title}{fg=white} +\setbeamercolor{author}{fg=white} +\setbeamercolor{institute}{fg=white} +\setbeamercolor{date}{fg=white} +\setbeamercolor{frametitle}{fg=white} + +\AtBeginSection[] +{ + { + \usebackgroundtemplate% + {% + \includegraphics[width=\paperwidth,height=\paperheight]{section_head.jpg}% + } + + \begin{frame} + \centering + \color{white}\Huge\insertsectionhead + + \end{frame} + } + +} \ No newline at end of file diff --git a/papers/mgs-2019/presentation/tex/template.tex b/papers/mgs-2019/presentation/tex/template.tex new file mode 100644 index 00000000..44f7e350 --- /dev/null +++ b/papers/mgs-2019/presentation/tex/template.tex @@ -0,0 +1,59 @@ +\documentclass[bigger]{beamer} +\usepackage[utf8]{inputenc} +\usepackage[T1]{fontenc} +\usepackage{graphicx} + +\usebackgroundtemplate% +{% + \includegraphics[width=\paperwidth,height=\paperheight]{background.jpg}% +} + +\setbeamercolor{title}{fg=white} +\setbeamercolor{author}{fg=white} +\setbeamercolor{institute}{fg=white} +\setbeamercolor{date}{fg=white} +\setbeamercolor{frametitle}{fg=white} + +\title{\bf Sample title} +\author{Anonymous} +\institute{Eötvös Loránd University (ELTE), \\ Budapest, Hungary} +\date{2018.} + +\begin{document} + +{ +\usebackgroundtemplate{\includegraphics[width=\paperwidth]{title.jpg}}% +\frame{\vspace{15mm}\titlepage} +} + +\begin{frame}{Introduction} +\begin{itemize} +\item 1 +\item 2 +\item 3 +\end{itemize} +\end{frame} + +% this slide need not be used in the presentation, but must be +% present when you archieve your talk + +{ +\usebackgroundtemplate{\includegraphics[width=\paperwidth]{title.jpg}}% +\begin{frame}{} + +\bigskip\bigskip\bigskip + +{\bf\Huge\color{white} THANK YOU} + +\bigskip + +{\bf\Huge\color{white} FOR YOUR} + +\bigskip + +{\bf\Huge\color{white} ATTENTION!} + +\end{frame} +} + +\end{document} diff --git a/papers/stcs-2019/article/img/exact-length-compile-time.png b/papers/stcs-2019/article/img/exact-length-compile-time.png new file mode 100644 index 00000000..ac867588 Binary files /dev/null and b/papers/stcs-2019/article/img/exact-length-compile-time.png differ diff --git a/papers/stcs-2019/article/img/exact-length-runtime.png b/papers/stcs-2019/article/img/exact-length-runtime.png new file mode 100644 index 00000000..e4c112c5 Binary files /dev/null and b/papers/stcs-2019/article/img/exact-length-runtime.png differ diff --git a/papers/stcs-2019/article/img/length-compile-time.png b/papers/stcs-2019/article/img/length-compile-time.png new file mode 100644 index 00000000..0223c641 Binary files /dev/null and b/papers/stcs-2019/article/img/length-compile-time.png differ diff --git a/papers/stcs-2019/article/img/length-runtime.png b/papers/stcs-2019/article/img/length-runtime.png new file mode 100644 index 00000000..0c030a89 Binary files /dev/null and b/papers/stcs-2019/article/img/length-runtime.png differ diff --git a/papers/stcs-2019/article/img/reverse-compile-time.png b/papers/stcs-2019/article/img/reverse-compile-time.png new file mode 100644 index 00000000..29b026be Binary files /dev/null and b/papers/stcs-2019/article/img/reverse-compile-time.png differ diff --git a/papers/stcs-2019/article/img/reverse-runtime.png b/papers/stcs-2019/article/img/reverse-runtime.png new file mode 100644 index 00000000..cafcd730 Binary files /dev/null and b/papers/stcs-2019/article/img/reverse-runtime.png differ diff --git a/papers/stcs-2019/article/img/tyfuns-compile-time.png b/papers/stcs-2019/article/img/tyfuns-compile-time.png new file mode 100644 index 00000000..aae27057 Binary files /dev/null and b/papers/stcs-2019/article/img/tyfuns-compile-time.png differ diff --git a/papers/stcs-2019/article/img/tyfuns-runtime.png b/papers/stcs-2019/article/img/tyfuns-runtime.png new file mode 100644 index 00000000..48a337ff Binary files /dev/null and b/papers/stcs-2019/article/img/tyfuns-runtime.png differ diff --git a/papers/stcs-2019/article/related-work.notes b/papers/stcs-2019/article/related-work.notes new file mode 100644 index 00000000..2fd478d1 --- /dev/null +++ b/papers/stcs-2019/article/related-work.notes @@ -0,0 +1,109 @@ +GRIN: + - Boq: + - HPT ~ Andersen + - UHC + - goal: + - complete compiler + - for teaching & research + - uses GRIN "just as a backend" + - AGs + - LLVM codegen, .Net, JVM, JS + - has its own front end + - synchronous exceptions (throw + catch) + - new AST constructs (Boquist GRIN with try/catch monadic operations) + - bad GC + - JHC (Meacham GRIN) + - some modifications to GRIN: + - mutable variables + - some type system (from front end- PTS) + - memory regions instead of arity raising (put node onto STACK instead of HEAP) + - IO exceptions - only throw ~ halt + - goal: efficient Haskell code + - without runtime (just some system calls) + - Haskell -> GRIN -> C + - compact GRIN code + - has its own front end + - typed GRIN + - probably for GC + - pure type system + - more general than the available type systems back then + - some optimizations are done on this level + - AJHC + - forked then remerged + - NetBSD audio driver + - formal methods in systems programming + - JHC had the east amount of unknown symbols + - LHC (David Himmelstrup) + - Bedrock (JHC Grin-like language) + - LLVM codegen + + +GHC: + - multi-threading + - async exceptions + - incremental conmpilation + - language extensions (FE) + - STM (software transactional memory) - monadic API + +Whole program opt: + - Intel + - goal: + - highly optimized code generation: (e.g.: vectorization) + - for highly demanding computatons (numerical algorithms) + - has no FE (uses GHC), receives external core from GHC + - some extra work required see Notes + - two parts: HRC (front end), FLRC (optimizer) [written in MLton] + - FLRC (Functional Language Research Compiler): + - general BE for (strict) functional languages + - has pointers + - no whole program defunctionalization (ptrs instead of eval) + - has basic blocks + - sync exceptions + - multi-threading + - long running research project: optimizing compiler for functional languages (one of them FE is HRC) + + - MLton: + - goal: + - compiler for standard ML + - efficiency + - own FE (why?) + - whole program optimization + - defunctorization: + - expose types hidden by functors (paramerized modules) + - expose function calls accross modules + - IR: + - has many different IRs (each can explicitly express a certain feature, making the analysis easier) + - basic block + - 0CFA ~ similar to defunc. (language can be higher order) (follows closures instead of tags) - on which IR + - contification: + - functions used as continuations turned into jumps + - tries to transform recursive calls into tail calls + + +"Our GRIN": + - SSA + + +Notes: + - core vs STG: + - core has some invariants (e.g.: [RealWorld] token chain) + - STG is for codegen (ctors/primops are saturated) + - denotational semantics ~ execution of lambda calculus + - operationaé semantics ~ how to execute STG like an imperative language (funcalls to sequences) + - C-- (like LLVM befor LLVM) + - imperative + - to their own machine code or LLVM + + +Haskell: + - for humans with sugar +Core: + - like Haskell but wo sugars + - has type info +STG: + - no type lambda/type app + - only runtime-relevant info + - explicit laziness in OPERATIONAL semantics +C--: + - very close to machine + - less precise semantics (e.g.: only WORDs) (only "implicit" semantics) diff --git a/papers/stcs-2019/article/tex/.gitignore b/papers/stcs-2019/article/tex/.gitignore new file mode 100644 index 00000000..89ca51ee --- /dev/null +++ b/papers/stcs-2019/article/tex/.gitignore @@ -0,0 +1,9 @@ +*.toc +*.aux +*.bbl +*.blg +*.log +*.out +*.gz +*.listing +_minted-main/ diff --git a/papers/stcs-2019/article/tex/abstract.tex b/papers/stcs-2019/article/tex/abstract.tex new file mode 100644 index 00000000..43ac807d --- /dev/null +++ b/papers/stcs-2019/article/tex/abstract.tex @@ -0,0 +1,14 @@ +\documentclass[main.tex]{subfiles} +\begin{document} + \begin{abstract} + + GRIN is short for Graph Reduction Intermediate Notation, a modern back end for lazy functional languages. Most of the currently available compilers for such languages share a common flaw: they can only optimize programs on a per-module basis. The GRIN framework allows for interprocedural whole program analysis, enabling optimizing code transformations across functions and modules as well. + + Some implementations of GRIN already exist, but most of them were developed only for experimentation purposes. Thus, they either compromise on low level efficiency or contain ad hoc modifications compared to the original specification. + + Our goal is to provide a full-fledged implementation of GRIN by combining the currently available best technologies like LLVM, and evaluate the framework's effectiveness by measuring how the optimizer improves the performance of certain programs. We also present some improvements to the already existing components of the framework. Some of these improvements include a typed representation for the intermediate language and an interprocedural program optimization, the dead data elimination. + + \keywords{grin \and compiler \and whole program optimization \and intermediate representation \and dead code elimination} + + \end{abstract} +\end{document} \ No newline at end of file diff --git a/papers/stcs-2019/article/tex/bib_database.bib b/papers/stcs-2019/article/tex/bib_database.bib new file mode 100644 index 00000000..eedd70c5 --- /dev/null +++ b/papers/stcs-2019/article/tex/bib_database.bib @@ -0,0 +1,408 @@ +@phdthesis +{ + boquist-phd, + author = {Urban Boquist}, + school = {{Chalmers University of Technology and Göteborg University}}, + title ={{Code Optimisation Techniques for Lazy Functional Languages}}, + year = {1999}, + isbn = {91-7197-792-9} +} + +@inproceedings +{ + boquist-grin, + author = {Boquist, Urban and Johnsson, Thomas}, + title = {{The GRIN Project: A Highly Optimising Back End for Lazy Functional Languages}}, + booktitle = {{Selected Papers from the 8th International Workshop on Implementation of Functional Languages}}, + series = {IFL '96}, + year = {1997}, + isbn = {3-540-63237-9}, + pages = {58--84}, + numpages = {27}, + url = {http://dl.acm.org/citation.cfm?id=647975.743083}, + acmid = {743083}, + publisher = {{Springer-Verlag}}, + address = {Berlin, Heidelberg} +} + +@mastersthesis +{ + remi-masters, + author = {Remi Turk}, + school = {{Universiteit van Amsterdam}}, + title ={{A modern back-end for a dependently typed language}}, + year = {2010} +} + +@article +{ + hbc, + title={{Haskell B. user manual}}, + author={{Augustsson, Lennart}}, + journal={{Programming methodology group report, Dept. of Comp. Sci, Chalmers Univ. of Technology, G{\"o}teborg, Sweden}}, + year={1992} +} + + +@inproceedings +{ + uhc, + author = {{Dijkstra, Atze and Fokker, Jeroen and Swierstra, S. Doaitse}}, + title = {{The Architecture of the Utrecht Haskell Compiler}}, + booktitle = {{Proceedings of the 2Nd ACM SIGPLAN Symposium on Haskell}}, + series = {Haskell '09}, + year = {2009}, + isbn = {978-1-60558-508-6}, + location = {Edinburgh, Scotland}, + pages = {93--104}, + numpages = {12}, + url = {http://doi.acm.org/10.1145/1596638.1596650}, + doi = {10.1145/1596638.1596650}, + acmid = {1596650}, + publisher = {ACM}, + address = {New York, NY, USA}, + keywords = {aspect orientation, attribute grammar, compiler architecture, haskell}, +} + +@article +{ + hrc, + author = {{Liu, Hai and Glew, Neal and Petersen, Leaf and Anderson, Todd A.}}, + title = {{The Intel Labs Haskell Research Compiler}}, + journal = {{SIGPLAN Not.}}, + issue_date = {December 2013}, + volume = {48}, + number = {12}, + month = sep, + year = {2013}, + issn = {0362-1340}, + pages = {105--116}, + numpages = {12}, + url = {http://doi.acm.org/10.1145/2578854.2503779}, + doi = {10.1145/2578854.2503779}, + acmid = {2503779}, + publisher = {ACM}, + address = {New York, NY, USA}, + keywords = {compiler optimization, functional language compiler, haskell}, +} + +@article +{ + hrc-simd, + author = {{Petersen, Leaf and Orchard, Dominic and Glew, Neal}}, + title = {{Automatic SIMD Vectorization for Haskell}}, + journal = {{SIGPLAN Not.}}, + issue_date = {September 2013}, + volume = {48}, + number = {9}, + month = sep, + year = {2013}, + issn = {0362-1340}, + pages = {25--36}, + numpages = {12}, + url = {http://doi.acm.org/10.1145/2544174.2500605}, + doi = {10.1145/2544174.2500605}, + acmid = {2500605}, + publisher = {ACM}, + address = {New York, NY, USA}, + keywords = {compiler optimization, functional languages, haskell, simd, vectorization}, +} + +@inproceedings +{ + haskell-gap, + author = {Petersen, Leaf and Anderson, Todd A. and Liu, Hai and Glew, Neal}, + title = {{Measuring the Haskell Gap}}, + booktitle = {{Proceedings of the 25th Symposium on Implementation and Application of Functional Languages}}, + series = {IFL '13}, + year = {2014}, + isbn = {978-1-4503-2988-0}, + location = {Nijmegen, Netherlands}, + pages = {61:61--61:72}, + articleno = {61}, + numpages = {12}, + url = {http://doi.acm.org/10.1145/2620678.2620685}, + doi = {10.1145/2620678.2620685}, + acmid = {2620685}, + publisher = {ACM}, + address = {New York, NY, USA}, +} + +@techreport +{ + mlton-llvm, + author = {Brian Andrew Leibig}, + title = {{An LLVM Back-end for MLton}}, + year = {2013}, + url = {https://www.cs.rit.edu/~mtf/student-resources/20124_leibig_msproject.pdf}, + note = {A Project Report Submitted in Partial Fulfillment of the Requirements for the Degree of Master of Science in Computer Science}, + institution = {Department of Computer Science, B. Thomas Golisano College of Computing and Information Sciences}, +} + +@article +{ + contification, + author = {Fluet, Matthew and Weeks, Stephen}, + title = {{Contification Using Dominators}}, + journal = {{SIGPLAN Not.}}, + issue_date = {October 2001}, + volume = {36}, + number = {10}, + month = oct, + year = {2001}, + issn = {0362-1340}, + pages = {2--13}, + numpages = {12}, + url = {http://doi.acm.org/10.1145/507669.507639}, + doi = {10.1145/507669.507639}, + acmid = {507639}, + publisher = {ACM}, + address = {New York, NY, USA}, +} + +@inproceedings +{ + mlton, + author = {Weeks, Stephen}, + title = {{Whole-program Compilation in MLton}}, + booktitle = {{Proceedings of the 2006 Workshop on ML}}, + series = {ML '06}, + year = {2006}, + isbn = {1-59593-483-9}, + location = {Portland, Oregon, USA}, + pages = {1--1}, + numpages = {1}, + url = {http://doi.acm.org/10.1145/1159876.1159877}, + doi = {10.1145/1159876.1159877}, + acmid = {1159877}, + publisher = {ACM}, + address = {New York, NY, USA}, +} + +@inproceedings +{ + llvm-2004, + author = {Chris Lattner and Vikram Adve}, + title = {{LLVM: A Compilation Framework for Lifelong Program Analysis and Transformation}}, + booktitle = {CGO}, + address = {San Jose, CA, USA}, + month = {Mar}, + year = {2004}, + pages = {75--88}, +} + +@misc +{ + opt, + title = {{Modular LLVM Analyzer and Optimizer}}, + url = {http://llvm.org/docs/CommandGuide/opt.html} +} + +@misc +{ + llc, + title = {{LLVM Static Compiler}}, + url = {https://llvm.org/docs/CommandGuide/llc.html} +} + +@misc +{ + clang, + title = {{Clang: a C language family front end for LLVM}}, + url = {https://clang.llvm.org} +} + +@inproceedings +{ + ghc, + author = {{Hall, Cordelia V. and Hammond, Kevin and Partain, Will and Peyton Jones, Simon L. and Wadler, Philip}}, + title = {{The Glasgow Haskell Compiler: A Retrospective}}, + booktitle = {{Proceedings of the 1992 Glasgow Workshop on Functional Programming}}, + year = {1993}, + isbn = {3-540-19820-2}, + pages = {62--71}, + numpages = {10}, + url = {http://dl.acm.org/citation.cfm?id=647557.729914}, + acmid = {729914}, + publisher = {{Springer-Verlag}}, + address = {London, UK}, +} + +@article +{ + idris, + title={{Idris, a general-purpose dependently typed programming language: Design and implementation}}, + volume={23}, + DOI={10.1017/S095679681300018X}, + number={5}, + journal={{Journal of Functional Programming}}, + publisher={Cambridge University Press}, + author={{Brady, Edwin}}, + year={2013}, + pages={552–593} +} + +@phdthesis +{ + andersen-ptr, + title={{Program analysis and specialization for the C programming language}}, + author={{Andersen, Lars Ole}}, + year={1994}, + school={University of Cophenhagen} +} + +@inproceedings +{ + steensgaard-ptr, + title={{Points-to analysis in almost linear time}}, + author={{Steensgaard, Bjarne}}, + booktitle={{Proceedings of the 23rd ACM SIGPLAN-SIGACT symposium on Principles of programming languages}}, + pages={32--41}, + year={1996}, + organization={ACM} +} + +@inproceedings +{ + shapiro-ptr, + title={{Fast and accurate flow-insensitive points-to analysis}}, + author={{Shapiro, Marc and Horwitz, Susan}}, + booktitle={{Proceedings of the 24th ACM SIGPLAN-SIGACT symposium on Principles of programming languages}}, + pages={1--14}, + year={1997}, + organization={ACM} +} + +@inproceedings +{ + andersen-opt, + title={{The ant and the grasshopper: fast and accurate pointer analysis for millions of lines of code}}, + author={{Hardekopf, Ben and Lin, Calvin}}, + booktitle={{ACM SIGPLAN Notices}}, + volume={42}, + number={6}, + pages={290--299}, + year={2007}, + organization={ACM} +} + +@article +{ + andersen-gpu, + title={{A GPU implementation of inclusion-based points-to analysis}}, + author={{Mendez-Lojo, Mario and Burtscher, Martin and Pingali, Keshav}}, + journal={{ACM SIGPLAN Notices}}, + volume={47}, + number={8}, + pages={107--116}, + year={2012}, + publisher={ACM} +} + + + +@article +{ + ajhc, + author = {{Okabe, Kiwamu and Muranushi, Takayuki}}, + title = {{Systems Demonstration: Writing NetBSD Sound Drivers in Haskell}}, + journal = {{SIGPLAN Not.}}, + issue_date = {December 2014}, + volume = {49}, + number = {12}, + month = sep, + year = {2014}, + issn = {0362-1340}, + pages = {77--78}, + numpages = {2}, + url = {http://doi.acm.org/10.1145/2775050.2633370}, + doi = {10.1145/2775050.2633370}, + acmid = {2633370}, + publisher = {ACM}, + address = {New York, NY, USA}, + keywords = {languages, performance}, +} + +@misc +{ + jhc, + title={{JHC}}, + author={{John Meacham}}, + url={http://repetae.net/computer/jhc/jhc.shtml} +} + +@misc +{ + lhc, + title={{LLVM Haskell Compiler}}, + author={{David Himmelstrup}}, + url={http://lhc-compiler.blogspot.com/} +} + +@article +{ + lambda-cube, + title={{Lambda calculi with types}}, + author={{Barendregt, Henk P}}, + year={1992}, + publisher={Oxford: Clarendon Press} +} + +@article +{ + pts-berardi, + title={{Towards a mathematical analysis of the Coquand-Huet calculus of constructions and the other systems in Barendregt’s cube}}, + author={{Berardi, Stefano}}, + journal={Technica1 report, Carnegie-Me11on University (USA) and Universita di Torino (Ita1y)}, + year={1988} +} + +@article +{ + pts-terlouw, + title={{Een nadere bewijstheoretische analyse van GSTT’s}}, + author={{Terlouw, Jan}}, + journal={Manuscript (in Dutch)}, + year={1989} +} + +@book +{ + tdd-idris, + title={{Type-driven development with Idris}}, + author={{Brady, Edwin}}, + year={2017}, + publisher={Manning Publications Company} +} + +@inproceedings +{ + systemfc, + title={{System F with type equality coercions}}, + author={{Sulzmann, Martin and Chakravarty, Manuel MT and Jones, Simon Peyton and Donnelly, Kevin}}, + booktitle={{Proceedings of the 2007 ACM SIGPLAN international workshop on Types in languages design and implementation}}, + pages={53--66}, + year={2007}, + organization={ACM} +} + +@book +{ + impl-fun-lang, + author = {{Peyton Jones, Simon}}, + title = {{The Implementation of Functional Programming Languages}}, + year = {1987}, + month = {January}, + publisher = {{Prentice Hall}}, + url = {https://www.microsoft.com/en-us/research/publication/the-implementation-of-functional-programming-languages/}, + note = {{pages 185--219}} +} + +@phdthesis +{ + uhc-exceptional-grin, + title={{Exceptional GRIN}}, + author={{Douma, Christof}}, + year={2006}, + school={{Master’s thesis, Utrecht University, Institute of Information and Computing}} +} \ No newline at end of file diff --git a/papers/stcs-2019/article/tex/bibliography.tex b/papers/stcs-2019/article/tex/bibliography.tex new file mode 100644 index 00000000..a52c3458 --- /dev/null +++ b/papers/stcs-2019/article/tex/bibliography.tex @@ -0,0 +1,10 @@ +\documentclass[main.tex]{subfiles} +\begin{document} + \makeatletter + \preto{\@verbatim}{\topsep=0pt \partopsep=0pt } + \makeatother + + \bibliographystyle{IEEEtranS} + \bibliography{bib_database} + +\end{document} \ No newline at end of file diff --git a/papers/stcs-2019/article/tex/conclusions.tex b/papers/stcs-2019/article/tex/conclusions.tex new file mode 100644 index 00000000..ebffb6c4 --- /dev/null +++ b/papers/stcs-2019/article/tex/conclusions.tex @@ -0,0 +1,12 @@ +\documentclass[main.tex]{subfiles} +\begin{document} + + In this paper we presented a modern look at GRIN, an optimizing functional language back end originally published by Urban Bouquist. + + We gave an overview of the GRIN framework, and introduced the reader to the related research on compilers utilizing GRIN and whole program optimization. Then we gave an extension for the heap points-to analysis with more accurate basic value tracking. This allowed for defining a type inference algorithm for the GRIN intermediate representation, which then was used in the implementation of the LLVM back end. Following that, we detailed the dead data elimination pass and the required data-flow analyses, originally published by Remi Turk. We also presented an extension of the dummification transformation which is compatible with the typed representation of GRIN by extending the IR with the \pilcode{undefined} value. Furthermore, we gave an alternative method for transforming producer-consumer groups by using basic blocks. Our last contribution was the implementation of the Idris front end. + + We evaluated our implementation of GRIN using simple Idris programs taken from the book \textit{Type-driven development with Idris}~\cite{tdd-idris} by Edwin Brady. We measured the optimized GRIN programs, as well as the generated binaries. It is important to note, that the measurements presented in this paper can only be considered preliminary, given the compiler needs further work to be comparable to other systems. Nevertheless, these statistics are still relevant, since they provide valuable information about the effectiveness of the optimizer. The results demonstrate that the GRIN optimizer can significantly improve the performance of GRIN programs. Furthermore, they indicate that the GRIN optimizer performs optimizations orthogonal to the LLVM optimizations, which supports the motivation behind the framework. As for dead data elimination, we found that it can facilitate other transformations during the optimization pipeline, and that it can considerably reduce the size of the generated binaries. + + All things considered, the current implementation of GRIN brought adequate results. However, there are still many promising ideas left to research. + +\end{document} \ No newline at end of file diff --git a/papers/stcs-2019/article/tex/dce.tex b/papers/stcs-2019/article/tex/dce.tex new file mode 100644 index 00000000..eb8af228 --- /dev/null +++ b/papers/stcs-2019/article/tex/dce.tex @@ -0,0 +1,50 @@ +\documentclass[main.tex]{subfiles} +\begin{document} + + Dead code elimination is one of the most well-known compiler optimization techniques. The aim of dead code elimination is to remove certain parts of the program that neither affect its final result nor its side effects. This includes code that can never be executed, and also code which only consists of irrelevant operations on dead variables. Dead code elimination can reduce the size of the input program, as well as increase its execution speed. Furthermore, it can facilitate other optimizing transformation by restructuring the code. + + \subsection{Dead Code Elmination in GRIN} + + The original GRIN framework has three different type of dead code eliminating transformations. These are dead function elimination, dead variable elimination and dead function paramater elimination. In general, the effectiveness of most optimizations solely depends on the accuracy of the information it has about the program. The more precise information it has, the more agressive it can be. Furthermore, running the same transformation but with additional information available, can often yield more efficient code. + + In the original framework, the dead code eliminating transformations were provided only a very rough approximation of the liveness of variables and function parameters. In fact, a variable was deemed dead only if it was never used in the program. As a consequence, the required analyses were really fast, but the transformations themselves were very limited. + + \subsection{Interprocedural Liveness Analysis} \label{sub-sec:lva} + + In order to improve the effectiveness of dead code elimination, we need more sophisticated data-flow analyses. Liveness analysis is a standard data-flow analysis that determines which variables are live in the program and which ones are not. It is important to note, that even if a variable is used in the program, it does not necessarily mean it is live. See Program~code~\ref{code:lva-example}. + + \begin{codeFloat}[h] + \begin{center} + \begin{minipage}{0.375\textwidth} + \begin{haskell} + main = + n <- pure 5 + y <- pure (CInt n) + pure 0 + \end{haskell} + \subcaption{Put into a data constructor} + \end{minipage} + \hspace{1cm} + \begin{minipage}{0.375\textwidth} + \begin{haskell} + main = + n <- pure 5 + foo n + foo x = pure 0 + \end{haskell} + \subcaption{Argument to a function call} + \end{minipage} + \end{center} + \caption{Examples demonstrating that a used variable can still be dead} + \label{code:lva-example} + \end{codeFloat} + + In the first example, we can see a program where the variable \pilcode{n} is used, it is put into a \pilcode{CInt} node, but despite this, it is obvious to see that \pilcode{n} is still dead. Moreover, the liveness analysis can determine this fact just by examining the function body locally. It does not need to analyze any function calls. However, in the second example, we can see a very similar situation, but here \pilcode{n} is an argument to a function call. To calculate the liveness of \pilcode{n}, the analysis either has to assume that the arguments of \pilcode{foo} are always live, or it has to analyze the body of the function. The former decision yields a faster, but less precise \emph{intraprocedural} analysis, the latter results in a bit more costly, but also more accurate \emph{interprocedural} analysis. + + By extending the analysis with interprocedural elements, we can obtain quite a good estimate of the live variables in the program, while minimizing the cost of the algorithm. Using the information gathered by the liveness analysis, the original optimizations can remove even more dead code segments. + + %TODO: example here? + + + +\end{document} \ No newline at end of file diff --git a/papers/stcs-2019/article/tex/dde.tex b/papers/stcs-2019/article/tex/dde.tex new file mode 100644 index 00000000..ca059c4a --- /dev/null +++ b/papers/stcs-2019/article/tex/dde.tex @@ -0,0 +1,136 @@ +\documentclass[main.tex]{subfiles} +\begin{document} + + % TODO: reference Remi Turk & HRC + % TODO: example for length + + Conventional dead code eliminating optimizations usually only remove statements or expressions from programs; however, \emph{dead data elimination} can transform the underlying data structures themselves. Essentially, it can specialize a certain data structure for a given use-site by removing or transforming unnecessary parts of it. It is a powerful optimization technique that --- given the right circumstances --- can significantly decrease memory usage and reduce the number of executed heap operations. + + Within the framework of GRIN, it was Remi Turk, who presented the initial version of dead data elimination in his master's thesis~\cite{remi-masters}. His original implementation used intraprocedural analyses and an untyped representation of GRIN. We extended the algorithm with interprocedural analyses, and improved the ``dummification" process (see Sections~\ref{subsec:producers-and-consumers}~and~\ref{subsec:undefined}). In the followings we present a high level overview of the original dead data elimination algorithm, as well as detail some of our modifications. + + \subsection{Dead Data Elimination in GRIN} + + In the context of GRIN, dead data elimination removes dead fields of data constructors (or nodes) for both definition- and use-sites. In the followings, we will refer to definition-sites as \emph{producers} and to use-sites as \emph{consumers}. Producers and consumers are in a \emph{many-to-many} relationship with each other. A producer can define a variable used by many consumers, and a consumer can use a variable possibly defined by many producers. It only depends on the control flow of the program. Program~code~\ref{code:dde-simple} illustrates dead data elimination on a very simple example with a single producer and a single consumer. + + \begin{codeFloat}[h] + \begin{center} + \begin{minipage}{0.42\textwidth} + \begin{haskell} + main = + x <- pure (CPair 0 1) + y <- snd x + pure y + + snd p = + (CPair a b) <- pure p + pure b + \end{haskell} + \subcaption{Before the transformation} + \end{minipage} + $\xRightarrow{\text{\emph{a} is dead}}$ + \begin{minipage}{0.4\textwidth} + \begin{haskell} + main = + x <- pure (CPair' 1) + y <- snd x + pure y + + snd p = + (CPair' b) <- pure p + pure b + \end{haskell} + \subcaption{After the transformation} + \end{minipage} + \end{center} + \caption{A simple example for dead data elimination} + \label{code:dde-simple} + \end{codeFloat} + + As we can see, the first component of the pair is never used, so the optimization can safely eliminate the first field of the node. It is important to note, that the transformation has to remove the dead field for both the producer and the consumer. Furthermore, the name of the node also has to be changed to preserve type correctness, since the transformation is specific to each producer-consumer group. This means, the data constructor \pilcode{CPair} still exists, and it can be used by other parts of the program, but a new, specialized version is introduced for any optimizable producer-consumer group\footnote{Strictly speaking, a new version is only introduced for each different set of live fields used by producer-consumer groups}. + + Dead data elimination requires a considerable amount of data-flow analyses and possibly multiple transformation passes. First of all, it has to identify potentially removable dead fields of a node. This information can be acquired by running liveness analysis on the program (see Section~\ref{sub-sec:lva}). After that, it has to connect producers with consumers by running the \emph{created-by data-flow analysis}. Then it has to group producers together sharing at least one common consumer, and determine whether a given field for a given producer can be removed globally, or just dummified locally. Finally, it has to transform both the producers and the consumers. + + \subsection{Created-by Analysis} + + The created-by analysis, as its name suggests is responsible for determining the set of producers a given variable-was possibly created by. For our purposes, it is sufficient to track only node valued variables, since these are the only potential candidates for dead data elimination. Analysis~example~\ref{analysis:cby} demonstrates how the algorithm works on a simple program. + + \begin{analysisFloat}[h] + \begin{center} + \begin{minipage}{0.43\textwidth} + \begin{haskell} + null xs = + y <- case xs of + (CNil) -> + a <- pure (CTrue) + pure a + (CCons z zs) -> + b <- pure (CFalse) + pure b + pure y + \end{haskell} + \subcaption{Input program} + \end{minipage} + \hspace{1cm} + \begin{minipage}{0.44\textwidth} + \begin{tcolorbox}[tab2,tabularx={l|r}] + Var & Producers \\ + \hline\hline + \pilcode{xs} & $\set{CNil[\dots], CCons[\dots]}$\footnotemark[1] \\\hline + \pilcode{a} & $\set{CTrue[\pilcode{a}]}$ \\\hline + \pilcode{b} & $\set{CFalse[\pilcode{b}]}$ \\\hline + \pilcode{y} & $\set{CTrue[\pilcode{a}], CFalse[\pilcode{b}]}$ \\ + \end{tcolorbox} + \subcaption{Anyalsis result} + \end{minipage} + \end{center} + \caption{An example demonstrating the created-by analysis} + \label{analysis:cby} + \end{analysisFloat} + + The result of the analysis is a mapping from variable names to set of producers grouped by their tags. For example, we could say that ''variable \pilcode{y} was created by the producer \pilcode{a} given it was constructed with the \pilcode{CTrue} tag''. Naturally, a variable can be constructed with many different tags, and each tag can have multiple producers. Also, it is important to note that some variables are their own producers. This is because producers are basically definitions-sites or bindings, identified by the name of the variable on their left-hand sides. However, not all bindings have variables on their left-hand side, and some values may not be bound to variables. Fortunately, this problem can be easily solved by a simple program transformation. + + \footnotetext[1]{\label{footnote:cby-example}For the sake of simplicity, we will assume that \pilcode{xs} was constructed with the \pilcode{CNil} and \pilcode{CCons} tags. Also its producers are irrelevant in this example} + + \subsection{Grouping Producers} + + On a higher abstraction level, the result of the created-by analysis can be interpreted as a bipartite graph between producers and consumers. One group of nodes represents the producers and the other one represents the consumers. A producer is connected to a consumer if and only if the value created by the producer can be consumed by the consumer. Furthermore, each component of the graph corresponds to one producer-consumer group. Each producer inside the group can only create values consumed by the consumers inside the same group, and a similar statement holds for the consumers as well. + + \subsection{Transforming Producers and Consumers} + \label{subsec:producers-and-consumers} + + As mentioned earlier, the transformation applied by dead data elimination can be specific for each producer-consumer group, and both the producers and the consumers have to be transformed. Also, the transformation can not always simply remove the dead field of a producer. Take a look at Figure~\ref{fig:producers-and-consumers}. + + \begin{figure}[h] + \centering + \begin{adjustbox}{scale = 1.5} + \begin{tikzpicture}[ node distance = 1cm and 2cm, on grid ] + + \node [shape=circle,draw=black] (P1) {$P_1$}; + \node [shape=circle,draw=black] (P2) [right =of P1] {$P_2$}; + \coordinate (Middle) at ($(P1)!0.5!(P2)$); + \node [shape=circle,draw=black] (C2) [below =of Middle] {$C_2$}; + \node [shape=circle,draw=black] (C1) [left =of C2] {$C_1$}; + \node [shape=circle,draw=black] (C3) [right =of C2] {$C_3$}; + + \path[-{Stealth[scale=1.5]}] (P1) edge [] (C1) + (P1) edge [] (C2) + (P2) edge [] (C2) + (P2) edge [] (C3); + + + \end{tikzpicture} + \end{adjustbox} + \caption{Producer-consumer group} + \label{fig:producers-and-consumers} + \end{figure} + + As we can see, producers $P_1$ and $P_2$ share a common consumer $C_2$. Let's assume, that the shared value is a \pilcode{CPair} node with two fields, and neither $C_1$, nor $C_2$ uses the first field of that node. This means, the first field of the \pilcode{CPair} node is locally dead for producer $P_1$. Also, suppose that $C_3$ \emph{does} use the first field of that node, meaning it is live for $P_2$, hence it cannot be removed. In this situation, if the transformation were to remove the locally dead field from $P_1$, then it would lead to a type mismatch at $C_2$, since $C_2$ would receive two \pilcode{CPair} nodes with different number of arguments, with possibly different types for their first fields. In order to resolve this issue the transformation has to rename the tag at $P_1$ to \pilcode{CPair'}, and create new patterns for \pilcode{CPair'} at $C_1$ and $C_2$ by duplicating and renaming the existing ones for \pilcode{CPair}. This way, we can avoid potential memory operations at the cost of code duplication. + + In fact, even the code duplication can be circumvented by introducing the notion of \emph{basic blocks} to the intermediate representation. This way, we still need to generate new alternatives (new patterns), but their right-hand sides will be simple jump instructions to the basic blocks of the original alternative's right-hand side. + + \subsection{The \pilcode{undefined} value} + \label{subsec:undefined} + + Another option would be to only \emph{dummify} the locally dead fields. In other words, instead of removing the field at the producer and restructuring the consumers, the transformation could simply introduce a dummy value for that field. The dummy value could be any placeholder with the same type as the locally dead field. For instance, it could be any literal of that type. A more sophisticated solution would be to introduce an undefined value. The \pilcode{undefined} value is a placeholder as well, but it carries much more information. By marking certain values undefined instead of just introducing placeholder literals, we can facilitate other optimizations down the pipeline. However, each \pilcode{undefined} value has to be explicitly type annotated for the heap points-to analysis to work correctly. Just like the other approach mentioned earlier, this alternative also solves the problem of code duplication at the cost of some modifications to the intermediate representation. + +\end{document} \ No newline at end of file diff --git a/papers/stcs-2019/article/tex/future-work.tex b/papers/stcs-2019/article/tex/future-work.tex new file mode 100644 index 00000000..9f31da5c --- /dev/null +++ b/papers/stcs-2019/article/tex/future-work.tex @@ -0,0 +1,10 @@ +\documentclass[main.tex]{subfiles} +\begin{document} + + Currently, the framework only supports the compilation of Idris, but we are working on supporting Haskell by integrating the Glasgow Haskell Compiler as a new front end. As of right now, the framework \emph{can} generate GRIN IR code from GHC's STG representation, but the generated programs still contain unimplemented primitive operations. The main challenge is to somehow handle these primitive operations. In fact, there is only a small set of primitive operations that cannot be trivially incorporated into the framework, but these might even require extending the GRIN IR with additional built-in instructions. + + Besides the addition of built-in instructions, the GRIN intermediate representation can be improved further by introducing the notion of function pointers and basic blocks. Firstly, the original specification of GRIN does not support modular compilation. However, extending the IR with function pointers can help to achieve incremental compilation. Each module could be compiled separately with indirect calls to other modules through function pointers, then by using different data-flow analyses and program transformations, all modules could be optimized together incrementally. In theory, if the entire program is available for analysis at compile time, incremental compilation could produce the same result as whole program compilation. Secondly, the original GRIN IR has a monadic structure which can make it difficult to analyze and transform the control flow of the program. Fortunately, replacing the monadic structure with basic blocks can resolve this issue. + + Whole program analysis is a powerful tool for optimizing compilers, but it can be quite demanding on execution time. This being said, there are certain techniques to speed up these analyses. The core of the GRIN optimizer is the heap points-to analysis, an Andersen-style inclusion based pointer analysis~\cite{andersen-ptr}. This type of data-flow analysis is very well researched, and there are several ways to improve the algorithm's performance. Firstly, cyclic references could be detected and eliminated between data-flow nodes at runtime. This optimization allows the algorithm to analyze millions of lines of code within seconds~\cite{andersen-opt}. Secondly, the algorithm itself could be parallelized for both CPU and GPU~\cite{andersen-gpu}, achieving considerable speedups. Furthermore, some alternative algorithms could also be considered. For example, Steengaard's unification based algorithm~\cite{steensgaard-ptr} is a less precise analysis, but it runs in almost linear time. It could be used as a preliminary analysis for some simple transformations at the beginning of the pipeline. Finally, Shapiro's algorithm~\cite{shapiro-ptr} could act as a compromise between Steengaard's and Andersen's algorithm. In a way, Shapiro's analysis lies somewhere between the other two analyses. It is slower than Steengaard's, but also much more precise; and it is less precise than Andersen's, but also much faster. + +\end{document} \ No newline at end of file diff --git a/papers/stcs-2019/article/tex/grin.tex b/papers/stcs-2019/article/tex/grin.tex new file mode 100644 index 00000000..10dce469 --- /dev/null +++ b/papers/stcs-2019/article/tex/grin.tex @@ -0,0 +1,14 @@ +\documentclass[main.tex]{subfiles} +\begin{document} + + GRIN is short for \emph{Graph Reduction Intermediate Notation}. GRIN consists of an intermediate representation language (IR in the followings) as well as the entire compiler back end framework built around it. GRIN tries to resolve the issues highlighted in Section~\ref{sec:intro} by using interprocedural whole program optimization. + + Interprocedural program analysis is a type of data-flow analysis that propagates information about certain program elements through function calls. Using interprocedural analyses instead of intraprocedural ones, allows for optimizations across functions. This means the framework can handle the issue of large sets of small interconnecting functions presented by the composable programming style. + + Whole program analysis enables optimizations across modules. This type of data-flow analysis has all the available information about the program at once. As a consequence, it is possible to analyze and optimize global functions. Furthermore, with the help of whole program analysis, laziness can be made explicit. In fact, the evaluation of suspended computations in GRIN is done by an ordinary function called \pilcode{eval}. This is a global function uniquely generated for each program, meaning it can be optimized just like any other function by using whole program analysis. + + Finally, since the analyses and optimizations are implemented on a general intermediate representation, all other languages can benefit from the features provided by the GRIN back end. The intermediate layer of GRIN between the front end language and the low level machine code serves the purpose of eliminating functional artifacts from programs. This is achieved by using optimizing program transformations specific to the GRIN IR and functional languages in general. The simplified programs can then be optimized further by using conventional techniques already available. For example, it is possible to compile GRIN to LLVM and take advantage of an entire compiler framework providing a huge array of very powerful tools and features. + + % TODO: refer LLVM section + +\end{document} \ No newline at end of file diff --git a/papers/stcs-2019/article/tex/idris-compilation-pipeline.tex b/papers/stcs-2019/article/tex/idris-compilation-pipeline.tex new file mode 100644 index 00000000..6d13026a --- /dev/null +++ b/papers/stcs-2019/article/tex/idris-compilation-pipeline.tex @@ -0,0 +1,33 @@ +\documentclass[main.tex]{subfiles} +\begin{document} + \newlength{\vDist} + \newlength{\hDist} + \begin{tikzpicture}[align=center, draw=black, scale=1] + + % add styles + \tikzset{styRectDef/.style = {rectangle, rounded corners, draw=black, inner xsep=6mm, inner ysep=3mm}} + \tikzcustom{} + + % set distances + \setlength{\vDist}{0.75cm} + \setlength{\hDist}{1.75cm} + + % nodes + \node [styRectGn] (grin-cg) {GRIN code gen.}; + \node [styRectBl, right=\hDist of grin-cg] (dde) {DDE}; + \node [styRectBl, above=\vDist of dde] (reg-opts-1) {Regular opts.}; + \node [styRectBl, below=\vDist of dde] (reg-opts-2) {Regular opts.}; + \node [styRectGn, right=\hDist of dde] (bin-gen) {Binary generation}; + + \node [styLabel] (iteratively1) at ([shift={(1.25,0.45)}]reg-opts-1) {iteratively}; + \node [styLabel] (iteratively2) at ([shift={(1.25,0.45)}]reg-opts-2) {iteratively}; + + % connections + \draw[styConn, bend left] (grin-cg) to (reg-opts-1); + \draw[styConn] (reg-opts-1) to (dde); + \draw[styConn] (dde) to (reg-opts-2); + \draw[styConn, bend right] (reg-opts-2) to (bin-gen); + \end{tikzpicture} + \caption{Idris compilation pipeline} + \label{fig:idris-compilation-pipeline} +\end{document} \ No newline at end of file diff --git a/papers/stcs-2019/article/tex/idris-front-end.tex b/papers/stcs-2019/article/tex/idris-front-end.tex new file mode 100644 index 00000000..b60af3c8 --- /dev/null +++ b/papers/stcs-2019/article/tex/idris-front-end.tex @@ -0,0 +1,29 @@ +\documentclass[main.tex]{subfiles} +\begin{document} + + Currently, our compiler uses the Idris compiler as its front end. The infrastructure can be divided into three components: the front end, that is responsible for generating GRIN IR from the Idris byte code; the optimizer, that applies GRIN-to-GRIN transformations to the GRIN program, possibly improving its performance; and the back end, that compiles the optimized GRIN code into an executable. + + \subsection{Front end} + + The front end uses the bytecode produced by the Idris compiler to generate the GRIN intermediate representation. The Idris bytecode is generated without any optimizations by the Idris compiler. The code generation from Idris to GRIN is really simple, the difficult part of refining the original program is handled by the optimizer. + + \subsection{Optimizer} + \label{subsec:optimizer} + + The optimization pipeline consists of three stages, as can be seen in Figure~\ref{fig:idris-compilation-pipeline}. In the first stage, the optimizer iteratively runs the so-called \textit{regular optimizations}. These are the program transformations described in Urban Boquist's PhD thesis~\cite{boquist-phd}. A given pipeline of these transformations are run until the code reaches a fixed-point, and cannot be optimized any further. This set of transformation are not formally proven to be confluent, so theoretically different pipelines can result in different fixed-points\footnote{Although, experiments suggest that these transformations \textit{are} confluent}. Furthermore, some of these transformations can work against each other, so a fixed-point may not always exist. In this case, the pipeline can be caught in a loop, where the program returns to the same state over and over again. Fortunately, these loops can be detected, and the transformation pipeline can be terminated. + + \begin{figure}[t] + \centering + \subfile{idris-compilation-pipeline} + \end{figure} + + Following that, in the second stage, the optimizer runs the \textit{dead data elimination pass}. This pass can be quite demanding on both the memory usage and the execution time due to the several data-flow analyses the transformation requires. Also, it is a rather specific transformation, which means, running it multiple times might not improve the code any further. As a consequence, the dead data elimination pass is executed only a single time during the entire optimization process. Since the dead data elimination pass can enable other optimizations, the optimizer runs the regular optimizations a second time right after the DDE pass. + + \subsection{Back end} + \label{subsec:llvm-back-end} + + After the optimization process, the optimized GRIN code is passed onto the back end, which then generates an executable using the LLVM compiler framework. The input of the back end consists of the optimized GRIN code, the primitive operations of Idris and a minimal runtime (the latter two are both implemented in C). Currently, the runtime is only responsible for allocating heap memory for the program, and at this point it does not include a garbage collector. + + The first task of the back end is to compile the GRIN code into LLVM IR code which is then optimized further by the LLVM Modular Optimizer~\cite{opt}. Currently, the back end uses the default LLVM optimization pipeline. After that, the optimized LLVM code is compiled into an object file by the LLVM Static Compiler~\cite{llc}. Finally, Clang links together the object file with the C-implemented primitive operations and the runtime, and generates an executable binary. + +\end{document} \ No newline at end of file diff --git a/papers/stcs-2019/article/tex/introduction.tex b/papers/stcs-2019/article/tex/introduction.tex new file mode 100644 index 00000000..c7a1bb87 --- /dev/null +++ b/papers/stcs-2019/article/tex/introduction.tex @@ -0,0 +1,29 @@ +\documentclass[main.tex]{subfiles} +\begin{document} + + Over the last few years, the functional programming paradigm has become even more popular and prominent than it was before. More and more industrial applications emerge, the paradigm itself keeps evolving, existing functional languages are being refined day by day, and even completely new languages appear. Yet, it seems the corresponding compiler technology lacks behind a bit. + + Functional languages come with a multitude of interesting features that allow us to write programs on higher abstraction levels. Some of these features include higher-order functions, laziness and sophisticated type systems based on SystemFC~\cite{systemfc}, some even supporting dependent types. Although these features make writing code more convenient, they also complicate the compilation process. + + Compiler front ends usually handle these problems very well, but the back ends often struggle to produce efficient low level code. The reason for this is that back ends have a hard time optimizing code containing \emph{functional artifacts}. These functional artifacts are the by-products of high-level language features mentioned earlier. For example, higher-order functions can introduce unknown function calls and laziness can result in implicit value evaluation which can prove to be very hard to optimize. As a consequence, compilers generally compromise on low level efficiency for high-level language features. + + Moreover, the paradigm itself also encourages a certain programming style which further complicates the situation. Functional code usually consist of many smaller functions, rather than fewer big ones. This style of coding results in more composable programs, but also presents more difficulties for compilation, since optimizing only individual functions is no longer sufficient. + + In order to resolve these problems, we need a compiler back end that can optimize across functions as well as allow the optimization of laziness in some way. Also, it would be beneficial if the back end could theoretically handle any front end language. + + In this paper we present a modern look at the GRIN framework. We explain some of its core concepts, and also provide a number of improvements to it. The main contributions presented in the paper are the following. + + \hspace{0.5cm} + \begin{enumerate} + \item Extension of the heap points-to analysis with more accurate basic value tracking + + \item Specification of a type inference algorithm for GRIN using the extended heap points-to analysis + + \item Implementation of an LLVM back end for the GRIN framework + + \item Extension of the dead data elimination transformation with typed dummification and an overview of an alternative transformation for producer-consumer groups + + \item Implementation of an Idris front end for the GRIN framework + \end{enumerate} + +\end{document} \ No newline at end of file diff --git a/papers/stcs-2019/article/tex/llncs.cls b/papers/stcs-2019/article/tex/llncs.cls new file mode 100644 index 00000000..7a058208 --- /dev/null +++ b/papers/stcs-2019/article/tex/llncs.cls @@ -0,0 +1,1218 @@ +% LLNCS DOCUMENT CLASS -- version 2.20 (10-Mar-2018) +% Springer Verlag LaTeX2e support for Lecture Notes in Computer Science +% +%% +%% \CharacterTable +%% {Upper-case \A\B\C\D\E\F\G\H\I\J\K\L\M\N\O\P\Q\R\S\T\U\V\W\X\Y\Z +%% Lower-case \a\b\c\d\e\f\g\h\i\j\k\l\m\n\o\p\q\r\s\t\u\v\w\x\y\z +%% Digits \0\1\2\3\4\5\6\7\8\9 +%% Exclamation \! Double quote \" Hash (number) \# +%% Dollar \$ Percent \% Ampersand \& +%% Acute accent \' Left paren \( Right paren \) +%% Asterisk \* Plus \+ Comma \, +%% Minus \- Point \. Solidus \/ +%% Colon \: Semicolon \; Less than \< +%% Equals \= Greater than \> Question mark \? +%% Commercial at \@ Left bracket \[ Backslash \\ +%% Right bracket \] Circumflex \^ Underscore \_ +%% Grave accent \` Left brace \{ Vertical bar \| +%% Right brace \} Tilde \~} +%% +\NeedsTeXFormat{LaTeX2e}[1995/12/01] +\ProvidesClass{llncs}[2018/03/10 v2.20 +^^J LaTeX document class for Lecture Notes in Computer Science] +% Options +\let\if@envcntreset\iffalse +\DeclareOption{envcountreset}{\let\if@envcntreset\iftrue} +\DeclareOption{citeauthoryear}{\let\citeauthoryear=Y} +\DeclareOption{oribibl}{\let\oribibl=Y} +\let\if@custvec\iftrue +\DeclareOption{orivec}{\let\if@custvec\iffalse} +\let\if@envcntsame\iffalse +\DeclareOption{envcountsame}{\let\if@envcntsame\iftrue} +\let\if@envcntsect\iffalse +\DeclareOption{envcountsect}{\let\if@envcntsect\iftrue} +\let\if@runhead\iffalse +\DeclareOption{runningheads}{\let\if@runhead\iftrue} + +\let\if@openright\iftrue +\let\if@openbib\iffalse +\DeclareOption{openbib}{\let\if@openbib\iftrue} + +% languages +\let\switcht@@therlang\relax +\def\ds@deutsch{\def\switcht@@therlang{\switcht@deutsch}} +\def\ds@francais{\def\switcht@@therlang{\switcht@francais}} + +\DeclareOption*{\PassOptionsToClass{\CurrentOption}{article}} + +\ProcessOptions + +\LoadClass[twoside]{article} +\RequirePackage{multicol} % needed for the list of participants, index +\RequirePackage{aliascnt} + +\setlength{\textwidth}{12.2cm} +\setlength{\textheight}{19.3cm} +\renewcommand\@pnumwidth{2em} +\renewcommand\@tocrmarg{3.5em} +% +\def\@dottedtocline#1#2#3#4#5{% + \ifnum #1>\c@tocdepth \else + \vskip \z@ \@plus.2\p@ + {\leftskip #2\relax \rightskip \@tocrmarg \advance\rightskip by 0pt plus 2cm + \parfillskip -\rightskip \pretolerance=10000 + \parindent #2\relax\@afterindenttrue + \interlinepenalty\@M + \leavevmode + \@tempdima #3\relax + \advance\leftskip \@tempdima \null\nobreak\hskip -\leftskip + {#4}\nobreak + \leaders\hbox{$\m@th + \mkern \@dotsep mu\hbox{.}\mkern \@dotsep + mu$}\hfill + \nobreak + \hb@xt@\@pnumwidth{\hfil\normalfont \normalcolor #5}% + \par}% + \fi} +% +\def\switcht@albion{% +\def\abstractname{Abstract.} +\def\ackname{Acknowledgement.} +\def\andname{and} +\def\lastandname{\unskip, and} +\def\appendixname{Appendix} +\def\chaptername{Chapter} +\def\claimname{Claim} +\def\conjecturename{Conjecture} +\def\contentsname{Table of Contents} +\def\corollaryname{Corollary} +\def\definitionname{Definition} +\def\examplename{Example} +\def\exercisename{Exercise} +\def\figurename{Fig.} +\def\keywordname{{\bf Keywords:}} +\def\indexname{Index} +\def\lemmaname{Lemma} +\def\contriblistname{List of Contributors} +\def\listfigurename{List of Figures} +\def\listtablename{List of Tables} +\def\mailname{{\it Correspondence to\/}:} +\def\noteaddname{Note added in proof} +\def\notename{Note} +\def\partname{Part} +\def\problemname{Problem} +\def\proofname{Proof} +\def\propertyname{Property} +\def\propositionname{Proposition} +\def\questionname{Question} +\def\remarkname{Remark} +\def\seename{see} +\def\solutionname{Solution} +\def\subclassname{{\it Subject Classifications\/}:} +\def\tablename{Table} +\def\theoremname{Theorem}} +\switcht@albion +% Names of theorem like environments are already defined +% but must be translated if another language is chosen +% +% French section +\def\switcht@francais{%\typeout{On parle francais.}% + \def\abstractname{R\'esum\'e.}% + \def\ackname{Remerciements.}% + \def\andname{et}% + \def\lastandname{ et}% + \def\appendixname{Appendice} + \def\chaptername{Chapitre}% + \def\claimname{Pr\'etention}% + \def\conjecturename{Hypoth\`ese}% + \def\contentsname{Table des mati\`eres}% + \def\corollaryname{Corollaire}% + \def\definitionname{D\'efinition}% + \def\examplename{Exemple}% + \def\exercisename{Exercice}% + \def\figurename{Fig.}% + \def\keywordname{{\bf Mots-cl\'e:}} + \def\indexname{Index} + \def\lemmaname{Lemme}% + \def\contriblistname{Liste des contributeurs} + \def\listfigurename{Liste des figures}% + \def\listtablename{Liste des tables}% + \def\mailname{{\it Correspondence to\/}:} + \def\noteaddname{Note ajout\'ee \`a l'\'epreuve}% + \def\notename{Remarque}% + \def\partname{Partie}% + \def\problemname{Probl\`eme}% + \def\proofname{Preuve}% + \def\propertyname{Caract\'eristique}% +%\def\propositionname{Proposition}% + \def\questionname{Question}% + \def\remarkname{Remarque}% + \def\seename{voir} + \def\solutionname{Solution}% + \def\subclassname{{\it Subject Classifications\/}:} + \def\tablename{Tableau}% + \def\theoremname{Th\'eor\`eme}% +} +% +% German section +\def\switcht@deutsch{%\typeout{Man spricht deutsch.}% + \def\abstractname{Zusammenfassung.}% + \def\ackname{Danksagung.}% + \def\andname{und}% + \def\lastandname{ und}% + \def\appendixname{Anhang}% + \def\chaptername{Kapitel}% + \def\claimname{Behauptung}% + \def\conjecturename{Hypothese}% + \def\contentsname{Inhaltsverzeichnis}% + \def\corollaryname{Korollar}% +%\def\definitionname{Definition}% + \def\examplename{Beispiel}% + \def\exercisename{\"Ubung}% + \def\figurename{Abb.}% + \def\keywordname{{\bf Schl\"usselw\"orter:}} + \def\indexname{Index} +%\def\lemmaname{Lemma}% + \def\contriblistname{Mitarbeiter} + \def\listfigurename{Abbildungsverzeichnis}% + \def\listtablename{Tabellenverzeichnis}% + \def\mailname{{\it Correspondence to\/}:} + \def\noteaddname{Nachtrag}% + \def\notename{Anmerkung}% + \def\partname{Teil}% +%\def\problemname{Problem}% + \def\proofname{Beweis}% + \def\propertyname{Eigenschaft}% +%\def\propositionname{Proposition}% + \def\questionname{Frage}% + \def\remarkname{Anmerkung}% + \def\seename{siehe} + \def\solutionname{L\"osung}% + \def\subclassname{{\it Subject Classifications\/}:} + \def\tablename{Tabelle}% +%\def\theoremname{Theorem}% +} + +% Ragged bottom for the actual page +\def\thisbottomragged{\def\@textbottom{\vskip\z@ plus.0001fil +\global\let\@textbottom\relax}} + +\renewcommand\small{% + \@setfontsize\small\@ixpt{11}% + \abovedisplayskip 8.5\p@ \@plus3\p@ \@minus4\p@ + \abovedisplayshortskip \z@ \@plus2\p@ + \belowdisplayshortskip 4\p@ \@plus2\p@ \@minus2\p@ + \def\@listi{\leftmargin\leftmargini + \parsep 0\p@ \@plus1\p@ \@minus\p@ + \topsep 8\p@ \@plus2\p@ \@minus4\p@ + \itemsep0\p@}% + \belowdisplayskip \abovedisplayskip +} + +\frenchspacing +\widowpenalty=10000 +\clubpenalty=10000 + +\setlength\oddsidemargin {63\p@} +\setlength\evensidemargin {63\p@} +\setlength\marginparwidth {90\p@} + +\setlength\headsep {16\p@} + +\setlength\footnotesep{7.7\p@} +\setlength\textfloatsep{8mm\@plus 2\p@ \@minus 4\p@} +\setlength\intextsep {8mm\@plus 2\p@ \@minus 2\p@} + +\setcounter{secnumdepth}{2} + +\newcounter {chapter} +\renewcommand\thechapter {\@arabic\c@chapter} + +\newif\if@mainmatter \@mainmattertrue +\newcommand\frontmatter{\cleardoublepage + \@mainmatterfalse\pagenumbering{Roman}} +\newcommand\mainmatter{\cleardoublepage + \@mainmattertrue\pagenumbering{arabic}} +\newcommand\backmatter{\if@openright\cleardoublepage\else\clearpage\fi + \@mainmatterfalse} + +\renewcommand\part{\cleardoublepage + \thispagestyle{empty}% + \if@twocolumn + \onecolumn + \@tempswatrue + \else + \@tempswafalse + \fi + \null\vfil + \secdef\@part\@spart} + +\def\@part[#1]#2{% + \ifnum \c@secnumdepth >-2\relax + \refstepcounter{part}% + \addcontentsline{toc}{part}{\thepart\hspace{1em}#1}% + \else + \addcontentsline{toc}{part}{#1}% + \fi + \markboth{}{}% + {\centering + \interlinepenalty \@M + \normalfont + \ifnum \c@secnumdepth >-2\relax + \huge\bfseries \partname~\thepart + \par + \vskip 20\p@ + \fi + \Huge \bfseries #2\par}% + \@endpart} +\def\@spart#1{% + {\centering + \interlinepenalty \@M + \normalfont + \Huge \bfseries #1\par}% + \@endpart} +\def\@endpart{\vfil\newpage + \if@twoside + \null + \thispagestyle{empty}% + \newpage + \fi + \if@tempswa + \twocolumn + \fi} + +\newcommand\chapter{\clearpage + \thispagestyle{empty}% + \global\@topnum\z@ + \@afterindentfalse + \secdef\@chapter\@schapter} +\def\@chapter[#1]#2{\ifnum \c@secnumdepth >\m@ne + \if@mainmatter + \refstepcounter{chapter}% + \typeout{\@chapapp\space\thechapter.}% + \addcontentsline{toc}{chapter}% + {\protect\numberline{\thechapter}#1}% + \else + \addcontentsline{toc}{chapter}{#1}% + \fi + \else + \addcontentsline{toc}{chapter}{#1}% + \fi + \chaptermark{#1}% + \addtocontents{lof}{\protect\addvspace{10\p@}}% + \addtocontents{lot}{\protect\addvspace{10\p@}}% + \if@twocolumn + \@topnewpage[\@makechapterhead{#2}]% + \else + \@makechapterhead{#2}% + \@afterheading + \fi} +\def\@makechapterhead#1{% +% \vspace*{50\p@}% + {\centering + \ifnum \c@secnumdepth >\m@ne + \if@mainmatter + \large\bfseries \@chapapp{} \thechapter + \par\nobreak + \vskip 20\p@ + \fi + \fi + \interlinepenalty\@M + \Large \bfseries #1\par\nobreak + \vskip 40\p@ + }} +\def\@schapter#1{\if@twocolumn + \@topnewpage[\@makeschapterhead{#1}]% + \else + \@makeschapterhead{#1}% + \@afterheading + \fi} +\def\@makeschapterhead#1{% +% \vspace*{50\p@}% + {\centering + \normalfont + \interlinepenalty\@M + \Large \bfseries #1\par\nobreak + \vskip 40\p@ + }} + +\renewcommand\section{\@startsection{section}{1}{\z@}% + {-18\p@ \@plus -4\p@ \@minus -4\p@}% + {12\p@ \@plus 4\p@ \@minus 4\p@}% + {\normalfont\large\bfseries\boldmath + \rightskip=\z@ \@plus 8em\pretolerance=10000 }} +\renewcommand\subsection{\@startsection{subsection}{2}{\z@}% + {-18\p@ \@plus -4\p@ \@minus -4\p@}% + {8\p@ \@plus 4\p@ \@minus 4\p@}% + {\normalfont\normalsize\bfseries\boldmath + \rightskip=\z@ \@plus 8em\pretolerance=10000 }} +\renewcommand\subsubsection{\@startsection{subsubsection}{3}{\z@}% + {-18\p@ \@plus -4\p@ \@minus -4\p@}% + {-0.5em \@plus -0.22em \@minus -0.1em}% + {\normalfont\normalsize\bfseries\boldmath}} +\renewcommand\paragraph{\@startsection{paragraph}{4}{\z@}% + {-12\p@ \@plus -4\p@ \@minus -4\p@}% + {-0.5em \@plus -0.22em \@minus -0.1em}% + {\normalfont\normalsize\itshape}} +\renewcommand\subparagraph[1]{\typeout{LLNCS warning: You should not use + \string\subparagraph\space with this class}\vskip0.5cm +You should not use \verb|\subparagraph| with this class.\vskip0.5cm} + +\DeclareMathSymbol{\Gamma}{\mathalpha}{letters}{"00} +\DeclareMathSymbol{\Delta}{\mathalpha}{letters}{"01} +\DeclareMathSymbol{\Theta}{\mathalpha}{letters}{"02} +\DeclareMathSymbol{\Lambda}{\mathalpha}{letters}{"03} +\DeclareMathSymbol{\Xi}{\mathalpha}{letters}{"04} +\DeclareMathSymbol{\Pi}{\mathalpha}{letters}{"05} +\DeclareMathSymbol{\Sigma}{\mathalpha}{letters}{"06} +\DeclareMathSymbol{\Upsilon}{\mathalpha}{letters}{"07} +\DeclareMathSymbol{\Phi}{\mathalpha}{letters}{"08} +\DeclareMathSymbol{\Psi}{\mathalpha}{letters}{"09} +\DeclareMathSymbol{\Omega}{\mathalpha}{letters}{"0A} + +\let\footnotesize\small + +\if@custvec +\def\vec#1{\mathchoice{\mbox{\boldmath$\displaystyle#1$}} +{\mbox{\boldmath$\textstyle#1$}} +{\mbox{\boldmath$\scriptstyle#1$}} +{\mbox{\boldmath$\scriptscriptstyle#1$}}} +\fi + +\def\squareforqed{\hbox{\rlap{$\sqcap$}$\sqcup$}} +\def\qed{\ifmmode\squareforqed\else{\unskip\nobreak\hfil +\penalty50\hskip1em\null\nobreak\hfil\squareforqed +\parfillskip=0pt\finalhyphendemerits=0\endgraf}\fi} + +\def\getsto{\mathrel{\mathchoice {\vcenter{\offinterlineskip +\halign{\hfil +$\displaystyle##$\hfil\cr\gets\cr\to\cr}}} +{\vcenter{\offinterlineskip\halign{\hfil$\textstyle##$\hfil\cr\gets +\cr\to\cr}}} +{\vcenter{\offinterlineskip\halign{\hfil$\scriptstyle##$\hfil\cr\gets +\cr\to\cr}}} +{\vcenter{\offinterlineskip\halign{\hfil$\scriptscriptstyle##$\hfil\cr +\gets\cr\to\cr}}}}} +\def\lid{\mathrel{\mathchoice {\vcenter{\offinterlineskip\halign{\hfil +$\displaystyle##$\hfil\cr<\cr\noalign{\vskip1.2pt}=\cr}}} +{\vcenter{\offinterlineskip\halign{\hfil$\textstyle##$\hfil\cr<\cr +\noalign{\vskip1.2pt}=\cr}}} +{\vcenter{\offinterlineskip\halign{\hfil$\scriptstyle##$\hfil\cr<\cr +\noalign{\vskip1pt}=\cr}}} +{\vcenter{\offinterlineskip\halign{\hfil$\scriptscriptstyle##$\hfil\cr +<\cr +\noalign{\vskip0.9pt}=\cr}}}}} +\def\gid{\mathrel{\mathchoice {\vcenter{\offinterlineskip\halign{\hfil +$\displaystyle##$\hfil\cr>\cr\noalign{\vskip1.2pt}=\cr}}} +{\vcenter{\offinterlineskip\halign{\hfil$\textstyle##$\hfil\cr>\cr +\noalign{\vskip1.2pt}=\cr}}} +{\vcenter{\offinterlineskip\halign{\hfil$\scriptstyle##$\hfil\cr>\cr +\noalign{\vskip1pt}=\cr}}} +{\vcenter{\offinterlineskip\halign{\hfil$\scriptscriptstyle##$\hfil\cr +>\cr +\noalign{\vskip0.9pt}=\cr}}}}} +\def\grole{\mathrel{\mathchoice {\vcenter{\offinterlineskip +\halign{\hfil +$\displaystyle##$\hfil\cr>\cr\noalign{\vskip-1pt}<\cr}}} +{\vcenter{\offinterlineskip\halign{\hfil$\textstyle##$\hfil\cr +>\cr\noalign{\vskip-1pt}<\cr}}} +{\vcenter{\offinterlineskip\halign{\hfil$\scriptstyle##$\hfil\cr +>\cr\noalign{\vskip-0.8pt}<\cr}}} +{\vcenter{\offinterlineskip\halign{\hfil$\scriptscriptstyle##$\hfil\cr +>\cr\noalign{\vskip-0.3pt}<\cr}}}}} +\def\bbbr{{\rm I\!R}} %reelle Zahlen +\def\bbbm{{\rm I\!M}} +\def\bbbn{{\rm I\!N}} %natuerliche Zahlen +\def\bbbf{{\rm I\!F}} +\def\bbbh{{\rm I\!H}} +\def\bbbk{{\rm I\!K}} +\def\bbbp{{\rm I\!P}} +\def\bbbone{{\mathchoice {\rm 1\mskip-4mu l} {\rm 1\mskip-4mu l} +{\rm 1\mskip-4.5mu l} {\rm 1\mskip-5mu l}}} +\def\bbbc{{\mathchoice {\setbox0=\hbox{$\displaystyle\rm C$}\hbox{\hbox +to0pt{\kern0.4\wd0\vrule height0.9\ht0\hss}\box0}} +{\setbox0=\hbox{$\textstyle\rm C$}\hbox{\hbox +to0pt{\kern0.4\wd0\vrule height0.9\ht0\hss}\box0}} +{\setbox0=\hbox{$\scriptstyle\rm C$}\hbox{\hbox +to0pt{\kern0.4\wd0\vrule height0.9\ht0\hss}\box0}} +{\setbox0=\hbox{$\scriptscriptstyle\rm C$}\hbox{\hbox +to0pt{\kern0.4\wd0\vrule height0.9\ht0\hss}\box0}}}} +\def\bbbq{{\mathchoice {\setbox0=\hbox{$\displaystyle\rm +Q$}\hbox{\raise +0.15\ht0\hbox to0pt{\kern0.4\wd0\vrule height0.8\ht0\hss}\box0}} +{\setbox0=\hbox{$\textstyle\rm Q$}\hbox{\raise +0.15\ht0\hbox to0pt{\kern0.4\wd0\vrule height0.8\ht0\hss}\box0}} +{\setbox0=\hbox{$\scriptstyle\rm Q$}\hbox{\raise +0.15\ht0\hbox to0pt{\kern0.4\wd0\vrule height0.7\ht0\hss}\box0}} +{\setbox0=\hbox{$\scriptscriptstyle\rm Q$}\hbox{\raise +0.15\ht0\hbox to0pt{\kern0.4\wd0\vrule height0.7\ht0\hss}\box0}}}} +\def\bbbt{{\mathchoice {\setbox0=\hbox{$\displaystyle\rm +T$}\hbox{\hbox to0pt{\kern0.3\wd0\vrule height0.9\ht0\hss}\box0}} +{\setbox0=\hbox{$\textstyle\rm T$}\hbox{\hbox +to0pt{\kern0.3\wd0\vrule height0.9\ht0\hss}\box0}} +{\setbox0=\hbox{$\scriptstyle\rm T$}\hbox{\hbox +to0pt{\kern0.3\wd0\vrule height0.9\ht0\hss}\box0}} +{\setbox0=\hbox{$\scriptscriptstyle\rm T$}\hbox{\hbox +to0pt{\kern0.3\wd0\vrule height0.9\ht0\hss}\box0}}}} +\def\bbbs{{\mathchoice +{\setbox0=\hbox{$\displaystyle \rm S$}\hbox{\raise0.5\ht0\hbox +to0pt{\kern0.35\wd0\vrule height0.45\ht0\hss}\hbox +to0pt{\kern0.55\wd0\vrule height0.5\ht0\hss}\box0}} +{\setbox0=\hbox{$\textstyle \rm S$}\hbox{\raise0.5\ht0\hbox +to0pt{\kern0.35\wd0\vrule height0.45\ht0\hss}\hbox +to0pt{\kern0.55\wd0\vrule height0.5\ht0\hss}\box0}} +{\setbox0=\hbox{$\scriptstyle \rm S$}\hbox{\raise0.5\ht0\hbox +to0pt{\kern0.35\wd0\vrule height0.45\ht0\hss}\raise0.05\ht0\hbox +to0pt{\kern0.5\wd0\vrule height0.45\ht0\hss}\box0}} +{\setbox0=\hbox{$\scriptscriptstyle\rm S$}\hbox{\raise0.5\ht0\hbox +to0pt{\kern0.4\wd0\vrule height0.45\ht0\hss}\raise0.05\ht0\hbox +to0pt{\kern0.55\wd0\vrule height0.45\ht0\hss}\box0}}}} +\def\bbbz{{\mathchoice {\hbox{$\mathsf\textstyle Z\kern-0.4em Z$}} +{\hbox{$\mathsf\textstyle Z\kern-0.4em Z$}} +{\hbox{$\mathsf\scriptstyle Z\kern-0.3em Z$}} +{\hbox{$\mathsf\scriptscriptstyle Z\kern-0.2em Z$}}}} + +\let\ts\, + +\setlength\leftmargini {17\p@} +\setlength\leftmargin {\leftmargini} +\setlength\leftmarginii {\leftmargini} +\setlength\leftmarginiii {\leftmargini} +\setlength\leftmarginiv {\leftmargini} +\setlength \labelsep {.5em} +\setlength \labelwidth{\leftmargini} +\addtolength\labelwidth{-\labelsep} + +\def\@listI{\leftmargin\leftmargini + \parsep 0\p@ \@plus1\p@ \@minus\p@ + \topsep 8\p@ \@plus2\p@ \@minus4\p@ + \itemsep0\p@} +\let\@listi\@listI +\@listi +\def\@listii {\leftmargin\leftmarginii + \labelwidth\leftmarginii + \advance\labelwidth-\labelsep + \topsep 0\p@ \@plus2\p@ \@minus\p@} +\def\@listiii{\leftmargin\leftmarginiii + \labelwidth\leftmarginiii + \advance\labelwidth-\labelsep + \topsep 0\p@ \@plus\p@\@minus\p@ + \parsep \z@ + \partopsep \p@ \@plus\z@ \@minus\p@} + +\renewcommand\labelitemi{\normalfont\bfseries --} +\renewcommand\labelitemii{$\m@th\bullet$} + +\setlength\arraycolsep{1.4\p@} +\setlength\tabcolsep{1.4\p@} + +\def\tableofcontents{\chapter*{\contentsname\@mkboth{{\contentsname}}% + {{\contentsname}}} + \def\authcount##1{\setcounter{auco}{##1}\setcounter{@auth}{1}} + \def\lastand{\ifnum\value{auco}=2\relax + \unskip{} \andname\ + \else + \unskip \lastandname\ + \fi}% + \def\and{\stepcounter{@auth}\relax + \ifnum\value{@auth}=\value{auco}% + \lastand + \else + \unskip, + \fi}% + \@starttoc{toc}\if@restonecol\twocolumn\fi} + +\def\l@part#1#2{\addpenalty{\@secpenalty}% + \addvspace{2em plus\p@}% % space above part line + \begingroup + \parindent \z@ + \rightskip \z@ plus 5em + \hrule\vskip5pt + \large % same size as for a contribution heading + \bfseries\boldmath % set line in boldface + \leavevmode % TeX command to enter horizontal mode. + #1\par + \vskip5pt + \hrule + \vskip1pt + \nobreak % Never break after part entry + \endgroup} + +\def\@dotsep{2} + +\let\phantomsection=\relax + +\def\hyperhrefextend{\ifx\hyper@anchor\@undefined\else +{}\fi} + +\def\addnumcontentsmark#1#2#3{% +\addtocontents{#1}{\protect\contentsline{#2}{\protect\numberline + {\thechapter}#3}{\thepage}\hyperhrefextend}}% +\def\addcontentsmark#1#2#3{% +\addtocontents{#1}{\protect\contentsline{#2}{#3}{\thepage}\hyperhrefextend}}% +\def\addcontentsmarkwop#1#2#3{% +\addtocontents{#1}{\protect\contentsline{#2}{#3}{0}\hyperhrefextend}}% + +\def\@adcmk[#1]{\ifcase #1 \or +\def\@gtempa{\addnumcontentsmark}% + \or \def\@gtempa{\addcontentsmark}% + \or \def\@gtempa{\addcontentsmarkwop}% + \fi\@gtempa{toc}{chapter}% +} +\def\addtocmark{% +\phantomsection +\@ifnextchar[{\@adcmk}{\@adcmk[3]}% +} + +\def\l@chapter#1#2{\addpenalty{-\@highpenalty} + \vskip 1.0em plus 1pt \@tempdima 1.5em \begingroup + \parindent \z@ \rightskip \@tocrmarg + \advance\rightskip by 0pt plus 2cm + \parfillskip -\rightskip \pretolerance=10000 + \leavevmode \advance\leftskip\@tempdima \hskip -\leftskip + {\large\bfseries\boldmath#1}\ifx0#2\hfil\null + \else + \nobreak + \leaders\hbox{$\m@th \mkern \@dotsep mu.\mkern + \@dotsep mu$}\hfill + \nobreak\hbox to\@pnumwidth{\hss #2}% + \fi\par + \penalty\@highpenalty \endgroup} + +\def\l@title#1#2{\addpenalty{-\@highpenalty} + \addvspace{8pt plus 1pt} + \@tempdima \z@ + \begingroup + \parindent \z@ \rightskip \@tocrmarg + \advance\rightskip by 0pt plus 2cm + \parfillskip -\rightskip \pretolerance=10000 + \leavevmode \advance\leftskip\@tempdima \hskip -\leftskip + #1\nobreak + \leaders\hbox{$\m@th \mkern \@dotsep mu.\mkern + \@dotsep mu$}\hfill + \nobreak\hbox to\@pnumwidth{\hss #2}\par + \penalty\@highpenalty \endgroup} + +\def\l@author#1#2{\addpenalty{\@highpenalty} + \@tempdima=15\p@ %\z@ + \begingroup + \parindent \z@ \rightskip \@tocrmarg + \advance\rightskip by 0pt plus 2cm + \pretolerance=10000 + \leavevmode \advance\leftskip\@tempdima %\hskip -\leftskip + \textit{#1}\par + \penalty\@highpenalty \endgroup} + +\setcounter{tocdepth}{0} +\newdimen\tocchpnum +\newdimen\tocsecnum +\newdimen\tocsectotal +\newdimen\tocsubsecnum +\newdimen\tocsubsectotal +\newdimen\tocsubsubsecnum +\newdimen\tocsubsubsectotal +\newdimen\tocparanum +\newdimen\tocparatotal +\newdimen\tocsubparanum +\tocchpnum=\z@ % no chapter numbers +\tocsecnum=15\p@ % section 88. plus 2.222pt +\tocsubsecnum=23\p@ % subsection 88.8 plus 2.222pt +\tocsubsubsecnum=27\p@ % subsubsection 88.8.8 plus 1.444pt +\tocparanum=35\p@ % paragraph 88.8.8.8 plus 1.666pt +\tocsubparanum=43\p@ % subparagraph 88.8.8.8.8 plus 1.888pt +\def\calctocindent{% +\tocsectotal=\tocchpnum +\advance\tocsectotal by\tocsecnum +\tocsubsectotal=\tocsectotal +\advance\tocsubsectotal by\tocsubsecnum +\tocsubsubsectotal=\tocsubsectotal +\advance\tocsubsubsectotal by\tocsubsubsecnum +\tocparatotal=\tocsubsubsectotal +\advance\tocparatotal by\tocparanum} +\calctocindent + +\def\l@section{\@dottedtocline{1}{\tocchpnum}{\tocsecnum}} +\def\l@subsection{\@dottedtocline{2}{\tocsectotal}{\tocsubsecnum}} +\def\l@subsubsection{\@dottedtocline{3}{\tocsubsectotal}{\tocsubsubsecnum}} +\def\l@paragraph{\@dottedtocline{4}{\tocsubsubsectotal}{\tocparanum}} +\def\l@subparagraph{\@dottedtocline{5}{\tocparatotal}{\tocsubparanum}} + +\def\listoffigures{\@restonecolfalse\if@twocolumn\@restonecoltrue\onecolumn + \fi\section*{\listfigurename\@mkboth{{\listfigurename}}{{\listfigurename}}} + \@starttoc{lof}\if@restonecol\twocolumn\fi} +\def\l@figure{\@dottedtocline{1}{0em}{1.5em}} + +\def\listoftables{\@restonecolfalse\if@twocolumn\@restonecoltrue\onecolumn + \fi\section*{\listtablename\@mkboth{{\listtablename}}{{\listtablename}}} + \@starttoc{lot}\if@restonecol\twocolumn\fi} +\let\l@table\l@figure + +\renewcommand\listoffigures{% + \section*{\listfigurename + \@mkboth{\listfigurename}{\listfigurename}}% + \@starttoc{lof}% + } + +\renewcommand\listoftables{% + \section*{\listtablename + \@mkboth{\listtablename}{\listtablename}}% + \@starttoc{lot}% + } + +\ifx\oribibl\undefined +\ifx\citeauthoryear\undefined +\renewenvironment{thebibliography}[1] + {\section*{\refname} + \def\@biblabel##1{##1.} + \small + \list{\@biblabel{\@arabic\c@enumiv}}% + {\settowidth\labelwidth{\@biblabel{#1}}% + \leftmargin\labelwidth + \advance\leftmargin\labelsep + \if@openbib + \advance\leftmargin\bibindent + \itemindent -\bibindent + \listparindent \itemindent + \parsep \z@ + \fi + \usecounter{enumiv}% + \let\p@enumiv\@empty + \renewcommand\theenumiv{\@arabic\c@enumiv}}% + \if@openbib + \renewcommand\newblock{\par}% + \else + \renewcommand\newblock{\hskip .11em \@plus.33em \@minus.07em}% + \fi + \sloppy\clubpenalty4000\widowpenalty4000% + \sfcode`\.=\@m} + {\def\@noitemerr + {\@latex@warning{Empty `thebibliography' environment}}% + \endlist} +\def\@lbibitem[#1]#2{\item[{[#1]}\hfill]\if@filesw + {\let\protect\noexpand\immediate + \write\@auxout{\string\bibcite{#2}{#1}}}\fi\ignorespaces} +\newcount\@tempcntc +\def\@citex[#1]#2{\if@filesw\immediate\write\@auxout{\string\citation{#2}}\fi + \@tempcnta\z@\@tempcntb\m@ne\def\@citea{}\@cite{\@for\@citeb:=#2\do + {\@ifundefined + {b@\@citeb}{\@citeo\@tempcntb\m@ne\@citea\def\@citea{,}{\bfseries + ?}\@warning + {Citation `\@citeb' on page \thepage \space undefined}}% + {\setbox\z@\hbox{\global\@tempcntc0\csname b@\@citeb\endcsname\relax}% + \ifnum\@tempcntc=\z@ \@citeo\@tempcntb\m@ne + \@citea\def\@citea{,}\hbox{\csname b@\@citeb\endcsname}% + \else + \advance\@tempcntb\@ne + \ifnum\@tempcntb=\@tempcntc + \else\advance\@tempcntb\m@ne\@citeo + \@tempcnta\@tempcntc\@tempcntb\@tempcntc\fi\fi}}\@citeo}{#1}} +\def\@citeo{\ifnum\@tempcnta>\@tempcntb\else + \@citea\def\@citea{,\,\hskip\z@skip}% + \ifnum\@tempcnta=\@tempcntb\the\@tempcnta\else + {\advance\@tempcnta\@ne\ifnum\@tempcnta=\@tempcntb \else + \def\@citea{--}\fi + \advance\@tempcnta\m@ne\the\@tempcnta\@citea\the\@tempcntb}\fi\fi} +\else +\renewenvironment{thebibliography}[1] + {\section*{\refname} + \small + \list{}% + {\settowidth\labelwidth{}% + \leftmargin\parindent + \itemindent=-\parindent + \labelsep=\z@ + \if@openbib + \advance\leftmargin\bibindent + \itemindent -\bibindent + \listparindent \itemindent + \parsep \z@ + \fi + \usecounter{enumiv}% + \let\p@enumiv\@empty + \renewcommand\theenumiv{}}% + \if@openbib + \renewcommand\newblock{\par}% + \else + \renewcommand\newblock{\hskip .11em \@plus.33em \@minus.07em}% + \fi + \sloppy\clubpenalty4000\widowpenalty4000% + \sfcode`\.=\@m} + {\def\@noitemerr + {\@latex@warning{Empty `thebibliography' environment}}% + \endlist} + \def\@cite#1{#1}% + \def\@lbibitem[#1]#2{\item[]\if@filesw + {\def\protect##1{\string ##1\space}\immediate + \write\@auxout{\string\bibcite{#2}{#1}}}\fi\ignorespaces} + \fi +\else +\@cons\@openbib@code{\noexpand\small} +\fi + +\def\idxquad{\hskip 10\p@}% space that divides entry from number + +\def\@idxitem{\par\hangindent 10\p@} + +\def\subitem{\par\setbox0=\hbox{--\enspace}% second order + \noindent\hangindent\wd0\box0}% index entry + +\def\subsubitem{\par\setbox0=\hbox{--\,--\enspace}% third + \noindent\hangindent\wd0\box0}% order index entry + +\def\indexspace{\par \vskip 10\p@ plus5\p@ minus3\p@\relax} + +\renewenvironment{theindex} + {\@mkboth{\indexname}{\indexname}% + \thispagestyle{empty}\parindent\z@ + \parskip\z@ \@plus .3\p@\relax + \let\item\par + \def\,{\relax\ifmmode\mskip\thinmuskip + \else\hskip0.2em\ignorespaces\fi}% + \normalfont\small + \begin{multicols}{2}[\@makeschapterhead{\indexname}]% + } + {\end{multicols}} + +\renewcommand\footnoterule{% + \kern-3\p@ + \hrule\@width 2truecm + \kern2.6\p@} + \newdimen\fnindent + \fnindent1em +\long\def\@makefntext#1{% + \parindent \fnindent% + \leftskip \fnindent% + \noindent + \llap{\hb@xt@1em{\hss\@makefnmark\ }}\ignorespaces#1} + +\long\def\@makecaption#1#2{% + \small + \vskip\abovecaptionskip + \sbox\@tempboxa{{\bfseries #1.} #2}% + \ifdim \wd\@tempboxa >\hsize + {\bfseries #1.} #2\par + \else + \global \@minipagefalse + \hb@xt@\hsize{\hfil\box\@tempboxa\hfil}% + \fi + \vskip\belowcaptionskip} + +\def\fps@figure{htbp} +\def\fnum@figure{\figurename\thinspace\thefigure} +\def \@floatboxreset {% + \reset@font + \small + \@setnobreak + \@setminipage +} +\def\fps@table{htbp} +\def\fnum@table{\tablename~\thetable} +\renewenvironment{table} + {\setlength\abovecaptionskip{0\p@}% + \setlength\belowcaptionskip{10\p@}% + \@float{table}} + {\end@float} +\renewenvironment{table*} + {\setlength\abovecaptionskip{0\p@}% + \setlength\belowcaptionskip{10\p@}% + \@dblfloat{table}} + {\end@dblfloat} + +\long\def\@caption#1[#2]#3{\par\addcontentsline{\csname + ext@#1\endcsname}{#1}{\protect\numberline{\csname + the#1\endcsname}{\ignorespaces #2}}\begingroup + \@parboxrestore + \@makecaption{\csname fnum@#1\endcsname}{\ignorespaces #3}\par + \endgroup} + +% LaTeX does not provide a command to enter the authors institute +% addresses. The \institute command is defined here. + +\newcounter{@inst} +\newcounter{@auth} +\newcounter{auco} +\newdimen\instindent +\newbox\authrun +\newtoks\authorrunning +\newtoks\tocauthor +\newbox\titrun +\newtoks\titlerunning +\newtoks\toctitle + +\def\clearheadinfo{\gdef\@author{No Author Given}% + \gdef\@title{No Title Given}% + \gdef\@subtitle{}% + \gdef\@institute{No Institute Given}% + \gdef\@thanks{}% + \global\titlerunning={}\global\authorrunning={}% + \global\toctitle={}\global\tocauthor={}} + +\def\institute#1{\gdef\@institute{#1}} + +\def\institutename{\par + \begingroup + \parskip=\z@ + \parindent=\z@ + \setcounter{@inst}{1}% + \def\and{\par\stepcounter{@inst}% + \noindent$^{\the@inst}$\enspace\ignorespaces}% + \setbox0=\vbox{\def\thanks##1{}\@institute}% + \ifnum\c@@inst=1\relax + \gdef\fnnstart{0}% + \else + \xdef\fnnstart{\c@@inst}% + \setcounter{@inst}{1}% + \noindent$^{\the@inst}$\enspace + \fi + \ignorespaces + \@institute\par + \endgroup} + +\def\@fnsymbol#1{\ensuremath{\ifcase#1\or\star\or{\star\star}\or + {\star\star\star}\or \dagger\or \ddagger\or + \mathchar "278\or \mathchar "27B\or \|\or **\or \dagger\dagger + \or \ddagger\ddagger \else\@ctrerr\fi}} + +\def\inst#1{\unskip$^{#1}$} +\def\orcidID#1{\unskip$^{[#1]}$} % added MR 2018-03-10 +\def\fnmsep{\unskip$^,$} +\def\email#1{{\tt#1}} + +\AtBeginDocument{\@ifundefined{url}{\def\url#1{#1}}{}% +\@ifpackageloaded{babel}{% +\@ifundefined{extrasenglish}{}{\addto\extrasenglish{\switcht@albion}}% +\@ifundefined{extrasfrenchb}{}{\addto\extrasfrenchb{\switcht@francais}}% +\@ifundefined{extrasgerman}{}{\addto\extrasgerman{\switcht@deutsch}}% +\@ifundefined{extrasngerman}{}{\addto\extrasngerman{\switcht@deutsch}}% +}{\switcht@@therlang}% +\providecommand{\keywords}[1]{\def\and{{\textperiodcentered} }% +\par\addvspace\baselineskip +\noindent\keywordname\enspace\ignorespaces#1}% +\@ifpackageloaded{hyperref}{% +\def\doi#1{\href{https://doi.org/#1}{https://doi.org/#1}}}{ +\def\doi#1{https://doi.org/#1}} +} +\def\homedir{\~{ }} + +\def\subtitle#1{\gdef\@subtitle{#1}} +\clearheadinfo +% +%%% to avoid hyperref warnings +\providecommand*{\toclevel@author}{999} +%%% to make title-entry parent of section-entries +\providecommand*{\toclevel@title}{0} +% +\renewcommand\maketitle{\newpage +\phantomsection + \refstepcounter{chapter}% + \stepcounter{section}% + \setcounter{section}{0}% + \setcounter{subsection}{0}% + \setcounter{figure}{0} + \setcounter{table}{0} + \setcounter{equation}{0} + \setcounter{footnote}{0}% + \begingroup + \parindent=\z@ + \renewcommand\thefootnote{\@fnsymbol\c@footnote}% + \if@twocolumn + \ifnum \col@number=\@ne + \@maketitle + \else + \twocolumn[\@maketitle]% + \fi + \else + \newpage + \global\@topnum\z@ % Prevents figures from going at top of page. + \@maketitle + \fi + \thispagestyle{empty}\@thanks +% + \def\\{\unskip\ \ignorespaces}\def\inst##1{\unskip{}}% + \def\thanks##1{\unskip{}}\def\fnmsep{\unskip}% + \instindent=\hsize + \advance\instindent by-\headlineindent + \if!\the\toctitle!\addcontentsline{toc}{title}{\@title}\else + \addcontentsline{toc}{title}{\the\toctitle}\fi + \if@runhead + \if!\the\titlerunning!\else + \edef\@title{\the\titlerunning}% + \fi + \global\setbox\titrun=\hbox{\small\rm\unboldmath\ignorespaces\@title}% + \ifdim\wd\titrun>\instindent + \typeout{Title too long for running head. Please supply}% + \typeout{a shorter form with \string\titlerunning\space prior to + \string\maketitle}% + \global\setbox\titrun=\hbox{\small\rm + Title Suppressed Due to Excessive Length}% + \fi + \xdef\@title{\copy\titrun}% + \fi +% + \if!\the\tocauthor!\relax + {\def\and{\noexpand\protect\noexpand\and}% + \def\inst##1{}% added MR 2017-09-20 to remove inst numbers from the TOC + \def\orcidID##1{}% added MR 2017-09-20 to remove ORCID ids from the TOC + \protected@xdef\toc@uthor{\@author}}% + \else + \def\\{\noexpand\protect\noexpand\newline}% + \protected@xdef\scratch{\the\tocauthor}% + \protected@xdef\toc@uthor{\scratch}% + \fi + \addtocontents{toc}{\noexpand\protect\noexpand\authcount{\the\c@auco}}% + \addcontentsline{toc}{author}{\toc@uthor}% + \if@runhead + \if!\the\authorrunning! + \value{@inst}=\value{@auth}% + \setcounter{@auth}{1}% + \else + \edef\@author{\the\authorrunning}% + \fi + \global\setbox\authrun=\hbox{\def\inst##1{}% added MR 2017-09-20 to remove inst numbers from the runninghead + \def\orcidID##1{}% added MR 2017-09-20 to remove ORCID ids from the runninghead + \small\unboldmath\@author\unskip}% + \ifdim\wd\authrun>\instindent + \typeout{Names of authors too long for running head. Please supply}% + \typeout{a shorter form with \string\authorrunning\space prior to + \string\maketitle}% + \global\setbox\authrun=\hbox{\small\rm + Authors Suppressed Due to Excessive Length}% + \fi + \xdef\@author{\copy\authrun}% + \markboth{\@author}{\@title}% + \fi + \endgroup + \setcounter{footnote}{\fnnstart}% + \clearheadinfo} +% +\def\@maketitle{\newpage + \markboth{}{}% + \def\lastand{\ifnum\value{@inst}=2\relax + \unskip{} \andname\ + \else + \unskip \lastandname\ + \fi}% + \def\and{\stepcounter{@auth}\relax + \ifnum\value{@auth}=\value{@inst}% + \lastand + \else + \unskip, + \fi}% + \begin{center}% + \let\newline\\ + {\Large \bfseries\boldmath + \pretolerance=10000 + \@title \par}\vskip .8cm +\if!\@subtitle!\else {\large \bfseries\boldmath + \vskip -.65cm + \pretolerance=10000 + \@subtitle \par}\vskip .8cm\fi + \setbox0=\vbox{\setcounter{@auth}{1}\def\and{\stepcounter{@auth}}% + \def\thanks##1{}\@author}% + \global\value{@inst}=\value{@auth}% + \global\value{auco}=\value{@auth}% + \setcounter{@auth}{1}% +{\lineskip .5em +\noindent\ignorespaces +\@author\vskip.35cm} + {\small\institutename} + \end{center}% + } + +% definition of the "\spnewtheorem" command. +% +% Usage: +% +% \spnewtheorem{env_nam}{caption}[within]{cap_font}{body_font} +% or \spnewtheorem{env_nam}[numbered_like]{caption}{cap_font}{body_font} +% or \spnewtheorem*{env_nam}{caption}{cap_font}{body_font} +% +% New is "cap_font" and "body_font". It stands for +% fontdefinition of the caption and the text itself. +% +% "\spnewtheorem*" gives a theorem without number. +% +% A defined spnewthoerem environment is used as described +% by Lamport. +% +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +\def\@thmcountersep{} +\def\@thmcounterend{.} + +\def\spnewtheorem{\@ifstar{\@sthm}{\@Sthm}} + +% definition of \spnewtheorem with number + +\def\@spnthm#1#2{% + \@ifnextchar[{\@spxnthm{#1}{#2}}{\@spynthm{#1}{#2}}} +\def\@Sthm#1{\@ifnextchar[{\@spothm{#1}}{\@spnthm{#1}}} + +\def\@spxnthm#1#2[#3]#4#5{\expandafter\@ifdefinable\csname #1\endcsname + {\@definecounter{#1}\@addtoreset{#1}{#3}% + \expandafter\xdef\csname the#1\endcsname{\expandafter\noexpand + \csname the#3\endcsname \noexpand\@thmcountersep \@thmcounter{#1}}% + \expandafter\xdef\csname #1name\endcsname{#2}% + \global\@namedef{#1}{\@spthm{#1}{\csname #1name\endcsname}{#4}{#5}}% + \global\@namedef{end#1}{\@endtheorem}}} + +\def\@spynthm#1#2#3#4{\expandafter\@ifdefinable\csname #1\endcsname + {\@definecounter{#1}% + \expandafter\xdef\csname the#1\endcsname{\@thmcounter{#1}}% + \expandafter\xdef\csname #1name\endcsname{#2}% + \global\@namedef{#1}{\@spthm{#1}{\csname #1name\endcsname}{#3}{#4}}% + \global\@namedef{end#1}{\@endtheorem}}} + +\def\@spothm#1[#2]#3#4#5{% + \@ifundefined{c@#2}{\@latexerr{No theorem environment `#2' defined}\@eha}% + {\expandafter\@ifdefinable\csname #1\endcsname + {\newaliascnt{#1}{#2}% + \expandafter\xdef\csname #1name\endcsname{#3}% + \global\@namedef{#1}{\@spthm{#1}{\csname #1name\endcsname}{#4}{#5}}% + \global\@namedef{end#1}{\@endtheorem}}}} + +\def\@spthm#1#2#3#4{\topsep 7\p@ \@plus2\p@ \@minus4\p@ +\refstepcounter{#1}% +\@ifnextchar[{\@spythm{#1}{#2}{#3}{#4}}{\@spxthm{#1}{#2}{#3}{#4}}} + +\def\@spxthm#1#2#3#4{\@spbegintheorem{#2}{\csname the#1\endcsname}{#3}{#4}% + \ignorespaces} + +\def\@spythm#1#2#3#4[#5]{\@spopargbegintheorem{#2}{\csname + the#1\endcsname}{#5}{#3}{#4}\ignorespaces} + +\def\@spbegintheorem#1#2#3#4{\trivlist + \item[\hskip\labelsep{#3#1\ #2\@thmcounterend}]#4} + +\def\@spopargbegintheorem#1#2#3#4#5{\trivlist + \item[\hskip\labelsep{#4#1\ #2}]{#4(#3)\@thmcounterend\ }#5} + +% definition of \spnewtheorem* without number + +\def\@sthm#1#2{\@Ynthm{#1}{#2}} + +\def\@Ynthm#1#2#3#4{\expandafter\@ifdefinable\csname #1\endcsname + {\global\@namedef{#1}{\@Thm{\csname #1name\endcsname}{#3}{#4}}% + \expandafter\xdef\csname #1name\endcsname{#2}% + \global\@namedef{end#1}{\@endtheorem}}} + +\def\@Thm#1#2#3{\topsep 7\p@ \@plus2\p@ \@minus4\p@ +\@ifnextchar[{\@Ythm{#1}{#2}{#3}}{\@Xthm{#1}{#2}{#3}}} + +\def\@Xthm#1#2#3{\@Begintheorem{#1}{#2}{#3}\ignorespaces} + +\def\@Ythm#1#2#3[#4]{\@Opargbegintheorem{#1} + {#4}{#2}{#3}\ignorespaces} + +\def\@Begintheorem#1#2#3{#3\trivlist + \item[\hskip\labelsep{#2#1\@thmcounterend}]} + +\def\@Opargbegintheorem#1#2#3#4{#4\trivlist + \item[\hskip\labelsep{#3#1}]{#3(#2)\@thmcounterend\ }} + +\if@envcntsect + \def\@thmcountersep{.} + \spnewtheorem{theorem}{Theorem}[section]{\bfseries}{\itshape} +\else + \spnewtheorem{theorem}{Theorem}{\bfseries}{\itshape} + \if@envcntreset + \@addtoreset{theorem}{section} + \else + \@addtoreset{theorem}{chapter} + \fi +\fi + +%definition of divers theorem environments +\spnewtheorem*{claim}{Claim}{\itshape}{\rmfamily} +\spnewtheorem*{proof}{Proof}{\itshape}{\rmfamily} +\if@envcntsame % alle Umgebungen wie Theorem. + \def\spn@wtheorem#1#2#3#4{\@spothm{#1}[theorem]{#2}{#3}{#4}} +\else % alle Umgebungen mit eigenem Zaehler + \if@envcntsect % mit section numeriert + \def\spn@wtheorem#1#2#3#4{\@spxnthm{#1}{#2}[section]{#3}{#4}} + \else % nicht mit section numeriert + \if@envcntreset + \def\spn@wtheorem#1#2#3#4{\@spynthm{#1}{#2}{#3}{#4} + \@addtoreset{#1}{section}} + \else + \def\spn@wtheorem#1#2#3#4{\@spynthm{#1}{#2}{#3}{#4} + \@addtoreset{#1}{chapter}}% + \fi + \fi +\fi +\spn@wtheorem{case}{Case}{\itshape}{\rmfamily} +\spn@wtheorem{conjecture}{Conjecture}{\itshape}{\rmfamily} +\spn@wtheorem{corollary}{Corollary}{\bfseries}{\itshape} +\spn@wtheorem{definition}{Definition}{\bfseries}{\itshape} +\spn@wtheorem{example}{Example}{\itshape}{\rmfamily} +\spn@wtheorem{exercise}{Exercise}{\itshape}{\rmfamily} +\spn@wtheorem{lemma}{Lemma}{\bfseries}{\itshape} +\spn@wtheorem{note}{Note}{\itshape}{\rmfamily} +\spn@wtheorem{problem}{Problem}{\itshape}{\rmfamily} +\spn@wtheorem{property}{Property}{\itshape}{\rmfamily} +\spn@wtheorem{proposition}{Proposition}{\bfseries}{\itshape} +\spn@wtheorem{question}{Question}{\itshape}{\rmfamily} +\spn@wtheorem{solution}{Solution}{\itshape}{\rmfamily} +\spn@wtheorem{remark}{Remark}{\itshape}{\rmfamily} + +\def\@takefromreset#1#2{% + \def\@tempa{#1}% + \let\@tempd\@elt + \def\@elt##1{% + \def\@tempb{##1}% + \ifx\@tempa\@tempb\else + \@addtoreset{##1}{#2}% + \fi}% + \expandafter\expandafter\let\expandafter\@tempc\csname cl@#2\endcsname + \expandafter\def\csname cl@#2\endcsname{}% + \@tempc + \let\@elt\@tempd} + +\def\theopargself{\def\@spopargbegintheorem##1##2##3##4##5{\trivlist + \item[\hskip\labelsep{##4##1\ ##2}]{##4##3\@thmcounterend\ }##5} + \def\@Opargbegintheorem##1##2##3##4{##4\trivlist + \item[\hskip\labelsep{##3##1}]{##3##2\@thmcounterend\ }} + } + +\renewenvironment{abstract}{% + \list{}{\advance\topsep by0.35cm\relax\small + \leftmargin=1cm + \labelwidth=\z@ + \listparindent=\z@ + \itemindent\listparindent + \rightmargin\leftmargin}\item[\hskip\labelsep + \bfseries\abstractname]} + {\endlist} + +\newdimen\headlineindent % dimension for space between +\headlineindent=1.166cm % number and text of headings. + +\def\ps@headings{\let\@mkboth\@gobbletwo + \let\@oddfoot\@empty\let\@evenfoot\@empty + \def\@evenhead{\normalfont\small\rlap{\thepage}\hspace{\headlineindent}% + \leftmark\hfil} + \def\@oddhead{\normalfont\small\hfil\rightmark\hspace{\headlineindent}% + \llap{\thepage}} + \def\chaptermark##1{}% + \def\sectionmark##1{}% + \def\subsectionmark##1{}} + +\def\ps@titlepage{\let\@mkboth\@gobbletwo + \let\@oddfoot\@empty\let\@evenfoot\@empty + \def\@evenhead{\normalfont\small\rlap{\thepage}\hspace{\headlineindent}% + \hfil} + \def\@oddhead{\normalfont\small\hfil\hspace{\headlineindent}% + \llap{\thepage}} + \def\chaptermark##1{}% + \def\sectionmark##1{}% + \def\subsectionmark##1{}} + +\if@runhead\ps@headings\else +\ps@empty\fi + +%\setlength\arraycolsep{1.4\p@} +%\setlength\tabcolsep{1.4\p@} + +\endinput +%end of file llncs.cls diff --git a/papers/stcs-2019/article/tex/llvm.tex b/papers/stcs-2019/article/tex/llvm.tex new file mode 100644 index 00000000..519dd222 --- /dev/null +++ b/papers/stcs-2019/article/tex/llvm.tex @@ -0,0 +1,60 @@ +\documentclass[main.tex]{subfiles} +\begin{document} + + %TODO: GRIN: functional domain - imperative domain, LLVM: architecture independent domain - architecture specific domain + + LLVM is a collection of compiler technologies consisting of an intermediate representation called the LLVM IR, a modularly built compiler framework and many other tools built on these technologies. This section discusses the benefits and challenges of compiling GRIN to LLVM. + + \subsection{Benefits and Challenges} + + The main advantage LLVM has over other CISC and RISC based languages lies in its modular design and library based structure. The compiler framework built around LLVM is entirely customizable and can generate highly optimized low level machine code for most architectures. Furthermore, it offers a vast range of tools and features out of the box, such as different debugging tools or compilation to WebAssembly. + + However, compiling unrefined functional code to LLVM does not yield the results one would expect. Since LLVM was mainly designed for imperative languages, functional programs may prove to be difficult to optimize. The reason for this is that functional artifacts or even just the general structuring of functional programs can render conventional optimization techniques useless. + + While LLVM acts as a transitional layer between architecture independent, and architecture specific domains, GRIN serves the same purpose for the functional and imperative domains. Figure~\ref{fig:grin-back-end} illustrates this domain separtion. The purpose of GRIN is to eliminate functional artifacts and restructure functional programs in a way so that they can be efficiently optimized by conventional techniques. + + \begin{figure}[h] + \centering + \begin{adjustbox}{scale = 1.4} + \tikzset{every loop/.style={-{Stealth[scale=1.5]}}} + + \begin{tikzpicture}[ node distance = 0.9cm and 1.5cm + , on grid + , loop/.append style={-triangle 60} + ] + + \node [draw=black] (haskell) {Haskell}; + \node [draw=black] (idris) [left =of haskell] {Idris}; + \node [draw=black] (agda) [right =of haskell] {Agda}; + \node [draw=black] (grin) [below =of haskell] {GRIN}; + \node [draw=black] (llvm) [below =of grin] {LLVM}; + + \path[-{Stealth[scale=1.5]}] + (idris) edge [] (grin) + (haskell) edge [] (grin) + (agda) edge [] (grin) + (grin) edge [] (llvm); + + + \end{tikzpicture} + \end{adjustbox} + \caption{Possible representations of different functional languages} + \label{fig:grin-back-end} + \end{figure} + + The main challenge of compiling GRIN to LLVM has to do with the discrepancy between the respective type systems of these languages: GRIN is untyped, while LLVM has static typing. In order to make compilation to LLVM possible\footnote{As a matter of fact, compiling untyped GRIN to LLVM \emph{is} possible, since only the registers are statically typed in LLVM, the memory is not. So in principle, if all variables were stored in memory, generating LLVM code from untyped GRIN would be plausible. However, this approach would prove to be very inefficient}, we need a typed representation for GRIN as well. Fortunately, this problem can be circumvented by implementing a type inference algorithm for the language. To achieve this, we can extend an already existing component of the framework, the heap points-to data-flow analysis. + + \subsection{Heap points-to Analysis} + + Heap points-to analysis (HPT in the followings), or pointer analysis is a commonly used data-flow analysis in the context of imperative languages. The result of the analysis contains information about the possible variables or heap locations a given pointer can point to. In the context of GRIN, it is used to determine the type of data constructors (or nodes) a given variable could have been constructed with. The result is a mapping of variables and abstract heap locations to sets of data constructors. + + %TODO: example, referece + + The original version of the analysis presented in \cite{boquist-phd} and further detailed in \cite{boquist-grin} only supports node level granularity. This means, that the types of literals are not differentiated, they are unified under a common "basic value" type. Therefore, the analysis cannot be used for type inference as it is. In order to facilitate type inference, HPT has to be extended, so that it propagates type information about literals as well. This can be easily achieved by defining primitive types for the literal values. Using the result of the modified algorithm, we can generate LLVM IR code from GRIN. + + %TODO: reference UHC paper + + However, in some cases the monomorphic type inference algorithm presented above is not sufficient. For example, the Glasgow Haskell Compiler has polymorphic primitive operations. This means, that despite GRIN being a monomorphic language, certain compiler front ends can introduce external polymorphic functions to GRIN programs. To resolve this problem, we have to further extend the heap points-to analysis. The algorithm now needs a table of external functions with their respective type information. These functions \emph{can} be polymorphic, hence they need special treatment during the analysis. When encountering external function applications, the algorithm has to determine the concrete type of the return value based on the possible types of the function arguments. Essentially, it has to fill all the type variables present in the type of the return value with concrete types. This can be achieved by unification. Fortunately, the unification algorithm can be expressed in terms of the same data-flow operations HPT already uses. + + +\end{document} \ No newline at end of file diff --git a/papers/stcs-2019/article/tex/main.pdf b/papers/stcs-2019/article/tex/main.pdf new file mode 100644 index 00000000..8be06bbf Binary files /dev/null and b/papers/stcs-2019/article/tex/main.pdf differ diff --git a/papers/stcs-2019/article/tex/main.tex b/papers/stcs-2019/article/tex/main.tex new file mode 100644 index 00000000..8a83939c --- /dev/null +++ b/papers/stcs-2019/article/tex/main.tex @@ -0,0 +1,67 @@ +\documentclass[runningheads]{llncs} +%\documentclass{actacyb} +%\documentclass[10pt,a4paper,oneside]{article} + +\usepackage{style} +\usepackage{subfiles} + +\begin{document} + + \title{A modern look at GRIN,\\ an optimizing functional language back end\thanks{ The project has been supported by the European Union, co-financed by the European Social Fund (EFOP-3.6.3-VEKOP-16-2017-00002)}} + \titlerunning{A modern look at GRIN} + + \institute + { + Eötvös Loránd University, Budapest, Hungary\\ + \email{peter.d.podlovics@gmail.com}\\ + \and + \email{\{csaba.hruska, andor.penzes\}@gmail.com} + } + + \date{\today} + \author + { + Péter Dávid Podlovics\inst{1} \and + Csaba Hruska\inst{2} \and + Andor Pénzes\inst{2} + } + \authorrunning{P. Podlovics \and Cs. Hruska \and A. Pénzes} + + \maketitle + \subfile{abstract} + + \section{Introduction} \label{sec:intro} + \subfile{introduction} + + \section{Graph Reduction Intermediate Notation} + \subfile{grin} + + \section{Related Work} + \subfile{related-work} + + \section{Compiling to LLVM} + \subfile{llvm} + + \section{Dead Code Elimination} + \subfile{dce} + + \section{Dead Data Elimination} \label{sec:dde} + \subfile{dde} + + \section{Idris Front End} \label{sec:idris-front-end} + \subfile{idris-front-end} + + \section{Results} + \subfile{results} + + \section{Future Work} + \subfile{future-work} + + \section{Conclusions} + \subfile{conclusions} + + \newpage + \subfile{bibliography} + + +\end{document} \ No newline at end of file diff --git a/papers/stcs-2019/article/tex/related-work.tex b/papers/stcs-2019/article/tex/related-work.tex new file mode 100644 index 00000000..82195d64 --- /dev/null +++ b/papers/stcs-2019/article/tex/related-work.tex @@ -0,0 +1,74 @@ +\documentclass[main.tex]{subfiles} +\begin{document} + + This section will introduce the reader to the state-of-the-art concerning functional language compiler technologies and whole program optimization. It will compare these systems' main goals, advantages, drawbacks and the techniques they use. + + \subsection{The Glasgow Haskell Compiler} + + GHC~\cite{ghc} is the de facto Haskell compiler. It is an industrial strength compiler supporting Haskell2010 with a multitude of language extensions. It has full support for multi-threading, asynchronous exception handling, incremental compilation and software transactional memory. + + GHC is the most feature-rich stable Haskell compiler. However, its optimizer part is lacking in two respects. Firstly, neither of its intermediate representations (STG and Core) can express laziness explicitly, which means that the strictness analysis cannot be as optimal as it could be. Secondly, GHC only supports optimization on a per-module basis by default, and only optimizes across modules after inlining certain specific functions. This can drastically limit the information available for the optimization passes, hence decreasing their efficiency. The following sections will show alternative compilation techniques to resolve the issues presented above. + + \subsection{GRIN} + + Graph Reduction Intermediate Notation is an intermediate representation for lazy\footnote{Strict semantics can be expressed as well} functional languages. Due to its simplicity and high expressive power, it was utilized by several compiler back ends. + + \subsubsection{Boquist} + + The original GRIN framework was developed by U. Boquist, and first described in the article~\cite{boquist-grin}, then in his PhD thesis~\cite{boquist-phd}. This version of GRIN used the Chalmers Haskell-B Compiler~\cite{hbc} as its front end and RISC as its back end. The main focus of the entire framework is to produce highly efficient machine code from high-level lazy functional programs through a series of optimizing code transformations. At that time, Boquist's implementation of GRIN already compared favorably to the existing Glasgow Haskell Compiler of version 4.01. + + The language itself has very simple syntax and semantics, and is capable of explicitly expressing laziness. It only has very few built-in instructions (\pilcode{store}, \pilcode{fetch} and \pilcode{update}) which can be interpreted in two ways. Firstly, they can be seen as simple heap operations; secondly, they can represent graph reduction semantics~\cite{impl-fun-lang}. For example, we can imagine \pilcode{store} creating a new node, and \pilcode{update} reducing those nodes. + + GRIN also supports whole program optimization. Whole program optimization is a compiler optimization technique that uses information regarding the entire program instead of localizing the optimizations to functions or translation units. One of the most important whole program analyses used by the framework is the heap-points-to analysis, a variation of Andersen's pointer analysis~\cite{andersen-ptr}. + + \subsubsection{UHC} + + The Utrecht Haskell Compiler~\cite{uhc} is a completely standalone Haskell compiler with its own front end. The main idea behind UHC is to use attribute grammars to handle the ever-growing complexity of compiler construction in an easily manageable way. Mainly, the compiler is being used for education, since utilizing a custom system, the programming environment can be fine-tuned for the students, and the error messages can be made more understandable. + + UHC also uses GRIN as its IR for its back-end part, however the main focus has diverted from low level efficiency, and broadened to the spectrum of the entire compiler framework. It also extended the original IR with synchronous exception handling by introducing new syntactic constructs for \pilcode{try}/\pilcode{catch} blocks~\cite{uhc-exceptional-grin}. Also, UHC can generate code for many different targets including LLVM~\cite{llvm-2004}, .Net, JVM and JavaScript. + + \subsubsection{JHC} + + JHC~\cite{jhc} is another complete compiler framework for Haskell, developed by John Meacham. JHC's goal is to generate not only efficient, but also very compact code without the need of any runtime. The generated code only has to rely on certain system calls. JHC also has its own front end and back end just like UHC, but they serve different purposes. + + The front end of JHC uses a very elaborate type system called the pure type system~\cite{pts-berardi, pts-terlouw}. In theory, the pure type system can be seen as a generalization of the lambda cube~\cite{lambda-cube}, in practice it behaves similarly to the Glasgow Haskell Compiler's Core representation. For example, similar transformations can be implemented on them. + + For its intermediate representation, JHC uses an alternate version of GRIN. Meacham made several modifications to the original specification of GRIN. Some of the most relevant additions are mutable variables, memory regions (heap and stack) and throw-only IO exceptions. JHC's exceptions are rather simple compared to those of UHC, since they can only be thrown, but never caught. + + JHC generates completely portable ISO C from the intermediate GRIN code. + + \subsubsection{AJHC} + + Originally, AJHC~\cite{ajhc} was a fork of JHC, but later it was remerged with all of its functionalities. The main goal of AJHC was to utilize formal methods in systems programming. It was used implementing a NetBSD sound driver in high-level Haskell. + + \subsubsection{LHC} + + The LLVM Haskell Compiler~\cite{lhc} is a Haskell compiler made from reusable libraries using JHC-style GRIN as its intermediate representation. As its name suggests, it generates LLVM IR code from the intermediate GRIN. + + \subsection{Other Intermediate Representations} + + GRIN is not the only IR available for functional languages. In fact, it is not even the most advanced one. Other representations can either be structurally different or can have different expressive power. For example GRIN and LLVM are both structurally and expressively different representations, because GRIN has monadic structure, while LLVM uses basic blocks, and while GRIN has sum types, LLVM has vector instructions. In general, different design choices can open up different optimization opportunities. + + \subsubsection{Intel Research Compiler} + + The Intel Labs Haskell Research Compiler~\cite{hrc} was a result of a long running research project of Intel focusing on functional language compilation. The project's main goal was to generate very efficient code for numerical computations utilizing whole program optimization. + + The compiler reused the front end part of GHC, and worked with the external Core representation provided by it. Its optimizer part was written in MLton and was a general purpose compiler back end for strict functional languages. Differently from GRIN, it used basic blocks which can open up a whole spectrum of new optimization opportunities. Furthermore, instead of whole program defunctionalization (the generation of global \pilcode{eval}), their compiler used function pointers and data-flow analysis techniques to globally analyze the program. They also supported synchronous exceptions and multi-threading. + + One of their most relevant optimizations was the SIMD vectorization pass~\cite{hrc-simd}. Using this optimization, they could transform sequential programs into vectorized ones. In conjunction with their other optimizations, they achieved performance metrics comparable to native C~\cite{haskell-gap}. + + \subsubsection{MLton} + + MLton~\cite{mlton} is a widely used Standard ML compiler. It also uses whole program optimization, and focuses on efficiency. + + MLton has a wide array of distinct intermediate representations, each serving a different purpose. Each IR can express a certain aspect of the language more precisely than the others, allowing for more convenient implementation of the respective analyses and transformations. They use a technique similar to defunctionalization called 0CFA, a higher-order control flow analysis. This method serves a very similar purpose to defunctionalization, but instead of following function tags, it tracks function closures. Also, 0CFA can be generalized to k-CFA, where $k$ represents the number of different contexts the analysis distinguishes. The variant used by MLton distinguishes zero different contexts, meaning it is a \textit{context insensitive} analysis. The main advantage of this technique is that it can be applied to higher-order languages as well. + + Furthermore, MLton supports contification~\cite{contification}, a control flow based transformation, which turns function calls into continuations. This can expose a lot of additional control flow information, allowing for a broad range of optimizations such as tail recursive function call optimization. + + As for its back end, MLton has its own native code generator, but it can also generate LLVM IR code~\cite{mlton-llvm}. + + % \subsection{Compilers with LLVM Back Ends} + + % In the imperative setting, probably the most well-known compiler with an LLVM back end is Clang~\cite{clang}. Clang's main goal is to provide a production quality compiler with a reusable, library-like structure. However, certain functional language compilers also have LLVM back ends. The two most notable ones are the Glasgow Haskell Compiler~\cite{ghc} and MLton~\cite{mlton-llvm}. + +\end{document} \ No newline at end of file diff --git a/papers/stcs-2019/article/tex/results.pdf b/papers/stcs-2019/article/tex/results.pdf new file mode 100644 index 00000000..ebb159f8 Binary files /dev/null and b/papers/stcs-2019/article/tex/results.pdf differ diff --git a/papers/stcs-2019/article/tex/results.tex b/papers/stcs-2019/article/tex/results.tex new file mode 100644 index 00000000..b73b55eb --- /dev/null +++ b/papers/stcs-2019/article/tex/results.tex @@ -0,0 +1,234 @@ +\documentclass[main.tex]{subfiles} +\begin{document} + + In this section, we present the initial results of our implementation of the GRIN framework. The measurements presented here can only be considered preliminary, given the compiler needs further work to be comparable to systems like the Glasgow Haskell Compiler or the Idris compiler~\cite{idris}. Nevertheless, these statistics are still relevant, since they provide valuable information about the effectiveness of the optimizer. + + \subsection{Measured programs} + + %TODO: include code of Length + The measurements were taken using the Idris front end and LLVM back end of the compiler. Each test program --- besides ``Length" --- was adopted from the book \textit{Type-driven development with Idris}~\cite{tdd-idris} by Edwin Brady. These are small Idris programs demonstrating a certain aspect of the language. + + ``Length" is an Idris program, calculating the length of a list containing the natural numbers from 1 to 100. This example was mainly constructed to test how the dead data elimination pass can transform the inner structure of a list into a simple natural number (see Section~\ref{sec:dde}). + + \subsection{Measured metrics} + + Each test program went trough the compilation pipeline described in Section~\ref{sec:idris-front-end}, and measurements were taken at certain points during the compilation. The programs were subject to three different types of measurements. + + \vspace{0.25cm} + \begin{itemize} + \item Static, compile time measurements of the GRIN code. + \item Dynamic, runtime measurements of the interpreted GRIN code. + \item Dynamic, runtime measurements of the executed binaries. + \end{itemize} + \vspace{0.25cm} + + The compile time measurements were taken during the GRIN optimization passes, after each transformation. The measured metrics were the number of \pilcode{store}s, \pilcode{fetch}es and function definitions. These measurements ought to illustrate how the GRIN code becomes more and more efficient during the optimization process. The corresponding diagrams for the static measurements are Diagrams~\ref{diagram:length-stats-ct}~to~\ref{diagram:reverse-stats-ct}. On the horizontal axis, we can see the indices of the transformations in the pipeline, and on the vertical axis, we can see the number of the corresponding syntax tree nodes. Reading these diagram from left to right, we can observe the continuous evolution of the GRIN program throughout the optimization process. + + The runtime measurements of the interpreted GRIN programs were taken at three points during the compilation process. First, right after the GRIN code is generated from the Idris byte code; second, after the regular optimization passes; and finally, at the end of the entire optimization pipeline. As can be seen on Figure~\ref{fig:idris-compilation-pipeline}, the regular optimizations are run a second time right after the dead data elimination pass. This is because the DDE pass can enable further optimizations. To clarify, the third runtime measurement of the interpreted GRIN program was taken after the second set of regular optimizations. The measured metrics were the number of executed function calls, case pattern matches, \pilcode{store}s and \pilcode{fetch}es. The goal of these measurements is to compare the GRIN programs at the beginning and at the end of the optimization pipeline, as well as to evaluate the efficiency of the dead data elimination pass. The corresponding diagrams for these measurement are Diagrams~\ref{diagram:length-stats-rt}~to~\ref{diagram:reverse-stats-rt}. + + The runtime measurements of the binaries were taken at the exact same points as the runtime measurements of the interpreted GRIN code. Their goal is similar as well, however they ought to compare the generated binaries instead of the GRIN programs. The measured metrics were the size of the binary, the number of executed user-space instructions, stores and loads. The binaries were generated by the LLVM back end described in Section~\ref{subsec:llvm-back-end} with varying opitmization levels for the LLVM Optimizer. The optimization levels are indicated in the corresponding tables: Tables~\ref{table:length-binary-results}~to~\ref{table:reverse-binary-results}. Where the optimization level is not specified, the default, \pilcode{O0} level was used. As for the LLVM Static Compiler and Clang, the most aggressive, \pilcode{O3} level was set for all the measurements. + + \subsection{Measurement setup} + + All the measurements were performed on a machine with \pilcode{Intel(R) Core(TM) i7-4710HQ CPU @ 2.50GHz} processor and \pilcode{Ubuntu 18.04 bionic} operating system with \pilcode{4.15.0-46-generic} kernel. The Idris compiler used by the front-end is of version 1.3.1, and the LLVM used by the back end is of version 7. + + The actual commands for the binary generation are detailed in Program~code~\ref{code:binary-gen}. That script has two parameters: \pilcode{N} and \pilcode{llvm-in}. \pilcode{N} is the optimization level for the LLVM Optimizer, and \pilcode{llvm-in} is the LLVM program generated from the optimized GRIN code. + + \vspace{-0.5cm} + \begin{codeFloat}[h] + \begin{bash} + opt-7 -ON -o + llc-7 -O3 -relocation-model=pic -filetype=obj -o + clang-7 -O3 prim_ops.c runtime.c -s -o + \end{bash} + \caption{Commands for binary generation} + \label{code:binary-gen} + \end{codeFloat} + \vspace{-0.5cm} + + As for the runtime measurements of the binary, we used the \pilcode{perf} tool. The used command can be seen in Program~code~\ref{code:binary-measurements}. + + \vspace{-0.5cm} + \begin{codeFloat}[h] + \begin{bash} + perf stat -e cpu/mem-stores/u -e "r81d0:u" -e instructions:u + \end{bash} + \caption{Command for runtime measurements of the binary} + \label{code:binary-measurements} + \end{codeFloat} + \vspace{-0.5cm} + + + \subsection{Length} + + The first thing we can notice on the runtime statistics of the GRIN code, is that the GRIN optimizer significantly reduced the number of heap operations, as well as the number of function calls and case pattern matches. Moreover, the DDE pass could further improve the program's performance by removing additional heap operations. + + The compile time statistics demonstrate an interesting phenomena. The number of \pilcode{store}s and function definitions continuously keep decreasing, but at a certain point, the number of \pilcode{fetch}es suddenly increase by a relatively huge margin. This is due to the fact that the optimizer usually performs some preliminary transformations on the GRIN program \emph{before} inlining function definitions. This explains the sudden rise in the number of \pilcode{fetch}es during the early stages of the optimization process. Following that spike, the number of heap operations and function definitions gradually decrease until the program cannot be optimized any further. + + \begin{figure}[h] + \hspace{-0.5cm} + \renewcommand{\figurename}{Diagram} + \caption{Length - GRIN statistics} + \label{diagram:length-stats} + \addtocounter{figure}{-1} + \begin{minipage}{0.5\textwidth} + \subcaption{Runtime} + \label{diagram:length-stats-rt} + \includegraphics[scale=0.43]{length-runtime.png} + \end{minipage} + \begin{minipage}{0.5\textwidth} + \subcaption{Compile time} + \label{diagram:length-stats-ct} + \includegraphics[scale=0.43]{length-compile-time.png} + \end{minipage} + \end{figure} + + The runtime statistics for the executed binary are particularly interesting. First, observing the \pilcode{O0} statistics, we can see that the regular optimizations substantially reduced the number of executed instructions and memory operations, just as we saw with the interpreted GRIN code. However, on the one hand the DDE optimized binary did not perform any better than the regularly optimized one, but on the other hand its size decreased by more than 20\%. + + \begin{center} + \begin{minipage}{0.74\linewidth} + \captionof{table}{Length - CPU binary statistics} + \label{table:length-binary-results} + \begin{tcolorbox}[tab2,tabularx={l||r|r|r|r}] + Stage & Size & Instructions & Stores & Loads \\ + \hline\hline + \pilcode{normal-O0} & 23928 & 769588 & 212567 & 233305 \\\hline + \pilcode{normal-O3} & 23928 & 550065 & 160252 & 170202 \\\hline + \pilcode{regular-opt} & 19832 & 257397 & 14848 & 45499 \\\hline + \pilcode{dde-O0} & 15736 & 256062 & 14243 & 45083 \\\hline + \pilcode{dde-O3} & 15736 & 284970 & 33929 & 54555 \\ + \end{tcolorbox} + \end{minipage} + \end{center} + + Also, it is interesting to see that the aggressively optimized DDE binary performed much worse than the \pilcode{O0} version. This is because the default optimization pipeline of LLVM is designed for the C and C++ languages. As a consequence, in certain scenarios it may perform poorly for other languages. In the future, we plan to construct a better LLVM optimization pipeline for GRIN. + + \subsection{Exact length} + + For the GRIN statistics of ``Exact length", we can draw very similar conclusions as for ``Length``. However, closely observing the statistics, we can see, that the DDE pass completely eliminated \emph{all} heap operations from the program. In principle, this means, that all the variables can be put into registers during the execution of the program. In practice, some variables will be spilled onto stack, but the heap will never be used. + + \begin{figure}[h] + \hspace{-0.5cm} + \renewcommand{\figurename}{Diagram} + \caption{Exact length - GRIN statistics} + \label{diagram:exact-length-stats} + \addtocounter{figure}{-1} + \begin{minipage}{0.5\textwidth} + \subcaption{Runtime} + \label{diagram:exact-length-stats-rt} + \includegraphics[scale=0.43]{exact-length-runtime.png} + \end{minipage} + \begin{minipage}{0.5\textwidth} + \subcaption{Compile time} + \label{diagram:exact-length-stats-ct} + \includegraphics[scale=0.43]{exact-length-compile-time.png} + \end{minipage} + \end{figure} + + As for the binary statistics, we do not see any major improvements besides the significant reduction in the size of the binary. Although, it is worth pointing out, that the cost of memory operations can be considerably higher when accessing heap memory, and that the statistics presented here do not account for that. + + \begin{center} + \begin{minipage}{0.72\linewidth} + \captionof{table}{Exact length - CPU binary statistics} + \label{table:exact-length-binary-results} + \begin{tcolorbox}[tab2,tabularx={l||r|r|r|r}] + Stage & Size & Instructions & Stores & Loads \\ + \hline\hline + \pilcode{normal-O0} & 18800 & 188469 & 14852 & 46566 \\\hline + \pilcode{normal-O3} & 14704 & 187380 & 14621 & 46233 \\\hline + \pilcode{regular-opt} & 10608 & 183560 & 13462 & 45214 \\\hline + \pilcode{dde-O0} & 10608 & 183413 & 13431 & 45189 \\\hline + \pilcode{dde-O3} & 10608 & 183322 & 13430 & 44226 \\ + \end{tcolorbox} + \end{minipage} + \end{center} + + \newpage + \subsection{Type level functions} + + The GRIN statistics for this program may not be particularly interesting, but they demonstrate that the GRIN optimizations work for programs with many type level computations as well. + + \begin{figure}[h] + \hspace{-0.5cm} + \renewcommand{\figurename}{Diagram} + \caption{Type level functions - GRIN statistics} + \label{diagram:tyfuns-stats} + \addtocounter{figure}{-1} + \begin{minipage}{0.5\textwidth} + \subcaption{Runtime} + \label{diagram:tyfuns-stats-rt} + \includegraphics[scale=0.43]{tyfuns-runtime.png} + \end{minipage} + \begin{minipage}{0.5\textwidth} + \subcaption{Compile time} + \label{diagram:tyfuns-stats-ct} + \includegraphics[scale=0.43]{tyfuns-compile-time.png} + \end{minipage} + \end{figure} + + The binary statistics look promising for ``Type level functions". Each measured performance metric is strictly decreasing, which suggests that even the default LLVM optimization pipeline can work for GRIN. + + \begin{center} + \begin{minipage}{0.72\linewidth} + \captionof{table}{Type level functions - CPU binary statistics} + \label{table:tyfuns-binary-results} + \begin{tcolorbox}[tab2,tabularx={l||r|r|r|r}] + Stage & Size & Instructions & Stores & Loads \\ + \hline\hline + \pilcode{normal-O0} & 65128 & 383012 & 49191 & 86754 \\\hline + \pilcode{normal-O3} & 69224 & 377165 & 47556 & 84156 \\\hline + \pilcode{regular-opt} & 36456 & 312122 & 34340 & 71162 \\\hline + \pilcode{dde-O0} & 32360 & 312075 & 34331 & 70530 \\\hline + \pilcode{dde-O3} & 28264 & 309822 & 33943 & 70386 \\ + \end{tcolorbox} + \end{minipage} + \end{center} + + \subsection{Reverse} + + Unlike, the previous programs, ``Reverse" could not have been optimized by the dead data elimination pass. The pass had no effect on it. Fortunately, the regular optimizations alone could considerably improve both the runtime and compile time metrics of the GRIN code. + + The binary statistics are rather promising. The binary size decreased by a substantial margin and the number of executed memory operations has also been reduced by quite a lot. + + \begin{figure}[h] + \hspace{-0.5cm} + \renewcommand{\figurename}{Diagram} + \caption{Reverse - GRIN statistics} + \label{diagram:reverse-stats} + \addtocounter{figure}{-1} + \begin{minipage}{0.5\textwidth} + \subcaption{Runtime} + \label{diagram:reverse-stats-rt} + \includegraphics[scale=0.43]{reverse-runtime.png} + \end{minipage} + \begin{minipage}{0.5\textwidth} + \subcaption{Compile time} + \label{diagram:reverse-stats-ct} + \includegraphics[scale=0.43]{reverse-compile-time.png} + \end{minipage} + \end{figure} + + \begin{center} + \begin{minipage}{0.76\linewidth} + \captionof{table}{Reverse - CPU binary statistics} + \label{table:reverse-binary-results} + \begin{tcolorbox}[tab2,tabularx={l||r|r|r|r}] + Stage & Size & Instructions & Stores & Loads \\ + \hline\hline + \pilcode{normal-O0} & 27112 & 240983 & 25018 & 58253 \\\hline + \pilcode{normal-O3} & 31208 & 236570 & 23808 & 56617 \\\hline + \pilcode{regular-opt-O0} & 14824 & 222085 & 19757 & 53125 \\\hline + \pilcode{regular-opt-O3} & 14824 & 220837 & 19599 & 52827 \\ + \end{tcolorbox} + \end{minipage} + \end{center} + + \subsection{General conclusions} + + In general, the measurements demonstrate that the GRIN optimizer can considerably improve the performance metrics of a given GRIN program. The regular optimizations themselves can usually produce highly efficient programs, however, in certain cases the dead data elimination pass can facilitate additional optimizations, and can further improve the performance. + + The results of the binary measurements indicate that the GRIN optimizer performs optimizations orthogonal to the LLVM optimizations. This supports the motivation behind the framework, which is to transform functional programs into a more manageable format for LLVM by eliminating the functional artifacts. This is backed up by the fact, that none of the fully optimized \pilcode{normal} programs could perform as well as the regularly or DDE optimized ones. Also, it is interesting to see, that there is not much difference between the \pilcode{O0} and \pilcode{O3} default LLVM optimization pipelines for GRIN. This motivates further research to find an optimal pipeline for GRIN. + + Finally, it is rather surprising to see, that the dead data elimination pass did not really impact the performance metrics of the executed binaries, but it significantly reduced their size. The former can be explained by the fact, that most of these programs are quite simple, and do not contain any compound data structures. Dead data elimination can shine when a data structure is used in a specific way, so that it can be locally restructured for each use site. However, when applying it to simple programs, we can obtain sub par results. + +\end{document} \ No newline at end of file diff --git a/papers/stcs-2019/article/tex/style.sty b/papers/stcs-2019/article/tex/style.sty new file mode 100644 index 00000000..af128b45 --- /dev/null +++ b/papers/stcs-2019/article/tex/style.sty @@ -0,0 +1,273 @@ +\ProvidesPackage{style} + +%\textwidth 15.0cm +%\textheight 22.0cm +%\oddsidemargin 0.4cm +%\evensidemargin 0.4cm +%\topmargin 0.0cm +%\frenchspacing +%\pagestyle{myheadings} +\setcounter{tocdepth}{1} + + +\usepackage[toc,page]{appendix} +\usepackage{hyperref} +\usepackage{float} +\usepackage{newfloat} +\usepackage{footnote} +\usepackage{subcaption} +\usepackage{cite} +\usepackage{url} +\usepackage{caption} +\usepackage{graphicx} +\graphicspath{ {../img/} } +\usepackage[bottom,perpage]{footmisc} +\usepackage{chngcntr} +\counterwithin{figure}{section} +\counterwithin{table}{section} + +\usepackage{enumitem} +\setlist{nosep} + +\usepackage[utf8]{inputenc} +\usepackage{etoolbox} +\usepackage{adjustbox} +\usepackage{latexsym,amssymb,amsmath,mathtools} + +\usepackage{algorithm} +\usepackage{algorithmicx} +\usepackage{algpseudocode} + +\floatstyle{plain} + +\DeclareFloatingEnvironment +[ name = {Program code} +, placement = htbp +, fileext = loc +, within = section +]{codeFloat} + +\DeclareFloatingEnvironment +[ name = {Table} +, placement = htbp +, fileext = lot +, within = section +]{tableFloat} + +\DeclareFloatingEnvironment +[ name = {Analysis example} +, placement = htbp +, fileext = loc +, within = section +]{analysisFloat} + +\DeclarePairedDelimiter\set\{\} + + +\hypersetup{% + colorlinks=true,% hyperlinks will be coloured + allcolors=blue,% hyperlink text will be green +} + +\newcommand*\Let[2]{\State #1 $\gets$ #2} +\algrenewcommand\algorithmicrequire{\textbf{Precondition:}} +\algrenewcommand\algorithmicensure{\textbf{Postcondition:}} + +\newcommand*\patBind[3]{\State \textbf{#1} #2 $\rightarrow$ #3} +\algblockdefx[CaseBlock]{case}{endCase} % +[1]{\textbf{case} $#1$ \textbf{of}} % +{} +\algblockdefx[PatMatch]{patMatch}{endPatMatch} % +[3]{ \Call{#1}{\textbf{#2} #3} = } % +{} + +\usepackage{minted} + +\usepackage[table]{xcolor} +\usepackage{listings} +\usepackage{lstautogobble} +\definecolor{identifierColor}{rgb}{0.65,0.16,0.16} +\definecolor{keywordColor}{rgb}{0.65,0.20,0.90} +\lstnewenvironment{code} +{ \lstset + { language = Haskell + , basicstyle = \small\ttfamily + , breaklines = true + , backgroundcolor = \color{gray!15} + , frame = single + , autogobble = true + , xleftmargin = 0.1cm + , xrightmargin = 0.2cm + %, identifierstyle = \color{gray} + , keywordstyle = \color{violet} + , morekeywords = {} + , escapechar = \% + } +} +{} + +\PassOptionsToPackage{usenames,dvipsnames,svgnames}{xcolor} +\usepackage{tikz} +\usetikzlibrary{arrows,arrows.meta,shapes,positioning,automata,calc} + +\usepackage{pgfplots} +\usepackage{tcolorbox} +\usepackage{tabularx} +\usepackage{array} +\usepackage{zref-savepos} +\usepackage{diagbox} +\usepackage{colortbl} +\tcbuselibrary{skins} +\tcbuselibrary{minted} + +\newcolumntype{Y}{>{\raggedleft\arraybackslash}X} +\tcbset +{ tab2/.style = + { enhanced + , fonttitle=\bfseries + , fontupper=\normalsize\sffamily + , colback = gray!5!white + , colframe = gray!75!black + , colbacktitle=yellow!40!white + , coltitle=black,center title + } +, hbox +} + +\newtcblisting{haskell} +{ listing engine = minted +, minted style = colorful +, minted language = Haskell +, minted options = { fontsize = \small + , breaklines + , autogobble + , linenos + , numbersep = 3mm + , escapeinside = \%\% + } +, colback = gray!5!white +, colframe = gray!75!black +, listing only +, left = 5mm +, enhanced +, overlay = { \begin{tcbclipinterior} + \fill[gray!80!blue!20!white] (frame.south west) rectangle ([xshift=5mm]frame.north west); + \end{tcbclipinterior} + } +} +\newtcblisting{oneLineHaskell} +{ listing engine = minted +, minted style = colorful +, minted language = Haskell +, minted options = { fontsize = \normalsize + , breaklines + , autogobble + , numbersep = 3mm + , escapeinside = \%\% + } +, colback = gray!5!white +, colframe = gray!75!black +, listing only +, left = 2mm +, top = 1mm +, bottom = 1mm +, enhanced +} + +\newtcblisting{bash} +{ listing engine = minted +, minted style = colorful +, minted language = bash +, minted options = { fontsize = \small + , breaklines + , autogobble + , linenos + , numbersep = 3mm + , escapeinside = \%\% + } +, colback = gray!5!white +, colframe = gray!75!black +, listing only +, left = 5mm +, enhanced +, overlay = { \begin{tcbclipinterior} + \fill[gray!80!blue!20!white] (frame.south west) rectangle ([xshift=5mm]frame.north west); + \end{tcbclipinterior} + } +} + +\colorlet{lightgreen}{green!50!white} +\colorlet{lightblue}{blue!40!white} +\colorlet{lightyellow}{yellow!50!white} +\colorlet{lightred}{red!40!white} + +\newcommand*{\paper}{thesis} +\newcommand*{\ext}[1]{\texttt{#1}} +\newcommand*{\chk}[1]{\texttt{#1}} +\newcommand*{\lvar}[1]{\textit{#1}} +\newcommand*{\ilcode}[1]{\mintinline{Haskell}{#1}} % inline code +\newcommand*{\pilcode}[1]{\texttt{#1}} % plain inline code + +% NOTE: This command need styRectDef to be defined locally +\newcommand*{\tikzcustom}[0] +{ + % \tikzset{styRectDef/.style = {rectangle, rounded corners, draw=black, inner xsep=6mm, inner ysep=3mm}} + \tikzset{styRectGn/.style = {styRectDef, draw=green, fill=green!20}} + \tikzset{styRectBl/.style = {styRectDef, draw=cyan, fill=cyan!40}} + \tikzset{styRectGy/.style = {styRectDef, draw=gray, fill=gray!17}} + \tikzset{styConn/.style = {very thick, ->, -latex, shorten <=1mm, shorten >=1mm}} + + \tikzset{styAnnotDef/.style = {rectangle, rounded corners, draw=black, inner xsep=2mm, inner ysep=1mm}} + \tikzset{styLabel/.style = {styAnnotDef, draw=black, fill=gray!10}} + +} + +\newcounter{NoTableEntry} +\renewcommand*{\theNoTableEntry}{NTE-\the\value{NoTableEntry}} + +\newcommand*{\notableentry}{% + \multicolumn{1}{@{}c@{}|}{% + \stepcounter{NoTableEntry}% + \vadjust pre{\zsavepos{\theNoTableEntry t}}% top + \vadjust{\zsavepos{\theNoTableEntry b}}% bottom + \zsavepos{\theNoTableEntry l}% left + \hspace{0pt plus 1filll}% + \zsavepos{\theNoTableEntry r}% right + \tikz[overlay]{% + \draw[red] + let + \n{llx}={\zposx{\theNoTableEntry l}sp-\zposx{\theNoTableEntry r}sp}, + \n{urx}={0}, + \n{lly}={\zposy{\theNoTableEntry b}sp-\zposy{\theNoTableEntry r}sp}, + \n{ury}={\zposy{\theNoTableEntry t}sp-\zposy{\theNoTableEntry r}sp} + in + (\n{llx}, \n{lly}) -- (\n{urx}, \n{ury}) + (\n{llx}, \n{ury}) -- (\n{urx}, \n{lly}) + ; + }% + }% +} + +\makeatletter +\newcommand{\captionabove}[2][] +{ + \vskip-\abovecaptionskip + \vskip+\belowcaptionskip + \ifx\@nnil#1\@nnil + \caption{#2}% + \else + \caption[#1]{#2}% + \fi + \vskip+\abovecaptionskip + \vskip-\belowcaptionskip +} + +% automatic period at the end of footnote +\makeatletter% +\long\def\@makefntext#1{% + \parindent 1em\noindent \hb@xt@ 1.8em{\hss\@makefnmark}#1.} +\makeatother + +% this is needed for LNCS +\setlength\arraycolsep{6pt} +\setlength\tabcolsep{6pt} \ No newline at end of file diff --git a/papers/stcs-2019/presentation/img/background.jpg b/papers/stcs-2019/presentation/img/background.jpg new file mode 100644 index 00000000..f9e6237e Binary files /dev/null and b/papers/stcs-2019/presentation/img/background.jpg differ diff --git a/papers/stcs-2019/presentation/img/compact_blank_titlepage.jpg b/papers/stcs-2019/presentation/img/compact_blank_titlepage.jpg new file mode 100644 index 00000000..0a68d282 Binary files /dev/null and b/papers/stcs-2019/presentation/img/compact_blank_titlepage.jpg differ diff --git a/papers/stcs-2019/presentation/img/exact_length_ct.png b/papers/stcs-2019/presentation/img/exact_length_ct.png new file mode 100644 index 00000000..42a99523 Binary files /dev/null and b/papers/stcs-2019/presentation/img/exact_length_ct.png differ diff --git a/papers/stcs-2019/presentation/img/exact_length_rt.png b/papers/stcs-2019/presentation/img/exact_length_rt.png new file mode 100644 index 00000000..1e3f546b Binary files /dev/null and b/papers/stcs-2019/presentation/img/exact_length_rt.png differ diff --git a/papers/stcs-2019/presentation/img/hpt-boq.png b/papers/stcs-2019/presentation/img/hpt-boq.png new file mode 100644 index 00000000..ba502c46 Binary files /dev/null and b/papers/stcs-2019/presentation/img/hpt-boq.png differ diff --git a/papers/stcs-2019/presentation/img/length_ct.png b/papers/stcs-2019/presentation/img/length_ct.png new file mode 100644 index 00000000..704b791c Binary files /dev/null and b/papers/stcs-2019/presentation/img/length_ct.png differ diff --git a/papers/stcs-2019/presentation/img/length_rt.png b/papers/stcs-2019/presentation/img/length_rt.png new file mode 100644 index 00000000..e2da8c4a Binary files /dev/null and b/papers/stcs-2019/presentation/img/length_rt.png differ diff --git a/papers/stcs-2019/presentation/img/reverse_ct.png b/papers/stcs-2019/presentation/img/reverse_ct.png new file mode 100644 index 00000000..b993a553 Binary files /dev/null and b/papers/stcs-2019/presentation/img/reverse_ct.png differ diff --git a/papers/stcs-2019/presentation/img/reverse_rt.png b/papers/stcs-2019/presentation/img/reverse_rt.png new file mode 100644 index 00000000..8a6e4a47 Binary files /dev/null and b/papers/stcs-2019/presentation/img/reverse_rt.png differ diff --git a/papers/stcs-2019/presentation/img/section_head.jpg b/papers/stcs-2019/presentation/img/section_head.jpg new file mode 100644 index 00000000..db546997 Binary files /dev/null and b/papers/stcs-2019/presentation/img/section_head.jpg differ diff --git a/papers/stcs-2019/presentation/img/title.jpg b/papers/stcs-2019/presentation/img/title.jpg new file mode 100644 index 00000000..1cb14754 Binary files /dev/null and b/papers/stcs-2019/presentation/img/title.jpg differ diff --git a/papers/stcs-2019/presentation/img/tyfuns_ct.png b/papers/stcs-2019/presentation/img/tyfuns_ct.png new file mode 100644 index 00000000..bf4c9ba0 Binary files /dev/null and b/papers/stcs-2019/presentation/img/tyfuns_ct.png differ diff --git a/papers/stcs-2019/presentation/img/tyfuns_rt.png b/papers/stcs-2019/presentation/img/tyfuns_rt.png new file mode 100644 index 00000000..cc9a3e61 Binary files /dev/null and b/papers/stcs-2019/presentation/img/tyfuns_rt.png differ diff --git a/papers/stcs-2019/presentation/prez.notes b/papers/stcs-2019/presentation/prez.notes new file mode 100644 index 00000000..be06c3ce --- /dev/null +++ b/papers/stcs-2019/presentation/prez.notes @@ -0,0 +1,83 @@ +Intro to GRIN: + - the problem (small functions, laziness) + - GRIN is the solution (interprocedural, whole, IR) + + - store, fetch, update + - eval + - (other restrictions ...) + + - 5-6 (*) + + - simple code generation + - many small transformations + - HPT is the core + + - 6-7 + +Extensions: + - LLVM (sum simple?) + - new HPT: + - original: for node info + - then: node info with simple type info (for LLVM) + - finally: type inference (polymorph primops) + + - 8-9 -1 (less HPT) + + - Dead Data Elimination + +Dead Data Elimination: + - first by Remi Turk for Agda + - motivational example: (*) + - length (List -> Nat) + - other applications: + - Map -> Set + - type class dictionaries + - Vector type index (EUTypes) + + - 11-12 + + - what else do we need? + - CBy (extended HPT, example) (*) + - LVA (standard) + - producer grouping (graph example) (*) + - transformations + + - 14-15 + +Results: + - only interpreted results + - pipeline setup + - diagrams + + - 18 - 19 + +Conclusions: + - the optimizer works well: + - #stores, #fetches, #funcalls and #cases significantly reduced + - code structure closer to imperative style + + - DDE is: + - a bit costly + - kinda specific, but can work wonders + - can trigger other transformations + + - 20 + + +Q&A: + - Why do we need Lambda? (closure conversion + lambda lifting) + cc: find free variables + ll: convert lambdas to top level functions + + - eval inlining ~?~ STG + eval/apply -> GRIN with indirect calls (funptr) + static analysis + - analysis cost: compiled abstract interpretation + + +no stores & no fetches --> in ideal case, everything can be put into registers (in worst onto the stack) + + +Notes: + - upto example (with head?) + - LLVM code example + - opt list + - max 25 slides \ No newline at end of file diff --git a/papers/stcs-2019/presentation/template.tex b/papers/stcs-2019/presentation/template.tex new file mode 100644 index 00000000..44f7e350 --- /dev/null +++ b/papers/stcs-2019/presentation/template.tex @@ -0,0 +1,59 @@ +\documentclass[bigger]{beamer} +\usepackage[utf8]{inputenc} +\usepackage[T1]{fontenc} +\usepackage{graphicx} + +\usebackgroundtemplate% +{% + \includegraphics[width=\paperwidth,height=\paperheight]{background.jpg}% +} + +\setbeamercolor{title}{fg=white} +\setbeamercolor{author}{fg=white} +\setbeamercolor{institute}{fg=white} +\setbeamercolor{date}{fg=white} +\setbeamercolor{frametitle}{fg=white} + +\title{\bf Sample title} +\author{Anonymous} +\institute{Eötvös Loránd University (ELTE), \\ Budapest, Hungary} +\date{2018.} + +\begin{document} + +{ +\usebackgroundtemplate{\includegraphics[width=\paperwidth]{title.jpg}}% +\frame{\vspace{15mm}\titlepage} +} + +\begin{frame}{Introduction} +\begin{itemize} +\item 1 +\item 2 +\item 3 +\end{itemize} +\end{frame} + +% this slide need not be used in the presentation, but must be +% present when you archieve your talk + +{ +\usebackgroundtemplate{\includegraphics[width=\paperwidth]{title.jpg}}% +\begin{frame}{} + +\bigskip\bigskip\bigskip + +{\bf\Huge\color{white} THANK YOU} + +\bigskip + +{\bf\Huge\color{white} FOR YOUR} + +\bigskip + +{\bf\Huge\color{white} ATTENTION!} + +\end{frame} +} + +\end{document} diff --git a/papers/stcs-2019/presentation/tex/main.pdf b/papers/stcs-2019/presentation/tex/main.pdf new file mode 100644 index 00000000..f4635943 Binary files /dev/null and b/papers/stcs-2019/presentation/tex/main.pdf differ diff --git a/papers/stcs-2019/presentation/tex/main.tex b/papers/stcs-2019/presentation/tex/main.tex new file mode 100644 index 00000000..e219b394 --- /dev/null +++ b/papers/stcs-2019/presentation/tex/main.tex @@ -0,0 +1,1078 @@ + + +\documentclass[bigger]{beamer} + +\usepackage{style} + + +\title[GRIN] %optional +{A modern look at GRIN} + +\subtitle{an optimizing functional language back end} + +\author[P. Podlovics, Cs. Hruska, A. Pénzes ] % (optional, for multiple authors) +{Péter Podlovics, Csaba Hruska, Andor Pénzes} + +\institute[ELTE] % (optional) +{ + Eötvös Loránd University (ELTE), \\ Budapest, Hungary +} + +\date{STCS-2019} % (optional) + + + +\begin{document} + +{ + \usebackgroundtemplate{\includegraphics[width=\paperwidth]{title.jpg}}% + \frame{\vspace{15mm}\titlepage} +} + +\begin{frame} + \frametitle{Overview} + \tableofcontents +\end{frame} + + +\section{Introduction} + +\begin{frame}[fragile] + \frametitle{Why functional?} + + \begin{vfitemize} + \item Declarativeness + \begin{itemize} + \item[pro:] can program on a higher abstraction level + \end{itemize} + \item Composability\\ + \begin{itemize} + \item[pro:] can easily piece together smaller programs + \item[con:] results in a lot of function calls + \end{itemize} + \item Functions are first class citizens + \begin{itemize} + \item[pro:] higher order functions + \item[con:] unknown function calls + \end{itemize} + \end{vfitemize} + +\end{frame} + + +\begin{frame} +\frametitle{Graph Reduction Intermediate Notation} + +\begin{figure}[h] + \centering + \begin{adjustbox}{scale = 1.4} + \tikzset{every loop/.style={-{Stealth[scale=1.5]}}} + + \begin{tikzpicture}[ node distance = 1.5cm and 1.5cm + , on grid + , loop/.append style={-triangle 60} + ] + + \node [draw=black] (haskell) {Haskell}; + \node [draw=black] (idris) [left =of haskell] {Idris}; + \node [draw=black] (agda) [right =of haskell] {Agda}; + \node [draw=black] (grin) [below =of haskell] {GRIN}; + \node [draw=black] (llvm) [below =of grin] {LLVM}; + + \path[-{Stealth[scale=1.5]}] + (idris) edge [] (grin) + (haskell) edge [] (grin) + (agda) edge [] (grin) + (grin) edge [] (llvm); + + + \end{tikzpicture} + \end{adjustbox} + \label{grin-backend} +\end{figure} +\end{frame} + + +\begin{frame}[fragile] +\frametitle{Front end code} + +\begin{minipage}{0.35\textwidth} + + \begin{haskellcode} + main = sum (upto 0 10) + + upto n m + | n > m = [] + | otherwise = n : upto (n+1) m + + sum [] = 0 + sum (x:xs) = x + sum xs + \end{haskellcode} +\end{minipage} +\hfill +\pause +\begin{minipage}{0.4\textwidth} + \vspace{2cm} + \begin{figure}[h] + \centering + \begin{adjustbox}{scale = 1.4} + \tikzset{every loop/.style={-{Stealth[scale=1.5]}}} + + \begin{tikzpicture}[ node distance = 1.3cm and 1cm + , on grid + , loop/.append style={-triangle 60} + ] + + \node [shape=ellipse,draw=black] (main) {main}; + \node [shape=ellipse,draw=black] (eval) [below =of main] {eval}; + \node [shape=ellipse,draw=black] (sum) [below left =of eval] {sum}; + \node [shape=ellipse,draw=black] (upto) [below right =of eval] {upto}; + + \path[-{Stealth[scale=1.5]}] + (main) edge [] (eval) + (eval) edge [bend left] (sum) + (eval) edge [bend right] (upto) + (sum) edge [bend left] (eval) + (upto) edge [bend right] (eval); + + + \end{tikzpicture} + \end{adjustbox} + \label{control-flow-lazy} + \end{figure} +\end{minipage} +\end{frame} + + +\begin{frame}[fragile] +\frametitle{GRIN code} + +\begin{minipage}{0.4\textwidth} + + \begin{haskellcode} + grinMain = + t1 <- store (CInt 1) + t2 <- store (CInt 10) + t3 <- store (Fupto t1 t2) + t4 <- store (Fsum t3) + (CInt r) <- eval t4 + _prim_int_print r + \end{haskellcode} +\end{minipage} +\hfill +\begin{minipage}{0.48\textwidth} + \vspace{1cm} + \begin{haskellcode} + eval p = + v <- fetch p + case v of + (CInt n) -> pure v + (CNil) -> pure v + (CCons y ys) -> pure v + (Fupto a b) -> + zs <- upto a b + update p zs + pure zs + (Fsum c) -> + s <- sum c + update p s + pure s + \end{haskellcode} +\end{minipage} + + +\end{frame} + + +\begin{frame}[fragile] +\frametitle{Transformation machinery} + + \begin{vfitemize} + + \item Inline calls to \mintinline{haskell}{eval} + \item Run dataflow analyses: + \begin{itemize} + \item Heap points-to analysis + \item Sharing analysis + \end{itemize} + \item Run transformations until we reach a fixed-point: + \begin{itemize} + \item Sparse Case Optimization + \item Common Subexpression Elimination + \item Generalized Unboxing + \item etc \dots + \end{itemize} + + \end{vfitemize} + + +\end{frame} + + +\section{Extensions} + +\begin{frame}[fragile] +\frametitle{Extending Heap points-to} + + \vspace{1cm} + \begin{minipage}{\textwidth} + \begin{figure} + \includegraphics[scale=0.3]{hpt-boq.png} + \end{figure} + \end{minipage} + \vfill + \pause + \begin{minipage}{\textwidth} + \begin{figure} + $BAS \in \{ \text{Int64}, \text{Float}, \text{Bool}, \text{String}, \text{Char} \}$ + \end{figure} + \end{minipage} + \vfill + \pause + \begin{center} + \begin{minipage}{0.8\textwidth} + % real type would be: a -> State# s -> (# State# s, MutVar# s a #) + \begin{haskellcode} + indexArray# :: Array# a -> Int# -> (# a #) + newMutVar# :: a -> s -> (# s, MutVar# s a #) + \end{haskellcode} + \end{minipage} + \end{center} + +\end{frame} + + +\begin{frame}[fragile] +\frametitle{LLVM back end} + + \hspace{-4cm} + \begin{minipage}[t]{0.30\textwidth} + \begin{minted}[fontsize=\scriptsize]{haskell} + grinMain = + t1 <- store (CInt 1) + t2 <- store (CInt 10) + t3 <- store (Fupto t1 t2) + t4 <- store (Fsum t3) + (CInt r') <- eval t4 + _prim_int_print r' + + upto m n = + (CInt m') <- eval m + (CInt n') <- eval n + b' <- _prim_int_gt m' n' + case b' of + #True -> pure (CNil) + + sum l = ... + + eval p = ... + \end{minted} + \end{minipage} + \hspace{1.8cm} + \pause + \begin{minipage}[t]{0.30\textwidth} + \begin{minted}[fontsize=\scriptsize]{haskell} + grinMain = + n1 <- sum 0 1 10 + _prim_int_print n1 + + sum s lo hi = + b <- _prim_int_gt lo hi + if b then + pure s + else + lo' <- _prim_int_add lo 1 + s' <- _prim_int_add s lo + sum s' lo' hi + + \end{minted} + \end{minipage} + \hspace{0.5cm} + \pause + \begin{minipage}[t]{0.30\textwidth} + \begin{minted}[fontsize=\scriptsize]{asm} + grinMain: + # BB#0: + movabsq $55, %rdi + jmp _prim_int_print + \end{minted} + \end{minipage} + +\end{frame} +%$ + +\section{Dead Data Elimination} + +\begin{frame}[fragile] +\frametitle{Dead data elimination} + +\begin{center} + \begin{minipage}{0.30\textwidth} + \begin{haskellcode} + length : List a -> Nat + length Nil = Z + length (Cons x xs) + = S (length xs) + \end{haskellcode} + \end{minipage} + \hspace{1cm} + $\xRightarrow{\text{DDE}}$ + \hfill + \begin{minipage}{0.5\textwidth} + \begin{haskellcode} + length p = + xs <- fetch p + case xs of + (Cons ys) -> + l1 <- length ys + l2 <- _prim_int_add l1 1 + pure l2 + (Nil) -> + pure 0 + \end{haskellcode} + \end{minipage} +\end{center} + + +\end{frame} + +\begin{frame} +\frametitle{Applications} + + \begin{vfitemize} + \item Map $\rightarrow$ Set + \item Type class dictionaries + \item Type erasure for dependently typed languages + \end{vfitemize} + +\end{frame} + +\begin{frame} +\frametitle{What do we need?} + + \begin{vfitemize} + \item Producers \& consumers + \item Detect dead fields + \item Connect consumers to producer + \item Remove or transform dead fields + \end{vfitemize} + +\end{frame} + +\begin{frame}[fragile] +\frametitle{Created-by} + +\begin{center} + \begin{minipage}{0.4\textwidth} + \begin{haskellcode} + grinMain = + a0 <- pure 5 + n0 <- pure (CNil) + p0 <- store n0 + n1 <- pure (CCons a0 p0) + r <- case n1 of + (CNil) -> + pure (CNil) + (CCons x xs) -> + xs' <- fetch xs + pure xs' + pure r + \end{haskellcode} + \end{minipage} + \hfill + \begin{minipage}{0.40\textwidth} + \begin{haskellcode} + Producers + a0 -> {} + n0 -> {CNil{n0}} + n1 -> {CCons{n1}} + p0 -> {} + r -> {CNil{n0}} + x -> {} + xs -> {} + xs' -> {CNil{n0}} + \end{haskellcode} + \end{minipage} +\end{center} + +\end{frame} + + +\begin{frame} +\frametitle{Producers and consumers} + +\begin{figure}[h] +\centering +\begin{adjustbox}{scale = 1.3} + \begin{tikzpicture}[ node distance = 1cm and 2cm, on grid ] + + \node [shape=circle,draw=black] (P1) {$P_1$}; + \node [shape=circle,draw=black] (P2) [right =of P1] {$P_2$}; + \coordinate (Middle) at ($(P1)!0.5!(P2)$); + \node [shape=circle,draw=black] (C2) [below =of Middle] {$C_2$}; + \node [shape=circle,draw=black] (C1) [left =of C2] {$C_1$}; + \node [shape=circle,draw=black] (C3) [right =of C2] {$C_3$}; + + \path[-{Stealth[scale=1.5]}] (P1) edge [] (C1) + (P1) edge [] (C2) + (P2) edge [] (C2) + (P2) edge [] (C3); + + + \end{tikzpicture} +\end{adjustbox} +\label{fig:producers-and-consumers} +\end{figure} + +\end{frame} + + + +\section{Results} + +\begin{frame}[fragile] +\frametitle{Setup} + + \vspace{1.5cm} + \begin{vfitemize} + \item Small Idris code snippets from: \\ + \textit{Type-driven Development with Idris} by Edwin Brady + \item Only interpreted code + \item Compile- \& runtime measurements + \item Pipeline setup: + \end{vfitemize} + + \begin{figure} + \begin{adjustbox}{scale = 1} + \tikzset{every loop/.style={-{Stealth[scale=1.5]}}} + + %\hspace{-1cm} + \begin{tikzpicture}[ node distance = 1.5cm and 3cm + , on grid + , loop/.append style={-triangle 60} + ] + + \node [draw=black] (cg) {Code gen.}; + \node [draw=black] (ro1) [right =of cg] {Regular Opts.}; + \node [draw=black] (dde) [right =2.5cm of ro1] {DDE}; + \node [draw=black] (ro2) [right =2.5cm of dde] {Regular Opts.}; + + \path[-{Stealth[scale=1.5]}] + (cg) edge [] (ro1) + (ro1) edge [loop] (ro1) + (ro1) edge [] (dde) + (dde) edge [] (ro2) + (ro2) edge [loop] (ro2); + + + \end{tikzpicture} + \end{adjustbox} + \label{fig:-measurement-pipeline} + \end{figure} + +\end{frame} + + + +\begin{frame}[fragile] +\frametitle{Length} + % real example + + \begin{figure} + \hspace{-1cm} + \begin{minipage}{0.45\textwidth} + \resizebox{\width}{5.5cm}{\includegraphics[scale=0.40]{length_rt.png}} + \end{minipage} + \hspace{1cm} + \begin{minipage}{0.45\textwidth} + \resizebox{\width}{5.5cm}{\includegraphics[scale=0.40]{length_ct.png}} + \end{minipage} + \end{figure} + +\end{frame} + +\begin{frame}[fragile] +\frametitle{Exact length} + % no stores & no fetches! (Maybe transformed) + \begin{figure} + \hspace{-1cm} + \begin{minipage}{0.45\textwidth} + \resizebox{\width}{5.5cm}{\includegraphics[scale=0.40]{exact_length_rt.png}} + \end{minipage} + \hspace{1cm} + \begin{minipage}{0.45\textwidth} + \resizebox{\width}{5.5cm}{\includegraphics[scale=0.40]{exact_length_ct.png}} + \end{minipage} + \end{figure} +\end{frame} + +\begin{frame}[fragile] +\frametitle{Reverse} + % interesting example, but no DDE + \begin{figure} + \hspace{-1cm} + \begin{minipage}{0.45\textwidth} + \resizebox{\width}{5.5cm}{\includegraphics[scale=0.40]{reverse_rt.png}} + \end{minipage} + \hspace{1cm} + \begin{minipage}{0.45\textwidth} + \resizebox{\width}{5.5cm}{\includegraphics[scale=0.40]{reverse_ct.png}} + \end{minipage} + \end{figure} +\end{frame} + +\begin{frame}[fragile] +\frametitle{Type level functions} + % caveat + \begin{figure} + \hspace{-1cm} + \begin{minipage}{0.45\textwidth} + \resizebox{\width}{5.5cm}{\includegraphics[scale=0.40]{tyfuns_rt.png}} + \end{minipage} + \hspace{1cm} + \begin{minipage}{0.45\textwidth} + \resizebox{\width}{5.5cm}{\includegraphics[scale=0.40]{tyfuns_ct.png}} + \end{minipage} + \end{figure} +\end{frame} + + +\begin{frame}[fragile] +\frametitle{Conclusions} + \begin{vfitemize} + \item The optimizer works well: + \begin{itemize} + \item the number of stores, fetches, function calls and pattern matches significantly decreased + \item the structure of the code resembles that of an imperative language + \end{itemize} + \item Dead Data Elimination: + \begin{itemize} + \item is a bit costly + \item is a specific optimization + \item can completely transform data structures + \item can trigger further transformations + \end{itemize} + \end{vfitemize} +\end{frame} + + +{ + \usebackgroundtemplate{\includegraphics[width=\paperwidth]{title.jpg}}% + \begin{frame}{} + + \bigskip\bigskip\bigskip + + {\bf\Huge\color{white} THANK YOU} + + \bigskip + + {\bf\Huge\color{white} FOR YOUR} + + \bigskip + + {\bf\Huge\color{white} ATTENTION!} + +\end{frame} +} + +% Q&A + +\begin{frame}[fragile] +\frametitle{Sparse case optimization} + +\begin{center} + \begin{minipage}{0.40\textwidth} + \begin{haskellcode} + + v <- eval l + case v of + CNil -> + CCons x xs -> + \end{haskellcode} + \end{minipage} + $\xRightarrow{v \in \{ \text{CCons}\}}$ + \hfill + \begin{minipage}{0.40\textwidth} + \begin{haskellcode} + + v <- eval l + case v of + CCons x xs -> + \end{haskellcode} + \end{minipage} +\end{center} + +\end{frame} + + +\begin{frame} +\frametitle{Compiled data flow analysis} + +\begin{vfitemize} + \item Analyzing the syntax tree has an interpretation overhead + \item We can work around this by "compiling" our analysis into an executable program + \item The compiled abstract program is independent of the AST + \item It can be executed in a different context (ie.: by another program or on GPU) + \item After run (iteratively), it produces the result of the given analysis +\end{vfitemize} +\end{frame} + + + + + + + + + + + + + + + + + + + + + + +\begin{frame} + +\end{frame} + +\begin{frame}[fragile] + \frametitle{A small functional program} + + \begin{haskellcode} + main = sum (upto 0 10) + + upto from to + | from > to = [] + | otherwise = from : upto (from+1) to + + sum [] = 0 + sum (x:xs) = x + sum xs + \end{haskellcode} + +\end{frame} + + + +\begin{frame}[fragile] + \frametitle{Strict control flow} + + \begin{minipage}{0.35\textwidth} + \vspace{-2cm} + \begin{haskellcode} + main = sum (upto 0 10) + + upto m n + | m > n = [] + | otherwise = m : upto (m+1) n + + sum [] = 0 + sum (x:xs) = x + sum xs + \end{haskellcode} + \end{minipage} + \hfill + \begin{minipage}{0.6\textwidth} + \vspace{3cm} + \begin{figure}[h] + \centering + \begin{adjustbox}{scale = 1.4} + \tikzset{every loop/.style={-{Stealth[scale=1.5]}}} + + \begin{tikzpicture}[ node distance = 1cm and 1cm + , on grid + , loop/.append style={-triangle 60} + ] + + \node [shape=ellipse,draw=black] (main) {main}; + \node [shape=ellipse,draw=black] (sum) [below left =of main] {sum}; + \node [shape=ellipse,draw=black] (upto) [below right =of main] {upto}; + + \path[-{Stealth[scale=1.5]}] + (main) edge [] (sum) + (main) edge [] (upto) + (sum) edge [loop left] (sum) + (upto) edge [loop right] (upto); + + + \end{tikzpicture} + \end{adjustbox} + \label{control-flow-strict} + \end{figure} + \end{minipage} + +\end{frame} + + + + + + + + +\begin{frame}[fragile] +\frametitle{Optimized lazy control flow} + +\begin{minipage}{0.35\textwidth} + \vspace{-2cm} + \begin{haskellcode} + main = sum (upto 0 10) + + upto m n + | m > n = [] + | otherwise = m : upto (m+1) n + + sum [] = 0 + sum (x:xs) = x + sum xs + \end{haskellcode} +\end{minipage} +\hfill +\begin{minipage}{0.3\textwidth} + \vspace{1 cm} + \begin{figure}[h] + \centering + \begin{adjustbox}{scale = 1.4} + \tikzset{every loop/.style={-{Stealth[scale=1.5]}}} + + \begin{tikzpicture}[ node distance = 1.3cm and 1cm + , on grid + , loop/.append style={-triangle 60} + ] + + \node [shape=ellipse,draw=black] (main) {main}; + \node [shape=ellipse,draw=black] (sum) [below =of main] {sum}; + \node [shape=ellipse,draw=black,dashed] (upto) [below =of sum] {upto}; + + \path[-{Stealth[scale=1.5]}] + (main) edge [] (sum) + (sum) edge [loop right] (sum) + (sum) edge [] (upto); + + + \end{tikzpicture} + \end{adjustbox} + \label{control-flow-lazy-opt} + \end{figure} +\end{minipage} + +\end{frame} + + +\begin{frame} + \frametitle{Goals} + + \vspace{-2cm} + \begin{vfitemize} + \item We need to handle laziness + \item We need to optimize across functions + \item Accomplish both of these for all functional languages + \end{vfitemize} + +\end{frame} + + + + + + +\begin{frame}[fragile] + \frametitle{Properties} + + \begin{vfitemize} + \item Designed for the computer + \item Simple syntax, and semantics + \item Untyped, but we use a typed version (for LLVM) + \item First order language + \item Monadic structure + \item Singe Static Assignment property + \item Explicit laziness + \item Global \mintinline{haskell}{eval} (generated) + \item No unknown function calls + \end{vfitemize} +\end{frame} + + + +\begin{frame} + \frametitle{Semantics} + + \begin{vfitemize} + \item C, F, P nodes + \item Only basic values and pointers can be in nodes + \item Functions cannot return pointers + \begin{itemize} + \item[-] More register usage is exposed + \item[-] The caller can decide whether the return value should be put onto the heap + \end{itemize} + \item \mintinline{haskell}{store}, + \mintinline{haskell}{fetch}, + \mintinline{haskell}{update} + \item Control flow can only diverge and merge at case expressions + \end{vfitemize} +\end{frame} + +\begin{frame}[fragile] + \frametitle{Laziness in GRIN} + + \begin{haskellcode} + upto m n = + (CInt m') <- eval m + (CInt n') <- eval n + b' <- _prim_int_gt m' n' + if b' then + pure (CNil) + else + m1' <- _prim_int_add m' 1 + m1 <- store (CInt m1') + p <- store (Fupto m1 n) + pure (CCons m p) + \end{haskellcode} + +\end{frame} + + + + + + + + + + +\begin{frame}[fragile] +\frametitle{Dead data elimination} + + \begin{center} + \begin{minipage}{0.40\textwidth} + \begin{haskellcode} + + n <- pure (CPair a b) + (CPair x y) <- pure n + + \end{haskellcode} + \end{minipage} + \hfill + $\xRightarrow{\text{x is dead}}$ + \hfill + \begin{minipage}{0.35\textwidth} + \begin{haskellcode} + + n <- pure (CPair b) + (CPair y) <- pure n + + \end{haskellcode} + \end{minipage} + \end{center} + +\end{frame} + + + +\begin{frame} + \frametitle{Analysis types} + + \begin{vfitemize} + \item Whole program analysis\\ + \begin{itemize} + \item[] The entire program is subject to the analysis + \end{itemize} + + \item Interprocedural program analysis + \begin{itemize} + \item[] The analysis is performed across functions + \end{itemize} + + \item Context insensitive program analysis + \begin{itemize} + \item Information is not propagated back to the call site + \end{itemize} + \end{vfitemize} +\end{frame} + + + +\begin{frame}[fragile] + \frametitle{Heap-points-to} + + \begin{center} + \begin{minipage}{0.35\textwidth} + \begin{haskellcode} + grinMain = + a0 <- pure 5 + n0 <- pure (CNil) + p0 <- store n0 + n1 <- pure (CCons a0 p0) + r <- case n1 of + (CNil) -> + pure (CNil) + (CCons x xs) -> + xs' <- fetch xs + pure xs' + pure r + \end{haskellcode} + \end{minipage} + \hfill + \begin{minipage}{0.48\textwidth} + \begin{haskellcode} + Heap + 0 -> {CNil[]} + Env + a0 -> {T_Int64} + n0 -> {CNil[]} + n1 -> {CCons[{T_Int64},{0}]} + p0 -> {0} + r -> {CNil[]} + x -> {T_Int64} + xs -> {0} + xs' -> {CNil[]} + Function + grinMain :: {CNil[]} + + \end{haskellcode} + \end{minipage} + \end{center} + +\end{frame} + +\begin{frame}[fragile] +\frametitle{Created-by} + +\begin{center} + \begin{minipage}{0.4\textwidth} + \begin{haskellcode} + grinMain = + a0 <- pure 5 + n0 <- pure (CNil) + p0 <- store n0 + n1 <- pure (CCons a0 p0) + r <- case n1 of + (CNil) -> + pure (CNil) + (CCons x xs) -> + xs' <- fetch xs + pure xs' + pure r + \end{haskellcode} + \end{minipage} + \hfill + \begin{minipage}{0.40\textwidth} + \begin{haskellcode} + Producers + a0 -> {} + n0 -> {CNil{n0}} + n1 -> {CCons{n1}} + p0 -> {} + r -> {CNil{n0}} + x -> {} + xs -> {} + xs' -> {CNil{n0}} + \end{haskellcode} + \end{minipage} +\end{center} + +\end{frame} + + +\begin{frame} +\frametitle{Producers and consumers} + +\begin{figure}[h] + \centering + \begin{adjustbox}{scale = 1.3} + \begin{tikzpicture}[ node distance = 1cm and 2cm, on grid ] + + \node [shape=circle,draw=black] (P1) {$P_1$}; + \node [shape=circle,draw=black] (P2) [right =of P1] {$P_2$}; + \coordinate (Middle) at ($(P1)!0.5!(P2)$); + \node [shape=circle,draw=black] (C2) [below =of Middle] {$C_2$}; + \node [shape=circle,draw=black] (C1) [left =of C2] {$C_1$}; + \node [shape=circle,draw=black] (C3) [right =of C2] {$C_3$}; + + \path[-{Stealth[scale=1.5]}] (P1) edge [] (C1) + (P1) edge [] (C2) + (P2) edge [] (C2) + (P2) edge [] (C3); + + + \end{tikzpicture} + \end{adjustbox} + \label{fig:producers-and-consumers} +\end{figure} + +\end{frame} + + +\begin{frame}[fragile] +\frametitle{Liveness} + +\begin{center} + \begin{minipage}{0.35\textwidth} + \begin{haskellcode} + grinMain = + a0 <- pure 5 + n0 <- pure (CNil) + p0 <- store n0 + n1 <- pure (CCons a0 p0) + r <- case n1 of + (CNil) -> + pure (CNil) + (CCons x xs) -> + xs' <- fetch xs + pure xs' + pure r + \end{haskellcode} + \end{minipage} + \hfill + \begin{minipage}{0.45\textwidth} + \begin{haskellcode} + Heap + 0 -> {CNil[]} + Env + a0 -> DEAD + n0 -> {CNil[]} + n1 -> {CCons[DEAD,LIVE]} + p0 -> LIVE + r -> {CNil[]} + x -> DEAD + xs -> LIVE + xs' -> {CNil[]} + Function + grinMain :: {CNil[]} + \end{haskellcode} + \end{minipage} +\end{center} + +\end{frame} + +\begin{frame} +\frametitle{Results} +\begin{vfitemize} + \item eval inlinig impact on code size + \item dead code elimination impact on code size + \item dead code elimination impact on performance + \item comparing intra- and interprocedural dead code elimination + \item how costly they are? + \item how the resulting codes differ? + \item how should the transformations be ordered to minimize compilation time, and maximize performance? + \item how costly are the analyses? + \item how does the GRIN optimized code compare to GHC's? +\end{vfitemize} +\end{frame} + +\begin{frame} + \frametitle{Summary} + \begin{vfitemize} + \item Compiling functional programs has its own challenges + \item We can make it easier by introducing a new IR + \item We can perform elaborate dataflow analyses on the IR, then ... + \item By transforming the code to a more manageable format, we can utilize the already existing infrastructure of LLVM + \end{vfitemize} +\end{frame} + +\end{document} + diff --git a/papers/stcs-2019/presentation/tex/style.sty b/papers/stcs-2019/presentation/tex/style.sty new file mode 100644 index 00000000..4583d036 --- /dev/null +++ b/papers/stcs-2019/presentation/tex/style.sty @@ -0,0 +1,244 @@ +\ProvidesPackage{style} + +\frenchspacing +\setcounter{tocdepth}{1} + +\setbeamertemplate{footline}[miniframes theme no subsection] +\setbeamertemplate{itemize items}[ball] + +\usepackage[toc,page]{appendix} +\usepackage{hyperref} +\usepackage{float} +\usepackage{subcaption} +\usepackage{cite} +\usepackage{url} +\usepackage{caption} +\usepackage{graphicx} +\graphicspath{ {../img/} } + +%\usepackage{enumitem} +%\setlist{nosep} + +\usepackage[utf8]{inputenc} +\usepackage[T1]{fontenc} +\usepackage{etoolbox} +\usepackage{adjustbox} +\usepackage{latexsym,amssymb,amsmath,mathtools} + +\usepackage{algorithm} +\usepackage{algorithmicx} +\usepackage{algpseudocode} + + +\hypersetup{% + colorlinks=true,% hyperlinks will be coloured + allcolors=blue,% hyperlink text will be green + linkcolor= +} + +\usepackage{minted} +\usepackage{xcolor} +\usepackage{listings} +\usepackage{lstautogobble} +\definecolor{identifierColor}{rgb}{0.65,0.16,0.16} +\definecolor{keywordColor}{rgb}{0.65,0.20,0.90} +\lstnewenvironment{code} +{ \lstset + { language = Haskell + , basicstyle = \small\ttfamily + , breaklines = true + , backgroundcolor = \color{gray!15} + , frame = single + , autogobble = true + , xleftmargin = 0.1cm + , xrightmargin = 0.2cm + %, identifierstyle = \color{gray} + , keywordstyle = \color{violet} + , morekeywords = {} + , escapechar = \% + } +} +{} + +\PassOptionsToPackage{usenames,dvipsnames,svgnames}{xcolor} +\usepackage{tikz} +\usetikzlibrary{arrows,arrows.meta,shapes,positioning,automata,calc} + +\usepackage{pgfplots} +\usepackage{tcolorbox} +\usepackage{tabularx} +\usepackage{array} +\usepackage{zref-savepos} +\usepackage{diagbox} +\usepackage{colortbl} +\tcbuselibrary{skins} +\tcbuselibrary{minted} + +\newcolumntype{Y}{>{\raggedleft\arraybackslash}X} +\tcbset +{ tab2/.style = + { enhanced + , fonttitle=\bfseries + , fontupper=\normalsize\sffamily + , colback = gray!5!white + , colframe = gray!75!black + , colbacktitle=yellow!40!white + , coltitle=black,center title + } + , hbox +} + +\newminted{haskell}{autogobble} + +\newtcblisting{haskell} +{ listing engine = minted + , minted style = colorful + , minted language = Haskell + , minted options = { fontsize = \small + , breaklines + , autogobble + , linenos + , numbersep = 3mm + , escapeinside = \%\% + } + , colback = gray!5!white + , colframe = gray!75!black + , listing only + , left = 5mm + , enhanced + , overlay = { \begin{tcbclipinterior} + \fill[gray!80!blue!20!white] (frame.south west) rectangle ([xshift=5mm]frame.north west); + \end{tcbclipinterior} + } +} + + +\newtcblisting{oneLineHaskell} +{ listing engine = minted + , minted style = colorful + , minted language = Haskell + , minted options = { fontsize = \normalsize + , breaklines + , autogobble + , numbersep = 3mm + , escapeinside = \%\% + } + , colback = gray!5!white + , colframe = gray!75!black + , listing only + , left = 2mm + , top = 1mm + , bottom = 1mm + , enhanced +} + +\colorlet{lightgreen}{green!50!white} +\colorlet{lightblue}{blue!40!white} +\colorlet{lightyellow}{yellow!50!white} +\colorlet{lightred}{red!40!white} + +\newcommand*{\paper}{thesis} +\newcommand*{\ext}[1]{\texttt{#1}} +\newcommand*{\chk}[1]{\texttt{#1}} +\newcommand*{\lvar}[1]{\textit{#1}} +\newcommand*{\ilcode}[1]{\mintinline{Haskell}{#1}} % inline code +\newcommand*{\pilcode}[1]{\texttt{#1}} % plain inline code + +% NOTE: This command need styRectDef to be defined locally +\newcommand*{\tikzcustom}[0] +{ + % \tikzset{styRectDef/.style = {rectangle, rounded corners, draw=black, inner xsep=6mm, inner ysep=3mm}} + \tikzset{styRectGn/.style = {styRectDef, draw=green, fill=green!20}} + \tikzset{styRectBl/.style = {styRectDef, draw=cyan, fill=cyan!40}} + \tikzset{styRectGy/.style = {styRectDef, draw=gray, fill=gray!17}} + \tikzset{styConn/.style = {very thick, ->, -latex, shorten <=1mm, shorten >=1mm}} + + \tikzset{styAnnotDef/.style = {rectangle, rounded corners, draw=black, inner xsep=2mm, inner ysep=1mm}} + \tikzset{styLabel/.style = {styAnnotDef, draw=black, fill=gray!10}} + +} + +\newcounter{NoTableEntry} +\renewcommand*{\theNoTableEntry}{NTE-\the\value{NoTableEntry}} + +\newcommand*{\notableentry}{% + \multicolumn{1}{@{}c@{}|}{% + \stepcounter{NoTableEntry}% + \vadjust pre{\zsavepos{\theNoTableEntry t}}% top + \vadjust{\zsavepos{\theNoTableEntry b}}% bottom + \zsavepos{\theNoTableEntry l}% left + \hspace{0pt plus 1filll}% + \zsavepos{\theNoTableEntry r}% right + \tikz[overlay]{% + \draw[red] + let + \n{llx}={\zposx{\theNoTableEntry l}sp-\zposx{\theNoTableEntry r}sp}, + \n{urx}={0}, + \n{lly}={\zposy{\theNoTableEntry b}sp-\zposy{\theNoTableEntry r}sp}, + \n{ury}={\zposy{\theNoTableEntry t}sp-\zposy{\theNoTableEntry r}sp} + in + (\n{llx}, \n{lly}) -- (\n{urx}, \n{ury}) + (\n{llx}, \n{ury}) -- (\n{urx}, \n{lly}) + ; + }% + }% +} + +\makeatletter +\newcommand{\captionabove}[2][] +{ + \vskip-\abovecaptionskip + \vskip+\belowcaptionskip + \ifx\@nnil#1\@nnil + \caption{#2}% + \else + \caption[#1]{#2}% + \fi + \vskip+\abovecaptionskip + \vskip-\belowcaptionskip +} + +% automatic period at the end of footnote +\makeatletter% +\long\def\@makefntext#1{% + \parindent 1em\noindent \hb@xt@ 1.8em{\hss\@makefnmark}#1.} +\makeatother + +\newenvironment{vfitemize} +{ \begin{itemize} % + \let\olditem\item % + \renewcommand\item{\vfill\olditem} +} % +{\end{itemize}} + + + + +% EFOP template stuff +\usebackgroundtemplate% +{% + \includegraphics[width=\paperwidth,height=\paperheight]{background.jpg}% +} + +\setbeamercolor{title}{fg=white} +\setbeamercolor{author}{fg=white} +\setbeamercolor{institute}{fg=white} +\setbeamercolor{date}{fg=white} +\setbeamercolor{frametitle}{fg=white} + +\AtBeginSection[] +{ + { + \usebackgroundtemplate% + {% + \includegraphics[width=\paperwidth,height=\paperheight]{section_head.jpg}% + } + + \begin{frame} + \centering + \color{white}\Huge\insertsectionhead + + \end{frame} + } + +} \ No newline at end of file diff --git a/papers/stcs-2019/presentation/tex/template.tex b/papers/stcs-2019/presentation/tex/template.tex new file mode 100644 index 00000000..44f7e350 --- /dev/null +++ b/papers/stcs-2019/presentation/tex/template.tex @@ -0,0 +1,59 @@ +\documentclass[bigger]{beamer} +\usepackage[utf8]{inputenc} +\usepackage[T1]{fontenc} +\usepackage{graphicx} + +\usebackgroundtemplate% +{% + \includegraphics[width=\paperwidth,height=\paperheight]{background.jpg}% +} + +\setbeamercolor{title}{fg=white} +\setbeamercolor{author}{fg=white} +\setbeamercolor{institute}{fg=white} +\setbeamercolor{date}{fg=white} +\setbeamercolor{frametitle}{fg=white} + +\title{\bf Sample title} +\author{Anonymous} +\institute{Eötvös Loránd University (ELTE), \\ Budapest, Hungary} +\date{2018.} + +\begin{document} + +{ +\usebackgroundtemplate{\includegraphics[width=\paperwidth]{title.jpg}}% +\frame{\vspace{15mm}\titlepage} +} + +\begin{frame}{Introduction} +\begin{itemize} +\item 1 +\item 2 +\item 3 +\end{itemize} +\end{frame} + +% this slide need not be used in the presentation, but must be +% present when you archieve your talk + +{ +\usebackgroundtemplate{\includegraphics[width=\paperwidth]{title.jpg}}% +\begin{frame}{} + +\bigskip\bigskip\bigskip + +{\bf\Huge\color{white} THANK YOU} + +\bigskip + +{\bf\Huge\color{white} FOR YOUR} + +\bigskip + +{\bf\Huge\color{white} ATTENTION!} + +\end{frame} +} + +\end{document}