diff --git a/flake.nix b/flake.nix
index 3299196..53c17bc 100644
--- a/flake.nix
+++ b/flake.nix
@@ -52,10 +52,10 @@
                 enable = true;
                 package = treefmtEval.config.build.wrapper;
               };
-              typst-fmt = {
+              typstyle = {
                 enable = true;
-                name = "Typst Formatter";
-                entry = "${pkgs.typstfmt}/bin/typstfmt";
+                name = "typstyle";
+                entry = "${pkgs.typstyle}/bin/typstyle --check";
                 files = "\\.typ$";
                 language = "rust";
               };
diff --git a/slides/00-tools.typ b/slides/00-tools.typ
index 95ce234..f30f356 100644
--- a/slides/00-tools.typ
+++ b/slides/00-tools.typ
@@ -4,9 +4,7 @@
 
 #new-section-slide("Tools")
 
-#slide(
-  title: "Recommended References",
-)[
+#slide(title: "Recommended References")[
   - #cite(<geTuringLanguageFlexible2018>, form: "prose") - Turing paper
 
   - #cite(<carpenterStanProbabilisticProgramming2017>, form: "prose") - Stan paper
@@ -20,11 +18,13 @@
 
 #focus-slide(background: julia-purple)[
   #quote(
-    block: true, attribution: [Vita Sackville-West],
+    block: true,
+    attribution: [Vita Sackville-West],
   )[A man and his tools make a man and his trade]
 
   #quote(
-    block: true, attribution: [Winston Churchill],
+    block: true,
+    attribution: [Winston Churchill],
   )[We shape our tools and then the tools shape us]
 ]
 
@@ -32,9 +32,7 @@
   #align(center)[#image("images/memes/standards.png")]
 ]
 
-#slide(
-  title: "Tools",
-)[
+#slide(title: "Tools")[
   - #link("https://mc-stan.org")[Stan] (BSD-3 License)
 
   - #link("https://turinglang.org")[Turing] (MIT License)
@@ -46,15 +44,9 @@
   - #link("https://www.mrc-bsu.cam.ac.uk/software/bugs/")[BUGS] (GPL License)
 ]
 
-#slide(
-  title: [Stan #footnote[#cite(<carpenterStanProbabilisticProgramming2017>, form: "prose")]],
-)[
-  #side-by-side(
-    columns: (4fr, 1fr),
-  )[
-    #text(
-      size: 16pt,
-    )[
+#slide(title: [Stan #footnote[#cite(<carpenterStanProbabilisticProgramming2017>, form: "prose")]])[
+  #side-by-side(columns: (4fr, 1fr))[
+    #text(size: 16pt)[
       - High-performance platform for statistical modeling and statistical computation
       - Financial support from
         #link("https://numfocus.org/")[NUMFocus]:
@@ -75,36 +67,30 @@
 ]
 
 #slide(title: [Stan Code Example])[
-#fit-to-height(1fr)[```cpp
-        data {
-          int<lower=0> N;
-          vector[N] x;
-          vector[N] y;
-        }
-        parameters {
-          real alpha;
-          real beta;
-          real<lower=0> sigma;
-        }
-        model {
-          alpha ~ normal(0, 20);
-          beta ~ normal(0, 2);
-          sigma ~ cauchy(0, 2.5);
-          y ~ normal(alpha + beta * x, sigma);
-        }
-  ```
-]
+  #fit-to-height(1fr)[```cpp
+          data {
+            int<lower=0> N;
+            vector[N] x;
+            vector[N] y;
+          }
+          parameters {
+            real alpha;
+            real beta;
+            real<lower=0> sigma;
+          }
+          model {
+            alpha ~ normal(0, 20);
+            beta ~ normal(0, 2);
+            sigma ~ cauchy(0, 2.5);
+            y ~ normal(alpha + beta * x, sigma);
+          }
+    ```
+  ]
 ]
 
-#slide(
-  title: [Turing #footnote[#cite(<geTuringLanguageFlexible2018>, form: "prose")]],
-)[
-  #side-by-side(
-    columns: (4fr, 1fr),
-  )[
-    #text(
-      size: 18pt,
-    )[
+#slide(title: [Turing #footnote[#cite(<geTuringLanguageFlexible2018>, form: "prose")]])[
+  #side-by-side(columns: (4fr, 1fr))[
+    #text(size: 18pt)[
       - Ecosystem of Julia packages for Bayesian Inference using probabilistic
         programming
 
@@ -127,12 +113,8 @@
   ]
 ]
 
-#slide(
-  title: [Turing Ecosystem],
-)[
-  #text(
-    size: 16pt,
-  )[
+#slide(title: [Turing Ecosystem])[
+  #text(size: 16pt)[
     We have several Julia packages under Turing's GitHub organization
     #link("https://github.com/TuringLang")[TuringLang], but I will focus on 6 of
     those:
@@ -170,9 +152,9 @@
     ] @storopoli2021bayesianjulia
   ] Code Example])[
 
-#v(1em)
+  #v(1em)
 
-```julia
+  ```julia
         @model function linreg(x,  y)
             α ~ Normal(0, 20)
             β ~ Normal(0, 2)
@@ -183,12 +165,8 @@
   ```
 ]
 
-#slide(
-  title: [PyMC #footnote[#cite(form: "prose", <pymc3>)]],
-)[
-  #side-by-side(
-    columns: (4fr, 1fr),
-  )[
+#slide(title: [PyMC #footnote[#cite(form: "prose", <pymc3>)]])[
+  #side-by-side(columns: (4fr, 1fr))[
 
     - Python package for Bayesian statistics with a Markov Chain Monte Carlo sampler
 
@@ -208,7 +186,7 @@
 ]
 
 #slide(title: [PyMC Code Example])[
-```python
+  ```python
     with pm.Model() as model:
         alpha = pm.Normal("Intercept", mu=0, sigma=20)
         beta = pm.Normal("beta", mu=0, sigma=2)
@@ -230,9 +208,7 @@
   ]
 ]
 
-#slide(
-  title: [Why Turing],
-)[
+#slide(title: [Why Turing])[
   - *Julia* all the way down...
   - Can *interface/compose* _any_ Julia package
   - Decoupling of *modeling DSL, inference algorithms and data*
@@ -246,9 +222,7 @@
   - Very easy to do *distributed model inference and prediction*.
 ]
 
-#slide(
-  title: [Why _Not_ Turing],
-)[
+#slide(title: [Why _Not_ Turing])[
   - *Not as fast*, but pretty close behind, as Stan.
 
   - *Not enough learning materials*, example models, tutorials. Also documentation
@@ -271,9 +245,7 @@
   - *More tutorials, example models, and learning materials available*.
 ]
 
-#slide(
-  title: [Why _Not_ Stan],
-)[
+#slide(title: [Why _Not_ Stan])[
   - If you want to try *something new*, you'll have to do in *C++*.
 
   - Constrained *only to HMC-NUTS* as MCMC algorithm.
diff --git a/slides/01-bayesian_statistics.typ b/slides/01-bayesian_statistics.typ
index d27d1bd..a8e0947 100644
--- a/slides/01-bayesian_statistics.typ
+++ b/slides/01-bayesian_statistics.typ
@@ -5,12 +5,8 @@
 
 #new-section-slide("Bayesian Statistics")
 
-#slide(
-  title: "Recommended References",
-)[
-  #text(
-    size: 18pt,
-  )[
+#slide(title: "Recommended References")[
+  #text(size: 18pt)[
     - #cite(<gelman2013bayesian>, form: "prose") - Chapter 1: Probability and
       inference
 
@@ -38,7 +34,8 @@
 
 #focus-slide(background: julia-purple)[
   #quote(
-    block: true, attribution: [Denis Lindley],
+    block: true,
+    attribution: [Denis Lindley],
   )[Inside every nonBayesian there is a Bayesian struggling to get out]
 
 ]
@@ -47,9 +44,7 @@
   #align(center)[#image("images/memes/frequentists-vs-bayesians.png")]
 ]
 
-#slide(
-  title: [What is Bayesian Statistics?],
-)[
+#slide(title: [What is Bayesian Statistics?])[
   #v(2em)
 
   Bayesian statistics is a *data analysis approach based on Bayes' theorem* where
@@ -67,12 +62,8 @@
   The posterior can also be used to make predictions about future events.
 ]
 
-#slide(
-  title: [What changes from Frequentist Statistics?],
-)[
-  #text(
-    size: 16pt,
-  )[
+#slide(title: [What changes from Frequentist Statistics?])[
+  #text(size: 16pt)[
     - *Flexibility* - probabilistic building blocks to construct a model #footnote[like LEGO]:
       - Probabilistic conjectures about parameters:
         - Prior
@@ -90,9 +81,7 @@
   ]
 ]
 
-#slide(
-  title: [A little bit more formal],
-)[
+#slide(title: [A little bit more formal])[
   - Bayesian Statistics uses probabilistic statements:
 
     - one or more parameters $θ$
@@ -108,20 +97,14 @@
   - We also, implicitly, condition on the observed data from any covariate $x$
 ]
 
-#slide(
-  title: [Definition of Bayesian Statistics],
-)[
+#slide(title: [Definition of Bayesian Statistics])[
   #v(4em)
   The use of Bayes theorem as the procedure to *estimate parameters of interest
   $θ$ or unobserved data $tilde(y)$*. @gelman2013bayesian
 ]
 
-#slide(
-  title: [PROBABILITY DOES NOT EXIST! #footnote[#cite(<definettiTheoryProbability1974>, form: "prose")]],
-)[
-  #side-by-side(
-    columns: (4fr, 1fr),
-  )[
+#slide(title: [PROBABILITY DOES NOT EXIST! #footnote[#cite(<definettiTheoryProbability1974>, form: "prose")]])[
+  #side-by-side(columns: (4fr, 1fr))[
     #v(3em)
 
     - Yes, probability does not exist ...
@@ -137,12 +120,8 @@
   ]
 ]
 
-#slide(
-  title: [PROBABILITY DOES NOT EXIST! #footnote[#cite(<definettiTheoryProbability1974>, form: "prose")]],
-)[
-  #side-by-side(
-    columns: (3fr, 2fr),
-  )[
+#slide(title: [PROBABILITY DOES NOT EXIST! #footnote[#cite(<definettiTheoryProbability1974>, form: "prose")]])[
+  #side-by-side(columns: (3fr, 2fr))[
     - Consider flipping a biased coin
     - The trials are considered independent and, as a result, have an important
       property: *the order does not matter*
@@ -152,42 +131,72 @@
     - We say that the probability is *invariant under permutations*
   ][
 
-    #align(
-      center,
-    )[
+    #align(center)[
       #canvas(
-        length: 0.65cm, {
+        length: 0.65cm,
+        {
           import draw: *
-          set-style(mark: (fill: black, size: 0.3), stroke: (thickness: 2pt), radius: 1)
+          set-style(
+            mark: (fill: black, size: 0.3),
+            stroke: (thickness: 2pt),
+            radius: 1,
+          )
           circle((0, 0))
-          arc((1, 0), start: 0deg, delta: 180deg, mode: "PIE", fill: julia-green)
-          arc((-1, 0), start: 180deg, delta: 180deg, mode: "PIE", fill: julia-red)
+          arc(
+            (1, 0),
+            start: 0deg,
+            delta: 180deg,
+            mode: "PIE",
+            fill: julia-green,
+          )
+          arc(
+            (-1, 0),
+            start: 180deg,
+            delta: 180deg,
+            mode: "PIE",
+            fill: julia-red,
+          )
           content((0, 0.5), [#align(center)[#text(fill: white, size: 14pt)[H]]])
-          content((0, -0.5), [#align(center)[#text(fill: white, size: 14pt)[T]]])
+          content(
+            (0, -0.5),
+            [#align(center)[#text(fill: white, size: 14pt)[T]]],
+          )
           line((0, -1), (-3.5, -5))
           content((-2.5, -3))[#text(size: 14pt)[$0.5$]]
           line((0, -1), (3.5, -5))
           content((2.5, -3))[#text(size: 14pt)[$0.5$]]
 
           circle((-3.5, -6), fill: julia-green)
-          content((-3.5, -6), [#align(center)[#text(fill: white, size: 14pt)[H]]])
+          content(
+            (-3.5, -6),
+            [#align(center)[#text(fill: white, size: 14pt)[H]]],
+          )
           line((-3.5, -7), (-5, -11))
           content((-5, -9))[#text(size: 14pt)[$0.5$]]
           line((-3.5, -7), (-2, -11))
           content((-2, -9))[#text(size: 14pt)[$0.5$]]
 
           circle((3.5, -6), fill: julia-red)
-          content((3.5, -6), [#align(center)[#text(fill: white, size: 14pt)[T]]])
+          content(
+            (3.5, -6),
+            [#align(center)[#text(fill: white, size: 14pt)[T]]],
+          )
           line((3.5, -7), (2, -11))
           content((2, -9))[#text(size: 14pt)[$0.5$]]
           line((3.5, -7), (5, -11))
           content((5, -9))[#text(size: 14pt)[$0.5$]]
 
           circle((-5, -12), fill: julia-green)
-          content((-5, -12), [#align(center)[#text(fill: white, size: 14pt)[H]]])
+          content(
+            (-5, -12),
+            [#align(center)[#text(fill: white, size: 14pt)[H]]],
+          )
 
           circle((-2, -12), fill: julia-red)
-          content((-2, -12), [#align(center)[#text(fill: white, size: 14pt)[T]]])
+          content(
+            (-2, -12),
+            [#align(center)[#text(fill: white, size: 14pt)[T]]],
+          )
 
           circle((2, -12), fill: julia-green)
           content((2, -12), [#align(center)[#text(fill: white, size: 14pt)[H]]])
@@ -216,9 +225,7 @@
     - $P("me being elected president") = 10^(-10)$ (highly unlikely)
 ]
 
-#slide(
-  title: [What is Probability?],
-)[
+#slide(title: [What is Probability?])[
   We define $A$ is an event and $P(A)$ the probability of event $A$.
 
   $P(A)$ has to be between $0$ and $1$, where higher values defines higher
@@ -231,12 +238,8 @@
   $
 ]
 
-#slide(
-  title: [Probability Axioms #footnote[#cite(<kolmogorovFoundationsTheoryProbability1933>, form: "prose")]],
-)[
-  #side-by-side(
-    columns: (4fr, 1fr),
-  )[
+#slide(title: [Probability Axioms #footnote[#cite(<kolmogorovFoundationsTheoryProbability1933>, form: "prose")]])[
+  #side-by-side(columns: (4fr, 1fr))[
     - *Non-negativity*: For every $A$:
       $P(A) >= 0$
 
@@ -278,17 +281,18 @@
   - Neptune: ♆
 ]
 
-#slide(
-  title: [Discrete Sample Space],
-)[
+#slide(title: [Discrete Sample Space])[
   #side-by-side[
-    #align(
-      center,
-    )[
+    #align(center)[
       #canvas(
-        length: 0.5cm, {
+        length: 0.5cm,
+        {
           import draw: *
-          set-style(mark: (fill: black, size: 0.3), stroke: (thickness: 2pt), radius: 1)
+          set-style(
+            mark: (fill: black, size: 0.3),
+            stroke: (thickness: 2pt),
+            radius: 1,
+          )
 
           content((0, 0))[#text(size: 18pt, fill: white)[placeholder]]
           content((0, -2))[#text(size: 18pt)[
@@ -310,13 +314,16 @@
       )
     ]
   ][
-    #align(
-      center,
-    )[
+    #align(center)[
       #canvas(
-        length: 0.5cm, {
+        length: 0.5cm,
+        {
           import draw: *
-          set-style(mark: (fill: black, size: 0.3), stroke: (thickness: 2pt), radius: 1)
+          set-style(
+            mark: (fill: black, size: 0.3),
+            stroke: (thickness: 2pt),
+            radius: 1,
+          )
 
           content((0, 0))[#text(size: 18pt)[$θ ∈ E_1$]]
           circle((-7, -2))
@@ -413,44 +420,45 @@
   ]
 ]
 
-#slide(
-  title: [Continuous Sample Space],
-)[
+#slide(title: [Continuous Sample Space])[
   #side-by-side[
     #align(center)[
-      #canvas(length: 0.5cm, {
-        import draw: *
-        set-style(mark: (fill: black, size: 0.3), stroke: (thickness: 2pt))
-
-        content((0, 0))[#text(size: 18pt, fill: white)[placeholder]]
-        content((0, -2))[#text(size: 18pt)[
-            The distance is less than five centimeters
-          ]]
-        content((0, -6))[#text(size: 18pt)[
-            The distance is between three and seven centimeters
-          ]]
-        content((0, -10))[#text(size: 18pt)[
-            The distance is less than five centimeters \
-            _and_ between three and seven centimeters
-          ]]
-        content((0, -14))[#text(size: 18pt)[
-            The distance is less than five centimeters \
-            _or_ between three and seven centimeters
-          ]]
-        content((0, -18))[#text(size: 18pt)[
-            The distance is _not_ less than five centimeters
-          ]]
-      })
+      #canvas(
+        length: 0.5cm,
+        {
+          import draw: *
+          set-style(mark: (fill: black, size: 0.3), stroke: (thickness: 2pt))
+
+          content((0, 0))[#text(size: 18pt, fill: white)[placeholder]]
+          content((0, -2))[#text(size: 18pt)[
+              The distance is less than five centimeters
+            ]]
+          content((0, -6))[#text(size: 18pt)[
+              The distance is between three and seven centimeters
+            ]]
+          content((0, -10))[#text(size: 18pt)[
+              The distance is less than five centimeters \
+              _and_ between three and seven centimeters
+            ]]
+          content((0, -14))[#text(size: 18pt)[
+              The distance is less than five centimeters \
+              _or_ between three and seven centimeters
+            ]]
+          content((0, -18))[#text(size: 18pt)[
+              The distance is _not_ less than five centimeters
+            ]]
+        },
+      )
     ]
   ][
-    #align(
-      center,
-    )[
+    #align(center)[
       #canvas(
-        length: 0.5cm, {
+        length: 0.5cm,
+        {
           import draw: *
           set-style(
-            mark: (start: "|", end: "|", fill: black, size: 1), stroke: (thickness: 4pt),
+            mark: (start: "|", end: "|", fill: black, size: 1),
+            stroke: (thickness: 4pt),
           )
 
           content((0, 0))[#text(size: 18pt)[$θ ∈ E_1$]]
@@ -478,12 +486,8 @@
   ]
 ]
 
-#slide(
-  title: [Discrete versus Continuous Parameters],
-)[
-  #text(
-    size: 18pt,
-  )[
+#slide(title: [Discrete versus Continuous Parameters])[
+  #text(size: 18pt)[
     Everything that has been exposed here was under the assumption that the
     parameters are discrete.
 
@@ -505,9 +509,7 @@
   ]
 ]
 
-#slide(
-  title: [Conditional Probability],
-)[
+#slide(title: [Conditional Probability])[
   Probability of an event occurring in case another has occurred or not.
 
   The notation we use is $P(A | B)$, that read as "the probability of observing $A$ given
@@ -521,9 +523,7 @@
   assuming that $P(B) > 0$}.
 ]
 
-#slide(
-  title: [Example of Conditional Probability -- Poker Texas Hold'em],
-)[
+#slide(title: [Example of Conditional Probability -- Poker Texas Hold'em])[
   - *Sample Space*: $52$ cards in a deck, $13$ types of cards and $4$ types of
     suits.
 
@@ -538,18 +538,14 @@
     Ace $(4 / 51 ≈ 0.078)$
 ]
 
-#slide(
-  title: [Caution! Not always $P(A | B) = P(B | A)$],
-)[
+#slide(title: [Caution! Not always $P(A | B) = P(B | A)$])[
   In the previous example we have the symmetry $P(A | K) = P(K | A)$, *but not
   always this is true* #footnote[
     More specific, if the basal rates $P(A)$ and $P(B)$ aren't equal, the symmetry
     is broken $P(A | B) ≠ P(B | A)$
   ]
 
-  #text(
-    size: 14pt,
-  )[
+  #text(size: 14pt)[
     The Pope is catholic:
 
     - $P("pope")$: Probability of some random person being the Pope, something really
@@ -581,12 +577,8 @@
   $
 ]
 
-#slide(
-  title: [Example of Joint Probability -- Revisiting Poker Texas Hold'em],
-)[
-  #text(
-    size: 17pt,
-  )[
+#slide(title: [Example of Joint Probability -- Revisiting Poker Texas Hold'em])[
+  #text(size: 17pt)[
     - *Sample Space*: $52$ cards in a deck, $13$ types of cards and $4$ types of
       suits.
     - $P(A)$: Probability of being dealt an Ace $(4 / 52 = 1 / 13)$
@@ -597,25 +589,25 @@
       Ace $(4 / 51 ≈ 0.078)$
     - $P(A, K)$: Probability of being dealt an Ace _and_ being dealt a King
       $
-        P(A, K)           &= P(K, A) \
+        P(A, K) &= P(K, A) \
         P(A) dot P(K | A) &= P(K) dot P(A | K) \
         1 / 13 dot 4 / 51 &= 1 / 13 dot 4 / 51 \
-                          &≈ 0.006
+        &≈ 0.006
       $
   ]
 ]
 
-#slide(
-  title: [Visualization of Joint Probability versus Conditional Probability],
-)[
+#slide(title: [Visualization of Joint Probability versus Conditional Probability])[
   #figure(
-    image("images/probability/joint_vs_conditional_probability.svg", height: 80%), caption: [$P(X,Y)$ versus $P(X | Y=-0.75)$],
+    image(
+      "images/probability/joint_vs_conditional_probability.svg",
+      height: 80%,
+    ),
+    caption: [$P(X,Y)$ versus $P(X | Y=-0.75)$],
   )
 ]
 
-#slide(
-  title: [Product Rule of Probability #footnote[also called the Product Rule of Probability.]],
-)[
+#slide(title: [Product Rule of Probability #footnote[also called the Product Rule of Probability.]])[
   #v(2em)
 
   We can decompose a joint probability $P(A,B)$ into the product of two
@@ -624,20 +616,14 @@
   #v(2em)
 
   $
-    P(A,B)            &= P(B,A) \
+    P(A,B) &= P(B,A) \
     P(A) dot P(B | A) &= P(B) dot P(A | B)
   $
 ]
 
-#slide(
-  title: [Who was Thomas Bayes?],
-)[
-  #side-by-side(
-    columns: (4fr, 1fr),
-  )[
-    #text(
-      size: 17pt,
-    )[
+#slide(title: [Who was Thomas Bayes?])[
+  #side-by-side(columns: (4fr, 1fr))[
+    #text(size: 17pt)[
       - *Thomas Bayes* (1701 - 1761) was a statistician, philosopher and Presbyterian
         minister who is known for formulating a specific case of the theorem that bears
         his name: Bayes' theorem.
@@ -671,7 +657,7 @@
   #text(size: 18pt)[
 
     $
-      P(A,B)            &= P(B,A) \
+      P(A,B) &= P(B,A) \
       P(A) dot P(B | A) &= P(B) dot P(A | B)
     $
 
@@ -680,19 +666,15 @@
     $
       (P(A) dot P(B | A)) / P(B) &= (P(B) dot P(A | B)) / P(B) \
       (P(A) dot P(B | A)) / P(B) &= P(A | B) \
-      P(A | B)                   &= (P(A) dot P(B | A)) / P(B)
+      P(A | B) &= (P(A) dot P(B | A)) / P(B)
     $
   ]
 ]
 
-#slide(
-  title: [A Probability Textbook Classic #footnote[Adapted from: #link(
+#slide(title: [A Probability Textbook Classic #footnote[Adapted from: #link(
         "https://www.yudkowsky.net/rational/bayes",
-      )[Yudkowski - _An Intuitive Explanation of Bayes' Theorem_]]],
-)[
-  #text(
-    size: 15pt,
-  )[
+      )[Yudkowski - _An Intuitive Explanation of Bayes' Theorem_]]])[
+  #text(size: 15pt)[
     How accurate is a *breast cancer* test?
 
     - 1% of women have *breast cancer* (Prevalence)
@@ -704,20 +686,22 @@
 
       $
         P(C | +) &= (P(+ | C) dot P(C)) / P(+) \
-        P(C | +) &= (P(+ | C) dot P(C)) / (P(+ | C) dot P(C) + P(+ | not C) dot P(not C)) \
+        P(C | +) &= (P(+ | C) dot P(C)) / (P(+ | C) dot P(C) + P(
+          + | not C
+        ) dot P(not C)) \
         P(C | +) &= (0.8 dot 0.01) / (0.8 dot 0.01 + 0.096 dot 0.99) \
         P(C | +) &≈ 0.0776
       $
   ]
 ]
 
-#slide(
-  title: [Why Bayes' Theorem is Important?],
-)[
+#slide(title: [Why Bayes' Theorem is Important?])[
   We can invert the conditional probability:
 
   $
-    P("hypothesis" | "data") = (P("data" | "hypothesis") dot P("hypothesis")) / P("data")
+    P("hypothesis" | "data") = (P("data" | "hypothesis") dot P(
+      "hypothesis"
+    )) / P("data")
   $
 
   #v(2em)
@@ -727,9 +711,7 @@
   #align(center)[#text(size: 24pt, fill: julia-red)[NO!]]
 ]
 
-#slide(
-  title: [What are $p$-values?],
-)[
+#slide(title: [What are $p$-values?])[
   #v(2em)
 
   $p$-value is the probability of obtaining results at least as extreme as the
@@ -744,12 +726,8 @@
   #align(center)[#image("images/memes/pvalue.jpg", height: 80%)]
 ]
 
-#slide(
-  title: [What $p$-value is *not*!],
-)[
-  #text(
-    size: 18pt,
-  )[
+#slide(title: [What $p$-value is *not*!])[
+  #text(size: 18pt)[
     - *$p$-value is not the probability of the null hypothesis*
       - #text(fill: julia-red)[No!]
       - Infamous confusion between $P(D | H_0)$ and $P(H_0 | D)$.
@@ -768,9 +746,7 @@
   ]
 ]
 
-#slide(
-  title: [The relationship between $p$-value and $H_0$],
-)[
+#slide(title: [The relationship between $p$-value and $H_0$])[
   To find out about any $p$-value, *find out what $H_0$ is behind it*. It's
   definition will never change, since it is always $P(D | H_0)$:
 
@@ -783,15 +759,12 @@
   - *Shapiro-Wilk*: $P(D | "population is distributed as a Normal distribution")$
 ]
 
-#slide(
-  title: [What are Confidence Intervals?],
-)[
-  #side-by-side(
-    columns: (4fr, 1fr),
-  )[
+#slide(title: [What are Confidence Intervals?])[
+  #side-by-side(columns: (4fr, 1fr))[
     #v(2em)
     #quote(
-      block: true, attribution: [
+      block: true,
+      attribution: [
         #cite(<neyman1937outline>, form: "prose") the "father" of confidence intervals
       ],
     )[
@@ -804,12 +777,8 @@
   ]
 ]
 
-#slide(
-  title: [What are Confidence Intervals?],
-)[
-  #text(
-    size: 18pt,
-  )[
+#slide(title: [What are Confidence Intervals?])[
+  #text(size: 18pt)[
     Say you performed a statistical analysis to compare the efficacy of a public
     policy between two groups and you obtain a difference between the mean of these
     groups. You can express this difference as a confidence interval. Often we
@@ -824,25 +793,29 @@
     The interval that we got in this particular instance is irrelevant and might as
     well be thrown away.
 
-    #text(
-      fill: julia-red,
-    )[
+    #text(fill: julia-red)[
       Doesn't say anything about you *target population*, but about you *sample* in an
       insane process of *infinite sampling* ...
     ]
   ]
 ]
 
-#slide(
-  title: [Confidence Intervals versus Posterior Intervals],
-)[
-  #align(
-    center,
-  )[
+#slide(title: [Confidence Intervals versus Posterior Intervals])[
+  #align(center)[
     #canvas(
-      length: 0.9cm, {
+      length: 0.9cm,
+      {
         plot.plot(
-          size: (16, 9), x-label: $θ$, y-label: none, x-tick-step: 5, y-tick-step: 0.25, x-min: -0.1, x-max: 4, y-min: -0.01, y-max: 1.5, {
+          size: (16, 9),
+          x-label: $θ$,
+          y-label: none,
+          x-tick-step: 5,
+          y-tick-step: 0.25,
+          x-min: -0.1,
+          x-max: 4,
+          y-min: -0.01,
+          y-max: 1.5,
+          {
             plot.add(domain: (0.02, 4), samples: 200, x => lognormal(x, 0, 2))
             plot.add(
               domain: (0.25950495026507125, 3.8534910373715427), samples: 200, style: plot.palette.blue, hypograph: true,
@@ -869,21 +842,18 @@
   #align(center)[#image("images/memes/assumptions-vs-reality.jpg")]
 ]
 
-#slide(
-  title: [But why I never see stats without $p$-values?],
-)[
-  #side-by-side(
-    columns: (4fr, 1fr),
-  )[
-    #text(
-      size: 17pt,
-    )[
+#slide(title: [But why I never see stats without $p$-values?])[
+  #side-by-side(columns: (4fr, 1fr))[
+    #text(size: 17pt)[
       We cannot understand $p$-values if we do no not comprehend its origins and
       historical trajectory. The first mention of $p$-values was made by the
       statistician Ronald Fischer in 1925:
-      #quote(block: true, attribution: [
-        #cite(<fisher1925statistical>, form: "prose")
-      ])[
+      #quote(
+        block: true,
+        attribution: [
+          #cite(<fisher1925statistical>, form: "prose")
+        ],
+      )[
         $p$-value is a measure of evidence against the null hypothesis
       ]
       - To quantify the strength of the evidence against the null hypothesis, Fisher
@@ -897,9 +867,7 @@
   ]
 ]
 
-#slide(
-  title: [$p = 0.06$],
-)[
+#slide(title: [$p = 0.06$])[
 
   - Since $p$-value is a probability, it is also a continuous measure.
 
@@ -913,21 +881,18 @@
     much as the $.05$" @rosnow1989statistical.
 ]
 
-#slide(
-  title: [
-    But why I never heard about Bayesian statistics? #footnote[
+#slide(title: [
+  But why I never heard about Bayesian statistics? #footnote[
       _inverse probability_
       was how Bayes' theorem was called in the beginning of the 20th century.
     ]
-  ],
-)[
-  #side-by-side(
-    columns: (4fr, 1fr),
-  )[
+])[
+  #side-by-side(columns: (4fr, 1fr))[
     #v(3em)
 
     #quote(
-      block: true, attribution: [
+      block: true,
+      attribution: [
         #cite(<fisher1925statistical>, form: "prose")
       ],
     )[
@@ -940,17 +905,13 @@
   ]
 ]
 
-#slide(
-  title: [
-    Inside every nonBayesian, there is a Bayesian struggling to get out
-    #footnote[
-      quote from Dennis Lindley.
-    ]
-  ],
-)[
-  #side-by-side(
-    columns: (4fr, 1fr),
-  )[
+#slide(title: [
+  Inside every nonBayesian, there is a Bayesian struggling to get out
+  #footnote[
+    quote from Dennis Lindley.
+  ]
+])[
+  #side-by-side(columns: (4fr, 1fr))[
     #v(2em)
 
     - In his final year of life, Fisher published a paper
@@ -966,19 +927,14 @@
   ]
 ]
 
-#slide(
-  title: [Bayes' Theorem as an Inference Engine],
-)[
-  #text(
-    size: 13pt,
-  )[
+#slide(title: [Bayes' Theorem as an Inference Engine])[
+  #text(size: 13pt)[
     Now that you know what is probability and Bayes' theorem, I will propose the
     following:
 
     $
       underbrace(P(θ | y), "Posterior") =
-      (overbrace(P(y | θ), "Likelihood") dot overbrace(P(θ), "Prior")) /
-      underbrace(P(y), "Normalizing Constant")
+      (overbrace(P(y | θ), "Likelihood") dot overbrace(P(θ), "Prior")) / underbrace(P(y), "Normalizing Constant")
     $
 
     - $θ$ -- parameter(s) of interest
@@ -1000,9 +956,7 @@
   ]
 ]
 
-#slide(
-  title: [Bayes' Theorem as an Inference Engine],
-)[
+#slide(title: [Bayes' Theorem as an Inference Engine])[
   Bayesian statistics allows us to *quantify directly the uncertainty* related to
   the value of one or more parameters of our model given the observed data.
 
@@ -1019,19 +973,28 @@
 ]
 
 // typstfmt::off
-#slide(
-title: [Bayesian vs Frequentist Stats],
-)[
+#slide(title: [Bayesian vs Frequentist Stats])[
   #table(
     columns: (auto, auto, auto),
     inset: 10pt,
     align: (left + horizon, center + horizon, center + horizon),
-    [],              [#text(fill: julia-blue)[Bayesian Statistics]], [#text(fill: julia-red)[Frequentist Statistics]],
-    [*Data*],        [Fixed -- Non-random],                          [Uncertain -- Random],
-    [*Parameters*],  [Uncertain -- Random],                          [Fixed -- Non-random],
-    [*Inference*],   [Uncertainty regarding the parameter value],    [Uncertainty regarding the sampling process from an infinite population],
-    [*Probability*], [Subjective],                                   [Objective (but with several model assumptions)],
-    [*Uncertainty*], [Posterior Interval -- $P(θ | y)$],             [Confidence Interval -- $P(y | θ)$],
+    [],
+    [#text(fill: julia-blue)[Bayesian Statistics]],
+    [#text(fill: julia-red)[Frequentist Statistics]],
+
+    [*Data*], [Fixed -- Non-random], [Uncertain -- Random],
+    [*Parameters*], [Uncertain -- Random], [Fixed -- Non-random],
+    [*Inference*],
+    [Uncertainty regarding the parameter value],
+    [Uncertainty regarding the sampling process from an infinite population],
+
+    [*Probability*],
+    [Subjective],
+    [Objective (but with several model assumptions)],
+
+    [*Uncertainty*],
+    [Posterior Interval -- $P(θ | y)$],
+    [Confidence Interval -- $P(y | θ)$],
   )
 ]
 // typstfmt::on
@@ -1053,31 +1016,27 @@ title: [Bayesian vs Frequentist Stats],
 ]
 
 #slide(title: [The beginning of the end of Frequentist Statistics])[
-  #text(
-    size: 18pt
-  )[
-  - Know that you are in a very special moment in history of great changes in statistics
-
-  - I believe that frequentist statistics, specially the way we qualify evidence and hypotheses with
-    $p$-values will transform in a "significant" #footnote[pun intended ...] way.
-
-  - 8 years ago, the _American Statistical Association_ (ASA) published a declaration about
-    $p$-values @Wasserstein2016.
-    It states exactly what we exposed here:
-    The main concepts of the null hypothesis significant testing and,
-    in particular $p$-values, cannot provide what researchers demand of them.
-    Despite what says several textbooks, learning materials and published content,
-    $p$-values below $0.05$ doesn't "prove" anything.
-    Not, on the other way around, $p$-values higher than $0.05$ refute anything.
-
-  - ASA statement has more than 4.700 citations with relevant impact.
+  #text(size: 18pt)[
+    - Know that you are in a very special moment in history of great changes in statistics
+
+    - I believe that frequentist statistics, specially the way we qualify evidence and hypotheses with
+      $p$-values will transform in a "significant" #footnote[pun intended ...] way.
+
+    - 8 years ago, the _American Statistical Association_ (ASA) published a declaration about
+      $p$-values @Wasserstein2016.
+      It states exactly what we exposed here:
+      The main concepts of the null hypothesis significant testing and,
+      in particular $p$-values, cannot provide what researchers demand of them.
+      Despite what says several textbooks, learning materials and published content,
+      $p$-values below $0.05$ doesn't "prove" anything.
+      Not, on the other way around, $p$-values higher than $0.05$ refute anything.
+
+    - ASA statement has more than 4.700 citations with relevant impact.
   ]
 ]
 
 #slide(title: [The beginning of the end of Frequentist Statistics])[
-  #text(
-    size: 17pt
-  )[
+  #text(size: 17pt)[
     - An international symposium was promoted in 2017 which originated
       an open-access special edition of _The American Statistician_ dedicated to
       practical ways to abandon $p < 0.05$ @wassersteinMovingWorld052019.
diff --git a/slides/02-statistical_distributions.typ b/slides/02-statistical_distributions.typ
index be39ee5..ccde8d8 100644
--- a/slides/02-statistical_distributions.typ
+++ b/slides/02-statistical_distributions.typ
@@ -22,9 +22,7 @@
   #align(center)[#image("images/memes/statistical_distributions.jpg")]
 ]
 
-#slide(
-  title: [Probability Distributions],
-)[
+#slide(title: [Probability Distributions])[
   Bayesian statistics uses probability distributions as the inference engine of
   the parameter and uncertainty estimates.
 
@@ -39,9 +37,7 @@
   relationships.
 ]
 
-#slide(
-  title: [Probability Distribution Function],
-)[
+#slide(title: [Probability Distribution Function])[
   A probability distribution function is a mathematical function that outputs the
   probabilities for different results of an experiment. It is a mathematical
   description of a random phenomena in terms of its sample space and the event
@@ -53,9 +49,7 @@
   variables, we define as "density".
 ]
 
-#slide(
-  title: [Mathematical Notation],
-)[
+#slide(title: [Mathematical Notation])[
   We use the notation
   $ X tilde "Dist"(θ_1, θ_2, dots) $
 
@@ -71,18 +65,24 @@
   that allow to control certain distribution aspects for a specific goal.
 ]
 
-#slide(
-  title: [Probability Distribution Function],
-)[
-  #align(
-    center,
-  )[
+#slide(title: [Probability Distribution Function])[
+  #align(center)[
     #canvas(
-      length: 0.9cm, {
+      length: 0.9cm,
+      {
         plot.plot(
-          size: (16, 9), x-label: $X$, y-label: "PDF", x-tick-step: 1, y-tick-step: 0.1, y-max: 0.45, {
+          size: (16, 9),
+          x-label: $X$,
+          y-label: "PDF",
+          x-tick-step: 1,
+          y-tick-step: 0.1,
+          y-max: 0.45,
+          {
             plot.add(
-              domain: (-4, 4), samples: 200, style: (stroke: (paint: julia-purple, thickness: 2pt)), x => gaussian(x, 0, 1),
+              domain: (-4, 4),
+              samples: 200,
+              style: (stroke: (paint: julia-purple, thickness: 2pt)),
+              x => gaussian(x, 0, 1),
             )
           },
         )
@@ -91,9 +91,7 @@
   ]
 ]
 
-#slide(
-  title: [Cumulative Distribution Function],
-)[
+#slide(title: [Cumulative Distribution Function])[
   The cumulative distribution function (CDF) of a random variable
   $X$ evaluated at $x$ is the probability that $X$ will take values less or qual
   than $x$:
@@ -101,18 +99,25 @@
   $ "CDF" = P(X <= x) $
 ]
 
-#slide(
-  title: [Cumulative Distribution Function],
-)[
-  #align(
-    center,
-  )[
+#slide(title: [Cumulative Distribution Function])[
+  #align(center)[
     #canvas(
-      length: 0.9cm, {
+      length: 0.9cm,
+      {
         plot.plot(
-          size: (16, 9), x-label: $X$, y-label: "PDF", x-tick-step: 1, y-tick-step: 0.25, y-min: -0.01, y-max: 1.01, {
+          size: (16, 9),
+          x-label: $X$,
+          y-label: "PDF",
+          x-tick-step: 1,
+          y-tick-step: 0.25,
+          y-min: -0.01,
+          y-max: 1.01,
+          {
             plot.add(
-              domain: (-4, 4), samples: 200, style: (stroke: (paint: julia-purple, thickness: 2pt)), x => normcdf(x, 0, 1),
+              domain: (-4, 4),
+              samples: 200,
+              style: (stroke: (paint: julia-purple, thickness: 2pt)),
+              x => normcdf(x, 0, 1),
             )
           },
         )
@@ -121,9 +126,7 @@
   ]
 ]
 
-#slide(
-  title: [Discrete Distributions],
-)[
+#slide(title: [Discrete Distributions])[
   Discrete probability distributions are distributions which the results are a
   discrete number:
   $-N, dots, -2, 1, 0, 1, 2, dots, N$ and $N ∈ ZZ$.
@@ -139,9 +142,7 @@
   $ "PMF"(x) = P(X = x) $
 ]
 
-#slide(
-  title: [Discrete Uniform],
-)[
+#slide(title: [Discrete Uniform])[
   The discrete uniform is a symmetric probability distribution in which a finite
   number of values are equally likely of being observable. Each one of the $n$ values
   have probability $1 / n$.
@@ -159,37 +160,47 @@
   Example: dice.
 ]
 
-#slide(
-  title: [Discrete Uniform],
-)[
+#slide(title: [Discrete Uniform])[
   #v(4em)
 
-  $ "Uniform"(a, b) = f(x, a, b) = 1 / (b - a + 1) "for" a <= x <= b "and" x ∈ {a, a + 1, dots ,b - 1, b} $
+  $
+    "Uniform"(a, b) = f(x, a, b) = 1 / (b - a + 1) "for" a <= x <= b "and" x ∈ {
+      a, a + 1, dots ,b - 1, b
+    }
+  $
 ]
 
-#slide(
-  title: [Discrete Uniform],
-)[
-  #align(
-    center,
-  )[
+#slide(title: [Discrete Uniform])[
+  #align(center)[
     #figure(
       {
         let data = for i in range(1, 7) {
           ((discreteuniform(1, 6), i),)
         }
-        let x_axis = axis(min: 0, max: 7, step: 1, location: "bottom", title: none)
-        let y_axis = axis(min: 0, max: 0.21, step: 0.1, location: "left", title: $"PMF"$)
+        let x_axis = axis(
+          min: 0,
+          max: 7,
+          step: 1,
+          location: "bottom",
+          title: none,
+        )
+        let y_axis = axis(
+          min: 0,
+          max: 0.21,
+          step: 0.1,
+          location: "left",
+          title: $"PMF"$,
+        )
         let pl = pplot(data: data, axes: ((x_axis, y_axis)))
         bar_chart(pl, (350pt, 275pt), bar_width: 70%, caption: none)
-      }, caption: [$a = 1, b = 6$], numbering: none,
+      },
+      caption: [$a = 1, b = 6$],
+      numbering: none,
     )
   ]
 ]
 
-#slide(
-  title: [Bernoulli],
-)[
+#slide(title: [Bernoulli])[
   Bernoulli distribution describes a binary event of the success of an experiment.
   We represent $0$ as failure and $1$ as success, hence the result of a Bernoulli
   distribution is a binary variable $Y ∈ {0, 1}$.
@@ -213,29 +224,34 @@
   $ "Bernoulli"(p) = f(x, p)=p^(x)(1 - p)^(1 - x) "for" x ∈ {0, 1} $
 ]
 
-#slide(
-  title: [Bernoulli],
-)[
-  #align(
-    center,
-  )[
+#slide(title: [Bernoulli])[
+  #align(center)[
     #figure(
       {
         let data = ((0.666, "0"), (0.333, "1"))
-        let x_axis = axis(values: ("", "0", "1"), location: "bottom", title: none)
+        let x_axis = axis(
+          values: ("", "0", "1"),
+          location: "bottom",
+          title: none,
+        )
         let y_axis = axis(
-          min: 0, max: 0.7, step: 0.2, value_formatter: "{:.1}", location: "left", title: $"PMF"$,
+          min: 0,
+          max: 0.7,
+          step: 0.2,
+          value_formatter: "{:.1}",
+          location: "left",
+          title: $"PMF"$,
         )
         let pl = pplot(data: data, axes: ((x_axis, y_axis)))
         bar_chart(pl, (350pt, 275pt), bar_width: 70%, caption: none)
-      }, caption: [$p = 1 / 3$], numbering: none,
+      },
+      caption: [$p = 1 / 3$],
+      numbering: none,
     )
   ]
 ]
 
-#slide(
-  title: [Binomial],
-)[
+#slide(title: [Binomial])[
   The binomial distribution describes an event in which the number of successes in
   a sequence $n$ independent experiments, each one making a yes--no question with
   probability of success $p$. Notice that Bernoulli distribution is a special case
@@ -254,21 +270,19 @@
   Example: number of heads in five coin throws.
 ]
 
-#slide(
-  title: [Binomial],
-)[
+#slide(title: [Binomial])[
   #v(4em)
 
-  $ "Binomial"(n,p) = f(x, n, p) = binom(n, x)p^(x)(1-p)^(n-x) "for" x ∈ {0, 1, dots, n} $
+  $
+    "Binomial"(n,p) = f(x, n, p) = binom(n, x)p^(x)(1-p)^(n-x) "for" x ∈ {
+      0, 1, dots, n
+    }
+  $
 ]
 
-#slide(
-  title: [Binomial],
-)[
+#slide(title: [Binomial])[
   #side-by-side[
-    #align(
-      center,
-    )[
+    #align(center)[
       #figure(
         {
           let data = for i in range(0, 11) {
@@ -279,17 +293,22 @@
           x_ticks.insert(0, "")
           let x_axis = axis(values: x_ticks, location: "bottom", title: none)
           let y_axis = axis(
-            min: 0, max: 0.5, step: 0.1, value_formatter: "{:.1}", location: "left", title: $"PMF"$,
+            min: 0,
+            max: 0.5,
+            step: 0.1,
+            value_formatter: "{:.1}",
+            location: "left",
+            title: $"PMF"$,
           )
           let pl = pplot(data: data, axes: ((x_axis, y_axis)))
           bar_chart(pl, (350pt, 275pt), bar_width: 70%, caption: none)
-        }, caption: [$n = 10, p = 1 / 5$], numbering: none,
+        },
+        caption: [$n = 10, p = 1 / 5$],
+        numbering: none,
       )
     ]
   ][
-    #align(
-      center,
-    )[
+    #align(center)[
       #figure(
         {
           let data = for i in range(0, 11) {
@@ -300,19 +319,24 @@
           x_ticks.insert(0, "")
           let x_axis = axis(values: x_ticks, location: "bottom", title: none)
           let y_axis = axis(
-            min: 0, max: 0.5, step: 0.1, value_formatter: "{:.1}", location: "left", title: $"PMF"$,
+            min: 0,
+            max: 0.5,
+            step: 0.1,
+            value_formatter: "{:.1}",
+            location: "left",
+            title: $"PMF"$,
           )
           let pl = pplot(data: data, axes: ((x_axis, y_axis)))
           bar_chart(pl, (350pt, 275pt), bar_width: 70%, caption: none)
-        }, caption: [$n = 10, p = 1 / 2$], numbering: none,
+        },
+        caption: [$n = 10, p = 1 / 2$],
+        numbering: none,
       )
     ]
   ]
 ]
 
-#slide(
-  title: [Poisson],
-)[
+#slide(title: [Poisson])[
   Poisson distribution describes the probability of a certain number of events
   occurring in a fixed time interval if these events occur with a constant mean
   rate which is known and independent since the time of last occurrence. Poisson
@@ -336,13 +360,9 @@
   $ "Poisson"(λ) = f(x, λ) = (λ^x e^(-λ)) / (x!) "for" λ > 0 $
 ]
 
-#slide(
-  title: [Poisson],
-)[
+#slide(title: [Poisson])[
   #side-by-side[
-    #align(
-      center,
-    )[
+    #align(center)[
       #figure(
         {
           let data = for i in range(0, 9) {
@@ -353,17 +373,22 @@
           x_ticks.insert(0, "")
           let x_axis = axis(values: x_ticks, location: "bottom", title: none)
           let y_axis = axis(
-            min: 0, max: 0.5, step: 0.1, value_formatter: "{:.1}", location: "left", title: $"PMF"$,
+            min: 0,
+            max: 0.5,
+            step: 0.1,
+            value_formatter: "{:.1}",
+            location: "left",
+            title: $"PMF"$,
           )
           let pl = pplot(data: data, axes: ((x_axis, y_axis)))
           bar_chart(pl, (350pt, 275pt), bar_width: 70%, caption: none)
-        }, caption: [$λ = 2$], numbering: none,
+        },
+        caption: [$λ = 2$],
+        numbering: none,
       )
     ]
   ][
-    #align(
-      center,
-    )[
+    #align(center)[
       #figure(
         {
           let data = for i in range(0, 9) {
@@ -374,27 +399,30 @@
           x_ticks.insert(0, "")
           let x_axis = axis(values: x_ticks, location: "bottom", title: none)
           let y_axis = axis(
-            min: 0, max: 0.5, step: 0.1, value_formatter: "{:.1}", location: "left", title: $"PMF"$,
+            min: 0,
+            max: 0.5,
+            step: 0.1,
+            value_formatter: "{:.1}",
+            location: "left",
+            title: $"PMF"$,
           )
           let pl = pplot(data: data, axes: ((x_axis, y_axis)))
           bar_chart(pl, (350pt, 275pt), bar_width: 70%, caption: none)
-        }, caption: [$λ = 3$], numbering: none,
+        },
+        caption: [$λ = 3$],
+        numbering: none,
       )
     ]
   ]
 ]
 
-#slide(
-  title: [
-    Negative Binomial #footnote[
+#slide(title: [
+  Negative Binomial #footnote[
       any phenomena that can be modeles as a Poisson distribution can be modeled also
       as negative binomial distribution @gelman2013bayesian, @gelman2020regression.
     ]
-  ],
-)[
-  #text(
-    size: 16pt,
-  )[
+])[
+  #text(size: 16pt)[
     The binomial distribution describes an event in which the number of successes in
     a sequence $n$ independent experiments, each one making a yes--no question with
     probability of success $p$
@@ -415,24 +443,20 @@
   ]
 ]
 
-#slide(
-  title: [Negative Binomial],
-)[
+#slide(title: [Negative Binomial])[
   #v(4em)
   $
-    "Negative Binomial"(k, p) &= f(x, k, p) &= binom(x + k - 1, k - 1)p^(x)(1-p)^(k) \
+    "Negative Binomial"(k, p) &= f(x, k, p) &= binom(x + k - 1, k - 1)p^(x)(
+      1-p
+    )^(k) \
     \
-                              &             &"for" x ∈ {0, 1, dots, n}
+    & &"for" x ∈ {0, 1, dots, n}
   $
 ]
 
-#slide(
-  title: [Negative Binomial],
-)[
+#slide(title: [Negative Binomial])[
   #side-by-side[
-    #align(
-      center,
-    )[
+    #align(center)[
       #figure(
         {
           let data = for i in range(0, 9) {
@@ -443,17 +467,22 @@
           x_ticks.insert(0, "")
           let x_axis = axis(values: x_ticks, location: "bottom", title: none)
           let y_axis = axis(
-            min: 0, max: 0.5, step: 0.1, value_formatter: "{:.1}", location: "left", title: $"PMF"$,
+            min: 0,
+            max: 0.5,
+            step: 0.1,
+            value_formatter: "{:.1}",
+            location: "left",
+            title: $"PMF"$,
           )
           let pl = pplot(data: data, axes: ((x_axis, y_axis)))
           bar_chart(pl, (350pt, 275pt), bar_width: 70%, caption: none)
-        }, caption: [$k = 1, p = 1 / 2$], numbering: none,
+        },
+        caption: [$k = 1, p = 1 / 2$],
+        numbering: none,
       )
     ]
   ][
-    #align(
-      center,
-    )[
+    #align(center)[
       #figure(
         {
           let data = for i in range(0, 9) {
@@ -464,22 +493,25 @@
           x_ticks.insert(0, "")
           let x_axis = axis(values: x_ticks, location: "bottom", title: none)
           let y_axis = axis(
-            min: 0, max: 0.5, step: 0.1, value_formatter: "{:.1}", location: "left", title: $"PMF"$,
+            min: 0,
+            max: 0.5,
+            step: 0.1,
+            value_formatter: "{:.1}",
+            location: "left",
+            title: $"PMF"$,
           )
           let pl = pplot(data: data, axes: ((x_axis, y_axis)))
           bar_chart(pl, (350pt, 275pt), bar_width: 70%, caption: none)
-        }, caption: [$k = 5, p = 1 / 2$], numbering: none,
+        },
+        caption: [$k = 5, p = 1 / 2$],
+        numbering: none,
       )
     ]
   ]
 ]
 
-#slide(
-  title: [Continuous Distributions],
-)[
-  #text(
-    size: 16pt,
-  )[
+#slide(title: [Continuous Distributions])[
+  #text(size: 16pt)[
     Continuous probability distributions are distributions which the results are
     values in a continuous real number line:
     $(-oo, +oo) ∈ RR$.
@@ -500,9 +532,7 @@
   ]
 ]
 
-#slide(
-  title: [Continuous Uniform],
-)[
+#slide(title: [Continuous Uniform])[
   The continuous uniform distribution is a symmetric probability distribution in
   which an infinite number of value intervals are equally likely of being
   observable. Each one of the infinite $n$ intervals have probability $1 / n$.
@@ -515,41 +545,48 @@
   - $b$ -- upper bound
 ]
 
-#slide(
-  title: [Continuous Uniform],
-)[
+#slide(title: [Continuous Uniform])[
   #v(4em)
 
   $ "Uniform"(a,b) = f(x, a, b) = 1 / (b-a) "for" a <= x <= b "and" x ∈ [a, b] $
 ]
 
-#slide(
-  title: [Continuous Uniform],
-)[
-  #align(
-    center,
-  )[
+#slide(title: [Continuous Uniform])[
+  #align(center)[
     #figure(
       {
         canvas(
-          length: 0.9cm, {
+          length: 0.9cm,
+          {
             plot.plot(
-              size: (16, 9), x-label: none, y-label: "PDF", x-tick-step: 1, y-tick-step: 0.1, x-min: 0, x-max: 6, y-max: 0.4, y-min: 0, {
+              size: (16, 9),
+              x-label: none,
+              y-label: "PDF",
+              x-tick-step: 1,
+              y-tick-step: 0.1,
+              x-min: 0,
+              x-max: 6,
+              y-max: 0.4,
+              y-min: 0,
+              {
                 plot.add(
-                  domain: (0, 6), samples: 200, style: (stroke: (paint: julia-purple, thickness: 2pt)), x => continuousuniform(0, 6),
+                  domain: (0, 6),
+                  samples: 200,
+                  style: (stroke: (paint: julia-purple, thickness: 2pt)),
+                  x => continuousuniform(0, 6),
                 )
               },
             )
           },
         )
-      }, caption: [$a = 0, b = 6$], numbering: none,
+      },
+      caption: [$a = 0, b = 6$],
+      numbering: none,
     )
   ]
 ]
 
-#slide(
-  title: [Normal],
-)[
+#slide(title: [Normal])[
   This distribution is generally used in social and natural sciences to represent
   continuous variables in which its underlying distribution are unknown.
 
@@ -568,9 +605,7 @@
   similar to normal distributions.
 ]
 
-#slide(
-  title: [Normal],
-)[
+#slide(title: [Normal])[
   The normal distribution has two parameters and its notation is
   $"Normal"(μ, σ)$ or $"N"(μ, σ)$:
 
@@ -583,13 +618,11 @@
   Example: height, weight etc.
 ]
 
-#slide(
-  title: [Normal #footnote[
+#slide(title: [Normal #footnote[
       see how the normal distribution was derived from the binomial distribution in
       the backup slides.
     ]
-  ],
-)[
+])[
   #v(4em)
 
   $
@@ -598,55 +631,77 @@
   $
 ]
 
-#slide(
-  title: [Normal],
-)[
+#slide(title: [Normal])[
   #side-by-side[
-    #align(
-      center,
-    )[
+    #align(center)[
       #figure(
         {
           canvas(
-            length: 0.75cm, {
+            length: 0.75cm,
+            {
               plot.plot(
-                size: (16, 9), x-label: none, y-label: "PDF", x-tick-step: 1, y-tick-step: 0.1, x-min: -4, x-max: 4, y-max: 0.65, y-min: 0, {
+                size: (16, 9),
+                x-label: none,
+                y-label: "PDF",
+                x-tick-step: 1,
+                y-tick-step: 0.1,
+                x-min: -4,
+                x-max: 4,
+                y-max: 0.65,
+                y-min: 0,
+                {
                   plot.add(
-                    domain: (-4, 4), samples: 200, style: (stroke: (paint: julia-purple, thickness: 2pt)), x => gaussian(x, 0, 1),
+                    domain: (-4, 4),
+                    samples: 200,
+                    style: (stroke: (paint: julia-purple, thickness: 2pt)),
+                    x => gaussian(x, 0, 1),
                   )
                 },
               )
             },
           )
-        }, caption: [$μ = 0, σ = 1$], numbering: none,
+        },
+        caption: [$μ = 0, σ = 1$],
+        numbering: none,
       )
     ]
   ][
-    #align(
-      center,
-    )[
+    #align(center)[
       #figure(
         {
           canvas(
-            length: 0.75cm, {
+            length: 0.75cm,
+            {
               plot.plot(
-                size: (16, 9), x-label: none, y-label: "PDF", x-tick-step: 1, y-tick-step: 0.1, x-min: -4, x-max: 4, y-max: 0.65, y-min: 0, {
+                size: (16, 9),
+                x-label: none,
+                y-label: "PDF",
+                x-tick-step: 1,
+                y-tick-step: 0.1,
+                x-min: -4,
+                x-max: 4,
+                y-max: 0.65,
+                y-min: 0,
+                {
                   plot.add(
-                    domain: (-4, 4), samples: 200, style: (stroke: (paint: julia-purple, thickness: 2pt)), x => gaussian(x, 1, 2 / 3),
+                    domain: (-4, 4),
+                    samples: 200,
+                    style: (stroke: (paint: julia-purple, thickness: 2pt)),
+                    x => gaussian(x, 1, 2 / 3),
                   )
                 },
               )
             },
           )
-        }, caption: [$μ = 1, σ = 2 / 3$], numbering: none,
+        },
+        caption: [$μ = 1, σ = 2 / 3$],
+        numbering: none,
       )
     ]
   ]
 ]
 
-#slide(
-  title: [Log-Normal],
-)[
+#slide(title: [Log-Normal])[
   The log-normal distribution is a continuous probability distribution of a random
   variable which its natural logarithm is distributed as a normal distribution.
   Thus, if the natural logarithm a random variable $X$, $ln(X)$, is distributed as
@@ -662,9 +717,7 @@
   of many independent positive random variables.
 ]
 
-#slide(
-  title: [Log-Normal],
-)[
+#slide(title: [Log-Normal])[
   The log-normal distribution has two parameters and its notation is
   $"Log-Normal"(μ, σ^2)$:
 
@@ -674,63 +727,87 @@
   - $σ$ -- square root of the variance of the distribution's natural logarithm
 ]
 
-#slide(
-  title: [Log-Normal],
-)[
+#slide(title: [Log-Normal])[
   #v(4em)
 
-  $ "Log-Normal"(μ,σ) = f(x, μ, σ) = 1 / (x σ sqrt(2π))e^((-ln(x) - μ)^2 / (2 σ^2)) "for" σ > 0 $
+  $
+    "Log-Normal"(μ,σ) = f(x, μ, σ) = 1 / (x σ sqrt(2π))e^((
+      -ln(x) - μ
+    )^2 / (2 σ^2)) "for" σ > 0
+  $
 ]
 
-#slide(
-  title: [Log-Normal],
-)[
+#slide(title: [Log-Normal])[
   #side-by-side[
-    #align(
-      center,
-    )[
+    #align(center)[
       #figure(
         {
           canvas(
-            length: 0.75cm, {
+            length: 0.75cm,
+            {
               plot.plot(
-                size: (16, 9), x-label: none, y-label: "PDF", x-tick-step: 1, y-tick-step: 0.1, x-min: 0, x-max: 5, y-max: 0.7, y-min: 0, {
+                size: (16, 9),
+                x-label: none,
+                y-label: "PDF",
+                x-tick-step: 1,
+                y-tick-step: 0.1,
+                x-min: 0,
+                x-max: 5,
+                y-max: 0.7,
+                y-min: 0,
+                {
                   plot.add(
-                    domain: (0.001, 5), samples: 200, style: (stroke: (paint: julia-purple, thickness: 2pt)), x => lognormal(x, 0, 1),
+                    domain: (0.001, 5),
+                    samples: 200,
+                    style: (stroke: (paint: julia-purple, thickness: 2pt)),
+                    x => lognormal(x, 0, 1),
                   )
                 },
               )
             },
           )
-        }, caption: [$μ = 0, σ = 1$], numbering: none,
+        },
+        caption: [$μ = 0, σ = 1$],
+        numbering: none,
       )
     ]
   ][
-    #align(
-      center,
-    )[
+    #align(center)[
       #figure(
         {
           canvas(
-            length: 0.75cm, {
+            length: 0.75cm,
+            {
               plot.plot(
-                size: (16, 9), x-label: none, y-label: "PDF", x-tick-step: 1, y-tick-step: 0.1, x-min: 0, x-max: 5, y-max: 0.7, y-min: 0, {
+                size: (16, 9),
+                x-label: none,
+                y-label: "PDF",
+                x-tick-step: 1,
+                y-tick-step: 0.1,
+                x-min: 0,
+                x-max: 5,
+                y-max: 0.7,
+                y-min: 0,
+                {
                   plot.add(
-                    domain: (0.001, 5), samples: 200, style: (stroke: (paint: julia-purple, thickness: 2pt)), x => lognormal(x, 1, 2 / 3),
+                    domain: (0.001, 5),
+                    samples: 200,
+                    style: (stroke: (paint: julia-purple, thickness: 2pt)),
+                    x => lognormal(x, 1, 2 / 3),
                   )
                 },
               )
             },
           )
-        }, caption: [$μ = 1, σ = 2 / 3$], numbering: none,
+        },
+        caption: [$μ = 1, σ = 2 / 3$],
+        numbering: none,
       )
     ]
   ]
 ]
 
-#slide(
-  title: [Exponential],
-)[
+#slide(title: [Exponential])[
   The exponential distribution is the probability distribution of the time between
   events that occurs in a continuous manner, are independent, and have constant
   mean rate of occurrence.
@@ -754,55 +831,77 @@
   $ "Exponential"(λ) = f(x, λ) = λ e^(-λ x) "for" λ > 0 $
 ]
 
-#slide(
-  title: [Exponential],
-)[
+#slide(title: [Exponential])[
   #side-by-side[
-    #align(
-      center,
-    )[
+    #align(center)[
       #figure(
         {
           canvas(
-            length: 0.75cm, {
+            length: 0.75cm,
+            {
               plot.plot(
-                size: (16, 9), x-label: none, y-label: "PDF", x-tick-step: 1, y-tick-step: 0.25, x-min: 0, x-max: 5, y-max: 0.95, y-min: 0, {
+                size: (16, 9),
+                x-label: none,
+                y-label: "PDF",
+                x-tick-step: 1,
+                y-tick-step: 0.25,
+                x-min: 0,
+                x-max: 5,
+                y-max: 0.95,
+                y-min: 0,
+                {
                   plot.add(
-                    domain: (0.001, 5), samples: 200, style: (stroke: (paint: julia-purple, thickness: 2pt)), x => exponential(x, 1),
+                    domain: (0.001, 5),
+                    samples: 200,
+                    style: (stroke: (paint: julia-purple, thickness: 2pt)),
+                    x => exponential(x, 1),
                   )
                 },
               )
             },
           )
-        }, caption: [$λ = 1$], numbering: none,
+        },
+        caption: [$λ = 1$],
+        numbering: none,
       )
     ]
   ][
-    #align(
-      center,
-    )[
+    #align(center)[
       #figure(
         {
           canvas(
-            length: 0.75cm, {
+            length: 0.75cm,
+            {
               plot.plot(
-                size: (16, 9), x-label: none, y-label: "PDF", x-tick-step: 1, y-tick-step: 0.25, x-min: 0, x-max: 5, y-max: 0.95, y-min: 0, {
+                size: (16, 9),
+                x-label: none,
+                y-label: "PDF",
+                x-tick-step: 1,
+                y-tick-step: 0.25,
+                x-min: 0,
+                x-max: 5,
+                y-max: 0.95,
+                y-min: 0,
+                {
                   plot.add(
-                    domain: (0.001, 5), samples: 200, style: (stroke: (paint: julia-purple, thickness: 2pt)), x => exponential(x, 1 / 2),
+                    domain: (0.001, 5),
+                    samples: 200,
+                    style: (stroke: (paint: julia-purple, thickness: 2pt)),
+                    x => exponential(x, 1 / 2),
                   )
                 },
               )
             },
           )
-        }, caption: [$λ = 1 / 2$], numbering: none,
+        },
+        caption: [$λ = 1 / 2$],
+        numbering: none,
       )
     ]
   ]
 ]
 
-#slide(
-  title: [Gamma],
-)[
+#slide(title: [Gamma])[
   The gamma distribution is a long-tailed distribution with support only for
   positive real numbers.
 
@@ -819,66 +918,88 @@
   Example: Any waiting time can be modelled with a gamma distribution.
 ]
 
-#slide(
-  title: [Gamma],
-)[
+#slide(title: [Gamma])[
   #v(4em)
 
-  $ "Gamma"(α, θ) = f(x, α, θ) = (x^(α-1) e^(-x / θ)) / (Γ(α) θ^α) "for" x, α, θ > 0 $
+  $
+    "Gamma"(α, θ) = f(x, α, θ) = (x^(α-1) e^(-x / θ)) / (Γ(
+      α
+    ) θ^α) "for" x, α, θ > 0
+  $
 ]
 
-#slide(
-  title: [Gamma],
-)[
+#slide(title: [Gamma])[
   #side-by-side[
-    #align(
-      center,
-    )[
+    #align(center)[
       #figure(
         {
           canvas(
-            length: 0.75cm, {
+            length: 0.75cm,
+            {
               plot.plot(
-                size: (16, 9), x-label: none, y-label: "PDF", x-tick-step: 1, y-tick-step: 0.2, x-min: 0, x-max: 6, y-max: 0.95, y-min: 0, {
+                size: (16, 9),
+                x-label: none,
+                y-label: "PDF",
+                x-tick-step: 1,
+                y-tick-step: 0.2,
+                x-min: 0,
+                x-max: 6,
+                y-max: 0.95,
+                y-min: 0,
+                {
                   plot.add(
-                    domain: (0.001, 6), samples: 200, style: (stroke: (paint: julia-purple, thickness: 2pt)), x => gammadist(x, 1, 1),
+                    domain: (0.001, 6),
+                    samples: 200,
+                    style: (stroke: (paint: julia-purple, thickness: 2pt)),
+                    x => gammadist(x, 1, 1),
                   )
                 },
               )
             },
           )
-        }, caption: [$α = 1, θ = 1$], numbering: none,
+        },
+        caption: [$α = 1, θ = 1$],
+        numbering: none,
       )
     ]
   ][
-    #align(
-      center,
-    )[
+    #align(center)[
       #figure(
         {
           canvas(
-            length: 0.75cm, {
+            length: 0.75cm,
+            {
               plot.plot(
-                size: (16, 9), x-label: none, y-label: "PDF", x-tick-step: 1, y-tick-step: 0.2, x-min: 0, x-max: 6, y-max: 0.95, y-min: 0, {
+                size: (16, 9),
+                x-label: none,
+                y-label: "PDF",
+                x-tick-step: 1,
+                y-tick-step: 0.2,
+                x-min: 0,
+                x-max: 6,
+                y-max: 0.95,
+                y-min: 0,
+                {
                   plot.add(
-                    domain: (0.001, 6), samples: 200, style: (stroke: (paint: julia-purple, thickness: 2pt)), x => gammadist(x, 2, 1 / 2),
+                    domain: (0.001, 6),
+                    samples: 200,
+                    style: (stroke: (paint: julia-purple, thickness: 2pt)),
+                    x => gammadist(x, 2, 1 / 2),
                   )
                 },
               )
             },
           )
-        }, caption: [$α = 2, θ = 1 / 2$], numbering: none,
+        },
+        caption: [$α = 2, θ = 1 / 2$],
+        numbering: none,
       )
     ]
   ]
 ]
 
-#slide(
-  title: [Student's $t$],
-)[
-  #text(
-    size: 18pt,
-  )[
+#slide(title: [Student's $t$])[
+  #text(size: 18pt)[
     Student's $t$ distribution arises by estimating the mean of a
     normally-distributed population in situations where the sample size is small and
     the standard deviation is known #footnote[this is where the ubiquitous Student's $t$ test.].
@@ -899,9 +1020,7 @@
   ]
 ]
 
-#slide(
-  title: [Student's $t$],
-)[
+#slide(title: [Student's $t$])[
   Student's $t$ distribution has one parameter and its notation is
   $"Student"(ν)$:
 
@@ -912,63 +1031,87 @@
   Example: a dataset full of outliers.
 ]
 
-#slide(
-  title: [Student's $t$],
-)[
+#slide(title: [Student's $t$])[
   #v(4em)
 
-  $ "Student"(ν) = f(x, ν) = (Γ ((ν + 1) / 2) ) / (sqrt(ν π) Γ (ν / 2 )) (1+ x^2 / ν)^(-(ν+1) / 2) "for" ν >= 1 $
+  $
+    "Student"(ν) = f(x, ν) = (Γ ((ν + 1) / 2) ) / (sqrt(ν π) Γ (ν / 2)) (
+      1+ x^2 / ν
+    )^(-(ν+1) / 2) "for" ν >= 1
+  $
 ]
 
-#slide(
-  title: [Student's $t$],
-)[
+#slide(title: [Student's $t$])[
   #side-by-side[
-    #align(
-      center,
-    )[
+    #align(center)[
       #figure(
         {
           canvas(
-            length: 0.75cm, {
+            length: 0.75cm,
+            {
               plot.plot(
-                size: (16, 9), x-label: none, y-label: "PDF", x-tick-step: 1, y-tick-step: 0.1, x-min: -4, x-max: 4, y-max: 0.45, y-min: 0, {
+                size: (16, 9),
+                x-label: none,
+                y-label: "PDF",
+                x-tick-step: 1,
+                y-tick-step: 0.1,
+                x-min: -4,
+                x-max: 4,
+                y-max: 0.45,
+                y-min: 0,
+                {
                   plot.add(
-                    domain: (-4, 4), samples: 200, style: (stroke: (paint: julia-purple, thickness: 2pt)), x => student(x, 1),
+                    domain: (-4, 4),
+                    samples: 200,
+                    style: (stroke: (paint: julia-purple, thickness: 2pt)),
+                    x => student(x, 1),
                   )
                 },
               )
             },
           )
-        }, caption: [$ν = 1$], numbering: none,
+        },
+        caption: [$ν = 1$],
+        numbering: none,
       )
     ]
   ][
-    #align(
-      center,
-    )[
+    #align(center)[
       #figure(
         {
           canvas(
-            length: 0.75cm, {
+            length: 0.75cm,
+            {
               plot.plot(
-                size: (16, 9), x-label: none, y-label: "PDF", x-tick-step: 1, y-tick-step: 0.1, x-min: -4, x-max: 4, y-max: 0.45, y-min: 0, {
+                size: (16, 9),
+                x-label: none,
+                y-label: "PDF",
+                x-tick-step: 1,
+                y-tick-step: 0.1,
+                x-min: -4,
+                x-max: 4,
+                y-max: 0.45,
+                y-min: 0,
+                {
                   plot.add(
-                    domain: (-4, 4), samples: 200, style: (stroke: (paint: julia-purple, thickness: 2pt)), x => student(x, 3),
+                    domain: (-4, 4),
+                    samples: 200,
+                    style: (stroke: (paint: julia-purple, thickness: 2pt)),
+                    x => student(x, 3),
                   )
                 },
               )
             },
           )
-        }, caption: [$ν = 3$], numbering: none,
+        },
+        caption: [$ν = 3$],
+        numbering: none,
       )
     ]
   ]
 ]
 
-#slide(
-  title: [Cauchy],
-)[
+#slide(title: [Cauchy])[
   The Cauchy distribution is bell-shaped distribution and a special case for
   Student's $t$ with $ν = 1$.
 
@@ -989,58 +1132,80 @@
 #slide(title: [Cauchy])[
   #v(4em)
 
-  $ "Cauchy"(μ, σ) = 1 / (π σ (1 + ((x - μ) / σ )^2 )) "for" σ >= 0 $
+  $ "Cauchy"(μ, σ) = 1 / (π σ (1 + ((x - μ) / σ)^2)) "for" σ >= 0 $
 ]
 
-#slide(
-  title: [Cauchy],
-)[
+#slide(title: [Cauchy])[
   #side-by-side[
-    #align(
-      center,
-    )[
+    #align(center)[
       #figure(
         {
           canvas(
-            length: 0.75cm, {
+            length: 0.75cm,
+            {
               plot.plot(
-                size: (16, 9), x-label: none, y-label: "PDF", x-tick-step: 1, y-tick-step: 0.1, x-min: -4, x-max: 4, y-max: 0.65, y-min: 0, {
+                size: (16, 9),
+                x-label: none,
+                y-label: "PDF",
+                x-tick-step: 1,
+                y-tick-step: 0.1,
+                x-min: -4,
+                x-max: 4,
+                y-max: 0.65,
+                y-min: 0,
+                {
                   plot.add(
-                    domain: (-4, 4), samples: 200, style: (stroke: (paint: julia-purple, thickness: 2pt)), x => cauchy(x, 0, 1),
+                    domain: (-4, 4),
+                    samples: 200,
+                    style: (stroke: (paint: julia-purple, thickness: 2pt)),
+                    x => cauchy(x, 0, 1),
                   )
                 },
               )
             },
           )
-        }, caption: [$μ = 0, σ = 1$], numbering: none,
+        },
+        caption: [$μ = 0, σ = 1$],
+        numbering: none,
       )
     ]
   ][
-    #align(
-      center,
-    )[
+    #align(center)[
       #figure(
         {
           canvas(
-            length: 0.75cm, {
+            length: 0.75cm,
+            {
               plot.plot(
-                size: (16, 9), x-label: none, y-label: "PDF", x-tick-step: 1, y-tick-step: 0.1, x-min: -4, x-max: 4, y-max: 0.65, y-min: 0, {
+                size: (16, 9),
+                x-label: none,
+                y-label: "PDF",
+                x-tick-step: 1,
+                y-tick-step: 0.1,
+                x-min: -4,
+                x-max: 4,
+                y-max: 0.65,
+                y-min: 0,
+                {
                   plot.add(
-                    domain: (-4, 4), samples: 200, style: (stroke: (paint: julia-purple, thickness: 2pt)), x => cauchy(x, 0, 1 / 2),
+                    domain: (-4, 4),
+                    samples: 200,
+                    style: (stroke: (paint: julia-purple, thickness: 2pt)),
+                    x => cauchy(x, 0, 1 / 2),
                   )
                 },
               )
             },
           )
-        }, caption: [$μ = 0, σ = 1 / 2$], numbering: none,
+        },
+        caption: [$μ = 0, σ = 1 / 2$],
+        numbering: none,
       )
     ]
   ]
 ]
 
-#slide(
-  title: [Beta],
-)[
+#slide(title: [Beta])[
   The beta distribution is a natural choice to model anything that is restricted
   to values between $0$ e $1$. Hence, it is a good candidate to model
   probabilities and proportions.
@@ -1061,55 +1226,81 @@
   in a total of 8 attempts -- $"Beta"(3, 5)$
 ]
 
-#slide(
-  title: [Beta],
-)[
+#slide(title: [Beta])[
   #v(4em)
 
-  $ "Beta"(α, β) = f(x, α, β) (x^(α - 1)(1 - x)^(β - 1)) / ((Γ(α)Γ(β)) / (Γ(α +β ))) "for" α, β > 0 "and" x ∈ [0, 1] $
+  $
+    "Beta"(α, β) = f(x, α, β) (x^(α - 1)(1 - x)^(β - 1)) / ((Γ(α)Γ(β)) / (Γ(
+      α +β
+    ))) "for" α, β > 0 "and" x ∈ [0, 1]
+  $
 ]
 
-#slide(
-  title: [Beta],
-)[
+#slide(title: [Beta])[
   #side-by-side[
-    #align(
-      center,
-    )[
+    #align(center)[
       #figure(
         {
           canvas(
-            length: 0.75cm, {
+            length: 0.75cm,
+            {
               plot.plot(
-                size: (16, 9), x-label: none, y-label: "PDF", x-tick-step: 1, y-tick-step: 0.1, x-min: 0, x-max: 1, y-max: 0.3, y-min: 0, {
+                size: (16, 9),
+                x-label: none,
+                y-label: "PDF",
+                x-tick-step: 1,
+                y-tick-step: 0.1,
+                x-min: 0,
+                x-max: 1,
+                y-max: 0.3,
+                y-min: 0,
+                {
                   plot.add(
-                    domain: (0.0001, 0.9999), samples: 200, style: (stroke: (paint: julia-purple, thickness: 2pt)), x => beta(x, 1, 1),
+                    domain: (0.0001, 0.9999),
+                    samples: 200,
+                    style: (stroke: (paint: julia-purple, thickness: 2pt)),
+                    x => beta(x, 1, 1),
                   )
                 },
               )
             },
           )
-        }, caption: [$α = 1, β = 1$], numbering: none,
+        },
+        caption: [$α = 1, β = 1$],
+        numbering: none,
       )
     ]
   ][
-    #align(
-      center,
-    )[
+    #align(center)[
       #figure(
         {
           canvas(
-            length: 0.75cm, {
+            length: 0.75cm,
+            {
               plot.plot(
-                size: (16, 9), x-label: none, y-label: "PDF", x-tick-step: 1, y-tick-step: 0.1, x-min: 0, x-max: 1, y-max: 0.3, y-min: 0, {
+                size: (16, 9),
+                x-label: none,
+                y-label: "PDF",
+                x-tick-step: 1,
+                y-tick-step: 0.1,
+                x-min: 0,
+                x-max: 1,
+                y-max: 0.3,
+                y-min: 0,
+                {
                   plot.add(
-                    domain: (0.0001, 0.9999), samples: 200, style: (stroke: (paint: julia-purple, thickness: 2pt)), x => beta(x, 3, 2),
+                    domain: (0.0001, 0.9999),
+                    samples: 200,
+                    style: (stroke: (paint: julia-purple, thickness: 2pt)),
+                    x => beta(x, 3, 2),
                   )
                 },
               )
             },
           )
-        }, caption: [$α = 3, β = 2$], numbering: none,
+        },
+        caption: [$α = 3, β = 2$],
+        numbering: none,
       )
     ]
   ]
diff --git a/slides/03-priors.typ b/slides/03-priors.typ
index db50373..ffa64b6 100644
--- a/slides/03-priors.typ
+++ b/slides/03-priors.typ
@@ -4,9 +4,7 @@
 
 #new-section-slide("Priors")
 
-#slide(
-  title: "Recommended References",
-)[
+#slide(title: "Recommended References")[
   - #cite(<gelman2013bayesian>, form: "prose"):
 
     - Chapter 2: Single-parameter models
@@ -26,17 +24,15 @@
   #align(center)[#image("images/memes/priors.jpg")]
 ]
 
-#slide(
-  title: "Prior Probability",
-)[
+#slide(title: "Prior Probability")[
   Bayesian statistics is characterized by the use of prior information as the
   prior probability $P(θ)$, often just prior:
-  $ underbrace(P(θ | y), "Posterior") = (overbrace(P(y | θ), "Likelihood") dot overbrace(P(θ), "Prior")) / underbrace(P(y), "Normalizing Constant") $
+  $
+    underbrace(P(θ | y), "Posterior") = (overbrace(P(y | θ), "Likelihood") dot overbrace(P(θ), "Prior")) / underbrace(P(y), "Normalizing Constant")
+  $
 ]
 
-#slide(
-  title: "The Subjectivity of the Prior",
-)[
+#slide(title: "The Subjectivity of the Prior")[
   - Many critics to Bayesian statistics are due the subjectivity in eliciting priors
     probability on certain hypothesis or model parameter's values.
   - Subjectivity is something unwanted in the ideal picture of the scientist and the
@@ -51,9 +47,7 @@
     @vandeschootBayesianStatisticsModelling2021.
 ]
 
-#slide(
-  title: "How to Incorporate Subjectivity",
-)[
+#slide(title: "How to Incorporate Subjectivity")[
   - Bayesian statistics *embraces* subjectivity while frequentist statistics *bans*
     it.
 
@@ -68,9 +62,7 @@
     *declared and formalized*
 ]
 
-#slide(
-  title: "Types of Priors",
-)[
+#slide(title: "Types of Priors")[
   In general, we can have 3 types of priors in a Bayesian approach
   @gelman2013bayesian @mcelreath2020statistical
   @vandeschootBayesianStatisticsModelling2021:
@@ -82,9 +74,7 @@
   - *informative*: introduction of medium to high domain knowledge.
 ]
 
-#slide(
-  title: "Uniform Prior (Flat)",
-)[
+#slide(title: "Uniform Prior (Flat)")[
   Starts from the premise that "everything is possible". There is no limits in the
   degree of beliefs that the distribution of certain values must be or any sort of
   restrictions.
@@ -100,9 +90,7 @@
   - *model error or residuals*: ${σ ∈ RR^+ : 0 < θ < oo}$
 ]
 
-#slide(
-  title: "Weakly Uninformative Prior",
-)[
+#slide(title: "Weakly Uninformative Prior")[
   Here we start to have "educated" guess about our parameter values. Hence, we
   don't start from the premise that "anything is possible".
 
@@ -122,12 +110,8 @@
   - $θ tilde "Student"(ν=3, 0, 1)$ (Aki Vehtari's preferred choice #footnote(<fn-priors-1>))
 ]
 
-#slide(
-  title: "An Example of a Robust Prior",
-)[
-  #text(
-    size: 18pt,
-  )[
+#slide(title: "An Example of a Robust Prior")[
+  #text(size: 18pt)[
     A nice example comes from a Ben Goodrich's lecture #footnote[
       https://youtu.be/p6cyRBWahRA, in case you want to see the full video, the
       section about priors related to the argument begins at minute 40
@@ -147,9 +131,7 @@
   ]
 ]
 
-#slide(
-  title: "Informative Prior",
-)[
+#slide(title: "Informative Prior")[
   In some contexts, it is interesting to use an informative prior. Good candidates
   are when data is scarce or expensive and prior knowledge about the phenomena is
   available.
diff --git a/slides/04-bayesian_workflow.typ b/slides/04-bayesian_workflow.typ
index fdace38..891493b 100644
--- a/slides/04-bayesian_workflow.typ
+++ b/slides/04-bayesian_workflow.typ
@@ -5,9 +5,7 @@
 
 #new-section-slide("Bayesian Workflow")
 
-#slide(
-  title: "Recommended References",
-)[
+#slide(title: "Recommended References")[
   - #cite(<gelman2013bayesian>, form: "prose") - Chapter 6: Model checking
 
   - #cite(<mcelreath2020statistical>, form: "prose") - Chapter 4: Geocentric Models
@@ -23,15 +21,12 @@
   #align(center)[#image("images/memes/workflow.jpg")]
 ]
 
-#slide(
-  title: "All Models Are Wrong",
-)[
-  #side-by-side(
-    columns: (4fr, 1fr),
-  )[
-    #align(
-      center + horizon,
-    )[#quote(block: true, attribution: [George Box @boxScienceStatistics1976])[
+#slide(title: "All Models Are Wrong")[
+  #side-by-side(columns: (4fr, 1fr))[
+    #align(center + horizon)[#quote(
+        block: true,
+        attribution: [George Box @boxScienceStatistics1976],
+      )[
         All models are wrong but some are useful
       ]]
   ][
@@ -39,39 +34,33 @@
   ]
 ]
 
-#slide(
-  title: [Bayesian Workflow #footnote[
+#slide(title: [Bayesian Workflow #footnote[
       based on #cite(<gelmanBayesianWorkflow2020>, form: "prose")
-    ]],
-)[
+    ]])[
   #v(2em)
-  #align(
-    center,
-  )[#cetz.canvas(
-      {
-        import cetz.draw: *
-        set-style(
-          mark: (start: ">", end: ">", fill: black, size: 0.3), stroke: (thickness: 2pt), radius: 3,
-        )
-        circle((0, 0))
-        content((0, 0), [#align(center)[Prior \ Elicitation]])
-        line((3, 0), (7, 0))
-        content((5, 2), [#align(center)[Prior \ Predictive \ Check]])
-        circle((10, 0))
-        content((10, 0), [#align(center)[Model \ Specification]])
-        line((13, 0), (17, 0))
-        content((15, 2), [#align(center)[Posterior \ Predictive \ Check]])
-        circle((20, 0))
-        content((20, 0), [#align(center)[Posterior \ Inference]])
-      },
-    )]
+  #align(center)[#cetz.canvas({
+      import cetz.draw: *
+      set-style(
+        mark: (start: ">", end: ">", fill: black, size: 0.3),
+        stroke: (thickness: 2pt),
+        radius: 3,
+      )
+      circle((0, 0))
+      content((0, 0), [#align(center)[Prior \ Elicitation]])
+      line((3, 0), (7, 0))
+      content((5, 2), [#align(center)[Prior \ Predictive \ Check]])
+      circle((10, 0))
+      content((10, 0), [#align(center)[Model \ Specification]])
+      line((13, 0), (17, 0))
+      content((15, 2), [#align(center)[Posterior \ Predictive \ Check]])
+      circle((20, 0))
+      content((20, 0), [#align(center)[Posterior \ Inference]])
+    })]
 ]
 
-#slide(
-  title: [Bayesian Workflow #footnote[
+#slide(title: [Bayesian Workflow #footnote[
       adapted from #link("https://github.com/elizavetasemenova")[Elizaveta Semenova].
-    ]],
-)[
+    ]])[
   - Understand the domain and problem.
   - Formulate the model mathematically.
   - Implement model, test, and debug.
@@ -83,28 +72,24 @@
     efficient models.
 ]
 
-#slide(
-  title: "Actual Bayesian Workflow",
-)[
+#slide(title: "Actual Bayesian Workflow")[
   #figure(
-    image("images/workflow/workflow_overview.svg", height: 80%), caption: [Bayesian workflow by #cite(<gelmanBayesianWorkflow2020>, form: "prose").],
+    image("images/workflow/workflow_overview.svg", height: 80%),
+    caption: [Bayesian workflow by #cite(<gelmanBayesianWorkflow2020>, form: "prose").],
   )
 ]
 
-#slide(
-  title: [Not a "new idea"],
-)[
+#slide(title: [Not a "new idea"])[
   #figure(
-    image("images/workflow/box_loop.png", height: 80%), caption: [Box's Loop from
+    image("images/workflow/box_loop.png", height: 80%),
+    caption: [Box's Loop from
       #cite(<boxScienceStatistics1976>, form: "prose")
       but taken from
       #cite(<Blei_Workflow2014>, form: "prose").],
   )
 ]
 
-#slide(
-  title: "Prior Predictive Check",
-)[
+#slide(title: "Prior Predictive Check")[
   Before we feed data into our model, we need to check all of our priors.
 
   #v(1em)
@@ -120,12 +105,8 @@
   understanding of the prior influence onto the posterior.
 ]
 
-#slide(
-  title: "Posterior Predictive Check",
-)[
-  #text(
-    size: 18pt,
-  )[
+#slide(title: "Posterior Predictive Check")[
+  #text(size: 18pt)[
     We need to make sure that the posterior distribution of $bold(y)$, namely $bold(tilde(y))$,
     can capture all the nuances of the real distribution density/mass of $bold(y)$.
 
@@ -145,18 +126,16 @@
   ]
 ]
 
-#slide(
-  title: "Examples of Posterior Predictive Checks",
-)[
-  #side-by-side(
-    columns: (1fr, 1fr),
-  )[
+#slide(title: "Examples of Posterior Predictive Checks")[
+  #side-by-side(columns: (1fr, 1fr))[
     #figure(
-      image("images/predictive_checks/pp_check_brms.svg", height: 80%), caption: [#text(size: 18pt)[Real versus Simulated Densities]],
+      image("images/predictive_checks/pp_check_brms.svg", height: 80%),
+      caption: [#text(size: 18pt)[Real versus Simulated Densities]],
     )
   ][
     #figure(
-      image("images/predictive_checks/pp_check_brms_ecdf.svg", height: 80%), caption: [#text(size: 18pt)[Real versus Simulated Empirical CDFs]],
+      image("images/predictive_checks/pp_check_brms_ecdf.svg", height: 80%),
+      caption: [#text(size: 18pt)[Real versus Simulated Empirical CDFs]],
     )
   ]
 ]
diff --git a/slides/05-linear_regression.typ b/slides/05-linear_regression.typ
index a56bb34..cd0c1c0 100644
--- a/slides/05-linear_regression.typ
+++ b/slides/05-linear_regression.typ
@@ -5,9 +5,7 @@
 
 #new-section-slide("Linear Regression")
 
-#slide(
-  title: "Recommended References",
-)[
+#slide(title: "Recommended References")[
   - #cite(<gelman2013bayesian>, form: "prose"):
     - Chapter 14: Introduction to regression models
     - Chapter 16: Generalized linear models
@@ -24,25 +22,37 @@
   #align(center)[#image("images/memes/linear_regression.jpg")]
 ]
 
-#slide(
-  title: "What is Linear Regression?",
-)[
-  #let data_scatter = ((0.5, 0.7), (1, 0.7), (2, 2.4), (3, 2.6), (4, 4.6), (5, 4.2),)
-  #let data_graph = ((0, 0), (1, 1), (2, 2), (3, 3), (4, 4), (5, 5),)
+#slide(title: "What is Linear Regression?")[
+  #let data_scatter = (
+    (0.5, 0.7),
+    (1, 0.7),
+    (2, 2.4),
+    (3, 2.6),
+    (4, 4.6),
+    (5, 4.2),
+  )
+  #let data_graph = ((0, 0), (1, 1), (2, 2), (3, 3), (4, 4), (5, 5))
   #let x_axis = axis(min: 0, max: 5, location: "bottom")
   #let y_axis = axis(min: 0, max: 5, location: "left")
   #let pl_scatter = pplot(data: data_scatter, axes: ((x_axis, y_axis)))
-  #let scatter_display = scatter_plot(pl_scatter, 70pt, stroke: 3pt, caption: none)
+  #let scatter_display = scatter_plot(
+    pl_scatter,
+    70pt,
+    stroke: 3pt,
+    caption: none,
+  )
   #let pl_graph = pplot(data: data_graph, axes: (x_axis, y_axis))
   #let graph_display = graph_plot(
-    pl_graph, 70pt, stroke: 2pt + julia-blue, markings: none, caption: none,
+    pl_graph,
+    70pt,
+    stroke: 2pt + julia-blue,
+    markings: none,
+    caption: none,
   )
   #overlay((scatter_display, graph_display), (360pt, 280pt))
 ]
 
-#slide(
-  title: "What is Linear Regression?",
-)[
+#slide(title: "What is Linear Regression?")[
   The ideia here is to model a dependent variable as a linear combination of
   independent variables.
 
@@ -61,9 +71,7 @@
   #align(center)[#image("images/memes/assumptions.jpg")]
 ]
 
-#slide(
-  title: "Linear Regression Assumptions",
-)[
+#slide(title: "Linear Regression Assumptions")[
   #v(3em)
 
   - model error $ε$ is independent of $bold(X)$ and $bold(y)$.
@@ -77,24 +85,22 @@
   - Observations are I.I.D #footnote[independent and identically distributed.].
 ]
 
-#slide(
-  title: "Linear Regression Specification",
-)[
+#slide(title: "Linear Regression Specification")[
   To estimate the intercept $α$ and coefficients $bold(β)$
   we use a Gaussian/normal likelihood function. Mathematically speaking, Bayesian
   linear regression is:
 
   #v(2em)
 
-  $ bold(y) &tilde "Normal"(α + bold(X) bold(β), σ) \
-  α       &tilde "Normal"(μ_α, σ_α) \
-  bold(β) &tilde "Normal"(μ_bold(β), σ_bold(β)) \
-  σ       &tilde "Exponential"(λ_σ) $
+  $
+    bold(y) &tilde "Normal"(α + bold(X) bold(β), σ) \
+    α &tilde "Normal"(μ_α, σ_α) \
+    bold(β) &tilde "Normal"(μ_bold(β), σ_bold(β)) \
+    σ &tilde "Exponential"(λ_σ)
+  $
 ]
 
-#slide(
-  title: "Linear Regression Specification",
-)[
+#slide(title: "Linear Regression Specification")[
   What we are missing is the prior probabilities for the model's parameters:
 
   #v(2em)
@@ -109,9 +115,7 @@
     Knowledge that we have about the model's error.
 ]
 
-#slide(
-  title: "Good Candidates for Prior Distributions",
-)[
+#slide(title: "Good Candidates for Prior Distributions")[
   First, center ($μ = 0$) and standardize ($σ = 1$) the independent variables.
 
   #v(2em)
@@ -127,9 +131,7 @@
     to positive values only. Exponential is a good candidate.
 ]
 
-#slide(
-  title: "Posterior Computation",
-)[
+#slide(title: "Posterior Computation")[
   Our aim to is to *find the posterior distribution of the model's parameters of
   interest* ($α$ and $bold(β)$) by computing the full posterior distribution of:
 
diff --git a/slides/06-logistic_regression.typ b/slides/06-logistic_regression.typ
index 00bd63a..d62c0b0 100644
--- a/slides/06-logistic_regression.typ
+++ b/slides/06-logistic_regression.typ
@@ -5,9 +5,7 @@
 
 #new-section-slide("Logistic Regression")
 
-#slide(
-  title: "Recommended References",
-)[
+#slide(title: "Recommended References")[
   - #cite(<gelman2013bayesian>, form: "prose") - Chapter 16: Generalized linear
     models
 
@@ -26,9 +24,7 @@
   #align(center)[#image("images/memes/logistic_regression.jpg")]
 ]
 
-#slide(
-  title: [Welcome to the Magical World of the Linear Generalized Models],
-)[
+#slide(title: [Welcome to the Magical World of the Linear Generalized Models])[
   Leaving the realm of the linear models, we start to adventure to the generalized
   linear models -- GLM.
 
@@ -38,21 +34,17 @@
   binomial regression).
 ]
 
-#slide(
-  title: [Binary Data #footnote[
+#slide(title: [Binary Data #footnote[
       also known as dichotomous, dummy, indicator variable, etc.
     ]
-  ],
-)[
+])[
   #v(5em)
 
   We use logistic regression when our dependent variable is *binary*. It only
   takes two distinct values, usually coded as $0$ and $1$.
 ]
 
-#slide(
-  title: [What is Logistic Regression],
-)[
+#slide(title: [What is Logistic Regression])[
   Logistic regression behaves exactly as a linear model: it makes a prediction by
   simply computing a weighted sum of the independent variables $bold(X)$ using the
   estimated coefficients $bold(β)$, along with a constant term $α$.
@@ -62,19 +54,23 @@
   However, instead of outputting a continuous value $bold(y)$, it returns the
   *logistic function* of this value:
 
-  $ "logistic"(x) = 1/(1 + e^(-x)) $
+  $ "logistic"(x) = 1 / (1 + e^(-x)) $
 ]
 
-#slide(
-  title: [Logistic Function],
-)[
-  #align(
-    center,
-  )[
+#slide(title: [Logistic Function])[
+  #align(center)[
     #canvas(
-      length: 0.9cm, {
+      length: 0.9cm,
+      {
         plot.plot(
-          size: (16, 9), x-label: $x$, y-label: $"logistic"(x)$, x-tick-step: 5, y-tick-step: 0.25, y-min: -0.01, y-max: 1.01, {
+          size: (16, 9),
+          x-label: $x$,
+          y-label: $"logistic"(x)$,
+          x-tick-step: 5,
+          y-tick-step: 0.25,
+          y-min: -0.01,
+          y-max: 1.01,
+          {
             plot.add(
               domain: (-10, 10), samples: 200,
               // label: $"logistic"(x)$, // FIXME: depends on unreleased cetz 2.0.0
@@ -87,27 +83,29 @@
   ]
 ]
 
-#slide(
-  title: [Probit Function],
-)[
+#slide(title: [Probit Function])[
   We can also opt to choose to use the *probit function* (usually represented by
   the Greek letter $Φ$) which is the CDF of a normal distribution:
 
   #v(2em)
 
-  $ Φ(x)= 1/(sqrt(2π)) ∫_(-oo)^(x)e^((-t^2)/2) dif t $
+  $ Φ(x)= 1 / (sqrt(2π)) ∫_(-oo)^(x)e^((-t^2) / 2) dif t $
 ]
 
-#slide(
-  title: [Probit Function],
-)[
-  #align(
-    center,
-  )[
+#slide(title: [Probit Function])[
+  #align(center)[
     #canvas(
-      length: 0.9cm, {
+      length: 0.9cm,
+      {
         plot.plot(
-          size: (16, 9), x-label: $x$, y-label: $Φ(x)$, x-tick-step: 5, y-tick-step: 0.25, y-min: -0.01, y-max: 1.01, {
+          size: (16, 9),
+          x-label: $x$,
+          y-label: $Φ(x)$,
+          x-tick-step: 5,
+          y-tick-step: 0.25,
+          y-min: -0.01,
+          y-max: 1.01,
+          {
             plot.add(
               domain: (-10, 10), samples: 200,
               // label: $"probit"(x)$, // FIXME: depends on unreleased cetz 2.0.0
@@ -120,16 +118,20 @@
   ]
 ]
 
-#slide(
-  title: [Logistic Function versus Probit Function],
-)[
-  #align(
-    center,
-  )[
+#slide(title: [Logistic Function versus Probit Function])[
+  #align(center)[
     #canvas(
-      length: 0.9cm, {
+      length: 0.9cm,
+      {
         plot.plot(
-          size: (16, 9), x-label: $x$, y-label: $Φ(x)$, x-tick-step: 5, y-tick-step: 0.25, y-min: -0.01, y-max: 1.01, {
+          size: (16, 9),
+          x-label: $x$,
+          y-label: $Φ(x)$,
+          x-tick-step: 5,
+          y-tick-step: 0.25,
+          y-min: -0.01,
+          y-max: 1.01,
+          {
             plot.add(
               domain: (-10, 10), samples: 200,
               // label: $"logistic"(x)$, // FIXME: depends on unreleased cetz 2.0.0
@@ -147,9 +149,7 @@
   ]
 ]
 
-#slide(
-  title: [Comparison with Linear Regression],
-)[
+#slide(title: [Comparison with Linear Regression])[
   Linear regression follows the following mathematical expression:
 
   $ "linear" = α + β_1 x_1 + β_2 x_2 + dots + β_k x_k $
@@ -167,9 +167,7 @@
     $bold(y)$'s predicted binary value.
 ]
 
-#slide(
-  title: [Logistic Regression Specification],
-)[
+#slide(title: [Logistic Regression Specification])[
   We can model logistic regression using two approaches:
 
   #v(3em)
@@ -187,8 +185,8 @@
   #text(size: 16pt)[
     $
       bold(y) &tilde "Bernoulli"(p) \
-      p       &= "logistic/probit"(α + bold(X) bold(β)) \
-      α       &tilde "Normal"(μ_α, σ_α) \
+      p &= "logistic/probit"(α + bold(X) bold(β)) \
+      α &tilde "Normal"(μ_α, σ_α) \
       bold(β) &tilde "Normal"(μ_bold(β), σ_bold(β))
     $
 
@@ -208,8 +206,8 @@
   #text(size: 16pt)[
     $
       bold(y) &tilde "Binomial"(n, p) \
-      p       &= "logistic/probit"(α + bold(X) bold(β)) \
-      α       &tilde "Normal"(μ_α, σ_α) \
+      p &= "logistic/probit"(α + bold(X) bold(β)) \
+      α &tilde "Normal"(μ_α, σ_α) \
       bold(β) &tilde "Normal"(μ_bold(β), σ_bold(β))
     $
 
@@ -226,9 +224,7 @@
   ]
 ]
 
-#slide(
-  title: [Posterior Computation],
-)[
+#slide(title: [Posterior Computation])[
   Our aim to is to *find the posterior distribution of the model's parameters of
   interest* ($α$ and $bold(β)$) by computing the full posterior distribution of:
 
@@ -237,9 +233,7 @@
   $ P(bold(θ) | bold(y)) = P(α, bold(β) | bold(y)) $
 ]
 
-#slide(
-  title: [How to Interpret Coefficients],
-)[
+#slide(title: [How to Interpret Coefficients])[
   If we revisit logistic transformation mathematical expression, we see that, in
   order to interpret coefficients $bold(β)$, we need to perform a transformation.
 
@@ -249,12 +243,8 @@
   its inverse function.
 ]
 
-#slide(
-  title: [Probability versus Odds],
-)[
-  #text(
-    size: 17pt,
-  )[
+#slide(title: [Probability versus Odds])[
+  #text(size: 17pt)[
     But before that, we need to discern between *probability and odds* #footnote[mathematically speaking.].
 
     - *Probability*: a real number between $0$ and $1$
@@ -274,9 +264,7 @@
   ]
 ]
 
-#slide(
-  title: [Probability versus Odds],
-)[
+#slide(title: [Probability versus Odds])[
   $ "odds" = p / (1 - p) $
 
   where $p$ is the probability.
@@ -288,22 +276,18 @@
   - Odds over $1$ increase the probability of seeing a certain event.
 ]
 
-#slide(
-  title: [Logodds],
-)[
+#slide(title: [Logodds])[
   If you revisit the logistic function, you'll se that the intercept $α$
   and coefficients $bold(β)$ are literally the *log of the odds* (logodds):
   $
-    p       &= "logistic"(α + bold(X) bold(β) ) \
-    p       &= "logistic"(α) + "logistic"( bold(X) bold(β)) \
-    p       &= 1 / (1 + e^(-bold(β))) \
+    p &= "logistic"(α + bold(X) bold(β)) \
+    p &= "logistic"(α) + "logistic"(bold(X) bold(β)) \
+    p &= 1 / (1 + e^(-bold(β))) \
     bold(β) &= log("odds")
   $
 ]
 
-#slide(
-  title: [Logodds],
-)[
+#slide(title: [Logodds])[
   Hence, the coefficients of a logistic regression are expressed in logodds, in
   which $0$ is the neutral element, and any number above or below it increases or
   decreases, respectively, the changes of obtaining a "success" in $bold(y)$. To
@@ -313,7 +297,7 @@
   values:
 
   $
-    "odds"(α)       & = e^α \
+    "odds"(α) & = e^α \
     "odds"(bold(β)) & = e^(bold(β))
   $
 ]
diff --git a/slides/07-ordinal_regression.typ b/slides/07-ordinal_regression.typ
index b27c4a6..8972b4e 100644
--- a/slides/07-ordinal_regression.typ
+++ b/slides/07-ordinal_regression.typ
@@ -5,9 +5,7 @@
 
 #new-section-slide("Ordinal Regression")
 
-#slide(
-  title: "Recommended References",
-)[
+#slide(title: "Recommended References")[
   - #cite(<gelman2013bayesian>, form: "prose") - Chapter 16: Generalized linear
     models, Section 16.2: Models for multivariate and multinomial responses
 
@@ -26,9 +24,7 @@
   #align(center)[#image("images/memes/categorical_data.jpg")]
 ]
 
-#slide(
-  title: [What is Ordinal Regression?],
-)[
+#slide(title: [What is Ordinal Regression?])[
   #v(2em)
 
   *Ordinal regression* is a regression model for *discrete data* and, more
@@ -41,9 +37,7 @@
   agree-disagree, or a patient perception of pain score.
 ]
 
-#slide(
-  title: [Why not just use Linear Regression?],
-)[
+#slide(title: [Why not just use Linear Regression?])[
   The main reason to not simply use linear regression with ordinal discrete
   outcomes is that the categories of the dependent variable could not be
   *equidistant*.
@@ -69,9 +63,7 @@
   Another *non-linear transformation*.
 ]
 
-#slide(
-  title: [Cumulative Distribution Function -- CDF],
-)[
+#slide(title: [Cumulative Distribution Function -- CDF])[
   In the case of ordinal regression, first we need to transform the *dependent
   variable into a cumulative scale*
 
@@ -86,12 +78,8 @@
   $y$
 ]
 
-#slide(
-  title: [Log-cumulative-odds],
-)[
-  #text(
-    size: 18pt,
-  )[
+#slide(title: [Log-cumulative-odds])[
+  #text(size: 18pt)[
     Still, this is not enough. We need to apply the *logit function onto the CDF*:
 
     $ op("logit")(x) = op("logistic")^(-1)(x) = ln(x / (1 -x)) $
@@ -107,9 +95,7 @@
   ]
 ]
 
-#slide(
-  title: [$K-1$ Intercepts],
-)[
+#slide(title: [$K-1$ Intercepts])[
   What do we do with this *log-cumulative-odds*?
 
   It allows us to construct *different intercepts for all possible values of the
@@ -126,9 +112,7 @@
   $Y$ *can take*.
 ]
 
-#slide(
-  title: [Violation of the Equidistant Assumption],
-)[
+#slide(title: [Violation of the Equidistant Assumption])[
   #v(3em)
 
   Since each intercept implies a different CDF value for each $k ∈ K$, we can
@@ -136,9 +120,7 @@
   ordinal variables.
 ]
 
-#slide(
-  title: [Cut Points],
-)[
+#slide(title: [Cut Points])[
   Each intercept implies in a log-cumulative-odds for each $k ∈ K$; We need also
   to *undo the cumulative nature of the $K - 1$ intercepts*. Firstly, we *convert
   the log-cumulative-odds back to a valid probability with the logistic
@@ -153,53 +135,98 @@
   $ P(Y = k) = P(Y <= k) - P(Y <= k - 1) $
 ]
 
-#slide(
-  title: [Example - Probability Mass Function of an Ordinal Variable],
-)[
-  #align(
-    center,
-  )[
-    #let data = ((0.10, 1), (0.15, 2), (0.33, 3), (0.25, 4), (0.10, 5), (0.07, 6),)
+#slide(title: [Example - Probability Mass Function of an Ordinal Variable])[
+  #align(center)[
+    #let data = (
+      (0.10, 1),
+      (0.15, 2),
+      (0.33, 3),
+      (0.25, 4),
+      (0.10, 5),
+      (0.07, 6),
+    )
     #let x_axis = axis(
-      min: 0, max: 6.1, step: 1, location: "bottom", helper_lines: false, title: "values",
+      min: 0,
+      max: 6.1,
+      step: 1,
+      location: "bottom",
+      helper_lines: false,
+      title: "values",
     )
     #let y_axis = axis(
-      min: 0, max: 0.46, step: 0.1, location: "left", show_markings: true, helper_lines: true, value_formatter: "{:.1}", title: "PDF",
+      min: 0,
+      max: 0.46,
+      step: 0.1,
+      location: "left",
+      show_markings: true,
+      helper_lines: true,
+      value_formatter: "{:.1}",
+      title: "PDF",
     )
     #let pl = pplot(data: data, axes: (x_axis, y_axis))
     #bar_chart(pl, 80%, bar_width: 70%, caption: none)
   ]
 ]
 
-#slide(
-  title: [Example - CDF versus log-cumulative-odds],
-)[
+#slide(title: [Example - CDF versus log-cumulative-odds])[
   #side-by-side[
-    #align(
-      center,
-    )[
-      #let data = ((0.10, 1), (0.25, 2), (0.58, 3), (0.83, 4), (0.93, 5), (1.00, 6),)
+    #align(center)[
+      #let data = (
+        (0.10, 1),
+        (0.25, 2),
+        (0.58, 3),
+        (0.83, 4),
+        (0.93, 5),
+        (1.00, 6),
+      )
       #let x_axis = axis(
-        min: 0, max: 6.1, step: 1, location: "bottom", helper_lines: false, title: "values",
+        min: 0,
+        max: 6.1,
+        step: 1,
+        location: "bottom",
+        helper_lines: false,
+        title: "values",
       )
       #let y_axis = axis(
-        min: 0, max: 1.2, step: 0.1, location: "left", show_markings: true, helper_lines: true, value_formatter: "{:.1}", title: "CDF",
+        min: 0,
+        max: 1.2,
+        step: 0.1,
+        location: "left",
+        show_markings: true,
+        helper_lines: true,
+        value_formatter: "{:.1}",
+        title: "CDF",
       )
       #let pl = pplot(data: data, axes: (x_axis, y_axis))
       #bar_chart(pl, 80%, bar_width: 70%, caption: none)
     ]
   ][
-    #align(
-      center,
-    )[
+    #align(center)[
       #let data = (
-        (1, -2.19722), (2, -1.09861), (3, 0.322773), (4, 1.58563), (5, 2.58669), (6, 10.0),
+        (1, -2.19722),
+        (2, -1.09861),
+        (3, 0.322773),
+        (4, 1.58563),
+        (5, 2.58669),
+        (6, 10.0),
       )
       #let x_axis = axis(
-        min: 0, max: 6.1, step: 1, location: "bottom", helper_lines: false, title: "values",
+        min: 0,
+        max: 6.1,
+        step: 1,
+        location: "bottom",
+        helper_lines: false,
+        title: "values",
       )
       #let y_axis = axis(
-        min: -3, max: 3, step: 1, location: "left", show_markings: true, helper_lines: true, value_formatter: "{:.1}", title: "log-cumulative-odds",
+        min: -3,
+        max: 3,
+        step: 1,
+        location: "left",
+        show_markings: true,
+        helper_lines: true,
+        value_formatter: "{:.1}",
+        title: "log-cumulative-odds",
       )
       #let pl = pplot(data: data, axes: (x_axis, y_axis))
       #scatter_plot(pl, 80%, stroke: 4pt, caption: none)
@@ -207,9 +234,7 @@
   ]
 ]
 
-#slide(
-  title: [Adding Coefficients $bold(β)$],
-)[
+#slide(title: [Adding Coefficients $bold(β)$])[
   #v(3em)
 
   With the equidistant assumption solved with $K - 1$ intercepts, we can add
@@ -217,9 +242,7 @@
   regression model.
 ]
 
-#slide(
-  title: [More Log-cumulative-odds],
-)[
+#slide(title: [More Log-cumulative-odds])[
   We've transformed all intercepts into log-cumulative-odds so that we can add
   effects as weighted sums of the independent variables to our basal rates
   (intercepts).
@@ -234,9 +257,7 @@
   Lastly, $φ_k$ represents the linear predictor for the $k$th intercept.
 ]
 
-#slide(
-  title: [Matrix Notation],
-)[
+#slide(title: [Matrix Notation])[
   This can become more elegant and computationally efficient if we use
   matrix/vector notation:
 
@@ -253,26 +274,20 @@
   observation and every column an independent variable.
 ]
 
-#slide(
-  title: [Ordinal Regression Specification],
-)[
-  #text(
-    size: 13pt,
-  )[
+#slide(title: [Ordinal Regression Specification])[
+  #text(size: 13pt)[
     $
       bold(y) &tilde "Categorical"(bold(p)) \
       bold(p) &= "logistic"(bold(φ)) \
       bold(φ) &= bold(α) + bold(c) + bold(X) \cdot bold(β) \
-      c_1     &= "logit"("CDF"(y_1)) \
-      c_k     &= "logit"("CDF"(y_k) - "CDF"(y_(k-1))) "for" 2 <= k <= K-1 \
-      c_K     &= "logit"(1 - "CDF"(y_(K-1))) \
+      c_1 &= "logit"("CDF"(y_1)) \
+      c_k &= "logit"("CDF"(y_k) - "CDF"(y_(k-1))) "for" 2 <= k <= K-1 \
+      c_K &= "logit"(1 - "CDF"(y_(K-1))) \
       bold(α) &tilde "Normal"(μ_α, σ_α) \
       bold(β) &tilde "Normal"(μ_(bold(β)), σ_{bold(β)})
     $
   ]
-  #text(
-    size: 12pt,
-  )[
+  #text(size: 12pt)[
     - $bold(y)$ -- ordinal discrete dependent variable.
     - $bold(p)$ -- probability vector of size $K$.
     - $K$: number of possible values that $bold(y)$ can take, i.e. number of ordered
diff --git a/slides/08-poisson_regression.typ b/slides/08-poisson_regression.typ
index 8af7bc5..fdce0f6 100644
--- a/slides/08-poisson_regression.typ
+++ b/slides/08-poisson_regression.typ
@@ -5,9 +5,7 @@
 
 #new-section-slide("Poisson Regression")
 
-#slide(
-  title: "Recommended References",
-)[
+#slide(title: "Recommended References")[
   - #cite(<gelman2013bayesian>, form: "prose") - Chapter 16: Generalized linear
     models
 
@@ -23,18 +21,14 @@
   #align(center)[#image("images/memes/poisson-distribution.jpg")]
 ]
 
-#slide(
-  title: [Count Data],
-)[
+#slide(title: [Count Data])[
   #v(3em)
 
   Poisson regression is used when our dependent variable can only take *positive
   values*, usually in the context of *count data*.
 ]
 
-#slide(
-  title: [What is Poisson Regression?],
-)[
+#slide(title: [What is Poisson Regression?])[
   Poisson regression behaves exactly like a linear model: it makes a prediction by
   simply computing a weighted sum of the independent variables $bold(X)$ with the
   estimated coefficients $bold(β)$:
@@ -53,16 +47,19 @@
   $
 ]
 
-#slide(
-  title: [Exponential Function],
-)[
-  #align(
-    center,
-  )[
+#slide(title: [Exponential Function])[
+  #align(center)[
     #canvas(
-      length: 0.9cm, {
+      length: 0.9cm,
+      {
         plot.plot(
-          size: (16, 9), x-label: $x$, y-label: $e^x$, x-tick-step: 1, y-tick-step: 20, y-min: -0.01, {
+          size: (16, 9),
+          x-label: $x$,
+          y-label: $e^x$,
+          x-tick-step: 1,
+          y-tick-step: 20,
+          y-min: -0.01,
+          {
             plot.add(
               domain: (-1, 5), samples: 200,
               // label: $"exponential"(x)$, // FIXME: depends on unreleased cetz 2.0.0
@@ -75,9 +72,7 @@
   ]
 ]
 
-#slide(
-  title: [Comparison with Linear Regression],
-)[
+#slide(title: [Comparison with Linear Regression])[
   Linear regression has the following mathematical expression:
 
   $
@@ -95,9 +90,7 @@
   - $log(y) = e^("Linear") = e^(α + β_1 x_1 + β_2 x_2 + dots + β_k x_k)$
 ]
 
-#slide(
-  title: [Poisson Regression Specification],
-)[
+#slide(title: [Poisson Regression Specification])[
   We can use Poisson regression if the dependent variable
   $bold(y)$ has count data, i.e.,
   $bold(y)$ only takes positive values.
@@ -109,14 +102,12 @@
 
   $
     bold(y) &tilde "Poisson"(e^((α + bold(X) bold(β)))) \
-    α       &tilde "Normal"(μ_α, σ_α) \
+    α &tilde "Normal"(μ_α, σ_α) \
     bold(β) &tilde "Normal"(μ_(bold(β)), σ_(bold(β)))
   $
 ]
 
-#slide(
-  title: [Interpreting the Coefficients],
-)[
+#slide(title: [Interpreting the Coefficients])[
   When we see the Poisson regression specification, we realize that the
   coefficient interpretation requires a transformation. What we need to do is undo
   the logarithm transformation:
@@ -129,13 +120,13 @@
 
   $
     bold(y) &= e^((α + bold(X) bold(β))) \
-            &= e^(α) dot e^(( X_((1)) dot β_((1)) )) dot e^(( X_((2)) dot β_((2)) )) dot dots dot e^(( X_((k)) dot β_((k)) ))
+    &= e^(α) dot e^((X_((1)) dot β_((1)))) dot e^((
+      X_((2)) dot β_((2))
+    )) dot dots dot e^((X_((k)) dot β_((k))))
   $
 ]
 
-#slide(
-  title: [Interpreting the Coefficients],
-)[
+#slide(title: [Interpreting the Coefficients])[
   Finally, notice that, when transformed, our dependent variables is no more a "weighted
   sum of an intercept and independent variables":
 
@@ -143,7 +134,9 @@
 
   $
     bold(y) &= e^((α + bold(X) bold(β))) \
-            &= e^(α) dot e^(( X_((1)) dot β_((1)) )) dot e^(( X_((2)) dot β_((2)) )) dot dots dot e^(( X_((k)) dot β_((k)) ))
+    &= e^(α) dot e^((X_((1)) dot β_((1)))) dot e^((
+      X_((2)) dot β_((2))
+    )) dot dots dot e^((X_((k)) dot β_((k))))
   $
 
   #v(1em)
diff --git a/slides/09-robust_regression.typ b/slides/09-robust_regression.typ
index e2518c1..53e4ceb 100644
--- a/slides/09-robust_regression.typ
+++ b/slides/09-robust_regression.typ
@@ -5,9 +5,7 @@
 
 #new-section-slide("Robust Regression")
 
-#slide(
-  title: "Recommended References",
-)[
+#slide(title: "Recommended References")[
   - #cite(<gelman2013bayesian>, form: "prose") - Chapter 17: Models for robust
     inference
 
@@ -23,9 +21,7 @@
   #align(center)[#image("images/memes/not_normal_transparent.png")]
 ]
 
-#slide(
-  title: [Robust Models],
-)[
+#slide(title: [Robust Models])[
   Almost always data from real world are really strange.
 
   #v(1em)
@@ -39,9 +35,7 @@
   - unrealistic model assumptions?
 ]
 
-#slide(
-  title: [Outliers],
-)[
+#slide(title: [Outliers])[
   #v(2em)
 
   Models based on the *normal distribution are notoriously "non-robust" against
@@ -50,9 +44,7 @@
   with it.
 ]
 
-#slide(
-  title: [Overdispersion],
-)[
+#slide(title: [Overdispersion])[
   Superdispersion and underdispersion #footnote[
     rarer to find in the real world.
   ]
@@ -65,9 +57,7 @@
   *a single parameter* is added to allow for overdispersion @gelman2013bayesian.
 ]
 
-#slide(
-  title: [Overdispersion Example],
-)[
+#slide(title: [Overdispersion Example])[
   Suppose you are analyzing data from car accidents. The model we generally use in
   this type of phenomena is *Poisson regression*.
 
@@ -83,9 +73,7 @@
   the desired phenomena.
 ]
 
-#slide(
-  title: [Student's $t$ instead of Normal],
-)[
+#slide(title: [Student's $t$ instead of Normal])[
   Student's $t$ distribution has *wider #footnote[or "fatter".] tails* than the
   Normal distribution.
 
@@ -104,16 +92,20 @@
   function.
 ]
 
-#slide(
-  title: [Student's $t$ instead of Normal],
-)[
-  #align(
-    center,
-  )[
+#slide(title: [Student's $t$ instead of Normal])[
+  #align(center)[
     #canvas(
-      length: 0.9cm, {
+      length: 0.9cm,
+      {
         plot.plot(
-          size: (16, 9), x-label: none, y-label: "PDF", x-tick-step: 1, y-tick-step: 0.1, y-min: -0.01, y-max: 0.42, {
+          size: (16, 9),
+          x-label: none,
+          y-label: "PDF",
+          x-tick-step: 1,
+          y-tick-step: 0.1,
+          y-min: -0.01,
+          y-max: 0.42,
+          {
             plot.add(
               domain: (-4, 4), samples: 200,
               // label: "Normal", // FIXME: depends on unreleased cetz 2.0.0
@@ -131,22 +123,18 @@
   ]
 ]
 
-#slide(
-  title: [Student's $t$ instead of Normal],
-)[
-  #text(
-    size: 16pt,
-  )[
+#slide(title: [Student's $t$ instead of Normal])[
+  #text(size: 16pt)[
     By using a Student's $t$ distribution instead of the Normal distribution as
     likelihood functions, the model's error $σ$ does _not_ follow a Normal
     distribution, but a Student's $t$ distribution:
 
     $
       bold(y) &tilde "Student"(ν, α + bold(X) bold(β), σ) \
-      α       &tilde "Normal"(μ_α, σ_α) \
+      α &tilde "Normal"(μ_α, σ_α) \
       bold(β) &tilde "Normal"(μ_bold(β), σ_bold(β)) \
-      ν       &tilde "Log-Normal"(2, 1) \
-      σ       &tilde "Exponential"(λ_σ)
+      ν &tilde "Log-Normal"(2, 1) \
+      σ &tilde "Exponential"(λ_σ)
     $
 
     Note that we are including an extra parameter $ν$, which represents the
@@ -158,9 +146,7 @@
   ]
 ]
 
-#slide(
-  title: [Beta-Binomial instead of the Binomial],
-)[
+#slide(title: [Beta-Binomial instead of the Binomial])[
   The binomial distribution has a practical limitation that we only have one free
   parameter to estimate #footnote[since $n$ already comes from data.] ($p$). This
   implies in the *variance to determined by the mean*. Hence, the binomial
@@ -174,9 +160,7 @@
   against overdispersion*.
 ]
 
-#slide(
-  title: [Beta-Binomial instead of Binomial],
-)[
+#slide(title: [Beta-Binomial instead of Binomial])[
   The *beta-binomial distribution* is a binomial distribution, where the
   probability of success $p$ is parameterized as a $"Beta"(α, β)$.
 
@@ -190,18 +174,14 @@
   Values of $β >= 1$ make the beta-binomial behave the same as a binomial.
 ]
 
-#slide(
-  title: [Beta-Binomial instead of Binomial],
-)[
-  #text(
-    size: 18pt,
-  )[
+#slide(title: [Beta-Binomial instead of Binomial])[
+  #text(size: 18pt)[
     $
       bold(y) & tilde "Beta-Binomial"(n, p, φ) \
-      p       & tilde "Logistic/Probit"(α + bold(X) bold(β)) \
-      α       & tilde "Normal"(μ_α, σ_α) \
+      p & tilde "Logistic/Probit"(α + bold(X) bold(β)) \
+      α & tilde "Normal"(μ_α, σ_α) \
       bold(β) & tilde "Normal"(μ_bold(β), σ_bold(β)) \
-      φ       & tilde "Exponential"(1)
+      φ & tilde "Exponential"(1)
     $
 
     #v(1em)
@@ -212,12 +192,8 @@
   ]
 ]
 
-#slide(
-  title: [Student's $t$ instead Binomial],
-)[
-  #text(
-    size: 15pt,
-  )[
+#slide(title: [Student's $t$ instead Binomial])[
+  #text(size: 15pt)[
     Also known as Robit #footnote[there is a great discussion between Gelman, Vehtari and Kurz at
       #link("https://discourse.mc-stan.org/t/robit-regression-not-robust/21245/")[
         Stan's Discourse
@@ -230,7 +206,7 @@
       y_i &= cases(0 "if" z_i < 0, 1 "if" z_i > 0) \
       z_i &= X_i bold(β) + ε_i \
       ε_i &tilde "Student"(ν, 0, sqrt((ν - 2) / ν)) \
-      ν   &tilde "Gamma"(2, 0.1) ∈ [2, oo)
+      ν &tilde "Gamma"(2, 0.1) ∈ [2, oo)
     $
 
     Here we are using the gamma distribution as a truncated Student's $t$
@@ -239,9 +215,7 @@
   ]
 ]
 
-#slide(
-  title: [Negative Binomial instead of Poisson],
-)[
+#slide(title: [Negative Binomial instead of Poisson])[
   This is the overdispersion example. The Poisson distribution uses a *single
   parameter for both its mean and variance*.
 
@@ -258,13 +232,11 @@
   $φ$ is also known as a "reciprocal dispersion" parameter.
 ]
 
-#slide(
-  title: [Negative Binomial instead of Poisson],
-)[
+#slide(title: [Negative Binomial instead of Poisson])[
   $
     bold(y) &tilde "Negative Binomial"(e^((α + bold(X) bold(β))), φ) \
-    φ       &tilde "Gamma"(0.01, 0.01) \
-    α       &tilde "Normal"(μ_α, σ_α) \
+    φ &tilde "Gamma"(0.01, 0.01) \
+    α &tilde "Normal"(μ_α, σ_α) \
     bold(β) &tilde "Normal"(μ_bold(β), σ_bold(β))
   $
 
@@ -284,19 +256,17 @@
   @mcelreath2020statistical.
 ]
 
-#slide(
-  title: [Negative Binomial Mixture instead of Poisson],
-)[
+#slide(title: [Negative Binomial Mixture instead of Poisson])[
   Here, $S_i$ is a dummy variable, taking value $1$ if the $i$th observation has a
   value $≠ 0$.
   $S_i$ can be modeled using logistic regression:
   $
-    bold(y)    &cases(
+    bold(y) &cases(
       = 0 "if" S_i = 0, tilde "Negative Binomial"(e^((α + bold(X) bold(β))), φ ) "if" S_i = 1,
 
     ) \
     P(S_i = 1) &= "Logistic/Probit"(bold(X) bold(γ)) \
-    γ          &tilde "Beta"(1, 1)
+    γ &tilde "Beta"(1, 1)
   $
 
   #v(1em)
@@ -304,9 +274,7 @@
   $γ$ is a new coefficients which we give uniform prior of $"Beta"(1, 1)$.
 ]
 
-#slide(
-  title: [Why Use Non-Robust Models?],
-)[
+#slide(title: [Why Use Non-Robust Models?])[
   The *central limit theorem* tells us that the *normal distribution* is an
   appropriate model for data that arises as a *sum of independent components*.
 
diff --git a/slides/10-sparse_regression.typ b/slides/10-sparse_regression.typ
index 0879937..6f1bd9e 100644
--- a/slides/10-sparse_regression.typ
+++ b/slides/10-sparse_regression.typ
@@ -5,9 +5,7 @@
 
 #new-section-slide("Sparse Regression")
 
-#slide(
-  title: "Recommended References",
-)[
+#slide(title: "Recommended References")[
   - #cite(<gelman2020regression>, form: "prose") - Chapter 12, Section 12.8: Models
     for regression coefficients
 
@@ -22,9 +20,7 @@
   #align(center)[#image("images/memes/horseshoe.jpg")]
 ]
 
-#slide(
-  title: [What is Sparsity?],
-)[
+#slide(title: [What is Sparsity?])[
   #v(2em)
 
   Sparsity is a concept frequently encountered in statistics, signal processing,
@@ -32,9 +28,7 @@
   elements in a dataset or a vector are zero or close to zero.
 ]
 
-#slide(
-  title: [How to Handle Sparsity?],
-)[
+#slide(title: [How to Handle Sparsity?])[
   Almost all techniques deal with some sort of *variable selection*, instead of
   altering data.
 
@@ -44,9 +38,7 @@
   don't want to throw information away.
 ]
 
-#slide(
-  title: [Frequentist Approach],
-)[
+#slide(title: [Frequentist Approach])[
   The frequentist approach deals with sparse regression by staying in the
   "optimization" context but adding *Lagrangian constraints* #footnote[
     this is called *LASSO* (least absolute shrinkage and selection operator) from #cite(<tibshirani1996regression>, form: "prose")
@@ -64,9 +56,7 @@
   Here $|| dot ||_p$ is the $p$-norm.
 ]
 
-#slide(
-  title: [Variable Selection Techniques],
-)[
+#slide(title: [Variable Selection Techniques])[
   #v(4em)
 
   - *discrete mixtures*: _spike-and-slab_ prior
@@ -74,12 +64,8 @@
   - *shrinkage priors*: _Laplace_ prior and _horseshoe_ prior @carvalho2009handling
 ]
 
-#slide(
-  title: [Discrete Mixtures -- Spike-and-Slab Prior],
-)[
-  #text(
-    size: 16pt,
-  )[
+#slide(title: [Discrete Mixtures -- Spike-and-Slab Prior])[
+  #text(size: 16pt)[
     Mixture of two distributions—one that is concentrated at zero (the "spike") and
     one with a much wider spread (the
     "slab"). This prior indicates that we believe most coefficients in our model are
@@ -90,7 +76,7 @@
 
     $
       β_i | λ_i, c &tilde "Normal"(0, sqrt(λ_i^2 c^2)) \
-      λ_i          &tilde "Bernoulli"(p)
+      λ_i &tilde "Bernoulli"(p)
     $
 
     where:
@@ -103,55 +89,77 @@
   ]
 ]
 
-#slide(
-  title: [Discrete Mixtures -- Spike-and-Slab Prior],
-)[
+#slide(title: [Discrete Mixtures -- Spike-and-Slab Prior])[
   #side-by-side[
-    #align(
-      center,
-    )[
+    #align(center)[
       #figure(
         {
           canvas(
-            length: 0.75cm, {
+            length: 0.75cm,
+            {
               plot.plot(
-                size: (16, 9), x-label: none, y-label: "PDF", x-tick-step: 1, y-tick-step: 0.1, x-min: -4, x-max: 4, y-max: 0.5, y-min: 0, {
+                size: (16, 9),
+                x-label: none,
+                y-label: "PDF",
+                x-tick-step: 1,
+                y-tick-step: 0.1,
+                x-min: -4,
+                x-max: 4,
+                y-max: 0.5,
+                y-min: 0,
+                {
                   plot.add(
-                    domain: (-4, 4), samples: 200, style: (stroke: (paint: julia-purple, thickness: 2pt)), x => hs(x, 1, 0.05),
+                    domain: (-4, 4),
+                    samples: 200,
+                    style: (stroke: (paint: julia-purple, thickness: 2pt)),
+                    x => hs(x, 1, 0.05),
                   )
                 },
               )
             },
           )
-        }, caption: [$c = 1, λ = 0$], numbering: none,
+        },
+        caption: [$c = 1, λ = 0$],
+        numbering: none,
       )
     ]
   ][
-    #align(
-      center,
-    )[
+    #align(center)[
       #figure(
         {
           canvas(
-            length: 0.75cm, {
+            length: 0.75cm,
+            {
               plot.plot(
-                size: (16, 9), x-label: none, y-label: "PDF", x-tick-step: 1, y-tick-step: 0.1, x-min: -4, x-max: 4, y-max: 0.5, y-min: 0, {
+                size: (16, 9),
+                x-label: none,
+                y-label: "PDF",
+                x-tick-step: 1,
+                y-tick-step: 0.1,
+                x-min: -4,
+                x-max: 4,
+                y-max: 0.5,
+                y-min: 0,
+                {
                   plot.add(
-                    domain: (-4, 4), samples: 200, style: (stroke: (paint: julia-purple, thickness: 2pt)), x => hs(x, 1, 1),
+                    domain: (-4, 4),
+                    samples: 200,
+                    style: (stroke: (paint: julia-purple, thickness: 2pt)),
+                    x => hs(x, 1, 1),
                   )
                 },
               )
             },
           )
-        }, caption: [$c = 1, λ = 1$], numbering: none,
+        },
+        caption: [$c = 1, λ = 1$],
+        numbering: none,
       )
     ]
   ]
 ]
 
-#slide(
-  title: [Shinkrage Priors -- Laplace Prior],
-)[
+#slide(title: [Shinkrage Priors -- Laplace Prior])[
   The Laplace distribution is a continuous probability distribution named after
   Pierre-Simon Laplace. It is also known as the double exponential distribution.
 
@@ -169,36 +177,43 @@
   It is a symmetrical exponential decay around $μ$ with scale governed by $b$.
 ]
 
-#slide(
-  title: [Shinkrage Priors -- Laplace Prior],
-)[
-  #align(
-    center,
-  )[
+#slide(title: [Shinkrage Priors -- Laplace Prior])[
+  #align(center)[
     #figure(
       {
         canvas(
-          length: 0.9cm, {
+          length: 0.9cm,
+          {
             plot.plot(
-              size: (16, 9), x-label: none, y-label: "PDF", x-tick-step: 1, y-tick-step: 0.1, x-min: -4, x-max: 4, y-max: 0.55, y-min: 0, {
+              size: (16, 9),
+              x-label: none,
+              y-label: "PDF",
+              x-tick-step: 1,
+              y-tick-step: 0.1,
+              x-min: -4,
+              x-max: 4,
+              y-max: 0.55,
+              y-min: 0,
+              {
                 plot.add(
-                  domain: (-4, 4), samples: 200, style: (stroke: (paint: julia-purple, thickness: 2pt)), x => laplace(x, 1),
+                  domain: (-4, 4),
+                  samples: 200,
+                  style: (stroke: (paint: julia-purple, thickness: 2pt)),
+                  x => laplace(x, 1),
                 )
               },
             )
           },
         )
-      }, caption: [$μ = 0, b = 1$], numbering: none,
+      },
+      caption: [$μ = 0, b = 1$],
+      numbering: none,
     )
   ]
 ]
 
-#slide(
-  title: [Shinkrage Priors -- Horseshoe Prior],
-)[
-  #text(
-    size: 17pt,
-  )[
+#slide(title: [Shinkrage Priors -- Horseshoe Prior])[
+  #text(size: 17pt)[
     The horseshoe prior @carvalho2009handling assumes that each coefficient
     $β_i$ is conditionally independent with density
     $P_("HS")(β_i | τ )$, where $P_("HS")$
@@ -206,7 +221,7 @@
 
     $
       β_i | λ_i, τ &tilde "Normal"(0, sqrt(λ_i^2 τ^2)) \
-      λ_i          &tilde "Cauchy"^+ (0, 1)
+      λ_i &tilde "Cauchy"^+ (0, 1)
     $
 
     where:
@@ -219,55 +234,77 @@
   ]
 ]
 
-#slide(
-  title: [Discrete Mixtures -- Spike-and-Slab Prior],
-)[
+#slide(title: [Discrete Mixtures -- Spike-and-Slab Prior])[
   #side-by-side[
-    #align(
-      center,
-    )[
+    #align(center)[
       #figure(
         {
           canvas(
-            length: 0.75cm, {
+            length: 0.75cm,
+            {
               plot.plot(
-                size: (16, 9), x-label: none, y-label: "PDF", x-tick-step: 1, y-tick-step: 0.1, x-min: -4, x-max: 4, y-max: 0.8, y-min: 0, {
+                size: (16, 9),
+                x-label: none,
+                y-label: "PDF",
+                x-tick-step: 1,
+                y-tick-step: 0.1,
+                x-min: -4,
+                x-max: 4,
+                y-max: 0.8,
+                y-min: 0,
+                {
                   plot.add(
-                    domain: (-4, 4), samples: 200, style: (stroke: (paint: julia-purple, thickness: 2pt)), x => hs(x, 1, 1),
+                    domain: (-4, 4),
+                    samples: 200,
+                    style: (stroke: (paint: julia-purple, thickness: 2pt)),
+                    x => hs(x, 1, 1),
                   )
                 },
               )
             },
           )
-        }, caption: [$τ = 1, λ = 1$], numbering: none,
+        },
+        caption: [$τ = 1, λ = 1$],
+        numbering: none,
       )
     ]
   ][
-    #align(
-      center,
-    )[
+    #align(center)[
       #figure(
         {
           canvas(
-            length: 0.75cm, {
+            length: 0.75cm,
+            {
               plot.plot(
-                size: (16, 9), x-label: none, y-label: "PDF", x-tick-step: 1, y-tick-step: 0.1, x-min: -4, x-max: 4, y-max: 0.8, y-min: 0, {
+                size: (16, 9),
+                x-label: none,
+                y-label: "PDF",
+                x-tick-step: 1,
+                y-tick-step: 0.1,
+                x-min: -4,
+                x-max: 4,
+                y-max: 0.8,
+                y-min: 0,
+                {
                   plot.add(
-                    domain: (-4, 4), samples: 200, style: (stroke: (paint: julia-purple, thickness: 2pt)), x => hs(x, 1, 1 / 2),
+                    domain: (-4, 4),
+                    samples: 200,
+                    style: (stroke: (paint: julia-purple, thickness: 2pt)),
+                    x => hs(x, 1, 1 / 2),
                   )
                 },
               )
             },
           )
-        }, caption: [$τ = 1, λ = 1 / 2$], numbering: none,
+        },
+        caption: [$τ = 1, λ = 1 / 2$],
+        numbering: none,
       )
     ]
   ]
 ]
 
-#slide(
-  title: [Discrete Mixtures versus Shinkrage Priors],
-)[
+#slide(title: [Discrete Mixtures versus Shinkrage Priors])[
   *Discrete mixtures* offer the correct representation of sparse problems
   @carvalho2009handling by placing positive prior probability on
   $β_i = 0$ (regression coefficient), but pose several difficulties: mostly
@@ -279,12 +316,8 @@
   be very attractive computationally: again due to the *continuous property*.
 ]
 
-#slide(
-  title: [Horseshoe versus Laplace],
-)[
-  #text(
-    size: 17pt,
-  )[
+#slide(title: [Horseshoe versus Laplace])[
+  #text(size: 17pt)[
     The advantages of the Horseshoe prior over the Laplace prior are primarily:
 
     - *shrinkage*: The Horseshoe prior has infinitely heavy tails and an infinite
@@ -304,9 +337,7 @@
   ]
 ]
 
-#slide(
-  title: [Effective Shinkrage Comparison],
-)[
+#slide(title: [Effective Shinkrage Comparison])[
   Makes more sense to compare the shinkrage effects of the proposed approaches so
   far. Assume for now that $σ^2 = τ^2 = 1$, and define $κ_i = 1 / (1 + λ_i^2)$.
 
@@ -326,59 +357,79 @@
   $
 ]
 
-#slide(
-  title: [Effective Shinkrage Comparison #footnote[spike-and-slab with $p = 1 / 2$
-      would be very similar to Horseshoe but with discontinuities.]],
-)[
+#slide(title: [Effective Shinkrage Comparison #footnote[spike-and-slab with $p = 1 / 2$
+      would be very similar to Horseshoe but with discontinuities.]])[
   #side-by-side[
-    #align(
-      center,
-    )[
+    #align(center)[
       #figure(
         {
           canvas(
-            length: 0.75cm, {
+            length: 0.75cm,
+            {
               plot.plot(
-                size: (16, 9), x-label: none, y-label: "PDF", x-tick-step: 1, y-tick-step: 0.1, x-min: 0, x-max: 1, y-max: 0.8, y-min: 0, {
+                size: (16, 9),
+                x-label: none,
+                y-label: "PDF",
+                x-tick-step: 1,
+                y-tick-step: 0.1,
+                x-min: 0,
+                x-max: 1,
+                y-max: 0.8,
+                y-min: 0,
+                {
                   plot.add(
-                    domain: (0.01, 0.99), samples: 200, style: (stroke: (paint: julia-purple, thickness: 2pt)), x => beta(x, 0.5, 0.5),
+                    domain: (0.01, 0.99),
+                    samples: 200,
+                    style: (stroke: (paint: julia-purple, thickness: 2pt)),
+                    x => beta(x, 0.5, 0.5),
                   )
                 },
               )
             },
           )
-        }, caption: [Laplace], numbering: none,
+        },
+        caption: [Laplace],
+        numbering: none,
       )
     ]
   ][
-    #align(
-      center,
-    )[
+    #align(center)[
       #figure(
         {
           canvas(
-            length: 0.75cm, {
+            length: 0.75cm,
+            {
               plot.plot(
-                size: (16, 9), x-label: none, y-label: "PDF", x-tick-step: 1, y-tick-step: 0.1, x-min: 0, x-max: 1, y-max: 0.8, y-min: 0, {
+                size: (16, 9),
+                x-label: none,
+                y-label: "PDF",
+                x-tick-step: 1,
+                y-tick-step: 0.1,
+                x-min: 0,
+                x-max: 1,
+                y-max: 0.8,
+                y-min: 0,
+                {
                   plot.add(
-                    domain: (0.1, 0.9), samples: 200, style: (stroke: (paint: julia-purple, thickness: 2pt)), x => shinkragelaplace(x, 1 / 2),
+                    domain: (0.1, 0.9),
+                    samples: 200,
+                    style: (stroke: (paint: julia-purple, thickness: 2pt)),
+                    x => shinkragelaplace(x, 1 / 2),
                   )
                 },
               )
             },
           )
-        }, caption: [Horseshoe], numbering: none,
+        },
+        caption: [Horseshoe],
+        numbering: none,
       )
     ]
   ]
 ]
 
-#slide(
-  title: [Shinkrage Priors -- Horseshoe+],
-)[
-  #text(
-    size: 17pt,
-  )[
+#slide(title: [Shinkrage Priors -- Horseshoe+])[
+  #text(size: 17pt)[
     Natural extension from the Horseshoe that has improved performance with highly
     sparse data @bhadra2015horseshoe.
 
@@ -386,8 +437,8 @@
 
     $
       β_i | λ_i, η_i, τ &tilde "Normal"(0, λ_i) \
-      λ_i | η_i, τ      &tilde "Cauchy"^+(0, τ η_i) \
-      η_i               &tilde "Cauchy"^+ (0, 1)
+      λ_i | η_i, τ &tilde "Cauchy"^+(0, τ η_i) \
+      η_i &tilde "Cauchy"^+ (0, 1)
     $
 
     where:
@@ -399,9 +450,7 @@
   ]
 ]
 
-#slide(
-  title: [Shinkrage Priors -- Regularized Horseshoe],
-)[
+#slide(title: [Shinkrage Priors -- Regularized Horseshoe])[
   The Horseshoe and Horseshoe+ guarantees that the strong signals will not be
   overshrunk. However, this property can also be harmful, especially when the
   parameters are weakly identified.
@@ -413,16 +462,12 @@
   coefficient.
 ]
 
-#slide(
-  title: [Shinkrage Priors -- Regularized Horseshoe],
-)[
-  #text(
-    size: 16pt,
-  )[
+#slide(title: [Shinkrage Priors -- Regularized Horseshoe])[
+  #text(size: 16pt)[
     $
       β_i | λ_i, τ, c &tilde "Normal" (0, sqrt(τ^2 tilde(λ_i)^2)) \
-      tilde(λ_i)^2    &= (c^2 λ_i^2) / (c^2 + τ^2 λ_i^2) \
-      λ_i             &tilde "Cauchy"^+(0, 1)
+      tilde(λ_i)^2 &= (c^2 λ_i^2) / (c^2 + τ^2 λ_i^2) \
+      λ_i &tilde "Cauchy"^+(0, 1)
     $
 
     where:
@@ -438,9 +483,7 @@
   ]
 ]
 
-#slide(
-  title: [Shinkrage Priors -- R2-D2],
-)[ Still, we can do better. The *R2-D2* #footnote[
+#slide(title: [Shinkrage Priors -- R2-D2])[ Still, we can do better. The *R2-D2* #footnote[
     $R^2$-induced Dirichlet Decomposition
   ] prior @zhang2022bayesian has heavier tails and higher concentration around
   zero than the previous approaches.
@@ -452,16 +495,12 @@
   coefficient between the dependent variable and its modeled expectation.}. Then
   using that prior to "distribute" throughout the $bold(β)$. ]
 
-#slide(
-  title: [Shinkrage Priors -- R2-D2],
-)[
-  #text(
-    size: 16pt,
-  )[
+#slide(title: [Shinkrage Priors -- R2-D2])[
+  #text(size: 16pt)[
     $
-      R^2     &tilde "Beta"(μ_(R^2) σ_(R^2), (1 - μ_(R^2)) σ_(R^2)) \
+      R^2 &tilde "Beta"(μ_(R^2) σ_(R^2), (1 - μ_(R^2)) σ_(R^2)) \
       bold(φ) &tilde "Dirichlet"(J, 1) \
-      τ^2     &= (R^2) / (1 - R^2) \
+      τ^2 &= (R^2) / (1 - R^2) \
       bold(β) &= Z dot sqrt(bold(φ) τ^2)
     $
 
diff --git a/slides/11-hierarchical_models.typ b/slides/11-hierarchical_models.typ
index 054b1b1..83fd05d 100644
--- a/slides/11-hierarchical_models.typ
+++ b/slides/11-hierarchical_models.typ
@@ -5,9 +5,7 @@
 
 #new-section-slide("Hierarchical Models")
 
-#slide(
-  title: "Recommended References",
-)[
+#slide(title: "Recommended References")[
   - #cite(<gelman2020regression>, form: "prose"):
     - Chapter 5: Hierarchical models
     - Chapter 15: Hierarchical linear models
@@ -29,9 +27,7 @@
   #align(center)[#image("images/memes/hierarchical_models.jpg")]
 ]
 
-#slide(
-  title: [I have many names...],
-)[
+#slide(title: [I have many names...])[
   Hierarchical models are also known for several names #footnote[
     for the whole full list
     #link(
@@ -50,9 +46,7 @@
   - Nested Data Models
 ]
 
-#slide(
-  title: [What are hierarchical models?],
-)[
+#slide(title: [What are hierarchical models?])[
   Statistical model specified in multiple levels that estimates parameters from
   the posterior distribution using a Bayesian approach.
 
@@ -67,24 +61,21 @@
   values.
 ]
 
-#slide(
-  title: [What are hierarchical models?],
-)[
-  #text(
-    size: 16pt,
-  )[
+#slide(title: [What are hierarchical models?])[
+  #text(size: 16pt)[
     Hyperparameter $φ$ that parameterizes $θ_1, θ_2, dots, θ_K$, that are used to
     infer the posterior density of some random variable
     $bold(y) = y_1, y_2, dots, y_K$
   ]
-  #align(
-    center,
-  )[
+  #align(center)[
     #canvas(
-      length: 0.9cm, {
+      length: 0.9cm,
+      {
         import draw: *
         set-style(
-          mark: (end: ">", fill: black, size: 0.3), stroke: (thickness: 2pt), radius: 1,
+          mark: (end: ">", fill: black, size: 0.3),
+          stroke: (thickness: 2pt),
+          radius: 1,
         )
         circle((6, 0))
         content((6, 0), [#align(center)[$φ$]])
@@ -133,25 +124,22 @@
   ]
 ]
 
-#slide(
-  title: [What are hierarchical models?],
-)[
-  #text(
-    size: 14pt,
-  )[
+#slide(title: [What are hierarchical models?])[
+  #text(size: 14pt)[
     Even that the observations directly inform only a single set of parameters, a
     hierarchical model couples individual parameters, and provides a "backdoor" for
     information flow.
   ]
-  #align(
-    center,
-  )[
+  #align(center)[
     #side-by-side[
       #canvas(
-        length: 0.7cm, {
+        length: 0.7cm,
+        {
           import draw: *
           set-style(
-            mark: (end: ">", fill: black, size: 0.3), stroke: (thickness: 2pt), radius: 1,
+            mark: (end: ">", fill: black, size: 0.3),
+            stroke: (thickness: 2pt),
+            radius: 1,
           )
           circle((6, 0))
           content((6, 0), [#align(center)[$φ$]])
@@ -195,10 +183,13 @@
       )
     ][
       #canvas(
-        length: 0.7cm, {
+        length: 0.7cm,
+        {
           import draw: *
           set-style(
-            mark: (end: ">", fill: black, size: 0.3), stroke: (thickness: 2pt), radius: 1,
+            mark: (end: ">", fill: black, size: 0.3),
+            stroke: (thickness: 2pt),
+            radius: 1,
           )
           circle((6, 0))
           content((6, 0), [#align(center)[$φ$]])
@@ -245,9 +236,7 @@
       )
     ]
   ]
-  #text(
-    size: 14pt,
-  )[
+  #text(size: 14pt)[
     For example, the observations from the $k$th group, $y_k$, informs directly the
     parameters that quantify the $k$th group's behavior,
     $θ_k$. These parameters, however, inform directly the population-level
@@ -259,9 +248,7 @@
   ]
 ]
 
-#slide(
-  title: [When to Use Hierarchical Models?],
-)[
+#slide(title: [When to Use Hierarchical Models?])[
   #v(3em)
 
   *Hierarchical models* are used when information is available in *several levels
@@ -270,12 +257,8 @@
   also performing a crucial role in the development of *computational strategies*.
 ]
 
-#slide(
-  title: [When to Use Hierarchical Models?],
-)[
-  #text(
-    size: 16pt,
-  )[
+#slide(title: [When to Use Hierarchical Models?])[
+  #text(size: 16pt)[
     Hierarchical models are particularly appropriate for research projects where
     participant data can be organized in more than one level #footnote[
       also known as nested data.
@@ -295,9 +278,7 @@
   ]
 ]
 
-#slide(
-  title: [When to Use Hierarchical Models?],
-)[
+#slide(title: [When to Use Hierarchical Models?])[
   Another good use case is *big data* @gelman2013bayesian.
 
   - simple nonhierarchical models are usually inappropriate for hierarchical data:
@@ -310,9 +291,7 @@
     thereby *avoiding problems of overfitting*.
 ]
 
-#slide(
-  title: [When to Use Hierarchical Models?],
-)[
+#slide(title: [When to Use Hierarchical Models?])[
   #v(2em)
 
   Most important is *not to violate* the *exchangeability assumption*
@@ -323,9 +302,7 @@
   This assumption stems from the principle that *groups are _exchangeable_*.
 ]
 
-#slide(
-  title: [Hyperprior],
-)[
+#slide(title: [Hyperprior])[
   In hierarchical models, we have a hyperprior, which is a prior's prior:
 
   #v(1em)
@@ -333,7 +310,7 @@
   $
     bold(y) &tilde "Normal"(10, bold(θ)) \
     bold(θ) &tilde "Normal"(0, φ) \
-    φ       &tilde "Exponential(1)"
+    φ &tilde "Exponential(1)"
   $
 
   #v(2em)
@@ -343,12 +320,8 @@
   their own prior (which becomes a hyperprior) $φ$.
 ]
 
-#slide(
-  title: [Frequentist versus Bayesian Approaches],
-)[
-  #text(
-    size: 18pt,
-  )[
+#slide(title: [Frequentist versus Bayesian Approaches])[
+  #text(size: 18pt)[
     There are also hierarchical models in frequentist statistics. They are mainly
     available in the lme4 package @lme4, and also in MixedModels.jl @MixedModels.
 
@@ -370,9 +343,7 @@
   ]
 ]
 
-#slide(
-  title: [Frequentist versus Bayesian Approaches],
-)[
+#slide(title: [Frequentist versus Bayesian Approaches])[
   #v(3em)
 
   To sum up, *frequentist approach for hierarchical models is not robust* in both
@@ -381,9 +352,7 @@
   provide $p$-values due to *strong assumptions that are almost always violated*).
 ]
 
-#slide(
-  title: [Approaches to Hierarchical Modeling],
-)[
+#slide(title: [Approaches to Hierarchical Modeling])[
   #v(1em)
 
   - *Varying-intercept* model: One group-level intercept besides the
@@ -400,9 +369,7 @@
     group-level coefficient(s).
 ]
 
-#slide(
-  title: [Mathematical Specification of Hierarchical Models],
-)[
+#slide(title: [Mathematical Specification of Hierarchical Models])[
   #v(4em)
 
   We have $N$ observations organized in $J$ groups with $K$ independent variables.
@@ -413,40 +380,34 @@
 
   $
     bold(y) &tilde "Normal"(α_j + bold(X) dot bold(β), σ) \
-    α_j     &tilde "Normal"(α, τ) \
-    α       &tilde "Normal"(μ_α, σ_α) \
+    α_j &tilde "Normal"(α, τ) \
+    α &tilde "Normal"(μ_α, σ_α) \
     bold(β) &tilde "Normal"(μ_{bold(β)}, σ_(bold(β))) \
-    τ       &tilde "Cauchy"^+(0, ψ_(α)) \
-    σ       &tilde "Exponential"(λ_σ)
+    τ &tilde "Cauchy"^+(0, ψ_(α)) \
+    σ &tilde "Exponential"(λ_σ)
   $
 ]
 
-#slide(
-  title: [Mathematical Specification -- Varying-Intercept Model],
-)[
+#slide(title: [Mathematical Specification -- Varying-Intercept Model])[
 
-  #text(
-    size: 14pt,
-  )[
+  #text(size: 14pt)[
     If you need to extend to more than one group, such as $J_1, J_2, dots$:
 
     $
-      bold(y)   &tilde "Normal"(α_j_1 + α_j_2 + bold(X) bold(β), σ) \
-      α_(j_1)   &tilde "Normal"(α_1, τ_α_j_1) \
-      α_(j_2)   &tilde "Normal"(α_2, τ_α_j_2) \
-      α_1       &tilde "Normal"(μ_(α_1), σ_α_1) \
-      α_2       &tilde "Normal"(μ_(α_2), σ_α_2) \
-      bold(β)   &tilde "Normal"(μ_(bold(β)), σ_(bold(β))) \
+      bold(y) &tilde "Normal"(α_j_1 + α_j_2 + bold(X) bold(β), σ) \
+      α_(j_1) &tilde "Normal"(α_1, τ_α_j_1) \
+      α_(j_2) &tilde "Normal"(α_2, τ_α_j_2) \
+      α_1 &tilde "Normal"(μ_(α_1), σ_α_1) \
+      α_2 &tilde "Normal"(μ_(α_2), σ_α_2) \
+      bold(β) &tilde "Normal"(μ_(bold(β)), σ_(bold(β))) \
       τ_(α_j_1) &tilde "Cauchy"^+(0, ψ_α_j_1) \
       τ_(α_j_2) &tilde "Cauchy"^+(0, ψ_α_j_2) \
-      σ         &tilde "Exponential"(λ_σ)
+      σ &tilde "Exponential"(λ_σ)
     $
   ]
 ]
 
-#slide(
-  title: [Mathematical Specification -- Varying-(Intercept-)Slope Model],
-)[
+#slide(title: [Mathematical Specification -- Varying-(Intercept-)Slope Model])[
   If we want a varying intercept, we just insert a column filled with $1$s in the
   data matrix $bold(X)$.
 
@@ -459,12 +420,10 @@
   intercept.
 ]
 
-#slide(
-title: [Mathematical Specification -- Varying-(Intercept-)Slope Model],
-)[
-Hence, we have as a data matrix:
+#slide(title: [Mathematical Specification -- Varying-(Intercept-)Slope Model])[
+  Hence, we have as a data matrix:
 
-#v(2em)
+  #v(2em)
 
   // typstfmt::off
   $
@@ -480,17 +439,15 @@ Hence, we have as a data matrix:
   // typstfmt::on
 ]
 
-#slide(
-  title: [Mathematical Specification -- Varying-(Intercept-)Slope Model],
-)[
+#slide(title: [Mathematical Specification -- Varying-(Intercept-)Slope Model])[
   This example is for linear regression:
 
   $
-    bold(y)    &tilde "Normal"(bold(X) bold(β)_{j}, σ) \
+    bold(y) &tilde "Normal"(bold(X) bold(β)_{j}, σ) \
     bold(β)_j &tilde "Multivariate Normal"(bold(μ)_j, bold(Σ))
     "for" j ∈ {1, dots, J} \
-    bold(Σ)   &tilde "LKJ"(η) \
-    σ         &tilde "Exponential"(λ_σ)
+    bold(Σ) &tilde "LKJ"(η) \
+    σ &tilde "Exponential"(λ_σ)
   $
 
   #v(1em)
@@ -500,26 +457,22 @@ Hence, we have as a data matrix:
   filled with $1$s (intercept).
 ]
 
-#slide(
-  title: [Mathematical Specification -- Varying-(Intercept-)Slope Model],
-)[
+#slide(title: [Mathematical Specification -- Varying-(Intercept-)Slope Model])[
   If you need to extend to more than one group, such as $J_1, J_2, dots$:
 
   $
-    bold(y)      &tilde "Normal"(α + bold(X) bold(β)_j_1 + bold(X) bold(β)_j_2, σ) \
+    bold(y) &tilde "Normal"(α + bold(X) bold(β)_j_1 + bold(X) bold(β)_j_2, σ) \
     bold(β)_j_1 &tilde "Multivariate Normal"(bold(μ)_j_1, bold(Σ)_1)
     "for" j_1 ∈ {1, dots, J_1} \
     bold(β)_j_2 &tilde "Multivariate Normal"(bold(μ)_j_2, bold(Σ)_2)
     "for" j_2 ∈ {1, dots, J_2} \
-    bold(Σ)_1   &tilde "LKJ"(η_1) \
-    bold(Σ)_2   &tilde "LKJ"(η_2) \
-    σ           &tilde "Exponential"(λ_σ)
+    bold(Σ)_1 &tilde "LKJ"(η_1) \
+    bold(Σ)_2 &tilde "LKJ"(η_2) \
+    σ &tilde "Exponential"(λ_σ)
   $
 ]
 
-#slide(
-  title: [Priors for Covariance Matrices],
-)[
+#slide(title: [Priors for Covariance Matrices])[
   We can specify a prior for a covariance matrix $bold(Σ)$.
 
   #v(1em)
@@ -538,20 +491,14 @@ Hence, we have as a data matrix:
   $bold(Σ)$ (is is the $bold(Σ)$'s diagonal).
 ]
 
-#slide(
-  title: [Priors for Covariance Matrices],
-)[
-  #text(
-    size: 16pt,
-  )[
+#slide(title: [Priors for Covariance Matrices])[
+  #text(size: 16pt)[
     Additionally, the correlation matrix $bold(Ω)$
     can be decomposed once more for greater computational efficiency.
 
     Since all correlations matrices are symmetric and positive definite (all of its
     eigenvalues are real numbers $RR$ and positive $>0$), we can use the
-    #link(
-      "https://en.wikipedia.org/wiki/Cholesky_decomposition",
-    )[Cholesky Decomposition]
+    #link("https://en.wikipedia.org/wiki/Cholesky_decomposition")[Cholesky Decomposition]
     to decompose it into a triangular matrix (which is much more computational
     efficient to handle):
 
diff --git a/slides/12-mcmc.typ b/slides/12-mcmc.typ
index 8398bd2..54734ef 100644
--- a/slides/12-mcmc.typ
+++ b/slides/12-mcmc.typ
@@ -6,12 +6,8 @@
 
 #new-section-slide("Markov Chain Monte Carlo (MCMC) and Model Metrics")
 
-#slide(
-  title: "Recommended References",
-)[
-  #text(
-    size: 18pt,
-  )[
+#slide(title: "Recommended References")[
+  #text(size: 18pt)[
     - #cite(<gelman2013bayesian>, form: "prose"):
       - Chapter 10: Introduction to Bayesian computation
       - Chapter 11: Basics of Markov chain simulation
@@ -37,12 +33,8 @@
   #align(center)[#image("images/memes/computation.png")]
 ]
 
-#slide(
-  title: [Monte Carlo Methods],
-)[
-  #side-by-side(
-    columns: (4fr, 1fr),
-  )[
+#slide(title: [Monte Carlo Methods])[
+  #side-by-side(columns: (4fr, 1fr))[
 
     - #link("http://mc-stan.org/")[Stan] is named after the mathematician Stanislaw
       Ulam, who was involved in the Manhattan project, and while trying to calculate
@@ -57,18 +49,12 @@
   ]
 ]
 
-#slide(
-  title: [
-    History Behind the Monte Carlo Methods
-    #footnote[those who are interested, should read #cite(<eckhardtStanUlamJohn1987>, form: "prose").]
-  ],
-)[
-  #side-by-side(
-    columns: (4fr, 1fr),
-  )[
-    #text(
-      size: 17pt,
-    )[
+#slide(title: [
+  History Behind the Monte Carlo Methods
+  #footnote[those who are interested, should read #cite(<eckhardtStanUlamJohn1987>, form: "prose").]
+])[
+  #side-by-side(columns: (4fr, 1fr))[
+    #text(size: 17pt)[
       - The idea came when Ulam was playing Solitaire while recovering from surgery.
         Ulam was trying to calculate the deterministic, i.e. analytical solution, of the
         probability of being dealt an already-won game. The calculations where almost
@@ -86,12 +72,8 @@
   ]
 ]
 
-#slide(
-  title: [Why Do We Need MCMC?],
-)[
-  #text(
-    size: 18pt,
-  )[
+#slide(title: [Why Do We Need MCMC?])[
+  #text(size: 18pt)[
     The main computation barrier for Bayesian statistics is the denominator in
     Bayes' theorem,
     $P("data")$:
@@ -115,9 +97,7 @@
   ]
 ]
 
-#slide(
-  title: [Why Do We Need MCMC?],
-)[
+#slide(title: [Why Do We Need MCMC?])[
   However, in the case of continuous values, the denominator $P("data")$
   turns into a very big and nasty integral:
 
@@ -135,9 +115,7 @@
   *This is where Monte Carlo methods comes into play!*
 ]
 
-#slide(
-  title: [Why Do We Need the Denominator $P("data")$?],
-)[
+#slide(title: [Why Do We Need the Denominator $P("data")$?])[
   To normalize the posterior with the intent of making it a *valid probability*.
   This means that the probability for all possible parameters' values must be $1$:
 
@@ -152,9 +130,7 @@
     $
 ]
 
-#slide(
-  title: [What If We Remove the Denominator $P("data")$?],
-)[
+#slide(title: [What If We Remove the Denominator $P("data")$?])[
   By removing the denominator $("data")$, we conclude that the posterior
   $P(θ | "data")$ is *proportional* to the product of the prior and the likelihood
   $P(θ) dot P("data" | θ)$:
@@ -166,9 +142,7 @@
   $
 ]
 
-#slide(
-  title: [Markov Chain Monte Carlo (MCMC)],
-)[
+#slide(title: [Markov Chain Monte Carlo (MCMC)])[
   Here is where *Markov Chain Monte Carlo* comes in:
 
   MCMC is an ample class of computational tools to approximate integrals and
@@ -189,14 +163,10 @@
   $P("data")$.
 ]
 
-#slide(
-  title: [
-    Markov Chains
-  ],
-)[
-  #side-by-side(
-    columns: (4fr, 1fr),
-  )[
+#slide(title: [
+  Markov Chains
+])[
+  #side-by-side(columns: (4fr, 1fr))[
     #v(2em)
 
     - We proceed by defining an *ergodic Markov chain* #footnote[
@@ -215,14 +185,10 @@
   ]
 ]
 
-#slide(
-  title: [
-    Markov Chains
-  ],
-)[
-  #side-by-side(
-    columns: (4fr, 1fr),
-  )[
+#slide(title: [
+  Markov Chains
+])[
+  #side-by-side(columns: (4fr, 1fr))[
 
     - Markov chains have a property that the probability distribution of the next
       state *depends only on the current state and not in the sequence of events that
@@ -241,18 +207,19 @@
   ]
 ]
 
-#slide(
-  title: [
-    Example of a Markov Chain
-  ],
-)[
-  #align(
-    center,
-  )[
+#slide(title: [
+  Example of a Markov Chain
+])[
+  #align(center)[
     #canvas(
-      length: 0.9cm, {
+      length: 0.9cm,
+      {
         import draw: *
-        set-style(mark: (end: ">", fill: black), stroke: (thickness: 2pt), radius: 2)
+        set-style(
+          mark: (end: ">", fill: black),
+          stroke: (thickness: 2pt),
+          radius: 2,
+        )
         circle((0, 0))
         content((0, 0), [#align(center)[Sun]])
         bezier-through((0, 2), (4, 4), (8, 2))
@@ -273,12 +240,8 @@
   ]
 ]
 
-#slide(
-  title: [Markov Chains],
-)[
-  #text(
-    size: 18pt,
-  )[
+#slide(title: [Markov Chains])[
+  #text(size: 18pt)[
     The efficacy of this approach depends on:
 
     - *how big $r$ must be* to guarantee an *adequate sample*.
@@ -297,9 +260,7 @@
   ]
 ]
 
-#slide(
-  title: [MCMC Algorithms],
-)[
+#slide(title: [MCMC Algorithms])[
   We have *TONS* of MCMC algorithms
   #footnote[
     see the #link(
@@ -319,9 +280,7 @@
     @neal2011mcmc @betancourtConceptualIntroductionHamiltonian2017.
 ]
 
-#slide(
-  title: [MCMC Algorithms -- Metropolis-Hastings],
-)[
+#slide(title: [MCMC Algorithms -- Metropolis-Hastings])[
   These are the first MCMC algorithms. They use an *acceptance/rejection rule for
   the proposals*. They are characterized by proposals originated from a random
   walk in the parameter space. The *Gibbs algorithm* can be seen as a *special
@@ -335,9 +294,7 @@
   the parameter space @beskosOptimalTuningHybrid2013.
 ]
 
-#slide(
-  title: [MCMC Algorithms -- Hamiltonian Monte Carlo],
-)[
+#slide(title: [MCMC Algorithms -- Hamiltonian Monte Carlo])[
   The current most efficient MCMC algorithms. They try to *avoid the random walk
   behavior by introducing an auxiliary vector of momenta
   using Hamiltonian dynamics*. The proposals are "guided" to higher density
@@ -351,17 +308,11 @@
   dimension in the parameter space @beskosOptimalTuningHybrid2013.
 ]
 
-#slide(
-  title: [
-    Metropolis Algorithm
-  ],
-)[
-  #side-by-side(
-    columns: (4fr, 1fr),
-  )[
-    #text(
-      size: 18pt,
-    )[
+#slide(title: [
+  Metropolis Algorithm
+])[
+  #side-by-side(columns: (4fr, 1fr))[
+    #text(size: 18pt)[
       The first broadly used MCMC algorithm to generate samples from a Markov chain
       was originated in the physics literature in the 1950s and is called Metropolis
       @metropolisEquationStateCalculations1953, in honor of the first author
@@ -386,12 +337,8 @@
   ]
 ]
 
-#slide(
-  title: [Metropolis Algorithm],
-)[
-  #text(
-    size: 18pt,
-  )[
+#slide(title: [Metropolis Algorithm])[
+  #text(size: 18pt)[
     Metropolis is a random walk through the parameter sample space, where the
     probability of the Markov chain changing its state is defined as:
 
@@ -415,12 +362,8 @@
   ]
 ]
 
-#slide(
-  title: [Metropolis Algorithm],
-)[
-  #algo(
-    line-numbers: false,
-  )[
+#slide(title: [Metropolis Algorithm])[
+  #algo(line-numbers: false)[
     Define an initial set $bold(θ)^0 ∈ RR^p$ that $P(bold(θ)^0 | bold(y)) > 0$ \
     for $t = 1, 2, dots$ #i \
     Sample a proposal of $bold(θ)^*$ from a proposal distribution in time
@@ -440,20 +383,26 @@
   ]
 ]
 
-#slide(
-  title: [Visual Intuition -- Metropolis],
-)[
-  #align(
-    center,
-  )[
+#slide(title: [Visual Intuition -- Metropolis])[
+  #align(center)[
     #import draw: *
     #canvas(
-      length: 0.9cm, {
+      length: 0.9cm,
+      {
         set-style(stroke: (thickness: 2pt))
         plot.plot(
-          size: (16, 9), x-label: none, y-label: "PDF", x-tick-step: 1, y-tick-step: 0.1, y-max: 0.55, {
+          size: (16, 9),
+          x-label: none,
+          y-label: "PDF",
+          x-tick-step: 1,
+          y-tick-step: 0.1,
+          y-max: 0.55,
+          {
             plot.add(
-              domain: (-4, 4), samples: 200, style: (stroke: (paint: julia-purple, thickness: 2pt)), x => gaussian(x, 0, 1),
+              domain: (-4, 4),
+              samples: 200,
+              style: (stroke: (paint: julia-purple, thickness: 2pt)),
+              x => gaussian(x, 0, 1),
             )
           },
         )
@@ -473,14 +422,10 @@
   ]
 ]
 
-#slide(
-  title: [
-    Metropolis-Hastings Algorithm
-  ],
-)[
-  #side-by-side(
-    columns: (4fr, 1fr),
-  )[
+#slide(title: [
+  Metropolis-Hastings Algorithm
+])[
+  #side-by-side(columns: (4fr, 1fr))[
     In the 1970s emerged a generalization of the Metropolis algorithm, which *does
     not need that the proposal distributions be symmetric*:
 
@@ -499,12 +444,8 @@
   ]
 ]
 
-#slide(
-  title: [Metropolis-Hastings Algorithm],
-)[
-  #algo(
-    line-numbers: false,
-  )[
+#slide(title: [Metropolis-Hastings Algorithm])[
+  #algo(line-numbers: false)[
     Define an initial set $bold(θ)^0 ∈ RR^p$ that $P(bold(θ)^0 | bold(y)) > 0$ \
     for $t = 1, 2, dots$ #i \
     Sample a proposal of $bold(θ)^*$ from a proposal distribution in time
@@ -512,7 +453,11 @@
     As an acceptance/rejection rule, compute the proportion of the probabilities: \
 
     $
-      r = ((P(bold(θ)^* | bold(y))) / (J_t (bold(θ)^* | bold(θ)^(t - 1)))) / ((P(bold(θ)^(t - 1) | bold(y))) / (J_t (bold(θ)^(t - 1) | bold(θ)^*)))
+      r = ((P(bold(θ)^* | bold(y))) / (J_t (
+        bold(θ)^* | bold(θ)^(t - 1)
+      ))) / ((P(bold(θ)^(t - 1) | bold(y))) / (J_t (
+        bold(θ)^(t - 1) | bold(θ)^*
+      )))
     $
 
     Assign:
@@ -524,25 +469,19 @@
   ]
 ]
 
-#slide(
-  title: [Metropolis-Hastings Animation],
-)[
-#v(4em)
+#slide(title: [Metropolis-Hastings Animation])[
+  #v(4em)
 
-#align(
-  center,
-)[
-See Metropolis-Hastings in action at #link(
+  #align(center)[
+    See Metropolis-Hastings in action at #link(
   "https://chi-feng.github.io/mcmc-demo/app.html?algorithm=RandomWalkMH&target=banana",
 )[
 `chi-feng/mcmc-demo`
 ].
-]
+  ]
 ]
 
-#slide(
-  title: [Limitations of the Metropolis Algorithms],
-)[
+#slide(title: [Limitations of the Metropolis Algorithms])[
   The limitations of the Metropolis-Hastings algorithms are mainly
   *computational*:
 
@@ -558,12 +497,8 @@ See Metropolis-Hastings in action at #link(
     overcome the low efficiency.
 ]
 
-#slide(
-  title: [Gibbs Algorithm],
-)[
-  #side-by-side(
-    columns: (4fr, 1fr),
-  )[
+#slide(title: [Gibbs Algorithm])[
+  #side-by-side(columns: (4fr, 1fr))[
     To circumvent Metropolis' low acceptance rate, the Gibbs algorithm was
     conceived. Gibbs *do not have an acceptance/rejection rule* for the Markov chain
     state change: *all proposals are accepted!*
@@ -579,9 +514,7 @@ See Metropolis-Hastings in action at #link(
   ]
 ]
 
-#slide(
-  title: [Gibbs Algorithm],
-)[
+#slide(title: [Gibbs Algorithm])[
   The Gibbs algorithm is very useful in multidimensional sample spaces. It is also
   known as *alternating conditional sampling*, because we always sample a
   parameter *conditioned* on the probability of the other model's parameters.
@@ -598,12 +531,8 @@ See Metropolis-Hastings in action at #link(
   $
 ]
 
-#slide(
-  title: [Gibbs Algorithm],
-)[
-  #algo(
-    line-numbers: false,
-  )[
+#slide(title: [Gibbs Algorithm])[
+  #algo(line-numbers: false)[
     Define an initial set $bold(θ)^0 ∈ RR^p$ that $P(bold(θ)^0 | bold(y)) > 0$ \
     for $t = 1, 2, dots$ #i \
     Assign:
@@ -618,25 +547,19 @@ See Metropolis-Hastings in action at #link(
   ]
 ]
 
-#slide(
-  title: [Gibbs Animation],
-)[
-#v(4em)
+#slide(title: [Gibbs Animation])[
+  #v(4em)
 
-#align(
-  center,
-)[
-See Gibbs in action at #link(
+  #align(center)[
+    See Gibbs in action at #link(
   "https://chi-feng.github.io/mcmc-demo/app.html?algorithm=GibbsSampling&target=banana",
 )[
 `chi-feng/mcmc-demo`
 ].
-]
+  ]
 ]
 
-#slide(
-  title: [Limitations of the Gibbs Algorithm],
-)[
+#slide(title: [Limitations of the Gibbs Algorithm])[
   The main limitation of Gibbs algorithm is with relation to *alternating
   conditional sampling*:
 
@@ -650,12 +573,8 @@ See Gibbs in action at #link(
     movements*, and never multidimensional diagonal movements.
 ]
 
-#slide(
-  title: [Hamiltonian Monte Carlo (HMC)],
-)[
-  #side-by-side(
-    columns: (4fr, 1fr),
-  )[
+#slide(title: [Hamiltonian Monte Carlo (HMC)])[
+  #side-by-side(columns: (4fr, 1fr))[
     Metropolis' low acceptance rate and Gibbs' low performance in multidimensional
     problems (where the posterior geometry is highly complex) made a new class of
     MCMC algorithms to emerge.
@@ -670,12 +589,8 @@ See Gibbs in action at #link(
   ]
 ]
 
-#slide(
-  title: [HMC Algorithm],
-)[
-  #text(
-    size: 18pt,
-  )[
+#slide(title: [HMC Algorithm])[
+  #text(size: 18pt)[
     HMC algorithm is an adaptation of the MH algorithm, and employs a guidance
     scheme to the generation of new proposals. It boosts the acceptance rate, and,
     consequently, has a better efficiency.
@@ -694,9 +609,7 @@ See Gibbs in action at #link(
   ]
 ]
 
-#slide(
-  title: [History of HMC Algorithm],
-)[
+#slide(title: [History of HMC Algorithm])[
   HMC was originally described in the physics literature
   #footnote[where is called "Hybrid" Monte Carlo (HMC)]
   @duaneHybridMonteCarlo1987.
@@ -714,9 +627,7 @@ See Gibbs in action at #link(
   and #cite(<betancourtConceptualIntroductionHamiltonian2017>, form: "prose").
 ]
 
-#slide(
-  title: [What Changes With HMC?],
-)[
+#slide(title: [What Changes With HMC?])[
   HMC uses Hamiltonian dynamics applied to particles efficiently exploring a
   posterior probability geometry, while also being robust to complex posterior's
   geometries.
@@ -727,12 +638,8 @@ See Gibbs in action at #link(
   Gibbs' parameters correlation issues
 ]
 
-#slide(
-  title: [Intuition Behind the HMC Algorithm],
-)[
-  #text(
-    size: 18pt,
-  )[
+#slide(title: [Intuition Behind the HMC Algorithm])[
+  #text(size: 18pt)[
     For every parameter $θ_j$, HMC adds a momentum variable $φ_j$. The posterior
     density $P(bold(θ) | y)$ is incremented by an independent momenta distribution $P(bold(φ))$,
     hence defining the following joint probability:
@@ -757,9 +664,7 @@ See Gibbs in action at #link(
   ]
 ]
 
-#slide(
-  title: [Momenta Distribution -- $P(bold(φ))$],
-)[
+#slide(title: [Momenta Distribution -- $P(bold(φ))$])[
   Generally we give $bold(φ)$ a multivariate normal distribution with mean $0$ and
   covariance $bold(M)$, a "mass matrix".
 
@@ -772,15 +677,9 @@ See Gibbs in action at #link(
   $ φ_j tilde "Normal"(0, M_(j j)) $
 ]
 
-#slide(
-  title: [HMC Algorithm],
-)[
-  #text(
-    size: 13pt,
-  )[
-    #algo(
-      line-numbers: false,
-    )[
+#slide(title: [HMC Algorithm])[
+  #text(size: 13pt)[
+    #algo(line-numbers: false)[
       Define an initial set $bold(θ)^0 ∈ RR^p$ that $P(bold(θ)^0 | bold(y)) > 0$ \
       Sample $bold(φ)$ from a $"Multivariate Normal"(bold(0),bold(M))$ \
       Simultaneously sample $bold(θ)^*$ and $bold(φ)$ with $L$ steps and step-size $ε$ \
@@ -788,14 +687,20 @@ See Gibbs in action at #link(
       $bold(θ)^* <- bold(θ)$ \
       for $1, 2, dots, L$ #i \
       Use the $log$ of the posterior's gradient $bold(θ)^*$ to produce a half-step of $bold(φ)$:
-      $bold(φ) <- bold(φ) + 1 / 2 ε (dif log P(bold(θ)^* | bold(y))) / (dif θ)$ \
+      $bold(φ) <- bold(φ) + 1 / 2 ε (dif log P(
+          bold(θ)^* | bold(y)
+        )) / (dif θ)$ \
       Use $bold(φ)$ to update $bold(θ)^*$:
       $bold(θ)^* <- bold(θ)^* + ε bold(M)^(-1) bold(φ)$ \
       Use again $bold(θ)^*$ $log$ gradient to produce a half-step of $bold(φ)$:
-      $bold(φ) <- bold(φ) + 1 / 2 ε (dif log P(bold(θ)^* | bold(y))) / (dif θ)$ #d \
+      $bold(φ) <- bold(φ) + 1 / 2 ε (dif log P(
+          bold(θ)^* | bold(y)
+        )) / (dif θ)$ #d \
       As an acceptance/rejection rule, compute: \
       $
-        r = (P (bold(θ)^* | bold(y) ) P (bold(φ)^*))/ (P (bold(θ)^(t - 1) | bold(y)) P(bold(φ)^(t - 1)))
+        r = (P (bold(θ)^* | bold(y)) P (bold(φ)^*)) / (P (
+          bold(θ)^(t - 1) | bold(y)
+        ) P(bold(φ)^(t - 1)))
       $
       Assign:
 
@@ -807,25 +712,19 @@ See Gibbs in action at #link(
   ]
 ]
 
-#slide(
-  title: [HMC Animation],
-)[
-#v(4em)
+#slide(title: [HMC Animation])[
+  #v(4em)
 
-#align(
-  center,
-)[
-See HMC in action at #link(
+  #align(center)[
+    See HMC in action at #link(
   "https://chi-feng.github.io/mcmc-demo/app.html?algorithm=HamiltonianHMC&target=banana",
 )[
 `chi-feng/mcmc-demo`
 ].
-]
+  ]
 ]
 
-#slide(
-  title: [An Interlude into Numerical Integration],
-)[
+#slide(title: [An Interlude into Numerical Integration])[
   In the field of ordinary differential equations (ODE), we have the idea of "discretizing"
   a system of ODEs by applying a small step-size $ε$ #footnote[sometimes also called $h$].
   Such approaches are called "numerical integrators" and are composed by an ample
@@ -838,12 +737,8 @@ See HMC in action at #link(
   future time $t$ from specific initial conditions.
 ]
 
-#slide(
-  title: [An Interlude into Numerical Integration],
-)[
-  #side-by-side(
-    columns: (3fr, 2fr),
-  )[
+#slide(title: [An Interlude into Numerical Integration])[
+  #side-by-side(columns: (3fr, 2fr))[
     The problem is that Euler method, when applied to Hamiltonian dynamics, *does
     not preserve volume*.
 
@@ -858,23 +753,20 @@ See HMC in action at #link(
   ][
 
     #figure(
-      image("images/mcmc/euler_0_3.jpg", height: 55%), caption: [HMC numerically integrated using Euler with $ε = 0.3$ and $L = 20$],
+      image("images/mcmc/euler_0_3.jpg", height: 55%),
+      caption: [HMC numerically integrated using Euler with $ε = 0.3$ and $L = 20$],
     )
   ]
 ]
 
-#slide(
-  title: [
-    An Interlude into Numerical Integration
-    #footnote[
-      An excellent textbook for numerical and symplectic integrator is
-      #cite(<irseles2008numericalanalysis>, form: "prose").
-    ]
-  ],
-)[
-  #side-by-side(
-    columns: (3fr, 2fr),
-  )[
+#slide(title: [
+  An Interlude into Numerical Integration
+  #footnote[
+    An excellent textbook for numerical and symplectic integrator is
+    #cite(<irseles2008numericalanalysis>, form: "prose").
+  ]
+])[
+  #side-by-side(columns: (3fr, 2fr))[
     To preserve volume, we need a numerical *symplectic integrator*.
 
     #v(1em)
@@ -888,17 +780,14 @@ See HMC in action at #link(
   ][
 
     #figure(
-      image("images/mcmc/leapfrog_0_3.jpg", height: 55%), caption: [HMC numerically integrated using leapfrog with $ε = 0.3$ and $L = 20$],
+      image("images/mcmc/leapfrog_0_3.jpg", height: 55%),
+      caption: [HMC numerically integrated using leapfrog with $ε = 0.3$ and $L = 20$],
     )
   ]
 ]
 
-#slide(
-  title: [Limitations of the HMC Algorithm],
-)[
-  #side-by-side(
-    columns: (3fr, 2fr),
-  )[
+#slide(title: [Limitations of the HMC Algorithm])[
+  #side-by-side(columns: (3fr, 2fr))[
     As you can see, HMC algorithm is highly sensible to the choice of leapfrog steps $L$ and
     step-size $ε$,
 
@@ -913,14 +802,13 @@ See HMC in action at #link(
   ][
 
     #figure(
-      image("images/mcmc/leapfrog_1_2.jpg", height: 55%), caption: [HMC numerically integrated using leapfrog with $ε = 1.2$ and $L = 20$],
+      image("images/mcmc/leapfrog_1_2.jpg", height: 55%),
+      caption: [HMC numerically integrated using leapfrog with $ε = 1.2$ and $L = 20$],
     )
   ]
 ]
 
-#slide(
-  title: [No-U-Turn-Sampler (NUTS)],
-)[
+#slide(title: [No-U-Turn-Sampler (NUTS)])[
   In HMC, we can adjust $ε$ during the algorithm runtime. But, for $L$, we need to
   to "dry run" the HMC sampler to find a good candidate value for $L$.
 
@@ -934,9 +822,7 @@ See HMC in action at #link(
   It will automatically find $ε$ and $L$.
 ]
 
-#slide(
-  title: [No-U-Turn-Sampler (NUTS)],
-)[
+#slide(title: [No-U-Turn-Sampler (NUTS)])[
   More specifically, we need a criterion that informs that we performed enough
   Hamiltonian dynamics simulation.
 
@@ -957,9 +843,7 @@ See HMC in action at #link(
   $
 ]
 
-#slide(
-  title: [No-U-Turn-Sampler (NUTS)],
-)[
+#slide(title: [No-U-Turn-Sampler (NUTS)])[
   #v(2em)
 
   This suggests an algorithms that does not allow proposals be guided infinitely
@@ -971,12 +855,8 @@ See HMC in action at #link(
   This means that such algorithm will *not allow u-turns*.
 ]
 
-#slide(
-  title: [No-U-Turn-Sampler (NUTS)],
-)[
-  #text(
-    size: 18pt,
-  )[
+#slide(title: [No-U-Turn-Sampler (NUTS)])[
+  #text(size: 18pt)[
     NUTS uses the leapfrog integrator to create a binary tree where each leaf node
     is a proposal of the momenta vector $bold(φ)$ tracing both a forward ($t + 1$)
     as well as a backward ($t - 1$) path in a determined fictitious time $t$.
@@ -985,14 +865,13 @@ See HMC in action at #link(
     forward or backward.
 
     #figure(
-      image("images/mcmc/nuts.jpg", height: 40%), caption: [NUTS growing leaf nodes forward],
+      image("images/mcmc/nuts.jpg", height: 40%),
+      caption: [NUTS growing leaf nodes forward],
     )
   ]
 ]
 
-#slide(
-  title: [No-U-Turn-Sampler (NUTS)],
-)[
+#slide(title: [No-U-Turn-Sampler (NUTS)])[
   #v(2em)
 
   NUTS also uses a procedure called Dual Averaging @nesterov2009primal to
@@ -1005,19 +884,11 @@ See HMC in action at #link(
   $ε$ and $L$ are kept fixed during the sampling phase.
 ]
 
-#slide(
-  title: [NUTS Algorithm],
-)[
-  #text(
-    size: 7pt,
-  )[
-    #algo(
-      line-numbers: false,
-    )[
+#slide(title: [NUTS Algorithm])[
+  #text(size: 7pt)[
+    #algo(line-numbers: false)[
       Define an initial set $bold(θ)^0 ∈ RR^p$ that $P(bold(θ)^0 | bold(y)) > 0$ \
-      #text(
-        fill: julia-blue,
-      )[Instantiate an empty binary tree with $2^L$ leaf nodes] \
+      #text(fill: julia-blue)[Instantiate an empty binary tree with $2^L$ leaf nodes] \
       Sample $bold(φ)$ from a $"Multivariate Normal"(bold(0),bold(M))$ \
       Simultaneously sample $bold(θ)^*$ and $bold(φ)$ with $L$ steps and step-size $ε$ \
       Define the current value of $bold(θ)$ as the proposed value $bold(θ)^*$:
@@ -1025,39 +896,37 @@ See HMC in action at #link(
       for $1, 2, dots, L$ #i \
       #text(fill: julia-blue)[Choose a direction $v tilde "Uniform"({-1, 1})$] \
       Use the $log$ of the posterior's gradient $bold(θ)^*$ to produce a half-step of $bold(φ)$:
-      $bold(φ) <- bold(φ) + 1 / 2 ε (dif log P(bold(θ)^* | bold(y))) / (dif θ)$ \
+      $bold(φ) <- bold(φ) + 1 / 2 ε (dif log P(
+          bold(θ)^* | bold(y)
+        )) / (dif θ)$ \
       Use $bold(φ)$ to update $bold(θ)^*$:
       $bold(θ)^* <- bold(θ)^* + ε bold(M)^(-1) bold(φ)$ \
       Use again $bold(θ)^*$ $log$ gradient to produce a half-step of $bold(φ)$:
-      $bold(φ) <- bold(φ) + 1 / 2 ε (dif log P(bold(θ)^* | bold(y))) / (dif θ)$ #d \
+      $bold(φ) <- bold(φ) + 1 / 2 ε (dif log P(
+          bold(θ)^* | bold(y)
+        )) / (dif θ)$ #d \
       #text(fill: julia-blue)[Define the node $L_t^v$ as the proposal $bold(θ)$] \
-      #text(
-        fill: julia-blue,
-      )[
+      #text(fill: julia-blue)[
         if the difference between proposal vector $bold(θ)^*$
         and current vector $bold(θ)$ in the direction $v$ is lower than zero: $v (dif) / (dif t) ((bold(θ)^* - bold(θ)^*) dot (bold(θ)^* - bold(θ)^*)) / 2 < 0$
         or $L$ steps have been reached
       ] #i \
-      #text(
-        fill: julia-red,
-      )[
+      #text(fill: julia-red)[
         Stop sampling $bold(θ)^*$ in the direction $v$ and continue sampling only in the
         direction $-v$
       ] #i \
-      #text(
-        fill: julia-blue,
-      )[
+      #text(fill: julia-blue)[
         The difference between proposal vector $bold(θ)^*$
         and current vector $bold(θ)$ in the direction $-v$ is lower than zero: $-v (dif) / (dif t) ((bold(θ)^* - bold(θ)^*) dot (bold(θ)^* - bold(θ)^*)) / 2 < 0$
         or $L$ steps have been reached
       ] #i \
       #text(fill: julia-red)[Stop sampling $bold(θ)^*$] #d #d #d\
-      #text(
-        fill: julia-blue,
-      )[Choose a random node from the binary tree as the proposal] \
+      #text(fill: julia-blue)[Choose a random node from the binary tree as the proposal] \
       As an acceptance/rejection rule, compute: \
       $
-        r = (P (bold(θ)^* | bold(y) ) P (bold(φ)^*))/ (P (bold(θ)^(t - 1) | bold(y)) P(bold(φ)^(t - 1)))
+        r = (P (bold(θ)^* | bold(y)) P (bold(φ)^*)) / (P (
+          bold(θ)^(t - 1) | bold(y)
+        ) P(bold(φ)^(t - 1)))
       $
       Assign:
 
@@ -1069,26 +938,20 @@ See HMC in action at #link(
   ]
 ]
 
-#slide(
-  title: [NUTS Animation],
-)[
-#v(4em)
+#slide(title: [NUTS Animation])[
+  #v(4em)
 
-#align(
-  center,
-)[
-See NUTS in action at #link(
+  #align(center)[
+    See NUTS in action at #link(
   "https://chi-feng.github.io/mcmc-demo/app.html?algorithm=EfficientNUTS&target=banana",
 )[
 `chi-feng/mcmc-demo`
 ].
-]
+  ]
 ]
 
-#slide(
-  title: [Limitations of HMC and NUTS Algorithms -- #cite(<nealSliceSampling2003>, form: "prose")'s
-    Funnel],
-)[
+#slide(title: [Limitations of HMC and NUTS Algorithms -- #cite(<nealSliceSampling2003>, form: "prose")'s
+  Funnel])[
   The famous "Devil's Funnel"
   #footnote[very common em hierarchical models.].
 
@@ -1102,13 +965,9 @@ See NUTS in action at #link(
   #align(center)[#image("images/funnel/funnel.png", height: 40%)]
 ]
 
-#slide(
-  title: [#cite(<nealSliceSampling2003>, form: "prose")'s Funnel and Non-Centered
-    Parameterization (NCP)],
-)[
-  #text(
-    size: 18pt,
-  )[
+#slide(title: [#cite(<nealSliceSampling2003>, form: "prose")'s Funnel and Non-Centered
+  Parameterization (NCP)])[
+  #text(size: 18pt)[
     The funnel occurs when we have a variable that its variance depends on another
     variable variance in an exponential scale. A canonical example of a centered
     parameterization (CP) is:
@@ -1126,8 +985,8 @@ See NUTS in action at #link(
     $
       P(tilde(y),tilde(x)) &= "Normal"(tilde(y) | 0, 1) dot
       "Normal"(tilde(x) | 0, 1) \
-      y                    &= tilde(y) dot 3 + 0 \
-      x                    &= tilde(x) dot e^(y / 2) + 0
+      y &= tilde(y) dot 3 + 0 \
+      x &= tilde(x) dot e^(y / 2) + 0
     $
   ]
 ]
@@ -1136,28 +995,26 @@ See NUTS in action at #link(
   This example is for linear regression:
 
   $
-    bold(y) &tilde "Normal"(α_j + bold(X) dot bold(β), σ ) \
-    α_j     &= z_j dot τ + α \
-    z_j     &tilde "Normal"(0, 1) \
-    α       &tilde "Normal"(μ_α, σ_α) \
+    bold(y) &tilde "Normal"(α_j + bold(X) dot bold(β), σ) \
+    α_j &= z_j dot τ + α \
+    z_j &tilde "Normal"(0, 1) \
+    α &tilde "Normal"(μ_α, σ_α) \
     bold(β) &tilde "Normal"(μ_bold(β), σ_bold(β)) \
-    τ       &tilde "Cauchy"^+(0, ψ_α) \
-    σ       &tilde "Exponential"(λ_σ)
+    τ &tilde "Cauchy"^+(0, ψ_α) \
+    σ &tilde "Exponential"(λ_σ)
   $
 ]
 
-#slide(
-  title: [Non-Centered Parameterization -- Varying-(Intercept-)Slope Model],
-)[
+#slide(title: [Non-Centered Parameterization -- Varying-(Intercept-)Slope Model])[
   This example is for linear regression:
 
   $
-    bold(y)   &tilde "Normal"(bold(X) bold(β)_j, σ) \
+    bold(y) &tilde "Normal"(bold(X) bold(β)_j, σ) \
     bold(β)_j &= bold(γ)_j dot bold(Σ) dot bold(γ)_j \
     bold(γ)_j &tilde "Multivariate Normal"(bold(0), bold(I))
     "for" j ∈ {1, dots, J} \
-    bold(Σ)   &tilde "LKJ"(η) \
-    σ         &tilde "Exponential"(λ_σ)
+    bold(Σ) &tilde "LKJ"(η) \
+    σ &tilde "Exponential"(λ_σ)
   $
 
   Each coefficient vector $bold(β)_j$ represents the model columns $bold(X)$ coefficients
@@ -1165,9 +1022,7 @@ See NUTS in action at #link(
   filled with $1$s (intercept).
 ]
 
-#slide(
-  title: [Stan and NUTS],
-)[
+#slide(title: [Stan and NUTS])[
   Stan was the first MCMC sampler to implement NUTS.
 
   Besides that, it has an automatic optimized adjustment routine for values of $L$ and $ε$ during
@@ -1189,9 +1044,7 @@ See NUTS in action at #link(
   - *max tree depth* (in powers of $2$): 10 (which means $2^(10) = 1024$)
 ]
 
-#slide(
-  title: [Turing and NUTS],
-)[
+#slide(title: [Turing and NUTS])[
   Turing also implements NUTS which lives, along with other MCMC samplers, inside
   the package AdvancedHMC.jl.
 
@@ -1211,9 +1064,7 @@ See NUTS in action at #link(
   - *max tree depth* (in powers of $2$): 10 (which means $2^(10) = 1024$)
 ]
 
-#slide(
-  title: [Markov Chain Convergence],
-)[
+#slide(title: [Markov Chain Convergence])[
   MCMC has an interesting property that it will *asymptotically converge to the
   target distribution*
   #footnote[
@@ -1229,9 +1080,7 @@ See NUTS in action at #link(
   convergence to the target distribution.
 ]
 
-#slide(
-  title: [Convergence Metrics],
-)[
+#slide(title: [Convergence Metrics])[
   #v(2em)
 
   We have some options on how to measure if the Markov chains converged to the
@@ -1248,9 +1097,7 @@ See NUTS in action at #link(
     Markov chain have mixed, and, potentially, converged.
 ]
 
-#slide(
-  title: [Convergence Metrics -- Effective Sample Size @gelman2013bayesian],
-)[
+#slide(title: [Convergence Metrics -- Effective Sample Size @gelman2013bayesian])[
   $ hat(n)_"eff" = (m n) / (1 + sum_(t = 1)^T hat(ρ)_t) $
 
   where:
@@ -1262,12 +1109,8 @@ See NUTS in action at #link(
   - $hat(ρ)_t$: an autocorrelation estimate.
 ]
 
-#slide(
-  title: [Convergence Metrics -- Rhat @gelman2013bayesian],
-)[
-  #text(
-    size: 18pt,
-  )[
+#slide(title: [Convergence Metrics -- Rhat @gelman2013bayesian])[
+  #text(size: 18pt)[
     $ hat(R) = sqrt((hat("var")^+ (ψ | y)) / W) $
 
     where $hat("var")^+ (ψ | y)$ is the Markov chains' sample variance for a certain
@@ -1284,26 +1127,20 @@ See NUTS in action at #link(
   ]
 ]
 
-#slide(
-  title: [Traceplot -- Convergent Markov Chains],
-)[
-  #align(
-    center,
-  )[
+#slide(title: [Traceplot -- Convergent Markov Chains])[
+  #align(center)[
     #figure(
-      image("images/funnel/good_chains_traceplot.svg", height: 80%), caption: [A convergent Markov chains traceplot],
+      image("images/funnel/good_chains_traceplot.svg", height: 80%),
+      caption: [A convergent Markov chains traceplot],
     )
   ]
 ]
 
-#slide(
-  title: [Traceplot -- Divergent Markov Chains],
-)[
-  #align(
-    center,
-  )[
+#slide(title: [Traceplot -- Divergent Markov Chains])[
+  #align(center)[
     #figure(
-      image("images/funnel/bad_chains_traceplot.svg", height: 80%), caption: [A divergent Markov chains traceplot],
+      image("images/funnel/bad_chains_traceplot.svg", height: 80%),
+      caption: [A divergent Markov chains traceplot],
     )
   ]
 ]
@@ -1316,58 +1153,52 @@ See NUTS in action at #link(
     )[Stan's #text(fill: julia-red)[warnings] guide].
   ]
 ])[
-#fit-to-height(1fr)[
-```shell
-Warning messages:
-1: There were 275 divergent transitions after warmup. See
-http://mc-stan.org/misc/warnings.html#divergent-transitions-after-warmup
-to find out why this is a problem and how to eliminate them.
-2: Examine the pairs() plot to diagnose sampling problems
-
-3: The largest R-hat is 1.12, indicating chains have not mixed.
-Running the chains for more iterations may help. See
-http://mc-stan.org/misc/warnings.html#r-hat
-4: Bulk Effective Samples Size (ESS) is too low, indicating posterior
-means and medians may be unreliable.
-Running the chains for more iterations may help. See
-http://mc-stan.org/misc/warnings.html#bulk-ess
-5: Tail Effective Samples Size (ESS) is too low, indicating posterior
-variances and tail quantiles may be unreliable.
-Running the chains for more iterations may help. See
-http://mc-stan.org/misc/warnings.html#tail-ess
-```
-]
-]
-
-#slide(
-  title: [
-    Turing's Warning Messages
-  ],
-)[
-#fit-to-height(
-  1fr,
-)[
-*Turing does not give warning messages!*
-But you can check divergent transitions with `summarize(chn;
-sections=[:internals])`:
+  #fit-to-height(1fr)[
+    ```shell
+    Warning messages:
+    1: There were 275 divergent transitions after warmup. See
+    http://mc-stan.org/misc/warnings.html#divergent-transitions-after-warmup
+    to find out why this is a problem and how to eliminate them.
+    2: Examine the pairs() plot to diagnose sampling problems
+
+    3: The largest R-hat is 1.12, indicating chains have not mixed.
+    Running the chains for more iterations may help. See
+    http://mc-stan.org/misc/warnings.html#r-hat
+    4: Bulk Effective Samples Size (ESS) is too low, indicating posterior
+    means and medians may be unreliable.
+    Running the chains for more iterations may help. See
+    http://mc-stan.org/misc/warnings.html#bulk-ess
+    5: Tail Effective Samples Size (ESS) is too low, indicating posterior
+    variances and tail quantiles may be unreliable.
+    Running the chains for more iterations may help. See
+    http://mc-stan.org/misc/warnings.html#tail-ess
+    ```
+  ]
+]
 
-```shell
-Summary Statistics
-      parameters     mean      std  naive_se     mcse      ess     rhat  ess_per_sec
-          Symbol  Float64  Float64   Float64  Float64  Float64  Float64  Float64
+#slide(title: [
+  Turing's Warning Messages
+])[
+  #fit-to-height(1fr)[
+    *Turing does not give warning messages!*
+    But you can check divergent transitions with `summarize(chn;
+sections=[:internals])`:
 
-              lp  -3.9649   1.7887   0.0200   0.1062  179.1235  1.0224   6.4133
-         n_steps   9.1275  11.1065   0.1242   0.7899   38.3507  1.3012   1.3731
- acceptance_rate   0.5944   0.4219   0.0047   0.0322   40.5016  1.2173   1.4501
-      tree_depth   2.2444   1.3428   0.0150   0.1049   32.8514  1.3544   1.1762
- numerical_error   0.1975   0.3981   0.0045   0.0273   59.8853  1.1117   2.1441
-```
-]
+    ```shell
+    Summary Statistics
+          parameters     mean      std  naive_se     mcse      ess     rhat  ess_per_sec
+              Symbol  Float64  Float64   Float64  Float64  Float64  Float64  Float64
+
+                  lp  -3.9649   1.7887   0.0200   0.1062  179.1235  1.0224   6.4133
+             n_steps   9.1275  11.1065   0.1242   0.7899   38.3507  1.3012   1.3731
+     acceptance_rate   0.5944   0.4219   0.0047   0.0322   40.5016  1.2173   1.4501
+          tree_depth   2.2444   1.3428   0.0150   0.1049   32.8514  1.3544   1.1762
+     numerical_error   0.1975   0.3981   0.0045   0.0273   59.8853  1.1117   2.1441
+    ```
+  ]
 ]
 
-#slide(
-  title: [What To Do If the Markov Chains Do Not Converge?],
-)[
+#slide(title: [What To Do If the Markov Chains Do Not Converge?])[
   *First*: before making any fine adjustments in the number of Markov chains or
   the number of iterations per chain, etc.
 
@@ -1383,21 +1214,18 @@ Summary Statistics
   99% of the time.
 ]
 
-#slide(
-  title: [What To Do If the Markov Chains Do Not Converge?],
-)[
+#slide(title: [What To Do If the Markov Chains Do Not Converge?])[
   #v(4em)
 
   #quote(
-    block: true, attribution: [#cite(<gelmanFolkTheoremStatistical2008>, form: "prose")],
+    block: true,
+    attribution: [#cite(<gelmanFolkTheoremStatistical2008>, form: "prose")],
   )[
     When you have computational problems, often there’s a problem with your model.
   ]
 ]
 
-#slide(
-  title: [What To Do If the Markov Chains Do Not Converge?],
-)[
+#slide(title: [What To Do If the Markov Chains Do Not Converge?])[
   If you experiencing convergence issues, *and you've discarded that something is
   wrong with you model*, here is a few steps to try
   #footnote[
@@ -1415,9 +1243,7 @@ Summary Statistics
     is 2,000 iterations and 4 chains).
 ]
 
-#slide(
-  title: [What To Do If the Markov Chains Do Not Converge?],
-)[
+#slide(title: [What To Do If the Markov Chains Do Not Converge?])[
   2. *Change the HMC's warmup adaptation routine*: make the HMC sampler to be more
     conservative in the proposals. This can be changed by increasing the
     hyperparameter *target acceptance rate of Metropolis proposals*
@@ -1430,9 +1256,7 @@ Summary Statistics
     (CP) and non-centered parameterization (NCP).
 ]
 
-#slide(
-  title: [What To Do If the Markov Chains Do Not Converge?],
-)[
+#slide(title: [What To Do If the Markov Chains Do Not Converge?])[
   4. *Collect more data*: sometimes the model is too complex and we need a higher
     sample size for stable estimates.
 
diff --git a/slides/13-model_comparison.typ b/slides/13-model_comparison.typ
index 3fc278e..f222310 100644
--- a/slides/13-model_comparison.typ
+++ b/slides/13-model_comparison.typ
@@ -4,9 +4,7 @@
 
 #new-section-slide("Model Comparison")
 
-#slide(
-  title: "Recommended References",
-)[
+#slide(title: "Recommended References")[
   - #cite(<gelman2013bayesian>, form: "prose") - Chapter 7: Evaluating, comparing,
     and expanding models
   - #cite(<gelman2020regression>, form: "prose") - Chapter 11, Section 11.8: Cross
@@ -26,18 +24,14 @@
   #align(center)[#image("images/memes/model_comparison.jpg")]
 ]
 
-#slide(
-  title: [Why Compare Models?],
-)[
+#slide(title: [Why Compare Models?])[
   #v(3em)
   After model parameters estimation, many times we want to measure its *predictive
   accuracy* by itself, or for *model comparison*, *model selection*, or computing
   a *model performance metric* @geisser1979predictive.
 ]
 
-#slide(
-  title: [But What About Visual Posterior Predictive Checks?],
-)[
+#slide(title: [But What About Visual Posterior Predictive Checks?])[
   To analyze and compare models using visual posterior predictive checks is a
   *subjective and arbitrary approach*.
 
@@ -54,9 +48,7 @@
   @gelmanBayesianWorkflow2020.
 ]
 
-#slide(
-  title: [Model Comparison Techniques],
-)[
+#slide(title: [Model Comparison Techniques])[
   We have several model comparison techniques that use *predictive accuracy*, but
   the main ones are:
   - Leave-one-out cross-validation (LOO) @vehtariPracticalBayesianModel2015.
@@ -70,12 +62,8 @@
     and it is asymptotically equal to LOO @vehtariPracticalBayesianModel2015.
 ]
 
-#slide(
-  title: [Historical Interlude],
-)[
-  #text(
-    size: 16pt,
-  )[
+#slide(title: [Historical Interlude])[
+  #text(size: 16pt)[
     In the past, we did not have computational power and data abundance. Model
     comparison was done based on a theoretical divergence metric originated from
     information theory's entropy:
@@ -88,7 +76,9 @@
     so lower values are preferable:
 
     $
-      D(y, bold(θ)) = -2 dot underbrace(
+      D(
+        y, bold(θ)
+      ) = -2 dot underbrace(
         sum^N_(i = 1) log 1 / S sum^S_(s = 1) P(y_i | bold(θ)^s), "log pointwise predictive density - lppd",
 
       )
@@ -98,9 +88,7 @@
   ]
 ]
 
-#slide(
-  title: [Historical Interlude -- AIC @akaike1998information],
-)[
+#slide(title: [Historical Interlude -- AIC @akaike1998information])[
   $
     "AIC" = D(y, bold(θ)) + 2k = -2 "lppd"_("mle") + 2k
   $
@@ -122,9 +110,7 @@
     parameters $k$: $N >> k$
 ]
 
-#slide(
-  title: [Historical Interlude -- DIC @spiegelhalter2002bayesian],
-)[
+#slide(title: [Historical Interlude -- DIC @spiegelhalter2002bayesian])[
   A generalization of the AIC, where we replace the maximum likelihood estimate
   for the posterior mean and $k$ by a data-based bias correction:
 
@@ -141,9 +127,7 @@
   and that $N >> k$.
 ]
 
-#slide(
-  title: [Predictive Accuracy],
-)[
+#slide(title: [Predictive Accuracy])[
   With current computational power, we do not need approximations #footnote[AIC, DIC etc.].
 
   #v(2em)
@@ -155,9 +139,7 @@
   But, first, let's define what is predictive accuracy.
 ]
 
-#slide(
-  title: [Predictive Accuracy],
-)[
+#slide(title: [Predictive Accuracy])[
   Bayesian approaches measure predictive accuracy using posterior draws
   $tilde(y)$ from the model. For that we have the predictive posterior
   distribution:
@@ -176,14 +158,14 @@
   $p(tilde(y) | y)$, the *better* will be the model's predictive accuracy.
 ]
 
-#slide(
-  title: [Predictive Accuracy],
-)[
+#slide(title: [Predictive Accuracy])[
   To make samples comparable, we calculate the expectation of this measure for
   each one of the $N$ sample observations:
 
   $
-    op("elpd") = sum_(i=1)^N ∫ p_t(tilde(y)_i) log p(tilde(y)_i | y) dif tilde(y)
+    op("elpd") = sum_(i=1)^N ∫ p_t(tilde(y)_i) log p(
+      tilde(y)_i | y
+    ) dif tilde(y)
   $
 
   where $op("elpd")$ is the expected log pointwise predictive density, and $p_t(tilde(y)_i)$ is
@@ -194,12 +176,8 @@
   approximations to estimate $op("elpd")$.
 ]
 
-#slide(
-  title: [Leave-One-Out Cross-Validation (LOO)],
-)[
-  #text(
-    size: 18pt,
-  )[
+#slide(title: [Leave-One-Out Cross-Validation (LOO)])[
+  #text(size: 18pt)[
     We can compute the $op("elpd")$ using LOO @vehtariPracticalBayesianModel2015:
 
     $
@@ -218,12 +196,8 @@
   ]
 ]
 
-#slide(
-  title: [Widely Applicable Information Criteria (WAIC)],
-)[
-  #text(
-    size: 12pt,
-  )[
+#slide(title: [Widely Applicable Information Criteria (WAIC)])[
+  #text(size: 12pt)[
     WAIC @watanabe2010asymptotic, like LOO, is also an alternative approach to
     compute the $op("elpd")$,
 
@@ -254,9 +228,7 @@
   ]
 ]
 
-#slide(
-  title: [$K$-fold Cross-Validation ($K$-fold CV)],
-)[
+#slide(title: [$K$-fold Cross-Validation ($K$-fold CV)])[
   In the same manner that we can compute the $op("elpd")$
   using LOO with $N-1$ sample partitions, we can also compute it with any desired
   partition number.
@@ -272,9 +244,7 @@
   which almost involves a *high computational cost*.
 ]
 
-#slide(
-  title: [Pareto Smoothed Importance Sampling LOO (PSIS-LOO)],
-)[
+#slide(title: [Pareto Smoothed Importance Sampling LOO (PSIS-LOO)])[
   PSIS uses *importance sampling* #footnote[another class of MCMC algorithm that we did not cover yet.],
   which means a importance weighting scheme approach.
 
@@ -284,9 +254,7 @@
   reliability.
 ]
 
-#slide(
-  title: [Importance Sampling],
-)[
+#slide(title: [Importance Sampling])[
   If the $N$ samples are conditionally independent #footnote[
     that is, they are independent if conditioned on the model's parameters, which is
     a basic assumption in any Bayesian (and frequentist) model
@@ -307,13 +275,13 @@
   $
 ]
 
-#slide(
-  title: [Importance Sampling],
-)[
+#slide(title: [Importance Sampling])[
   However, the posterior $P(θ | y$ often has low variance and shorter tails than
   the LOO distributions $P(θ | y_(-1))$. Hence, if we use:
   $
-    P(tilde(y)_i | y_(-i)) ≈ (sum_(s=1)^S r_i^s P(tilde(y)_i|θ^s)) / (sum_(s=1)^S r_i^s)
+    P(tilde(y)_i | y_(-i)) ≈ (sum_(s=1)^S r_i^s P(
+      tilde(y)_i|θ^s
+    )) / (sum_(s=1)^S r_i^s)
   $
 
   #v(1em)
@@ -322,9 +290,7 @@
   variance*.
 ]
 
-#slide(
-  title: [Pareto Smoothed Importance Sampling],
-)[
+#slide(title: [Pareto Smoothed Importance Sampling])[
   We can enhance the IS-LOO estimate using a *Pareto Smoothed Importance Sampling*
   @vehtariPracticalBayesianModel2015.
 
@@ -350,12 +316,8 @@
   where $w$ is the truncated weights.
 ]
 
-#slide(
-  title: [Pareto Smoothed Importance Sampling LOO (PSIS-LOO)],
-)[
-  #text(
-    size: 17pt,
-  )[
+#slide(title: [Pareto Smoothed Importance Sampling LOO (PSIS-LOO)])[
+  #text(size: 17pt)[
     We use the importance weights Pareto distribution's estimated shape parameter
     $hat(k)$ to assess its reliability:
 
diff --git a/slides/backup_slides.typ b/slides/backup_slides.typ
index d33caae..5229188 100644
--- a/slides/backup_slides.typ
+++ b/slides/backup_slides.typ
@@ -4,21 +4,17 @@
 
 #new-section-slide("Backup Slides")
 
-#slide(
-  title: [How the Normal distribution arose
-    #footnote[Origins can be traced back to Abraham de Moivre in 1738. A better explanation
-      can be found by
-      #link(
+#slide(title: [How the Normal distribution arose
+  #footnote[Origins can be traced back to Abraham de Moivre in 1738. A better explanation
+    can be found by
+    #link(
         "http://www.stat.yale.edu/~pollard/Courses/241.fall2014/notes2014/Bin.Normal.pdf",
-      )[clicking here].]],
-)[
-  #text(
-    size: 16pt,
-  )[
+      )[clicking here].]])[
+  #text(size: 16pt)[
 
     $
-      "Binomial"(n, k)                         &= binom(n, k) p^k (1-p)^(n-k) \
-      n!                                       &≈ sqrt(2 π n) (n / e)^n \
+      "Binomial"(n, k) &= binom(n, k) p^k (1-p)^(n-k) \
+      n! &≈ sqrt(2 π n) (n / e)^n \
       lim_(n → oo) binom(n, k) p^k (1-p)^(n-k) &=
       1 / (sqrt(2 π n p q)) e^(-((k - n p)^2) / (2 n p q))
     $
@@ -27,17 +23,15 @@
     replacing $op("E")$ by $μ$ and $op("Var")$ by $σ^2$:
 
     $
-      lim_(n → oo) binom(n, k) p^k (1-p)^(n-k) = 1 / (σ sqrt(2 π)) e^(-((k - μ)^2) / (σ^2))
+      lim_(n → oo) binom(n, k) p^k (1-p)^(n-k) = 1 / (σ sqrt(2 π)) e^(-((
+        k - μ
+      )^2) / (σ^2))
     $
   ]
 ]
 
-#slide(
-  title: [QR Decomposition],
-)[
-  #text(
-    size: 13pt,
-  )[
+#slide(title: [QR Decomposition])[
+  #text(size: 13pt)[
     In Linear Algebra 101, we learn that any matrix (even non-square ones) can be
     decomposed into a product of two matrices:
 
@@ -52,13 +46,13 @@
     numerical stable. Mathematically speaking, the thing QR decomposition is:
 
     $
-      bold(X)   &= bold(Q)^* bold(R)^* \
+      bold(X) &= bold(Q)^* bold(R)^* \
       bold(Q)^* &= bold(Q) dot sqrt(n - 1) \
       bold(R)^* &= 1 / (sqrt(n - 1)) dot bold(R) \
-      bold(μ)   &= α + bold(X) dot bold(β) + σ \
-                &= α + bold(Q)^* dot bold(R)^* dot bold(β) + σ \
-                &= α + bold(Q)^* dot (bold(R)^* dot bold(β)) + σ \
-                &= α + bold(Q)^* dot tilde(bold(β)) + σ
+      bold(μ) &= α + bold(X) dot bold(β) + σ \
+      &= α + bold(Q)^* dot bold(R)^* dot bold(β) + σ \
+      &= α + bold(Q)^* dot (bold(R)^* dot bold(β)) + σ \
+      &= α + bold(Q)^* dot tilde(bold(β)) + σ
     $
   ]
 ]
diff --git a/slides/slides.typ b/slides/slides.typ
index e033a53..495d251 100644
--- a/slides/slides.typ
+++ b/slides/slides.typ
@@ -3,7 +3,10 @@
 #import "utils.typ": *
 
 #show: clean-theme.with(
-  footer: [Bayesian Statistics, Jose Storopoli], short-title: [Bayesian Statistics], logo: image("images/logos/juliadatascience_white.png"), color: julia-purple,
+  footer: [Bayesian Statistics, Jose Storopoli],
+  short-title: [Bayesian Statistics],
+  logo: image("images/logos/juliadatascience_white.png"),
+  color: julia-purple,
 )
 
 // customizations
@@ -26,19 +29,13 @@
   #align(center)[#image("images/memes/main.jpg")]
 ]
 
-#slide(
-  title: "License",
-)[
+#slide(title: "License")[
   #v(2em)
 
-  #align(
-    center,
-  )[
+  #align(center)[
     The text and images from these slides have a
 
-    #link(
-      "https://creativecommons.org/licenses/by-nc-sa/4.0/deed.en",
-    )[
+    #link("https://creativecommons.org/licenses/by-nc-sa/4.0/deed.en")[
       Attribution-NonCommercial-ShareAlike 4.0 International (CC BY-NC-SA 4.0)
     ]
 
diff --git a/slides/utils.typ b/slides/utils.typ
index 9cb0f7e..3dafa8c 100644
--- a/slides/utils.typ
+++ b/slides/utils.typ
@@ -5,33 +5,81 @@
 #let julia-red = rgb("#cb3c33")
 
 // functions
-#let binormal(x, y, μ_a, σ_a, μ_b, σ_b, ρ) = calc.exp(
-  -(
-    calc.pow((x - μ_a) / σ_a, 2) + calc.pow((y - μ_b) / σ_b, 2) - (2 * ρ) * ((x - μ_a) / σ_a) * ((y - μ_b) / σ_b)
-  ) / (2 * (1 - calc.pow(ρ, 2))),
-) / (2 * calc.pi * σ_a * σ_b * calc.pow(1 - calc.pow(ρ, 2)), 0.5)
-#let conditionalbinormal(x, y_c, μ_a, σ_a, μ_b, σ_b, ρ) = calc.exp(
-  -(
-    calc.pow((x - μ_a) / σ_a, 2) + calc.pow((y_c - μ_b) / σ_b, 2) - (2 * ρ) * ((x - μ_a) / σ_a) * ((y_c - μ_b) / σ_b)
-  ) / (2 * (1 - calc.pow(ρ, 2))),
-) / (2 * calc.pi * σ_a * σ_b * calc.pow(1 - calc.pow(ρ, 2)), 0.5)
-#let sumtwonormals(x_a, x_b, μ_a, σ_a, w_a, μ_b, σ_b, w_b) = (w_a * gaussian(x_a, μ_a, σ_a)) + (w_b * gaussian(x_b, μ_b, σ_b))
+#let binormal(x, y, μ_a, σ_a, μ_b, σ_b, ρ) = (
+  calc.exp(-(
+    calc.pow((x - μ_a) / σ_a, 2) + calc.pow((y - μ_b) / σ_b, 2) - (2 * ρ) * (
+      (x - μ_a) / σ_a
+    ) * ((y - μ_b) / σ_b)
+  ) / (2 * (1 - calc.pow(ρ, 2)))) / (
+    2 * calc.pi * σ_a * σ_b * calc.pow(1 - calc.pow(ρ, 2)),
+    0.5,
+  )
+)
+#let conditionalbinormal(x, y_c, μ_a, σ_a, μ_b, σ_b, ρ) = (
+  calc.exp(-(
+    calc.pow((x - μ_a) / σ_a, 2) + calc.pow((y_c - μ_b) / σ_b, 2) - (2 * ρ) * (
+      (x - μ_a) / σ_a
+    ) * ((y_c - μ_b) / σ_b)
+  ) / (2 * (1 - calc.pow(ρ, 2)))) / (
+    2 * calc.pi * σ_a * σ_b * calc.pow(1 - calc.pow(ρ, 2)),
+    0.5,
+  )
+)
+#let sumtwonormals(x_a, x_b, μ_a, σ_a, w_a, μ_b, σ_b, w_b) = (
+  (w_a * gaussian(x_a, μ_a, σ_a)) + (w_b * gaussian(x_b, μ_b, σ_b))
+)
 #let discreteuniform(a, b) = 1 / (b - a + 1)
-#let binomial(x, n, p) = calc.fact(n) / (calc.fact(x) * calc.fact(n - x)) * calc.pow(p, x) * calc.pow(1 - p, n - x)
+#let binomial(x, n, p) = (
+  calc.fact(n) / (calc.fact(x) * calc.fact(n - x)) * calc.pow(p, x) * calc.pow(
+    1 - p,
+    n - x,
+  )
+)
 #let poisson(x, λ) = calc.pow(λ, x) * calc.exp(-λ) / calc.fact(x)
-#let negativebinomial(x, r, p) = calc.fact(x + r - 1) / (calc.fact(r - 1) * calc.fact(x)) * (calc.pow(1 - p, x) * calc.pow(p, r))
+#let negativebinomial(x, r, p) = (
+  calc.fact(x + r - 1) / (calc.fact(r - 1) * calc.fact(x)) * (
+    calc.pow(1 - p, x) * calc.pow(p, r)
+  )
+)
 #let continuousuniform(a, b) = 1 / (b - a)
-#let gaussian(x, μ, σ) = 1 / (σ * calc.sqrt(2 * calc.pi)) * calc.exp(-(calc.pow(x - μ, 2)) / (2 * calc.pow(σ, 2)))
-#let lognormal(x, μ, σ) = 1 / (x * σ * calc.sqrt(2 * calc.pi)) * calc.exp(-(calc.pow(calc.ln(x) - μ, 2)) / (2 * calc.pow(σ, 2)))
+#let gaussian(x, μ, σ) = (
+  1 / (σ * calc.sqrt(2 * calc.pi)) * calc.exp(-(calc.pow(x - μ, 2)) / (
+    2 * calc.pow(σ, 2)
+  ))
+)
+#let lognormal(x, μ, σ) = (
+  1 / (x * σ * calc.sqrt(2 * calc.pi)) * calc.exp(-(
+    calc.pow(calc.ln(x) - μ, 2)
+  ) / (2 * calc.pow(σ, 2)))
+)
 #let exponential(x, λ) = λ * calc.exp(-λ * x)
-#let gamma(z) = (2.506628274631 * calc.sqrt(1 / z)) + (0.20888568 * calc.pow(1 / z, 1.5)) + (0.00870357 * calc.pow(1 / z, 2.5)) - ((174.2106599 * calc.pow(1 / z, 3.5)) / 25920) - ((715.6423511 * calc.pow(1 / z, 4.5)) / 1244160) * (calc.exp(-calc.ln(1 / z) - 1) * z)
-#let gammadist(x, α, θ) = calc.pow(x, α - 1) * calc.exp(-x / θ) * gamma(α) * calc.pow(θ, α)
-#let student(x, ν) = gamma((ν + 1) / 2) / (calc.sqrt(ν * calc.pi) * gamma(ν / 2)) * calc.pow(1 + (calc.pow(x, 2) / ν), (-(ν + 1) / 2))
+#let gamma(z) = (
+  (2.506628274631 * calc.sqrt(1 / z)) + (0.20888568 * calc.pow(1 / z, 1.5)) + (
+    0.00870357 * calc.pow(1 / z, 2.5)
+  ) - ((174.2106599 * calc.pow(1 / z, 3.5)) / 25920) - (
+    (715.6423511 * calc.pow(1 / z, 4.5)) / 1244160
+  ) * (calc.exp(-calc.ln(1 / z) - 1) * z)
+)
+#let gammadist(x, α, θ) = (
+  calc.pow(x, α - 1) * calc.exp(-x / θ) * gamma(α) * calc.pow(θ, α)
+)
+#let student(x, ν) = (
+  gamma((ν + 1) / 2) / (calc.sqrt(ν * calc.pi) * gamma(ν / 2)) * calc.pow(
+    1 + (calc.pow(x, 2) / ν),
+    (-(ν + 1) / 2),
+  )
+)
 #let cauchy(x, μ, σ) = 1 / (calc.pi * σ * (1 + (calc.pow((x - μ) / σ, 2))))
-#let beta(x, α, β) = calc.pow(x, α - 1) * calc.pow(1 - x, β - 1) / ((gamma(α) * gamma(β)) / gamma(α + β))
+#let beta(x, α, β) = (
+  calc.pow(x, α - 1) * calc.pow(1 - x, β - 1) / (
+    (gamma(α) * gamma(β)) / gamma(α + β)
+  )
+)
 #let logistic(x) = 1 / (1 + calc.exp(-x))
-#let normcdf(x, μ, σ) = 1 / (
-  1 + calc.exp(-0.07056 * calc.pow(((x - μ) / σ), 3) - 1.5976 * (x - μ) / σ)
+#let normcdf(x, μ, σ) = (
+  1 / (
+    1 + calc.exp(-0.07056 * calc.pow(((x - μ) / σ), 3) - 1.5976 * (x - μ) / σ)
+  )
 )
 #let logodds(p) = calc.ln(p / (1 - p))
 #let laplace(x, b) = (1 / (2 * b)) * calc.exp(-calc.abs(x) / b)
diff --git a/treefmt.nix b/treefmt.nix
index 49a8893..8d8e221 100644
--- a/treefmt.nix
+++ b/treefmt.nix
@@ -7,4 +7,13 @@
   # Enable formatters
   programs.nixpkgs-fmt.enable = true;
   programs.prettier.enable = true;
+
+  # Custom typstyle
+  settings.formatter.typstyle = {
+    command = "${pkgs.typstyle}/bin/typstyle";
+    options = [
+      "--check"
+    ];
+    includes = [ "*.typ" "*.typst" ];
+  };
 }
diff --git a/typstfmt-config.toml b/typstfmt-config.toml
deleted file mode 100644
index e001094..0000000
--- a/typstfmt-config.toml
+++ /dev/null
@@ -1,4 +0,0 @@
-indent_space = 2
-max_line_length = 100
-experimental_args_breaking_consecutive = false
-line_wrap = true