[methods] general notes additions

diff --git a/spec/statistics.tex b/spec/statistics.tex

index 323ec2ac410e6e775239a53452756cc08d8f8a8d..fc4165c7a8631ca23a7d4db8382643e3fde52b0b 100644 (file)
--- a/spec/statistics.tex
+++ b/spec/statistics.tex
@@ -167,40 +167,41 @@
      Note hypotheses are always expressed in terms of population parameters
    \end{warning}
  
-  \subsection*{Null hypothesis \(H_0\)}
+  \subsection*{Null hypothesis \(\textbf{H}_0\)}
  
    Sample drawn from population has same mean as control population, and any difference can be explained by sample variations.
  
-  \subsection*{Alternative hypothesis \(H_1\)}
+  \subsection*{Alternative hypothesis \(\textbf{H}_1\)}
  
    Amount of variation from control is significant, despite standard sample variations.
  
    \subsection*{\(p\)-value}
  
+  Probability of observing a value of the sample statistic as significant as the one observed, assuming null hypothesis is true.
  
+  For one-tail tests:
    \begin{align*}
-    p &= \Pr(\overline{X} \lessgtr \mu(H_1)) \\
-    &= 2 \cdot \Pr(\overline{X} <> \mu(H_1) | \mu = 8)
+    p\text{-value} &= \Pr\left( \> \overline{X} \lessgtr \mu(\textbf{H}_1) \> \given \> \mu = \mu(\textbf{H}_0)\> \right) \\
+    &= \Pr\left( Z \lessgtr \dfrac{\left( \mu(\textbf{H}_1) - \mu(\textbf{H}_0) \right) \cdot \sqrt{n} }{\operatorname{sd}(X)} \right) \\
+    &\text{then use \texttt{normCdf} with std. norm.}
    \end{align*}
  
-  Probability of observing a value of the sample statistic as significant as the one observed, assuming null hypothesis is true.
-
    \vspace{0.5em}
    \begin{tabularx}{23em}{|l|X|}
      \hline
      \rowcolor{cas}
      \(\boldsymbol{p}\) & \textbf{Conclusion} \\
      \hline
-    \(> 0.05\) & insufficient evidence against \(H_0\) \\
-    \(< 0.05\) (5\%) & good evidence against \(H_0\) \\
-    \(< 0.01\) (1\%) & strong evidence against \(H_0\) \\
-    \(< 0.001\) (0.1\%) & very strong evidence against \(H_0\) \\
+    \(> 0.05\) & insufficient evidence against \(\textbf{H}_0\) \\
+    \(< 0.05\) (5\%) & good evidence against \(\textbf{H}_0\) \\
+    \(< 0.01\) (1\%) & strong evidence against \(\textbf{H}_0\) \\
+    \(< 0.001\) (0.1\%) & very strong evidence against \(\textbf{H}_0\) \\
      \hline
    \end{tabularx}
  
-  \subsection*{Statistical significance}
+  \subsection*{Significance level \(\alpha\)}
  
-  Significance level is denoted by \(\alpha\).
+  The condition for rejecting the null hypothesis.
  
    \-\hspace{1em} If \(p<\alpha\), null hypothesis is \textbf{rejected} \\
    \-\hspace{1em} If \(p>\alpha\), null hypothesis is \textbf{accepted}
@@ -213,7 +214,7 @@
    Menu \(\rightarrow\) Statistics \(\rightarrow\) Calc \(\rightarrow\) Test. \\
    Select \textit{One-Sample Z-Test} and \textit{Variable}, then input:
      \begin{description}[nosep, style=multiline, labelindent=0.5cm, leftmargin=2cm, font=\normalfont]
-    \item[\(\mu\) cond:] same operator as \(H_1\)
+    \item[\(\mu\) cond:] same operator as \(\textbf{H}_1\)
      \item[\(\mu_0\):] expected sample mean (null hypothesis)
      \item[\(\sigma\):] standard deviation (null hypothesis)
      \item[\(\overline{x}\):] sample mean
@@ -222,27 +223,36 @@
    \end{cas}
  
    \subsection*{One-tail and two-tail tests}
+  
+  \[ p\text{-value (two-tail)} = 2 \times p\text{-value (one-tail)} \]
  
    \subsubsection*{One tail}
  
    \begin{itemize}
      \item \(\mu\) has changed in one direction
-    \item State ``\(H_1: \mu \lessgtr \) known population mean''
+    \item State ``\(\textbf{H}_1: \mu \lessgtr \) known population mean''
    \end{itemize}
  
    \subsubsection*{Two tail}
  
    \begin{itemize}
      \item Direction of \(\Delta \mu\) is ambiguous
-    \item State ``\(H_1: \mu \ne\) known population mean''
+    \item State ``\(\textbf{H}_1: \mu \ne\) known population mean''
    \end{itemize}
  
-  For two tail tests:
    \begin{align*}
      p\text{-value} &= \Pr(|\overline{X} - \mu| \ge |\overline{x}_0 - \mu|) \\
-    &= \left( |Z| \ge \left|\dfrac{\overline{x}_0 - \mu}{\sigma \div \sqrt{n}} \right| \right)
+    &= \left( |Z| \ge \left|\dfrac{\overline{x}_0 - \mu}{\sigma \div \sqrt{n}} \right| \right) \\
    \end{align*}
  
+  where
+  \begin{description}[nosep, labelindent=0.5cm]
+    \item [\(\mu\)] is the population mean under \(\textbf{H}_0\)
+    \item [\(\overline{x}_0\)] is the observed sample mean
+    \item [\(\sigma\)] is the population s.d.
+    \item [\(n\)] is the sample size
+  \end{description}
+
    \subsection*{Modulus notation for two tail}
  
    \(\Pr(|\overline{X} - \mu| \ge a) \implies\) ``the probability that the distance between \(\overline{\mu}\) and \(\mu\) is \(\ge a\)''
@@ -256,10 +266,22 @@
    \subsection*{Errors}
  
    \begin{description}[labelwidth=2.5cm, labelindent=0.5cm]
-    \item [Type I error] \(H_0\) is rejected when it is \textbf{true}
-    \item [Type II error] \(H_0\) is \textbf{not} rejected when it is \textbf{false}
+    \item [Type I error] \(\textbf{H}_0\) is rejected when it is \textbf{true}
+    \item [Type II error] \(\textbf{H}_0\) is \textbf{not} rejected when it is \textbf{false}
    \end{description}
  
+  \begin{tabularx}{\columnwidth}{|X|l|l|}
+    \rowcolor{cas}\hline
+    \cellcolor{white}&\multicolumn{2}{c|}{\textbf{Actual result}} \\
+    \hline
+    \cellcolor{cas}\(\boldsymbol{z}\)\textbf{-test} & \cellcolor{light-gray}\(\textbf{H}_0\) true & \cellcolor{light-gray}\(\textbf{H}_0\) false \\
+    \hline
+    \cellcolor{light-gray}Reject \(\textbf{H}_0\) & Type I error & Correct \\
+    \hline
+    \cellcolor{light-gray}Do not reject \(\textbf{H}_0\) & Correct& Type II error \\
+    \hline
+  \end{tabularx}
+
  % \subsection*{Using c.i. to find \(p\)}
  % need more here