\begin{cas}
- \begin{itemize}
- \item Spreadsheet \(\rightarrow\) Catalog \(\rightarrow\) \verb;randNorm(sd, mean, n); where \verb;n; is the number of samples. Show histogram with Histogram key in top left
- \item To calculate parameters of a dataset: Calc \(\rightarrow\) One-variable
- \end{itemize}
+ \hspace{1em} Spreadsheet \(\rightarrow\) Catalog \(\rightarrow\) \verb;randNorm(sd, mean, n); \\
+ where \verb;n; is the number of samples. Show histogram with Histogram key in top left.
+
+ To calculate parameters of a dataset: \\
+ \-\hspace{1em}Calc \(\rightarrow\) One-variable
\end{cas}
Always express \(z\) as +ve. Express confidence \textit{interval} as ordered pair.
\end{warning}
-\pgfmathdeclarefunction{gauss}{2}{%
- \pgfmathparse{1/(#2*sqrt(2*pi))*exp(-((x-#1)^2)/(2*#2^2))}%
-}
- \pgfplotsset{every axis/.append style={
- axis x line=middle, % put the x axis in the middle
- axis y line=middle, % put the y axis in the middle
- }} \pgfkeys{/pgf/decoration/.cd,
- distance/.initial=10pt
-} \pgfdeclaredecoration{add dim}{final}{
-\state{final}{%
-\pgfmathsetmacro{\dist}{5pt*\pgfkeysvalueof{/pgf/decoration/distance}/abs(\pgfkeysvalueof{/pgf/decoration/distance})}
- \pgfpathmoveto{\pgfpoint{0pt}{0pt}}
- \pgfpathlineto{\pgfpoint{0pt}{2*\dist}}
- \pgfpathmoveto{\pgfpoint{\pgfdecoratedpathlength}{0pt}}
- \pgfpathlineto{\pgfpoint{(\pgfdecoratedpathlength}{2*\dist}}
- \pgfsetarrowsstart{latex}
- \pgfsetarrowsend{latex}
- \pgfpathmoveto{\pgfpoint{0pt}{\dist}}
- \pgfpathlineto{\pgfpoint{\pgfdecoratedpathlength}{\dist}}
- \pgfusepath{stroke}
- \pgfpathmoveto{\pgfpoint{0pt}{0pt}}
- \pgfpathlineto{\pgfpoint{\pgfdecoratedpathlength}{0pt}}
-}}
-\tikzset{dim/.style args={#1,#2}{decoration={add dim,distance=#2},
- decorate,
- postaction={decorate,decoration={text along path,
- raise=#2,
- text align={align=center},
- text={#1}}}}}
- \begin{figure*}[hb]
- \centering
- {\begin{center} \begin{tikzpicture}
- \pgfplotsset{set layers, axis x line=middle, axis y line=middle}
-\begin{axis}[every axis plot post/.append style={
- mark=none,domain=-3:3,samples=50,smooth},
- axis x line=bottom,
- axis y line=left,
- enlargelimits=upper,
- x=\textwidth/10,
- ytick={0.55},
- yticklabels={\(\frac{1}{\sigma \sqrt{2\pi}}\)},
- xtick={-2,-1,0,1,2},
- x tick label style = {font=\footnotesize},
- xticklabels={\((\mu-2\sigma)\), \((\mu-\sigma)\), \(\mu\), \((\mu+\sigma)\), \((\mu+2\sigma)\)},
- xlabel={\(x\)},
- every axis x label/.style={at={(current axis.right of origin)},anchor=north west},
- every axis y label/.style={at={(axis description cs:-0.02,0.2)}, anchor=south west, rotate=90},
- ylabel={\(\Pr(X=x)\)}]
- \addplot {gauss(0,0.75)};
-\fill[red!30] (-3,0) -- plot[id=f3,domain=-3:3,samples=50]
- function {1/(0.75*sqrt(2*pi))*exp(-((x)^2)/(2*0.75^2))} -- (3,0) -- cycle;
- \fill[darkgray!30] (3,0) -- plot[id=f3,domain=-3:3,samples=50] function {1/(0.75*sqrt(2*pi))*exp(-x*x*0.5/(0.75*0.75))} -- (3,0) -- cycle;
- \fill[lightgray!30] (-2,0) -- plot[id=f3,domain=-2:2,samples=50] function {1/(0.75*sqrt(2*pi))*exp(-x*x*0.5/(0.75*0.75))} -- (2,0) -- cycle;
- \fill[white!30] (-1,0) -- plot[id=f3,domain=-1:1,samples=50] function {1/(0.75*sqrt(2*pi))*exp(-x*x*0.5/(0.75*0.75))} -- (1,0) -- cycle;
- \begin{scope}[<->]
- \draw (-1,0.35) -- (1,0.35) node [midway, fill=white] {68.3\%};
- \draw (-2,0.25) -- (2,0.25) node [midway, fill=white] {95.5\%};
- \draw (-3,0.15) -- (3,0.15) node [midway, fill=white] {99.7\%};
- \end{scope}
- \begin{scope}[-, dashed, gray]
- \draw (-1,0) -- (-1, 0.35);
- \draw (1,0) -- (1, 0.35);
- \draw (-2,0) -- (-2, 0.25);
- \draw (2,0) -- (2, 0.25);
- \draw (-3,0) -- (-3, 0.15);
- \draw (3,0) -- (3, 0.15);
- \end{scope}
-\end{axis}
-\begin{axis}[every axis plot post/.append style={
- mark=none,domain=-3:3,samples=50,smooth},
- axis x line=bottom,
- enlargelimits=upper,
- x=\textwidth/10,
- xtick={-2,-1,0,1,2},
- axis x line shift=30pt,
- hide y axis,
- x tick label style = {font=\footnotesize},
- xlabel={\(Z\)},
- every axis x label/.style={at={(axis description cs:1,-0.25)},anchor=south west}]
- \addplot {gauss(0,0.75)};
-\end{axis}
-\end{tikzpicture}\end{center}}
- \end{figure*}
+ \begin{figure*}[hb]
+ \centering
+ \include{normal-dist-graph}
+ \end{figure*}
\subsection*{Central limit theorem}
- If \(X\) is randomly distributed with mean \(\mu\) and sd \(\sigma\), then with an adequate sample size \(n\) the distribution of the sample mean \(\overline{X}\) is approximately normal with mean \(E(\overline{X})\) and \(\operatorname{sd}(\overline{X}) = \frac{\sigma}{\sqrt{n}}\).
+ \begin{theorembox}{}
+ If \(X\) is randomly distributed with mean \(\mu\) and sd \(\sigma\), then with an adequate sample size \(n\) the distribution of the sample mean \(\overline{X}\) is approximately normal with mean \(E(\overline{X})\) and \(\operatorname{sd}(\overline{X}) = \frac{\sigma}{\sqrt{n}}\).
+ \end{theorembox}
\subsection*{Confidence intervals}
Note hypotheses are always expressed in terms of population parameters
\end{warning}
- \subsection*{Null hypothesis \(H_0\)}
+ \subsection*{Null hypothesis \(\textbf{H}_0\)}
Sample drawn from population has same mean as control population, and any difference can be explained by sample variations.
- \subsection*{Alternative hypothesis \(H_1\)}
+ \subsection*{Alternative hypothesis \(\textbf{H}_1\)}
Amount of variation from control is significant, despite standard sample variations.
\subsection*{\(p\)-value}
+ Probability of observing a value of the sample statistic as significant as the one observed, assuming null hypothesis is true.
+ For one-tail tests:
\begin{align*}
- p &= \Pr(\overline{X} \lessgtr \mu(H_1)) \\
- &= 2 \cdot \Pr(\overline{X} <> \mu(H_1) | \mu = 8)
+ p\text{-value} &= \Pr\left( \> \overline{X} \lessgtr \mu(\textbf{H}_1) \> \given \> \mu = \mu(\textbf{H}_0)\> \right) \\
+ &= \Pr\left( Z \lessgtr \dfrac{\left( \mu(\textbf{H}_1) - \mu(\textbf{H}_0) \right) \cdot \sqrt{n} }{\operatorname{sd}(X)} \right) \\
+ &\text{then use \texttt{normCdf} with std. norm.}
\end{align*}
- Probability of observing a value of the sample statistic as significant as the one observed, assuming null hypothesis is true.
-
\vspace{0.5em}
\begin{tabularx}{23em}{|l|X|}
\hline
\rowcolor{cas}
\(\boldsymbol{p}\) & \textbf{Conclusion} \\
\hline
- \(> 0.05\) & insufficient evidence against \(H_0\) \\
- \(< 0.05\) (5\%) & good evidence against \(H_0\) \\
- \(< 0.01\) (1\%) & strong evidence against \(H_0\) \\
- \(< 0.001\) (0.1\%) & very strong evidence against \(H_0\) \\
+ \(> 0.05\) & insufficient evidence against \(\textbf{H}_0\) \\
+ \(< 0.05\) (5\%) & good evidence against \(\textbf{H}_0\) \\
+ \(< 0.01\) (1\%) & strong evidence against \(\textbf{H}_0\) \\
+ \(< 0.001\) (0.1\%) & very strong evidence against \(\textbf{H}_0\) \\
\hline
\end{tabularx}
- \subsection*{Statistical significance}
+ \subsubsection*{Finding \(n\) for a given \(p\)-value}
+
+ Find \(c\) such that \(\Pr(Z \lessgtr c)\) such that \(c = \alpha\) (use \texttt{invNormCdf} on CAS).
- Significance level is denoted by \(\alpha\).
+ \subsection*{Significance level \(\alpha\)}
+
+ The condition for rejecting the null hypothesis.
\-\hspace{1em} If \(p<\alpha\), null hypothesis is \textbf{rejected} \\
\-\hspace{1em} If \(p>\alpha\), null hypothesis is \textbf{accepted}
Menu \(\rightarrow\) Statistics \(\rightarrow\) Calc \(\rightarrow\) Test. \\
Select \textit{One-Sample Z-Test} and \textit{Variable}, then input:
\begin{description}[nosep, style=multiline, labelindent=0.5cm, leftmargin=2cm, font=\normalfont]
- \item[\(\mu\) cond:] same operator as \(H_1\)
+ \item[\(\mu\) cond:] same operator as \(\textbf{H}_1\)
\item[\(\mu_0\):] expected sample mean (null hypothesis)
\item[\(\sigma\):] standard deviation (null hypothesis)
\item[\(\overline{x}\):] sample mean
\end{cas}
\subsection*{One-tail and two-tail tests}
+
+ \[ p\text{-value (two-tail)} = 2 \times p\text{-value (one-tail)} \]
\subsubsection*{One tail}
\begin{itemize}
\item \(\mu\) has changed in one direction
- \item State ``\(H_1: \mu \lessgtr \) known population mean''
+ \item State ``\(\textbf{H}_1: \mu \lessgtr \) known population mean''
\end{itemize}
\subsubsection*{Two tail}
\begin{itemize}
\item Direction of \(\Delta \mu\) is ambiguous
- \item State ``\(H_1: \mu \ne\) known population mean''
+ \item State ``\(\textbf{H}_1: \mu \ne\) known population mean''
\end{itemize}
- For two tail tests:
\begin{align*}
p\text{-value} &= \Pr(|\overline{X} - \mu| \ge |\overline{x}_0 - \mu|) \\
- &= \left( |Z| \ge \left|\dfrac{\overline{x}_0 - \mu}{\sigma \div \sqrt{n}} \right| \right)
+ &= \left( |Z| \ge \left|\dfrac{\overline{x}_0 - \mu}{\sigma \div \sqrt{n}} \right| \right) \\
\end{align*}
+ where
+ \begin{description}[nosep, labelindent=0.5cm]
+ \item [\(\mu\)] is the population mean under \(\textbf{H}_0\)
+ \item [\(\overline{x}_0\)] is the observed sample mean
+ \item [\(\sigma\)] is the population s.d.
+ \item [\(n\)] is the sample size
+ \end{description}
+
\subsection*{Modulus notation for two tail}
\(\Pr(|\overline{X} - \mu| \ge a) \implies\) ``the probability that the distance between \(\overline{\mu}\) and \(\mu\) is \(\ge a\)''
\subsection*{Errors}
\begin{description}[labelwidth=2.5cm, labelindent=0.5cm]
- \item [Type I error] \(H_0\) is rejected when it is \textbf{true}
- \item [Type II error] \(H_0\) is \textbf{not} rejected when it is \textbf{false}
+ \item [Type I error] \(\textbf{H}_0\) is rejected when it is \textbf{true}
+ \item [Type II error] \(\textbf{H}_0\) is \textbf{not} rejected when it is \textbf{false}
\end{description}
+ \begin{tabularx}{\columnwidth}{|X|l|l|}
+ \rowcolor{cas}\hline
+ \cellcolor{white}&\multicolumn{2}{c|}{\textbf{Actual result}} \\
+ \hline
+ \cellcolor{cas}\(\boldsymbol{z}\)\textbf{-test} & \cellcolor{light-gray}\(\textbf{H}_0\) true & \cellcolor{light-gray}\(\textbf{H}_0\) false \\
+ \hline
+ \cellcolor{light-gray}Reject \(\textbf{H}_0\) & Type I error & Correct \\
+ \hline
+ \cellcolor{light-gray}Do not reject \(\textbf{H}_0\) & Correct& Type II error \\
+ \hline
+ \end{tabularx}
+
% \subsection*{Using c.i. to find \(p\)}
% need more here