Andrew's git - notes.git/blob - spec/statistics.tex

   1\documentclass[a4paper]{article}
   2\usepackage[a4paper, margin=2cm]{geometry}
   3\usepackage{array}
   4\usepackage{amsmath}
   5\usepackage{amssymb}
   6\usepackage{tcolorbox}
   7\usepackage{fancyhdr}
   8\usepackage{pgfplots}
   9\usepackage{tabularx}
  10\usepackage{keystroke}
  11\usepackage{listings}
  12\usepackage{xcolor} % used only to show the phantomed stuff
  13\definecolor{cas}{HTML}{e6f0fe}
  14\usepackage{mathtools}
  15\pgfplotsset{compat=1.16}
  16
  17\pagestyle{fancy}
  18\fancyhead[LO,LE]{Unit 4 Specialist --- Statistics}
  19\fancyhead[CO,CE]{Andrew Lorimer}
  20
  21\setlength\parindent{0pt}
  22
  23\begin{document}
  24
  25  \title{Statistics}
  26  \author{}
  27  \date{}
  28  \maketitle
  29
  30  \section{Linear combinations of random variables}
  31
  32  \subsection*{Continuous random variables}
  33
  34  A continuous random variable \(X\) has a pdf \(f\) such that:
  35
  36  \begin{enumerate}
  37    \item \(f(x) \ge 0 \forall x \)
  38    \item \(\int^\infty_{-\infty} f(x) \> dx = 1\)
  39  \end{enumerate}
  40
  41  \[ \Pr(X \le c) = \int^c_{-\infty} f(x) \> dx \]
  42
  43  \subsubsection*{Linear functions \(X \rightarrow aX+b\)}
  44
  45  \begin{align*}
  46    \Pr(Y \le y) &= \Pr(aX+b \le y) \\
  47    &= \Pr\left(X \le \dfrac{y-b}{a}\right) \\
  48    &= \int^{\dfrac{y-b}{a}}_{-\infty} f(x) \> dx
  49  \end{align*}
  50
  51  \begin{align*}
  52    \textbf{Mean:} && \operatorname{E}(aX+b) & = a\operatorname{E}(X)+b \\
  53    \textbf{Variance:} && \operatorname{Var}(aX+b) &= a^2 \operatorname{Var}(X) \\
  54  \end{align*}
  55
  56  \subsection*{Linear combination of two random variables}
  57
  58  \begin{align*}
  59    \textbf{Mean:} && \operatorname{E}(aX+bY) & = a\operatorname{E}(X)+b\operatorname{E}(Y) \\
  60    \textbf{Variance:} && \operatorname{Var}(aX+bY) &= a^2 \operatorname{Var}(X) + b^2 \operatorname{Var}(Y) \tag{if \(X\) and \(Y\) are independent}\\
  61  \end{align*}
  62
  63  \section{Sample mean}
  64
  65  Approximation of the \textbf{population mean} determined experimentally.
  66
  67  \[ \overline{x} = \dfrac{\Sigma x}{n} \]
  68
  69  where \(n\) is the size of the sample (number of sample points) and \(x\) is the value of a sample point
  70
  71  \subsubsection*{\colorbox{cas}{On CAS:}}
  72
  73  \begin{enumerate}
  74    \item Spreadsheet
  75    \item In cell A1: \verb;mean(randNorm(sd, mean, sample size));
  76    \item Edit \(\rightarrow\) Fill \(\rightarrow\) Fill Range
  77    \item Input range as A1:An where \(n\) is the number of samples
  78    \item Graph \(\rightarrow\) Histogram
  79  \end{enumerate}
  80
  81  \subsubsection*{Sample size of \(n\)}
  82
  83  \[ \overline{X} = \sum_{i=1}^n \frac{x_i}{n} = \dfrac{\sum x}{n} \]
  84
  85  Sample mean is distributed with mean \(\mu\) and sd \(\frac{\sigma}{\sqrt{n}}\) (approaches these values for increasing sample size \(n\)).
  86
  87  \colorbox{cas}{On CAS:} Spreadsheet \(\rightarrow\) Catalog \(\rightarrow\) \verb;randNorm(sd, mean, n); where \verb;n; is the number of samples. Show histogram with Histogram key in top left \\
  88  To calculate parameters of a dataset: Calc \(\rightarrow\) One-variable
  89  
  90  \section{Normal distributions}
  91
  92  mean = mode = median
  93
  94  \[ Z = \frac{X - \mu}{\sigma} \]
  95
  96  Normal distributions must have are (total prob.) of 1 \(\implies \int^\infty_{-\infty} f(x) \> dx = 1\)
  97\pgfmathdeclarefunction{gauss}{2}{%
  98  \pgfmathparse{1/(#2*sqrt(2*pi))*exp(-((x-#1)^2)/(2*#2^2))}%
  99}
 100
 101\begin{tikzpicture}
 102\begin{axis}[every axis plot post/.append style={
 103  mark=none,domain=-3:3,samples=50,smooth}, % All plots: from -2:2, 50 samples, smooth, no marks
 104  axis x line*=bottom, % no box around the plot, only x and y axis
 105  axis y line*=left, % the * suppresses the arrow tips
 106  enlargelimits=upper,
 107  ytick={0.5},
 108  yticklabels={\(\frac{1}{\sigma \sqrt{2\pi}}\)}, 
 109  xtick={-2,-1,0,1,2},
 110  xticklabels={\(\mu-2\sigma\), \(\mu-\sigma\), \(\mu\), \(\mu+\sigma\), \(\mu+2\sigma\)},
 111  xlabel={\(x\)},
 112  every axis x label/.style={at={(current axis.right of origin)},anchor=north west},
 113  ylabel={\(\Pr(X=x)\)}]
 114  \addplot {gauss(0,0.75)};
 115\end{axis}
 116\end{tikzpicture}
 117
 118  \section{Central limit theorem}
 119
 120  If \(X\) is randomly distributed with mean \(\mu\) and sd \(\sigma\), then with an adequate sample size \(n\) the distribution of the sample mean \(\overline{X}\) is approximately normal with mean \(E(\overline{X})\) and \(\operatorname{sd}(\overline{X}) = \frac{\sigma}{\sqrt{n}}\).
 121
 122\end{document}