#! /usr/bin/python

import argparse
from bs4 import BeautifulSoup
import requests
from urllib.parse import urljoin

ROOT_URL = "https://vcaa.vic.edu.au"
INDEX_URL = "https://vcaa.vic.edu.au/assessment/vce-assessment/" \
        "past-examinations/Pages/Index.aspx"

def print_output(items, level):
    for i, item in enumerate(filter(None, items)):
        if type(item) is list:
            print_output(item, level+1)
        else:
            if i == len(items) - 1:
                print("│ "*level + "└─" + " ".join(item.split()))
            else:
                print("│ "*level + "├─" + " ".join(item.split()))


def main():

    # Get arguments
    parser = argparse.ArgumentParser(description="Download VCAA exams and "
            "reports from their website")
    parser.add_argument("action", nargs="?", type=str,
            choices=["list", "details", "download"])
    parser.add_argument("subjects", nargs=argparse.REMAINDER, 
            type=str)
    args = parser.parse_args()

    # Parse index page

    index_page = requests.get(INDEX_URL)
    index_tree = BeautifulSoup(index_page.text, "html.parser")
    faculties = index_tree.find_all(class_="card")

    # Generate dictionary (some <a> elements contain a <span> for some reason)
    faculties_dict = {}
    for f in faculties:
        faculties_dict[f.div.strong.contents[0]] = \
                {s.contents[0] if len(s.contents[0]) > 2 
                        else s.span.contents[0]: urljoin(INDEX_URL, s.get("href") )
                        for s in f.find(class_="links list-unstyled").find_all("a")}

    # Remove NHT link
    faculties_dict.pop("Northern Hemisphere Timetable", None)

    output = []
    # Iterate through all faculties
    for faculty, subjects in faculties_dict.items():
        # Check for subject match
        matching_subjects = set(args.subjects).intersection(subjects.keys())
        if args.action == "list":
            if (matching_subjects) and not (faculty in args.subjects):
                output.append[faculty]
                output.append([s for s in list(matching_subjects)])

            elif (not args.subjects) or (faculty in args.subjects):
                output.append(faculty)
                output.append([s for s in list(subjects.keys())])

        elif args.action == "details":
            # Print the available documents for requested subjects
            if (matching_subjects) and not (faculty in args.subjects):
                for subject in list(matching_subjects):
                    output.append(subject)
                    output.append([])
                    subject_page = requests.get(subjects[subject])
                    subject_tree = BeautifulSoup(subject_page.text, "html.parser")
                    for element in subject_tree.find(class_="main-section").find(class_="container-fluid").find_all():
                        if element.name == "h2":
                            output[-1].append(element.get_text())
                            output[-1].append([])
                        elif element.name == "h3":
                            output[-1][-1].append(element.get_text())
                            output[-1][-1].append([])
                        elif element.name == "p" and element.find("a", recursive=False):
                            if not(type(output[-1][-1]) is list and len(output[-1][-1]) > 0):
                                output[-1][-1].append([])
                            output[-1][-1][-1].append("".join(element.get_text().split("\n")))
                        elif element.name == "table" and element.get("class") == ["past-examtable"]:
                            for row in element.find_all("tr"):

                                if row.find_next().name == "th":
                                    continue    # Skip header row

                                columns = list(row.children)
                                year = columns[0].get_text()
                                for exam in columns[1].find_all("a"):
                                    output[-1][-1][-1].append(year + " " + "".join(exam.get_text().split("\n")))
                                for report in columns[2].find_all("a"):
                                    output[-1][-1][-1].append(year + " " + " Report (".join(report.get_text().split("(")))

    print_output(output, 0)

if __name__ == "__main__":
    main()