#!/usr/bin/env python3

""" 

Task: 
Make a comparison matrix with CI jobs and components what are tested
https://issues.redhat.com/browse/OSPRH-17693

1. for each yaml file in rdo-jobs/zuul.d
2. Look at every job and get the required information
3. We want to capture the following information:
    - job name
    - job parent (if available)
    - which component is being tested (Function)
    - job type
        - We will do this in two ways
            - pull expected type from the name (Function)
            - pull the expected type from the parent value (Function)
    - required projects (if available)
    - nodeset (If available)
    - scenario being tested (filled with job name for now)
        - idea for this is to split based on what's defined in the name
    - playbooks in the pre run (if available)
    - playbook run
    - what vars are included in the playbook run

output the values to a row on a csv file

------------
Quick guide: 
------------

parse_yaml_jobs() helper functions
    - component_in_job()
    - classify_by_name()
    - classify_by_parent()
    - classify_if_periodic()
    - get_scenario_from_name(name):

main() helper function
    - parse_yaml_jobs()

"""

import csv
import os
import yaml
import glob
import sys
import json

# function for getting the component from the job name
# Not entirely happy with the functionality of this right now
def component_in_job(job_name, job_vars):
    """

    Detect which OpenStack component is involved in a job.
    Priority:
    1. job vars
    2. By scanning job name for known services
    3. Fallback: unknown

    """

    if not job_vars:
        job_vars = {}

    # Explicitly from job vars
    if "component" in job_vars:
        return job_vars["component"]
    if "openstack_component" in job_vars:
        return job_vars["openstack_component"]

    # Match known OpenStack services in job name
    known_services = [
        "adoption", "integration", "neutron", "cinder", "glance", "keystone",
        "swift", "heat", "ironic", "barbican", "designate",
        "manila", "octavia", "magnum", "sahara", "trove",
        "senlin", "zun", "nova", "master", "ceph", "cloudops", "compute",
        "container"]

    for service in known_services:
        if service in job_name.lower():
            return service

    # Multi-service/adoption scenarios
    if "architecture" in job_name.lower():
        return "multi (core services)"
    if "integration" in job_name or "scenario" in job_name:
        return "integration"
    if "edpm" in job_name.lower():
        return "multi (adoption scenario)"

    # Fallback
    return "unknown"


# We want to classify the job based on it's name
# e.g. unit/lint, kubernetes operator test, Component specific etc. 
def classify_by_name(name):
    if name.startswith('validat'):
        return 'Validation'
    elif 'lint' in name or 'pep8' in name or 'flake8' in name:
        return 'Lint'
    elif 'unit' in name or 'tox-py' in name:
        return 'Unit Test'
    elif 'tcib' in name or 'build' in name:
        return 'Build'
    elif 'adoption' in name or 'edpm' in name:
        return 'EDPM / Data Plane Adoption Deploy'
    elif 'tempest' in name or 'integration' in name or "scenario" in name:
        return 'Integration'
    elif 'promote' in name or 'promotion' in name:
        return 'Promotion'
    else:
        return 'component'


# We want to get an alternative classifcation for the job based on it's parent
# e.g. unit/lint, kubernetes operator test, Component specific etc.
# Same as classify_by_name() but offers an alternative to the name
def classify_by_parent(parent):
    if "tox" in parent: return "Lint/Unit"
    if "adoption" in parent: return "EDPM/Adoption"
    if "tripleo-ci-base" in parent: return "Architecture"
    return "Unknown"


# scenario being tested (filled with job name for now)
# Idea for this is to split based on what's defined in the name
def get_scenario_from_name(name):
    return name


def classify_if_periodic(name):
    if "periodic" in name:
        return "yes"
    else:
        return "no"


# Function will be used to look at each file in the directory and append the found values to a created CSV
# This will get used in main()
def parse_yaml_jobs(filename):
    jobs_data = []
    with open(filename) as f:
        docs = yaml.safe_load(f)

    for entry in docs:
        if "job" not in entry:
            continue
        job = entry["job"]

        name = job["name"]
        description = job.get('description', '')
        parent = job.get("parent", "")
        required_projects = job.get("required-projects", [])
        job_vars = job.get("vars", {})

        # Bunch of placeholders
        # Will be edited with the functions above later
        component = component_in_job(name, job_vars)
        dependencies = job.get("dependencies", [])
        periodic = classify_if_periodic(name)
        job_type_1 = classify_by_name(name)
        # job_type_2 = classify_by_parent(parent)
        scenario = get_scenario_from_name(name)
        pre_run = job.get("pre-run", [])
        run_playbook = job.get("run", "")
        nodeset = job.get("nodeset", "")
        logs_location = "PLACEHOLDER"

        jobs_data.append({
            "File": os.path.basename(filename),
            "Job_name": name,
            "Description": description,
            "Parent": parent,
            "Component": component,
            "Periodic": periodic,
            "Job_type": job_type_1,
            # "job_type_2": job_type_2,
            "Dependencies": dependencies,
            "Scenario": scenario,
            "Pre_run": pre_run,
            "Run_playbook": run_playbook,
            "Required_projects": required_projects,
            "Nodeset": nodeset,
            "Vars": job_vars,
            "Logs_location": logs_location
        })

    return jobs_data


# main method which 
def main():
 
    zuul_dir = os.path.join(os.getcwd(), "zuul.d")

    if not os.path.isdir(zuul_dir):
        print(f"Couldn't find a directory in {zuul_dir}")
        sys.exit(1)

    print(f"Looking for yaml files in {zuul_dir}")

    all_jobs = []

    yaml_files = glob.glob(os.path.join(zuul_dir, "*.yaml"))

    if not yaml_files:
        print("No Yaml files found in zuul directory")
        sys.exit(1)

    for file_path in yaml_files:
        jobs = parse_yaml_jobs(file_path)
        all_jobs.extend(jobs)

    output_file = "parsed_jobs_output.json"
    with open(output_file, "w") as f:
        json.dump(all_jobs, f, indent=2)

    csv_file = "parsed_jobs_output.csv"
    if all_jobs:
        # Take the keys from the first job dict as column headers
        fieldnames = list(all_jobs[0].keys())

        with open(csv_file, "w", newline="") as f:
            writer = csv.DictWriter(f, fieldnames=fieldnames)
            writer.writeheader()
            writer.writerows(all_jobs)

main()