Source code for taxbrain.report

import shutil
import pandas as pd
import behresp
import taxbrain
import taxcalc as tc
from pathlib import Path
from .report_utils import (
    form_intro,
    form_baseline_intro,
    write_text,
    date,
    largest_tax_change,
    notable_changes,
    behavioral_assumptions,
    consumption_assumptions,
    policy_table,
    convert_table,
    growth_assumptions,
    md_to_pdf,
    DIFF_TABLE_ROW_NAMES,
    dollar_str_formatting,
)

CUR_PATH = Path(__file__).resolve().parent



[docs]
def report(
    tb,
    name=None,
    change_threshold=0.05,
    description=None,
    outdir=None,
    author="",
    css=None,
    verbose=False,
    clean=False,
):
    """
    Create a PDF report based on TaxBrain results

    Parameters
    ----------
    tb: TaxBrain object
        instance of a TaxBrain object
    name: str
        Name you want used for the title of the report
    change_threshold: float
        Percentage change (expressed as a decimal fraction) in
        an aggregate variable for it to be considered notable
    description: str
        A description of the reform being run
    outdir: str
        Output directory
    author: str
        Person or persons to be listed as the author of the report
    css: str
        Path to a CSS file used to format the final report
    verbose: bool
        boolean indicating whether or not to write progress as report is
        created
    clean: bool
        boolean indicating whether all of the files written to create the
        report should be deleated and a byte representation of the PDF returned

    Returns
    --------
    files or None: dict or None
        returns either None (reports saved to disk) or dictionary with
        string of bytes for markdown and pdf versions of the report

    """

    def format_table(df, int_cols, float_cols, float_perc=2):
        """
        Apply formatting to a given table

        Parameters
        ----------
        df: Pandas DataFrame
            DataFrame being formatted
        int_cols: list
            columns that need to be converted to integers
        float_cols: list
            floatcolumns that need to be converted to floats
        float_perc: int
            Decimal percision for float columns the table. Default is 2

        Returns
        --------
        df: Pandas DataFrame
            table of output
        """
        for col in int_cols:
            df[col] = df[col].astype(int).apply("{:,}".format)
        for col in float_cols:
            df[col] = (
                df[col]
                .astype(float)
                .apply("{:,.{}}".format, args=(float_perc,))
            )
        return df

    def export_plot(plot, graph):
        """
        Export plot as a PNG

        Parameters
        -----------
        plot: Matplolib.pyplot plot object
            plot to export
        graph: str
            str to use in file name of plot to save

        Returns
        -------
        str
            full filename indicating where plot is saved
        """
        # export graph as a PNG
        # we could get a higher quality image with an SVG, but the SVG plots
        # do not render correctly in the PDF document
        filename = f"{graph}_graph.png"
        full_filename = Path(output_path, filename)
        plot.savefig(full_filename, dpi=1200, bbox_inches="tight")

        return filename

    if not tb.has_run:
        tb.run()
    if not name:
        name = f"Policy Report-{date()}"
    if not outdir:
        outdir = name.replace(" ", "-").replace(",", "")
    if author:
        author = f"Report Prepared by {author.title()}"
    # create directory to hold report contents
    output_path = Path(outdir)
    if not output_path.exists():
        output_path.mkdir()
    # dictionary to hold pieces of the final text
    text_args = {
        "start_year": tb.start_year,
        "end_year": tb.end_year,
        "title": name,
        "date": date(),
        "author": author,
        "taxbrain": str(Path(CUR_PATH, "report_files", "taxbrain.png")),
    }
    if tb.stacked:
        stacked_table = tb.stacked_table * 1e-9
        stacked_table = format_table(
            stacked_table, [], list(stacked_table.columns), float_perc=1
        )
        stacked_table = convert_table(stacked_table)
        text_args["stacked_table"] = stacked_table
    if verbose:
        print("Writing Introduction")
    # find policy areas used in the reform
    pol = tc.Policy()
    pol_meta = pol.metadata()
    pol_areas = set()
    for var in tb.params["policy"].keys():
        # catch "{}-indexed" parameter changes
        if "-" in var:
            var = var.split("-")[0]
        area = pol_meta[var]["section_1"].lower()
        if area == "social security taxability":
            area = "Social Security taxability"
        if area != "":
            pol_areas.add(area)
    pol_areas = list(pol_areas)
    # add policy areas to the intro text
    text_args["introduction"] = form_intro(pol_areas, description)
    # write final sentance of introduction
    current_law = tb.params["base_policy"]
    text_args["baseline_intro"] = form_baseline_intro(current_law)

    if verbose:
        print("Writing Summary")
    agg_table = tb.weighted_totals("combined", include_total=True).fillna(0)
    rev_change = agg_table.loc["Difference"].sum()
    rev_direction = "increase"
    if rev_change < 0:
        rev_direction = "decrease"
    text_args["rev_direction"] = rev_direction
    text_args["rev_change"] = dollar_str_formatting(rev_change)

    # create differences table
    if verbose:
        print("Creating differences table")
    with pd.option_context("future.no_silent_downcasting", True):
        diff_table = tb.differences_table(
            tb.start_year, "standard_income_bins", "combined"
        ).fillna(0)
    diff_table.index = DIFF_TABLE_ROW_NAMES

    decile_diff_table = tb.differences_table(
        tb.start_year, "weighted_deciles", "combined"
    ).fillna(0)
    # move the "ALL" row to the bottom of the DataFrame
    target_row = decile_diff_table.loc["ALL", :]
    decile_diff_table = decile_diff_table.shift(-1)
    decile_diff_table.iloc[-1] = target_row.squeeze()

    # find which income bin sees the largest change in tax liability
    largest_change = largest_tax_change(diff_table)
    text_args["largest_change_group"] = largest_change[0]
    text_args["largest_change_str"] = largest_change[1]
    decile_diff_table.columns = tc.DIFF_TABLE_LABELS
    # drop certain columns to save space
    if tc.__version__ >= "3.2.1":
        drop_cols = [
            "Share of Overall Change",
            "Number of Returns with Tax Cut",
            "Number of Returns with Tax Increase",
        ]
    else:
        drop_cols = [
            "Share of Overall Change",
            "Count with Tax Cut",
            "Count with Tax Increase",
        ]
    sub_diff_table = decile_diff_table.drop(columns=drop_cols)

    # convert DataFrame to Markdown table
    sub_diff_table.index.name = "_Income &nbsp; Decile_"
    diff_table = format_table(sub_diff_table, [], list(sub_diff_table.columns))
    diff_md = convert_table(diff_table)
    text_args["differences_table"] = diff_md

    # aggregate results
    if verbose:
        print("Compiling aggregate results")
    # format aggregate table
    agg_table *= 1e-9
    agg_table = format_table(agg_table, list(agg_table.columns), [])
    agg_md = convert_table(agg_table)
    text_args["agg_table"] = agg_md

    # aggregate table by tax type
    tax_vars = ["iitax", "payrolltax", "combined"]
    agg_base = tb.multi_var_table(tax_vars, "base", include_total=True)
    agg_reform = tb.multi_var_table(tax_vars, "reform", include_total=True)
    agg_diff = agg_reform - agg_base
    agg_diff.index = ["Income Tax", "Payroll Tax", "Combined"]
    agg_diff *= 1e-9
    agg_diff = format_table(agg_diff, list(agg_diff.columns), [])
    text_args["agg_tax_type"] = convert_table(agg_diff)

    # summary of policy changes
    text_args["reform_summary"] = policy_table(tb.params["policy"])

    # policy baseline
    if tb.params["base_policy"]:
        text_args["policy_baseline"] = policy_table(tb.params["base_policy"])
    else:
        text_args["policy_baseline"] = (
            f"This report is based on current law as of {date()}."
        )

    # notable changes
    if verbose:
        print("Finding notable changes")
    text_args["notable_changes"] = notable_changes(tb, change_threshold)

    # behavioral assumptions
    if verbose:
        print("Compiling assumptions")
    text_args["behavior_assumps"] = behavioral_assumptions(tb)
    # consumption asssumptions
    text_args["consump_assumps"] = consumption_assumptions(tb)
    # growth assumptions
    text_args["growth_assumps"] = growth_assumptions(tb)

    # determine model versions
    text_args["model_versions"] = [
        {"name": "Tax-Brain", "release": taxbrain.__version__},
        {"name": "Tax-Calculator", "release": tc.__version__},
        {"name": "Behavioral-Responses", "release": behresp.__version__},
    ]

    # create graphs
    if verbose:
        print("Creating graphs")
    dist_graph = taxbrain.distribution_plot(
        tb,
        tb.start_year,
        (5, 4),
        f"Fig. 2: Percentage Change in After-Tax Income - {tb.start_year}",
    )
    text_args["distribution_graph"] = export_plot(dist_graph, "dist")

    # differences graph
    diff_graph = taxbrain.differences_plot(
        tb,
        "combined",
        (6, 3),
        title="Fig. 1: Change in Aggregate Combined Tax Liability",
    )
    text_args["agg_graph"] = export_plot(diff_graph, "difference")

    # fill in the report template
    if verbose:
        print("Compiling report")
    template_path = Path(CUR_PATH, "report_files", "report_template.md")
    report_md = write_text(template_path, **text_args)

    # write PDF, markdown files
    filename = name.replace(" ", "-").replace(",", "")
    pdf_path = Path(output_path, f"{filename}.pdf")
    md_path = Path(output_path, f"{filename}.md")
    md_path.write_text(report_md)
    md_to_pdf(report_md, str(pdf_path))

    if clean:
        # return PDF as bytes and the markdown text
        byte_pdf = pdf_path.read_bytes()
        files = {f"{filename}.md": report_md, f"{filename}.pdf": byte_pdf}
        # remove directory where everything was saved
        shutil.rmtree(output_path)
        assert not output_path.exists()
        return files