Source code for taxbrain.report_utils

Helper Functions for creating the automated reports

import json
import pypandoc
import numpy as np
import pandas as pd
import taxcalc as tc
from jinja2 import Template
from pathlib import Path
from datetime import datetime
from tabulate import tabulate
from collections import defaultdict, deque
from .utils import is_paramtools_format
from typing import Union

CUR_PATH = Path(__file__).resolve().parent

notable_vars = {
    "c00100": "AGI",
    "count_standard": "Number of standard deduction filers",
    "standard": "Standard deduction amount",
    "count_c04470": "Number of itemizers",
    "c04470": "Total itemized deductions",
    "c04600": "Personal exemptions",
    "c04800": "Taxable income",
    "c62100": "Total AMT income",
    "count_c09600": "Number of AMT filers",
    "c09600": "Total AMT",
    "c05800": "Total tax liability before credits",
    "c07100": "Total non-refundable credits",
    "refund": "Total refundable credits",
    "ubi": "Universal Basic Income",
    "benefit_cost_total": "Spending on benefit programs",
    "benefit_value_total": "Consumption value of benefits",
    "expanded_income": "Expanded income",
    "aftertax_income": "After-tax income",


[docs] def md_to_pdf(md_text, outputfile_path): """ Convert Markdown version of report to a PDF. Returns bytes that can be saved as a PDF Parameters ---------- md_text: str report template written in markdown outputfile_path: str path to where the final file sohould be written Returns ------- None Markdown text is saved as a PDF and the HTML used to create the report """ # convert markdown text to pdf with pandoc pypandoc.convert_text( md_text, "pdf", format="md", outputfile=outputfile_path, extra_args=["-V", "geometry:margin=1.5cm", "--pdf-engine", "pdflatex"], )
[docs] def convert_table(df, tablefmt: str = "pipe") -> str: """ Convert pandas DataFrame to Markdown style table Parameters ---------- df: Pandas DataFrame DataFrame to convert Returns ------- str String that is a formatted markdown table """ if isinstance(df, pd.DataFrame): return tabulate(df, headers="keys", tablefmt=tablefmt) else: return tabulate(df, headers="firstrow", tablefmt=tablefmt)
[docs] def policy_table(params): """ Create a table showing the policy parameters in a reform and their default value Parameters ---------- params: dict policy parameters being implemented Returns ------- md_tables: str String containing a markdown style table that summarized the given parameters """ # map out additional name information for vi indexed variables vi_map = { "MARS": [ "Single", "Married Filing Jointly", "Married Filing Separately", "Head of Household", "Widow", ], "idedtype": [ "Medical", "State & Local Taxes", "Real EState Taxes", "Casualty", "Miscellaneous", "Interest Paid", "Charitable Giving", ], "EIC": ["0 Kids", "1 Kid", "2 Kids", "3+ Kids"], } reform_years = set() reform_by_year = defaultdict(lambda: deque()) if is_paramtools_format(params): params = convert_params(params) pol = tc.Policy() # policy object used for getting original value # loop through all of the policy parameters in a given reform for param, meta in params.items(): # find all the years the parameter is updated years = set(meta.keys()) reform_years = reform_years.union(years) for yr in years: # find default information pol.set_year(yr) if param.endswith("-indexed"): _param = param.split("-")[0] pol_meta = pol.metadata()[_param] default_indexed = pol_meta["indexed"] new_indexed = meta[yr] name = pol_meta["title"] reform_by_year[yr].append( [ name, f"CPI Indexed: {default_indexed}", f"CPI Indexed: {new_indexed}", ] ) continue pol_meta = pol.metadata()[param] name = pol_meta["title"] default_val = getattr(pol, param) new_val = meta[yr] # skip any duplicated policy parameters if np.all(default_val == new_val): continue # create individual lines for indexed parameters # think parameters that vary by marital status, number of kids, etc if len(default_val.shape) != 1: # first find the indexed parameter we're working with for vi in vi_map.keys(): if vi in pol_meta["value"][0].keys(): vi_name = vi break vi_list = vi_map[vi_name] for i, val in enumerate(default_val[0]): # print(name, val, param, default_val) _name = f"{name} - {vi_list[i]}" _default_val = f"{val:,}" _new_val = f"{new_val[i]:,}" if _default_val == _new_val: continue reform_by_year[yr].append([_name, _default_val, _new_val]) else: reform_by_year[yr].append( [name, f"{default_val[0]:,}", f"{new_val:,}"] ) # convert all tables from CSV format to Markdown md_tables = {} for yr in reform_years: content = reform_by_year[yr] content.appendleft(["Policy", "Original Value", "New Value"]) md_tables[yr] = convert_table(content) return md_tables
[docs] def write_text(template_path, **kwargs): """ Fill in text with specified template Parameters ---------- template_path: str path to read template from Returns ------- rendered: str rendered template """ template_str = Path(template_path).open("r").read() template = Template(template_str) rendered = template.render(**kwargs) return rendered
[docs] def date(): """ Return formatted date Parameters ---------- None Returns -------- None """ today = month = today.strftime("%B") day = year = today.year date = f"{month} {day}, {year}" return date
[docs] def form_intro(pol_areas, description=None): """ Form the introduction line Parameters ---------- pol_areas: list list of all the policy areas included in the reform used to create a description of the reform description: str user provided description of the reform Returns ------- str introduction text for report """ # these are all of the possible strings used in the introduction sentance intro_text = { 1: "modifing the {} section of the tax code", 2: "modifing the {} and {} sections of the tax code", 3: "modifing the {}, {}, and {} sections of the tax code", 4: ( "modifing a number of areas of the tax code, " "including the {}, {}, and {} sections" ), } if not description: num_areas = min(len(pol_areas), 4) intro_line = intro_text[num_areas] if num_areas == 1: return intro_line.format(pol_areas[0]) elif num_areas == 2: return intro_line.format(pol_areas[0], pol_areas[1]) else: return intro_line.format(pol_areas[0], pol_areas[1], pol_areas[2]) else: return description
[docs] def form_baseline_intro(current_law): """ Form final sentence of introduction paragraph Parameters ----------- current_law: bool whether report is for the current law baseline Returns ------- str text or intro of baseline summary """ if not current_law: return f"{date()}" else: return ( f"{date()}, along with some modifications. A summary of these " 'modifications can be found in the "Summary of Baseline Policy" ' "section" )
[docs] def largest_tax_change(diff): """ Function to find the largest change in tax liability Parameters ---------- diff: Pandas DataFrame Differences table created by taxbrain Returns ------- largest_change_group: str string relating group with the largest change largest_change_str: str string relating size of largest change """ sub_diff = diff.drop(index="ALL") # remove total row # find the absolute largest change in total liability absolute_change = abs(sub_diff["tot_change"]) largest = sub_diff[max(absolute_change) == absolute_change] index_largest = largest.index.values[0] largest_change = largest["mean"].values[0] # index in case there"s a tie # split index to form sentance split_index = index_largest.split("-") if len(split_index) == 1: index_name = split_index[0][1:] direction = split_index[0][0] if direction == ">": direction_str = "greater than" elif direction == "<": direction_str = "less than" else: direction_str = "equal to" largest_change_group = f"{direction_str} {index_name}" else: largest_change_group = f"between {split_index[0]} and {split_index[1]}" formatted_change = dollar_str_formatting(largest_change) if largest_change < 0: largest_change_str = f"decrease by ${formatted_change}" elif largest_change > 0: largest_change_str = f"increase by ${formatted_change}" else: largest_change_str = "remain the same" return largest_change_group, largest_change_str
[docs] def notable_changes(tb, threshold: float) -> str: """ Find any notable changes in certain variables. "Notable" is defined as a percentage change above the given threshold. Parameters ---------- tb: TaxBrain object instance of TaxBrain object for analysis threshold: float Percentage change in an aggregate variable for it to be considered notable Returns ------- section: str text for section on notable changes """ notable_dict = defaultdict(list) # loop through all of the notable variables and see if there is a year # where they change more than the given threshold for var, desc in notable_vars.items(): if var.startswith("count_"): # count number of filers with a non-zero value for a given variable totals = [] years = [] for year in range(tb.start_year, tb.end_year + 1): _var = var.split("_")[1] base = tb.base_data[year] reform = tb.reform_data[year] base_total = np.where(base[_var] != 0, base["s006"], 0).sum() reform_total = np.where( reform[_var] != 0, reform["s006"], 0 ).sum() diff = reform_total - base_total totals.append( { "Base": base_total, "Reform": reform_total, "Difference": diff, } ) years.append(year) totals = pd.DataFrame(totals) totals.index = years else: totals = tb.weighted_totals(var).transpose() totals["pct_change"] = totals["Difference"] / totals["Base"] totals = totals.fillna(0.0) max_pct_change = max(totals["pct_change"]) max_yr = totals[totals["pct_change"] == max_pct_change].index.values[0] if abs(max_pct_change) >= threshold: if max_pct_change < 0: direction = "decreases" else: direction = "increases" pct_chng = max_pct_change * 100 notable_str = f"{desc} {direction} by {pct_chng:.2f}%" notable_dict[max_yr].append(notable_str) # add default message if no notable changes min_chng = threshold * 100 section = [f"No notable variables changed by more than {min_chng:.2f}%"] if notable_dict: section = [] for year, item in notable_dict.items(): section.append(f"_{year}_") for change in item: section.append(f"* {change}") return section
[docs] def behavioral_assumptions(tb): """ Return list of behavioral assumptions used Parameters ---------- tb: TaxBrain object instance of TaxBrain object for analysis Returns ------- assumptions: list list of behavioral assumptions used """ behavior_map = { "sub": "Substitution elasticity of taxable income: {}", "inc": "Income elasticity of taxable income: {}", "cg": "Semi-elasticity of long-term capital gains: {}", } assumptions = [] # if there are some behavioral assumptions, loop through them if tb.params["behavior"]: for param, val in tb.params["behavior"].items(): assumptions.append(behavior_map[param].format(val)) else: assumptions.append("No behavioral assumptions") return assumptions
[docs] def consumption_assumptions(tb): """ Create table of consumption assumptions used in analysis Parameters ---------- tb: TaxBrain object instance of TaxBrain object for analysis Returns ------- md_tables: str table of consumption assumptions for report """ if tb.params["consumption"]: params = tb.params["consumption"] # create table of consumption assumptions consump_years = set() consump_by_year = defaultdict(lambda: deque()) # read consumption.json from taxcalc package consump_json = ( Path(Path(tc.__file__).resolve().parent, "consumption.json") .open("r") .read() ) consump_meta = json.loads(consump_json) for param, meta in params.items(): # find all the years the parameter is updated years = set(meta.keys()) consump_years = consump_years.union(years) name = consump_meta[param]["title"] default_val = consump_meta[param]["value"][0] for yr in years: # find default information new_val = meta[yr] consump_by_year[yr].append([name, default_val, new_val]) md_tables = {} # hold markdown version of the tables for yr in consump_years: content = consump_by_year[yr] content.appendleft(["", "Default Value", "User Value"]) md_tables[yr] = convert_table(content) return md_tables else: msg = "No new consumption assumptions specified." return {"": msg}
[docs] def growth_assumptions(tb): """ Create a table with all of the growth assumptions used in the analysis Parameters ---------- tb: TaxBrain object instance of TaxBrain object for analysis Returns ------- md_tables: str table of growth assumptions for report """ growth_vars_map = { "ABOOK": "General Business and Foreign Tax Credit Growth Rate", "ACGNS": "Capital Gains Growth Rate", "ACPIM": "CPI - Medical", "ACPIU": "CPI - Urban Consumer", "ADIV": "Dividend Income Growth Rate", "AINTS": "Interest Income Growth Rate", "AIPD": "Interest Paid Deduction Growth Rate", "ASCHCI": "Schedule C Income Growth Rate", "ASCHCL": "Schedule C Losses Growth Rate", "ASCHEI": "Schedule E Income Growth Rate", "ASCHEL": "Schedule E Losses Growth Rate", "ASCHFI": "Schedule F Income Growth Rate", "ASCHFL": "Schedule F Losses Growth Rate", "ASOCSEC": "Social Security Benefit Growth Rate", "ATXPY": "Personal Income Growth Rate", "AUCOMP": "Unemployment Compensation Growth Rate", "AWAGE": "Wage Income Growth Rate", "ABENOTHER": "Other Benefits Growth Rate", "ABENMCARE": "Medicare Benefits Growth Rate", "ABENMCAID": "Medicaid Benfits Growth Rate", "ABENSSI": "SSI Benefits Growth Rate", "ABENSNAP": "SNAP Benefits Growth Rate", "ABENWIC": "WIC Benfits Growth Rate", "ABENHOUSING": "Housing Benefits Growth Rate", "ABENTANF": "TANF Benfits Growth Rates", "ABENVET": "Veteran's Benfits Growth Rates", } if tb.params["growdiff_response"]: params = tb.params["growdiff_response"] growdiff_years = set() growdiff_by_year = defaultdict(lambda: deque()) # base GrowFactor object to pull default values base_gf = tc.GrowFactors() # create GrowDiff and GrowFactors for the new assumptions reform_gd = tc.GrowDiff() reform_gd.update_growdiff(params) reform_gf = tc.GrowFactors() reform_gd.apply_to(reform_gf) # loop through all of the reforms for param, meta in params.items(): # find all years a new value is specified years = set(meta.keys()) growdiff_years = growdiff_years.union(years) name = growth_vars_map[param] for yr in years: # find default and new values default_val = base_gf.factor_value(param, yr) new_val = reform_gf.factor_value(param, yr) growdiff_by_year.append([name, default_val, new_val]) # create tables md_tables = {} for yr in growdiff_years: content = growdiff_by_year[yr] content.appendleft(["", "Default Value", "New Value"]) md_tables[yr] = convert_table(content) return md_tables else: return {"": "No new growth assumptions specified."}
[docs] def convert_params(params): """ Convert ParamTools style parameter inputs to traditional taxcalc style parameters for report policy table creation Parameters ---------- params: dict ParamTools style reform dictionary Returns ------- reform: dict a dictionary in traditional taxcalc style """ pol = tc.Policy() pol.adjust(params) indexed_params = [] reform = defaultdict(dict) first_yr = pol.LAST_BUDGET_YEAR for param in params.keys(): yrs = [] if param.endswith("-indexed"): indexed_params.append(param) continue for adj in params[param]: yrs.append(adj["year"]) yrs = set(yrs) values = getattr(pol, f"_{param}") for yr in yrs: idx = yr - pol.JSON_START_YEAR vals = values[idx] if isinstance(vals, np.ndarray): vals = list(vals) first_yr = min(yr, first_yr) reform[param][yr] = vals # add indexed parameters as being implemented in first year of reform for param in indexed_params: val = params[param][0]["value"] reform[param][first_yr] = val return reform
[docs] def dollar_str_formatting(val: Union[int, float]) -> str: """ Format dollar figures into $val trillion, $val billion, etc. Parameters ---------- val: float revenue amount Returns ------- str formatted dollar figure """ val = abs(val) if val >= 1e12: return f"{val * 1e-12:,.1f} trillion" elif 1e9 <= val < 1e12: return f"{val * 1e-9:,.1f} billion" elif 1e6 <= val < 1e9: return f"{val * 1e-6:,.1f} million" else: return f"{int(round(val, 0)):,}"