{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Columns\n",
    "\n",
    "[[Polars Documentation](https://docs.pola.rs/api/python/stable/reference/expressions/columns.html)]\n",
    "\n",
    "`pl.col(\"A\")` or `pl.col.A` starts an expression by selecting a column named \"A\".\n",
    "While the Polars Library allows for multiple columns to be selected simultaneously\n",
    "(via `pl.col(\"*\")`, `pl.col(\"A\", \"B\")`, `pl.col(pl.String)`, `pl.exclude`, and so on),\n",
    "the OpenDP Library currently only supports selection of one column at a time.\n",
    "The column name may be changed via `.alias`.\n",
    "\n",
    "Take for example the work hours dataset, where there are a collection of columns labeled `METHODX`, \n",
    "where `X` is an increasing alphabetic sequence."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "import polars as pl\n",
    "import opendp.prelude as dp\n",
    "\n",
    "dp.enable_features(\"contrib\")\n",
    "\n",
    "# not recommended, OpenDP will reject this joint expression over multiple columns\n",
    "single_expr = pl.col([f\"METHOD{l}\" for l in \"ABCDE\"]).fill_null(0).dp.sum((0, 9))\n",
    "\n",
    "# build individual expressions for each query\n",
    "split_exprs = [pl.col(f\"METHOD{l}\").fill_null(0).dp.sum((0, 9)) for l in \"ABCDE\"]"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Demonstration of use:"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div><style>\n",
       ".dataframe > thead > tr,\n",
       ".dataframe > tbody > tr {\n",
       "  text-align: right;\n",
       "  white-space: pre-wrap;\n",
       "}\n",
       "</style>\n",
       "<small>shape: (1, 5)</small><table border=\"1\" class=\"dataframe\"><thead><tr><th>METHODA</th><th>METHODB</th><th>METHODC</th><th>METHODD</th><th>METHODE</th></tr><tr><td>i64</td><td>i64</td><td>i64</td><td>i64</td><td>i64</td></tr></thead><tbody><tr><td>1704484</td><td>1699390</td><td>1702886</td><td>1703232</td><td>1705356</td></tr></tbody></table></div>"
      ],
      "text/plain": [
       "shape: (1, 5)\n",
       "┌─────────┬─────────┬─────────┬─────────┬─────────┐\n",
       "│ METHODA ┆ METHODB ┆ METHODC ┆ METHODD ┆ METHODE │\n",
       "│ ---     ┆ ---     ┆ ---     ┆ ---     ┆ ---     │\n",
       "│ i64     ┆ i64     ┆ i64     ┆ i64     ┆ i64     │\n",
       "╞═════════╪═════════╪═════════╪═════════╪═════════╡\n",
       "│ 1704484 ┆ 1699390 ┆ 1702886 ┆ 1703232 ┆ 1705356 │\n",
       "└─────────┴─────────┴─────────┴─────────┴─────────┘"
      ]
     },
     "execution_count": 2,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "context = dp.Context.compositor(\n",
    "    data=pl.scan_csv(dp.examples.get_france_lfs_path(), ignore_errors=True),\n",
    "    privacy_unit=dp.unit_of(contributions=36),\n",
    "    privacy_loss=dp.loss_of(epsilon=1.0),\n",
    "    split_evenly_over=1,\n",
    "    margins=[\n",
    "        dp.polars.Margin(max_partition_length=60_000_000 * 36)\n",
    "    ],\n",
    ")\n",
    "\n",
    "context.query().select(split_exprs).release().collect()"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": ".venv",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.13.0"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}