{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Operators\n",
    "[[Polars Documentation](https://docs.pola.rs/api/python/stable/reference/expressions/operators.html)]\n",
    "\n",
    "All Polars [conjunction](https://docs.pola.rs/api/python/stable/reference/expressions/operators.html#conjunction), \n",
    "[comparison](https://docs.pola.rs/api/python/stable/reference/expressions/operators.html#comparison), \n",
    "and [binary](https://docs.pola.rs/api/python/stable/reference/expressions/operators.html#binary) \n",
    "operators in the linked documentation are supported and are considered row-by-row.\n",
    "\n",
    "Even if you are in an aggregation context like `.select` or `.agg`,\n",
    "OpenDP enforces that inputs to binary operators are row-by-row.\n",
    "This is to ensure that the left and right arguments of binary operators have meaningful row alignment.\n",
    "\n",
    "These operators are particularly useful for building filtering predicates and grouping columns."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div><style>\n",
       ".dataframe > thead > tr,\n",
       ".dataframe > tbody > tr {\n",
       "  text-align: right;\n",
       "  white-space: pre-wrap;\n",
       "}\n",
       "</style>\n",
       "<small>shape: (4, 3)</small><table border=\"1\" class=\"dataframe\"><thead><tr><th>SEX</th><th>OVER_40</th><th>len</th></tr><tr><td>i64</td><td>bool</td><td>u32</td></tr></thead><tbody><tr><td>1</td><td>false</td><td>18045</td></tr><tr><td>1</td><td>true</td><td>22883</td></tr><tr><td>2</td><td>false</td><td>15838</td></tr><tr><td>2</td><td>true</td><td>21500</td></tr></tbody></table></div>"
      ],
      "text/plain": [
       "shape: (4, 3)\n",
       "┌─────┬─────────┬───────┐\n",
       "│ SEX ┆ OVER_40 ┆ len   │\n",
       "│ --- ┆ ---     ┆ ---   │\n",
       "│ i64 ┆ bool    ┆ u32   │\n",
       "╞═════╪═════════╪═══════╡\n",
       "│ 1   ┆ false   ┆ 18045 │\n",
       "│ 1   ┆ true    ┆ 22883 │\n",
       "│ 2   ┆ false   ┆ 15838 │\n",
       "│ 2   ┆ true    ┆ 21500 │\n",
       "└─────┴─────────┴───────┘"
      ]
     },
     "execution_count": 1,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "import polars as pl\n",
    "import opendp.prelude as dp\n",
    "dp.enable_features(\"contrib\")\n",
    "# Fetch and unpack the data. \n",
    "![ -e ../sample_FR_LFS.csv ] || ( curl 'https://github.com/opendp/dp-test-datasets/blob/main/data/sample_FR_LFS.csv.zip?raw=true' --location --output sample_FR_LFS.csv.zip; unzip sample_FR_LFS.csv.zip -d ../ )\n",
    "\n",
    "context = dp.Context.compositor(\n",
    "    # Many columns contain mixtures of strings and numbers and cannot be parsed as floats,\n",
    "    # so we'll set `ignore_errors` to true to avoid conversion errors.\n",
    "    data=pl.scan_csv(\"../sample_FR_LFS.csv\", ignore_errors=True),\n",
    "    privacy_unit=dp.unit_of(contributions=36),\n",
    "    privacy_loss=dp.loss_of(epsilon=1.0, delta=1e-7),\n",
    "    split_evenly_over=1,\n",
    "    margins={(): dp.polars.Margin(max_partition_length=60_000_000 * 36)}\n",
    ")\n",
    "\n",
    "query = (\n",
    "    context.query()\n",
    "    .filter((pl.col.HWUSUAL > 0) & (pl.col.HWUSUAL != 99))  # using the .gt, .and_ and .ne operators\n",
    "    .with_columns(OVER_40=pl.col.AGE > 40)\n",
    "    .group_by(\"SEX\", \"OVER_40\")\n",
    "    .agg(dp.len())\n",
    ")\n",
    "query.release().collect().sort(\"SEX\", \"OVER_40\")"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": ".venv",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.13.0"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}