{ "cells": [ { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "/home/matt/envs/menv/lib/python3.8/site-packages/IPython/core/interactiveshell.py:3418: DtypeWarning: Columns (7) have mixed types.Specify dtype option on import or set low_memory=False.\n", " exec(code_obj, self.user_global_ns, self.user_ns)\n" ] } ], "source": [ "import pandas as pd\n", "url = 'https://github.com/mattharrison/datasets/raw/master'\\\n", " '/data/dirtydevil.txt'\n", "df = pd.read_csv(url, skiprows=lambda num: num <34 or num == 35,\n", " sep='\\t')\n", "def to_denver_time(df_, time_col, tz_col):\n", " return (df_\n", " .assign(**{tz_col: df_[tz_col].replace('MDT', 'MST7MDT')})\n", " .groupby(tz_col)\n", " [time_col]\n", " .transform(lambda s: pd.to_datetime(s)\n", " .dt.tz_localize(s.name, ambiguous=True)\n", " .dt.tz_convert('America/Denver'))\n", " )" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [], "source": [ "def tweak_river(df_):\n", " return (df_\n", " .assign(datetime=to_denver_time(df_, 'datetime', 'tz_cd'))\n", " .rename(columns={'144166_00060': 'cfs',\n", " '144167_00065': 'gage_height'})\n", " .set_index('datetime')\n", " )" ] }, { "cell_type": "code", "execution_count": 3, "metadata": { "lines_to_next_cell": 2 }, "outputs": [ { "data": { "text/html": [ "<div>\n", "<style scoped>\n", " .dataframe tbody tr th:only-of-type {\n", " vertical-align: middle;\n", " }\n", "\n", " .dataframe tbody tr th {\n", " vertical-align: top;\n", " }\n", "\n", " .dataframe thead th {\n", " text-align: right;\n", " }\n", "</style>\n", "<table border=\"1\" class=\"dataframe\">\n", " <thead>\n", " <tr style=\"text-align: right;\">\n", " <th></th>\n", " <th>agency_cd</th>\n", " <th>site_no</th>\n", " <th>tz_cd</th>\n", " <th>cfs</th>\n", " <th>144166_00060_cd</th>\n", " <th>gage_height</th>\n", " <th>144167_00065_cd</th>\n", " </tr>\n", " <tr>\n", " <th>datetime</th>\n", " <th></th>\n", " <th></th>\n", " <th></th>\n", " <th></th>\n", " <th></th>\n", " <th></th>\n", " <th></th>\n", " </tr>\n", " </thead>\n", " <tbody>\n", " <tr>\n", " <th>2001-05-07 01:00:00-06:00</th>\n", " <td>USGS</td>\n", " <td>9333500</td>\n", " <td>MDT</td>\n", " <td>71.00</td>\n", " <td>A:[91]</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " </tr>\n", " <tr>\n", " <th>2001-05-07 01:15:00-06:00</th>\n", " <td>USGS</td>\n", " <td>9333500</td>\n", " <td>MDT</td>\n", " <td>71.00</td>\n", " <td>A:[91]</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " </tr>\n", " <tr>\n", " <th>2001-05-07 01:30:00-06:00</th>\n", " <td>USGS</td>\n", " <td>9333500</td>\n", " <td>MDT</td>\n", " <td>71.00</td>\n", " <td>A:[91]</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " </tr>\n", " <tr>\n", " <th>2001-05-07 01:45:00-06:00</th>\n", " <td>USGS</td>\n", " <td>9333500</td>\n", " <td>MDT</td>\n", " <td>70.00</td>\n", " <td>A:[91]</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " </tr>\n", " <tr>\n", " <th>2001-05-07 02:00:00-06:00</th>\n", " <td>USGS</td>\n", " <td>9333500</td>\n", " <td>MDT</td>\n", " <td>70.00</td>\n", " <td>A:[91]</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " </tr>\n", " <tr>\n", " <th>...</th>\n", " <td>...</td>\n", " <td>...</td>\n", " <td>...</td>\n", " <td>...</td>\n", " <td>...</td>\n", " <td>...</td>\n", " <td>...</td>\n", " </tr>\n", " <tr>\n", " <th>2020-09-28 08:30:00-06:00</th>\n", " <td>USGS</td>\n", " <td>9333500</td>\n", " <td>MDT</td>\n", " <td>9.53</td>\n", " <td>P</td>\n", " <td>6.16</td>\n", " <td>P</td>\n", " </tr>\n", " <tr>\n", " <th>2020-09-28 08:45:00-06:00</th>\n", " <td>USGS</td>\n", " <td>9333500</td>\n", " <td>MDT</td>\n", " <td>9.20</td>\n", " <td>P</td>\n", " <td>6.15</td>\n", " <td>P</td>\n", " </tr>\n", " <tr>\n", " <th>2020-09-28 09:00:00-06:00</th>\n", " <td>USGS</td>\n", " <td>9333500</td>\n", " <td>MDT</td>\n", " <td>9.20</td>\n", " <td>P</td>\n", " <td>6.15</td>\n", " <td>P</td>\n", " </tr>\n", " <tr>\n", " <th>2020-09-28 09:15:00-06:00</th>\n", " <td>USGS</td>\n", " <td>9333500</td>\n", " <td>MDT</td>\n", " <td>9.20</td>\n", " <td>P</td>\n", " <td>6.15</td>\n", " <td>P</td>\n", " </tr>\n", " <tr>\n", " <th>2020-09-28 09:30:00-06:00</th>\n", " <td>USGS</td>\n", " <td>9333500</td>\n", " <td>MDT</td>\n", " <td>9.20</td>\n", " <td>P</td>\n", " <td>6.15</td>\n", " <td>P</td>\n", " </tr>\n", " </tbody>\n", "</table>\n", "<p>539305 rows × 7 columns</p>\n", "</div>" ], "text/plain": [ " agency_cd site_no tz_cd cfs 144166_00060_cd \\\n", "datetime \n", "2001-05-07 01:00:00-06:00 USGS 9333500 MDT 71.00 A:[91] \n", "2001-05-07 01:15:00-06:00 USGS 9333500 MDT 71.00 A:[91] \n", "2001-05-07 01:30:00-06:00 USGS 9333500 MDT 71.00 A:[91] \n", "2001-05-07 01:45:00-06:00 USGS 9333500 MDT 70.00 A:[91] \n", "2001-05-07 02:00:00-06:00 USGS 9333500 MDT 70.00 A:[91] \n", "... ... ... ... ... ... \n", "2020-09-28 08:30:00-06:00 USGS 9333500 MDT 9.53 P \n", "2020-09-28 08:45:00-06:00 USGS 9333500 MDT 9.20 P \n", "2020-09-28 09:00:00-06:00 USGS 9333500 MDT 9.20 P \n", "2020-09-28 09:15:00-06:00 USGS 9333500 MDT 9.20 P \n", "2020-09-28 09:30:00-06:00 USGS 9333500 MDT 9.20 P \n", "\n", " gage_height 144167_00065_cd \n", "datetime \n", "2001-05-07 01:00:00-06:00 NaN NaN \n", "2001-05-07 01:15:00-06:00 NaN NaN \n", "2001-05-07 01:30:00-06:00 NaN NaN \n", "2001-05-07 01:45:00-06:00 NaN NaN \n", "2001-05-07 02:00:00-06:00 NaN NaN \n", "... ... ... \n", "2020-09-28 08:30:00-06:00 6.16 P \n", "2020-09-28 08:45:00-06:00 6.15 P \n", "2020-09-28 09:00:00-06:00 6.15 P \n", "2020-09-28 09:15:00-06:00 6.15 P \n", "2020-09-28 09:30:00-06:00 6.15 P \n", "\n", "[539305 rows x 7 columns]" ] }, "execution_count": 3, "metadata": {}, "output_type": "execute_result" } ], "source": [ "dd = tweak_river(df)\n", "dd" ] }, { "cell_type": "code", "execution_count": 4, "metadata": { "lines_to_next_cell": 2 }, "outputs": [], "source": [ "dd.to_csv('/tmp/dd.csv') # doctest: +SKIP" ] }, { "cell_type": "code", "execution_count": 5, "metadata": { "lines_to_next_cell": 2 }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "datetime,agency_cd,site_no,tz_cd,cfs,144166_00060_cd,gage_height,144167_00065_cd\n", "2001-05-07 01:00:00-06:00,USGS,9333500,MDT,71.0,A:[91],,\n", "2001-05-07 01:15:00-06:00,USGS,9333500,MDT,71.0,A:[91],,\n", "2001-05-07 01:30:00-06:00,USGS,9333500,MDT,71.0,A:[91],,\n", "2001-05-07 01:45:00-06:00,USGS,9333500,MDT,70.0,A:[91],,\n", "2001-05-07 02:00:00-06:00,USGS,9333500,MDT,70.0,A:[91],,\n", "\n" ] } ], "source": [ "print(dd.head(5).to_csv())" ] }, { "cell_type": "code", "execution_count": 6, "metadata": { "lines_to_next_cell": 2 }, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "/home/matt/envs/menv/lib/python3.8/site-packages/IPython/core/interactiveshell.py:3418: DtypeWarning: Columns (7) have mixed types.Specify dtype option on import or set low_memory=False.\n", " exec(code_obj, self.user_global_ns, self.user_ns)\n" ] } ], "source": [ "dd2 = pd.read_csv('/tmp/dd.csv', index_col='datetime')" ] }, { "cell_type": "code", "execution_count": 7, "metadata": { "lines_to_next_cell": 2 }, "outputs": [ { "ename": "ValueError", "evalue": "Excel does not support datetimes with timezones. Please ensure that datetimes are timezone unaware before writing to Excel.", "output_type": "error", "traceback": [ "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", "\u001b[0;31mValueError\u001b[0m Traceback (most recent call last)", "\u001b[0;32m<ipython-input-7-0ac6c2e2e196>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mdd\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mto_excel\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m'/tmp/dd.xlsx'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m", "\u001b[0;32m~/envs/menv/lib/python3.8/site-packages/pandas/core/generic.py\u001b[0m in \u001b[0;36mto_excel\u001b[0;34m(self, excel_writer, sheet_name, na_rep, float_format, columns, header, index, index_label, startrow, startcol, engine, merge_cells, encoding, inf_rep, verbose, freeze_panes, storage_options)\u001b[0m\n\u001b[1;32m 2282\u001b[0m \u001b[0minf_rep\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0minf_rep\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 2283\u001b[0m )\n\u001b[0;32m-> 2284\u001b[0;31m formatter.write(\n\u001b[0m\u001b[1;32m 2285\u001b[0m \u001b[0mexcel_writer\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 2286\u001b[0m \u001b[0msheet_name\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0msheet_name\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", "\u001b[0;32m~/envs/menv/lib/python3.8/site-packages/pandas/io/formats/excel.py\u001b[0m in \u001b[0;36mwrite\u001b[0;34m(self, writer, sheet_name, startrow, startcol, freeze_panes, engine, storage_options)\u001b[0m\n\u001b[1;32m 838\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 839\u001b[0m \u001b[0;32mtry\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 840\u001b[0;31m writer.write_cells(\n\u001b[0m\u001b[1;32m 841\u001b[0m \u001b[0mformatted_cells\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 842\u001b[0m \u001b[0msheet_name\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", "\u001b[0;32m~/envs/menv/lib/python3.8/site-packages/pandas/io/excel/_xlsxwriter.py\u001b[0m in \u001b[0;36mwrite_cells\u001b[0;34m(self, cells, sheet_name, startrow, startcol, freeze_panes)\u001b[0m\n\u001b[1;32m 225\u001b[0m \u001b[0mwks\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mfreeze_panes\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m*\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mfreeze_panes\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 226\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 227\u001b[0;31m \u001b[0;32mfor\u001b[0m \u001b[0mcell\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mcells\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 228\u001b[0m \u001b[0mval\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mfmt\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_value_with_fmt\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mcell\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mval\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 229\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n", "\u001b[0;32m~/envs/menv/lib/python3.8/site-packages/pandas/io/formats/excel.py\u001b[0m in \u001b[0;36mget_formatted_cells\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 776\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0mget_formatted_cells\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;34m->\u001b[0m \u001b[0mIterable\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mExcelCell\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 777\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0mcell\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mitertools\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mchain\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_format_header\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_format_body\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 778\u001b[0;31m \u001b[0mcell\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mval\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_format_value\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mcell\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mval\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 779\u001b[0m \u001b[0;32myield\u001b[0m \u001b[0mcell\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 780\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n", "\u001b[0;32m~/envs/menv/lib/python3.8/site-packages/pandas/io/formats/excel.py\u001b[0m in \u001b[0;36m_format_value\u001b[0;34m(self, val)\u001b[0m\n\u001b[1;32m 524\u001b[0m \u001b[0mval\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mfloat\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mfloat_format\u001b[0m \u001b[0;34m%\u001b[0m \u001b[0mval\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 525\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mgetattr\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mval\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m\"tzinfo\"\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;32mNone\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;32mis\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0;32mNone\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 526\u001b[0;31m raise ValueError(\n\u001b[0m\u001b[1;32m 527\u001b[0m \u001b[0;34m\"Excel does not support datetimes with \"\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 528\u001b[0m \u001b[0;34m\"timezones. Please ensure that datetimes \"\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", "\u001b[0;31mValueError\u001b[0m: Excel does not support datetimes with timezones. Please ensure that datetimes are timezone unaware before writing to Excel." ] } ], "source": [ "dd.to_excel('/tmp/dd.xlsx')" ] }, { "cell_type": "code", "execution_count": 8, "metadata": { "lines_to_next_cell": 2 }, "outputs": [], "source": [ "(dd # doctest: +SKIP\n", " .reset_index()\n", " .assign(datetime=lambda df_: df_.datetime.dt.tz_convert(tz=None))\n", " .set_index('datetime')\n", " .to_excel('/tmp/dd.xlsx')\n", ")" ] }, { "cell_type": "code", "execution_count": 9, "metadata": { "lines_to_next_cell": 2 }, "outputs": [], "source": [ "writer = pd.ExcelWriter('/tmp/dd2.xlsx')\n", "dd2 = (dd \n", " .reset_index()\n", " .assign(datetime=lambda df_: df_.datetime.dt.tz_convert(tz=None))\n", " .set_index('datetime')\n", ")\n", "(dd2 # doctest: +SKIP\n", " .loc['2010':'2010-12-31']\n", " .to_excel(writer, sheet_name='2010')\n", ")\n", "(dd2 # doctest: +SKIP\n", " .loc['2011':'2011-12-31'] \n", " .to_excel(writer, sheet_name='2011')\n", ")\n", "writer.save() # doctest: +SKIP" ] }, { "cell_type": "code", "execution_count": 10, "metadata": { "lines_to_next_cell": 2, "scrolled": true }, "outputs": [ { "ename": "ValueError", "evalue": "feather does not support serializing <class 'pandas.core.indexes.datetimes.DatetimeIndex'> for the index; you can .reset_index() to make the index into column(s)", "output_type": "error", "traceback": [ "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", "\u001b[0;31mValueError\u001b[0m Traceback (most recent call last)", "\u001b[0;32m<ipython-input-10-73127889b6d4>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mdd\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mto_feather\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m'/tmp/dd.fea'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m", "\u001b[0;32m~/envs/menv/lib/python3.8/site-packages/pandas/util/_decorators.py\u001b[0m in \u001b[0;36mwrapper\u001b[0;34m(*args, **kwargs)\u001b[0m\n\u001b[1;32m 205\u001b[0m \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 206\u001b[0m \u001b[0mkwargs\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mnew_arg_name\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mnew_arg_value\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 207\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mfunc\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m*\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 208\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 209\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0mcast\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mF\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mwrapper\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", "\u001b[0;32m~/envs/menv/lib/python3.8/site-packages/pandas/core/frame.py\u001b[0m in \u001b[0;36mto_feather\u001b[0;34m(self, path, **kwargs)\u001b[0m\n\u001b[1;32m 2517\u001b[0m \u001b[0;32mfrom\u001b[0m \u001b[0mpandas\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mio\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mfeather_format\u001b[0m \u001b[0;32mimport\u001b[0m \u001b[0mto_feather\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 2518\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 2519\u001b[0;31m \u001b[0mto_feather\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mpath\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 2520\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 2521\u001b[0m @doc(\n", "\u001b[0;32m~/envs/menv/lib/python3.8/site-packages/pandas/io/feather_format.py\u001b[0m in \u001b[0;36mto_feather\u001b[0;34m(df, path, storage_options, **kwargs)\u001b[0m\n\u001b[1;32m 59\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0misinstance\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mdf\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mindex\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m(\u001b[0m\u001b[0mInt64Index\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mRangeIndex\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 60\u001b[0m \u001b[0mtyp\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mtype\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mdf\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mindex\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 61\u001b[0;31m raise ValueError(\n\u001b[0m\u001b[1;32m 62\u001b[0m \u001b[0;34mf\"feather does not support serializing {typ} \"\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 63\u001b[0m \u001b[0;34m\"for the index; you can .reset_index() to make the index into column(s)\"\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", "\u001b[0;31mValueError\u001b[0m: feather does not support serializing <class 'pandas.core.indexes.datetimes.DatetimeIndex'> for the index; you can .reset_index() to make the index into column(s)" ] } ], "source": [ "dd.to_feather('/tmp/dd.fea')" ] }, { "cell_type": "code", "execution_count": 11, "metadata": { "lines_to_next_cell": 2 }, "outputs": [], "source": [ "(dd\n", " .reset_index()\n", " .to_feather('/tmp/dd.fea')\n", ")" ] }, { "cell_type": "code", "execution_count": 12, "metadata": { "lines_to_next_cell": 2 }, "outputs": [ { "data": { "text/plain": [ "True" ] }, "execution_count": 12, "metadata": {}, "output_type": "execute_result" } ], "source": [ "dd2 = pd.read_feather('/tmp/dd.fea')\n", "dd2.set_index('datetime').equals(dd)" ] }, { "cell_type": "code", "execution_count": 13, "metadata": { "lines_to_next_cell": 2 }, "outputs": [], "source": [ "import sqlite3\n", "con = sqlite3.connect('dd.db')\n", "dd.to_sql('dd', con, if_exists='replace') # doctest: +SKIP" ] }, { "cell_type": "code", "execution_count": 14, "metadata": { "lines_to_next_cell": 2 }, "outputs": [ { "data": { "text/plain": [ "False" ] }, "execution_count": 14, "metadata": {}, "output_type": "execute_result" } ], "source": [ "import sqlalchemy as sa\n", "eng = sa.create_engine('sqlite:///dd.db')\n", "sa_con = eng.connect()\n", "dd2 = pd.read_sql('dd', sa_con, index_col='datetime') # doctest: +SKIP\n", "dd2.equals(dd) # doctest: +SKIP" ] }, { "cell_type": "code", "execution_count": 15, "metadata": { "lines_to_next_cell": 2 }, "outputs": [ { "data": { "text/html": [ "<div>\n", "<style scoped>\n", " .dataframe tbody tr th:only-of-type {\n", " vertical-align: middle;\n", " }\n", "\n", " .dataframe tbody tr th {\n", " vertical-align: top;\n", " }\n", "\n", " .dataframe thead th {\n", " text-align: right;\n", " }\n", "</style>\n", "<table border=\"1\" class=\"dataframe\">\n", " <thead>\n", " <tr style=\"text-align: right;\">\n", " <th></th>\n", " <th>agency_cd</th>\n", " <th>site_no</th>\n", " <th>tz_cd</th>\n", " <th>cfs</th>\n", " <th>144166_00060_cd</th>\n", " <th>gage_height</th>\n", " <th>144167_00065_cd</th>\n", " </tr>\n", " <tr>\n", " <th>datetime</th>\n", " <th></th>\n", " <th></th>\n", " <th></th>\n", " <th></th>\n", " <th></th>\n", " <th></th>\n", " <th></th>\n", " </tr>\n", " </thead>\n", " <tbody>\n", " <tr>\n", " <th>2001-05-07 01:00:00</th>\n", " <td>USGS</td>\n", " <td>9333500</td>\n", " <td>MDT</td>\n", " <td>71.00</td>\n", " <td>A:[91]</td>\n", " <td>NaN</td>\n", " <td>None</td>\n", " </tr>\n", " <tr>\n", " <th>2001-05-07 01:15:00</th>\n", " <td>USGS</td>\n", " <td>9333500</td>\n", " <td>MDT</td>\n", " <td>71.00</td>\n", " <td>A:[91]</td>\n", " <td>NaN</td>\n", " <td>None</td>\n", " </tr>\n", " <tr>\n", " <th>2001-05-07 01:30:00</th>\n", " <td>USGS</td>\n", " <td>9333500</td>\n", " <td>MDT</td>\n", " <td>71.00</td>\n", " <td>A:[91]</td>\n", " <td>NaN</td>\n", " <td>None</td>\n", " </tr>\n", " <tr>\n", " <th>2001-05-07 01:45:00</th>\n", " <td>USGS</td>\n", " <td>9333500</td>\n", " <td>MDT</td>\n", " <td>70.00</td>\n", " <td>A:[91]</td>\n", " <td>NaN</td>\n", " <td>None</td>\n", " </tr>\n", " <tr>\n", " <th>2001-05-07 02:00:00</th>\n", " <td>USGS</td>\n", " <td>9333500</td>\n", " <td>MDT</td>\n", " <td>70.00</td>\n", " <td>A:[91]</td>\n", " <td>NaN</td>\n", " <td>None</td>\n", " </tr>\n", " <tr>\n", " <th>...</th>\n", " <td>...</td>\n", " <td>...</td>\n", " <td>...</td>\n", " <td>...</td>\n", " <td>...</td>\n", " <td>...</td>\n", " <td>...</td>\n", " </tr>\n", " <tr>\n", " <th>2020-09-28 08:30:00</th>\n", " <td>USGS</td>\n", " <td>9333500</td>\n", " <td>MDT</td>\n", " <td>9.53</td>\n", " <td>P</td>\n", " <td>6.16</td>\n", " <td>P</td>\n", " </tr>\n", " <tr>\n", " <th>2020-09-28 08:45:00</th>\n", " <td>USGS</td>\n", " <td>9333500</td>\n", " <td>MDT</td>\n", " <td>9.20</td>\n", " <td>P</td>\n", " <td>6.15</td>\n", " <td>P</td>\n", " </tr>\n", " <tr>\n", " <th>2020-09-28 09:00:00</th>\n", " <td>USGS</td>\n", " <td>9333500</td>\n", " <td>MDT</td>\n", " <td>9.20</td>\n", " <td>P</td>\n", " <td>6.15</td>\n", " <td>P</td>\n", " </tr>\n", " <tr>\n", " <th>2020-09-28 09:15:00</th>\n", " <td>USGS</td>\n", " <td>9333500</td>\n", " <td>MDT</td>\n", " <td>9.20</td>\n", " <td>P</td>\n", " <td>6.15</td>\n", " <td>P</td>\n", " </tr>\n", " <tr>\n", " <th>2020-09-28 09:30:00</th>\n", " <td>USGS</td>\n", " <td>9333500</td>\n", " <td>MDT</td>\n", " <td>9.20</td>\n", " <td>P</td>\n", " <td>6.15</td>\n", " <td>P</td>\n", " </tr>\n", " </tbody>\n", "</table>\n", "<p>539305 rows × 7 columns</p>\n", "</div>" ], "text/plain": [ " agency_cd site_no tz_cd cfs 144166_00060_cd \\\n", "datetime \n", "2001-05-07 01:00:00 USGS 9333500 MDT 71.00 A:[91] \n", "2001-05-07 01:15:00 USGS 9333500 MDT 71.00 A:[91] \n", "2001-05-07 01:30:00 USGS 9333500 MDT 71.00 A:[91] \n", "2001-05-07 01:45:00 USGS 9333500 MDT 70.00 A:[91] \n", "2001-05-07 02:00:00 USGS 9333500 MDT 70.00 A:[91] \n", "... ... ... ... ... ... \n", "2020-09-28 08:30:00 USGS 9333500 MDT 9.53 P \n", "2020-09-28 08:45:00 USGS 9333500 MDT 9.20 P \n", "2020-09-28 09:00:00 USGS 9333500 MDT 9.20 P \n", "2020-09-28 09:15:00 USGS 9333500 MDT 9.20 P \n", "2020-09-28 09:30:00 USGS 9333500 MDT 9.20 P \n", "\n", " gage_height 144167_00065_cd \n", "datetime \n", "2001-05-07 01:00:00 NaN None \n", "2001-05-07 01:15:00 NaN None \n", "2001-05-07 01:30:00 NaN None \n", "2001-05-07 01:45:00 NaN None \n", "2001-05-07 02:00:00 NaN None \n", "... ... ... \n", "2020-09-28 08:30:00 6.16 P \n", "2020-09-28 08:45:00 6.15 P \n", "2020-09-28 09:00:00 6.15 P \n", "2020-09-28 09:15:00 6.15 P \n", "2020-09-28 09:30:00 6.15 P \n", "\n", "[539305 rows x 7 columns]" ] }, "execution_count": 15, "metadata": {}, "output_type": "execute_result" } ], "source": [ "dd2" ] }, { "cell_type": "code", "execution_count": 16, "metadata": { "lines_to_next_cell": 2 }, "outputs": [ { "data": { "text/plain": [ "False" ] }, "execution_count": 16, "metadata": {}, "output_type": "execute_result" } ], "source": [ "(dd2 # doctest: +SKIP\n", " .reset_index()\n", " .assign(datetime=lambda df_: df_.datetime\n", " .dt.tz_localize('America/Denver', ambiguous=False))\n", " .set_index('datetime')\n", " .equals(dd)\n", ")" ] }, { "cell_type": "code", "execution_count": 17, "metadata": { "lines_to_next_cell": 2 }, "outputs": [], "source": [ "obj = dd.to_dict()" ] }, { "cell_type": "code", "execution_count": 18, "metadata": { "lines_to_next_cell": 2 }, "outputs": [ { "data": { "text/plain": [ "True" ] }, "execution_count": 18, "metadata": {}, "output_type": "execute_result" } ], "source": [ "dd2 = pd.DataFrame.from_dict(obj)\n", "dd.equals(dd2)" ] }, { "cell_type": "code", "execution_count": 19, "metadata": { "lines_to_next_cell": 2 }, "outputs": [ { "data": { "text/html": [ "<div>\n", "<style scoped>\n", " .dataframe tbody tr th:only-of-type {\n", " vertical-align: middle;\n", " }\n", "\n", " .dataframe tbody tr th {\n", " vertical-align: top;\n", " }\n", "\n", " .dataframe thead th {\n", " text-align: right;\n", " }\n", "</style>\n", "<table border=\"1\" class=\"dataframe\">\n", " <thead>\n", " <tr style=\"text-align: right;\">\n", " <th></th>\n", " <th>agency_cd</th>\n", " <th>site_no</th>\n", " <th>tz_cd</th>\n", " <th>cfs</th>\n", " <th>144166_00060_cd</th>\n", " <th>gage_height</th>\n", " <th>144167_00065_cd</th>\n", " </tr>\n", " </thead>\n", " <tbody>\n", " <tr>\n", " <th>2001-05-07 07:00:00</th>\n", " <td>USGS</td>\n", " <td>9333500</td>\n", " <td>MDT</td>\n", " <td>71.00</td>\n", " <td>A:[91]</td>\n", " <td>NaN</td>\n", " <td>None</td>\n", " </tr>\n", " <tr>\n", " <th>2001-05-07 07:15:00</th>\n", " <td>USGS</td>\n", " <td>9333500</td>\n", " <td>MDT</td>\n", " <td>71.00</td>\n", " <td>A:[91]</td>\n", " <td>NaN</td>\n", " <td>None</td>\n", " </tr>\n", " <tr>\n", " <th>2001-05-07 07:30:00</th>\n", " <td>USGS</td>\n", " <td>9333500</td>\n", " <td>MDT</td>\n", " <td>71.00</td>\n", " <td>A:[91]</td>\n", " <td>NaN</td>\n", " <td>None</td>\n", " </tr>\n", " <tr>\n", " <th>2001-05-07 07:45:00</th>\n", " <td>USGS</td>\n", " <td>9333500</td>\n", " <td>MDT</td>\n", " <td>70.00</td>\n", " <td>A:[91]</td>\n", " <td>NaN</td>\n", " <td>None</td>\n", " </tr>\n", " <tr>\n", " <th>2001-05-07 08:00:00</th>\n", " <td>USGS</td>\n", " <td>9333500</td>\n", " <td>MDT</td>\n", " <td>70.00</td>\n", " <td>A:[91]</td>\n", " <td>NaN</td>\n", " <td>None</td>\n", " </tr>\n", " <tr>\n", " <th>...</th>\n", " <td>...</td>\n", " <td>...</td>\n", " <td>...</td>\n", " <td>...</td>\n", " <td>...</td>\n", " <td>...</td>\n", " <td>...</td>\n", " </tr>\n", " <tr>\n", " <th>2020-09-28 14:30:00</th>\n", " <td>USGS</td>\n", " <td>9333500</td>\n", " <td>MDT</td>\n", " <td>9.53</td>\n", " <td>P</td>\n", " <td>6.16</td>\n", " <td>P</td>\n", " </tr>\n", " <tr>\n", " <th>2020-09-28 14:45:00</th>\n", " <td>USGS</td>\n", " <td>9333500</td>\n", " <td>MDT</td>\n", " <td>9.20</td>\n", " <td>P</td>\n", " <td>6.15</td>\n", " <td>P</td>\n", " </tr>\n", " <tr>\n", " <th>2020-09-28 15:00:00</th>\n", " <td>USGS</td>\n", " <td>9333500</td>\n", " <td>MDT</td>\n", " <td>9.20</td>\n", " <td>P</td>\n", " <td>6.15</td>\n", " <td>P</td>\n", " </tr>\n", " <tr>\n", " <th>2020-09-28 15:15:00</th>\n", " <td>USGS</td>\n", " <td>9333500</td>\n", " <td>MDT</td>\n", " <td>9.20</td>\n", " <td>P</td>\n", " <td>6.15</td>\n", " <td>P</td>\n", " </tr>\n", " <tr>\n", " <th>2020-09-28 15:30:00</th>\n", " <td>USGS</td>\n", " <td>9333500</td>\n", " <td>MDT</td>\n", " <td>9.20</td>\n", " <td>P</td>\n", " <td>6.15</td>\n", " <td>P</td>\n", " </tr>\n", " </tbody>\n", "</table>\n", "<p>539305 rows × 7 columns</p>\n", "</div>" ], "text/plain": [ " agency_cd site_no tz_cd cfs 144166_00060_cd \\\n", "2001-05-07 07:00:00 USGS 9333500 MDT 71.00 A:[91] \n", "2001-05-07 07:15:00 USGS 9333500 MDT 71.00 A:[91] \n", "2001-05-07 07:30:00 USGS 9333500 MDT 71.00 A:[91] \n", "2001-05-07 07:45:00 USGS 9333500 MDT 70.00 A:[91] \n", "2001-05-07 08:00:00 USGS 9333500 MDT 70.00 A:[91] \n", "... ... ... ... ... ... \n", "2020-09-28 14:30:00 USGS 9333500 MDT 9.53 P \n", "2020-09-28 14:45:00 USGS 9333500 MDT 9.20 P \n", "2020-09-28 15:00:00 USGS 9333500 MDT 9.20 P \n", "2020-09-28 15:15:00 USGS 9333500 MDT 9.20 P \n", "2020-09-28 15:30:00 USGS 9333500 MDT 9.20 P \n", "\n", " gage_height 144167_00065_cd \n", "2001-05-07 07:00:00 NaN None \n", "2001-05-07 07:15:00 NaN None \n", "2001-05-07 07:30:00 NaN None \n", "2001-05-07 07:45:00 NaN None \n", "2001-05-07 08:00:00 NaN None \n", "... ... ... \n", "2020-09-28 14:30:00 6.16 P \n", "2020-09-28 14:45:00 6.15 P \n", "2020-09-28 15:00:00 6.15 P \n", "2020-09-28 15:15:00 6.15 P \n", "2020-09-28 15:30:00 6.15 P \n", "\n", "[539305 rows x 7 columns]" ] }, "execution_count": 19, "metadata": {}, "output_type": "execute_result" } ], "source": [ "dd.to_json('/tmp/dd.json.gz')\n", "dd2 = pd.read_json('/tmp/dd.json')\n", "dd2" ] }, { "cell_type": "code", "execution_count": 20, "metadata": { "lines_to_next_cell": 2 }, "outputs": [ { "data": { "text/plain": [ "False" ] }, "execution_count": 20, "metadata": {}, "output_type": "execute_result" } ], "source": [ "dd2.equals(dd)" ] }, { "cell_type": "code", "execution_count": 21, "metadata": { "lines_to_next_cell": 2 }, "outputs": [], "source": [ "dd3 = (dd2\n", " .reset_index()\n", " .rename(columns={'index':'datetime'})\n", " .assign(datetime=lambda df_: df_.datetime.dt.tz_localize(tz='UTC')\n", " .dt.tz_convert('America/Denver'))\n", " .set_index('datetime')\n", ")" ] }, { "cell_type": "code", "execution_count": 22, "metadata": { "lines_to_next_cell": 2 }, "outputs": [ { "data": { "text/html": [ "<div>\n", "<style scoped>\n", " .dataframe tbody tr th:only-of-type {\n", " vertical-align: middle;\n", " }\n", "\n", " .dataframe tbody tr th {\n", " vertical-align: top;\n", " }\n", "\n", " .dataframe thead th {\n", " text-align: right;\n", " }\n", "</style>\n", "<table border=\"1\" class=\"dataframe\">\n", " <thead>\n", " <tr style=\"text-align: right;\">\n", " <th></th>\n", " <th>agency_cd</th>\n", " <th>site_no</th>\n", " <th>tz_cd</th>\n", " <th>cfs</th>\n", " <th>144166_00060_cd</th>\n", " <th>gage_height</th>\n", " <th>144167_00065_cd</th>\n", " </tr>\n", " <tr>\n", " <th>datetime</th>\n", " <th></th>\n", " <th></th>\n", " <th></th>\n", " <th></th>\n", " <th></th>\n", " <th></th>\n", " <th></th>\n", " </tr>\n", " </thead>\n", " <tbody>\n", " <tr>\n", " <th>2001-05-07 01:00:00-06:00</th>\n", " <td>USGS</td>\n", " <td>9333500</td>\n", " <td>MDT</td>\n", " <td>71.00</td>\n", " <td>A:[91]</td>\n", " <td>NaN</td>\n", " <td>None</td>\n", " </tr>\n", " <tr>\n", " <th>2001-05-07 01:15:00-06:00</th>\n", " <td>USGS</td>\n", " <td>9333500</td>\n", " <td>MDT</td>\n", " <td>71.00</td>\n", " <td>A:[91]</td>\n", " <td>NaN</td>\n", " <td>None</td>\n", " </tr>\n", " <tr>\n", " <th>2001-05-07 01:30:00-06:00</th>\n", " <td>USGS</td>\n", " <td>9333500</td>\n", " <td>MDT</td>\n", " <td>71.00</td>\n", " <td>A:[91]</td>\n", " <td>NaN</td>\n", " <td>None</td>\n", " </tr>\n", " <tr>\n", " <th>2001-05-07 01:45:00-06:00</th>\n", " <td>USGS</td>\n", " <td>9333500</td>\n", " <td>MDT</td>\n", " <td>70.00</td>\n", " <td>A:[91]</td>\n", " <td>NaN</td>\n", " <td>None</td>\n", " </tr>\n", " <tr>\n", " <th>2001-05-07 02:00:00-06:00</th>\n", " <td>USGS</td>\n", " <td>9333500</td>\n", " <td>MDT</td>\n", " <td>70.00</td>\n", " <td>A:[91]</td>\n", " <td>NaN</td>\n", " <td>None</td>\n", " </tr>\n", " <tr>\n", " <th>...</th>\n", " <td>...</td>\n", " <td>...</td>\n", " <td>...</td>\n", " <td>...</td>\n", " <td>...</td>\n", " <td>...</td>\n", " <td>...</td>\n", " </tr>\n", " <tr>\n", " <th>2020-09-28 08:30:00-06:00</th>\n", " <td>USGS</td>\n", " <td>9333500</td>\n", " <td>MDT</td>\n", " <td>9.53</td>\n", " <td>P</td>\n", " <td>6.16</td>\n", " <td>P</td>\n", " </tr>\n", " <tr>\n", " <th>2020-09-28 08:45:00-06:00</th>\n", " <td>USGS</td>\n", " <td>9333500</td>\n", " <td>MDT</td>\n", " <td>9.20</td>\n", " <td>P</td>\n", " <td>6.15</td>\n", " <td>P</td>\n", " </tr>\n", " <tr>\n", " <th>2020-09-28 09:00:00-06:00</th>\n", " <td>USGS</td>\n", " <td>9333500</td>\n", " <td>MDT</td>\n", " <td>9.20</td>\n", " <td>P</td>\n", " <td>6.15</td>\n", " <td>P</td>\n", " </tr>\n", " <tr>\n", " <th>2020-09-28 09:15:00-06:00</th>\n", " <td>USGS</td>\n", " <td>9333500</td>\n", " <td>MDT</td>\n", " <td>9.20</td>\n", " <td>P</td>\n", " <td>6.15</td>\n", " <td>P</td>\n", " </tr>\n", " <tr>\n", " <th>2020-09-28 09:30:00-06:00</th>\n", " <td>USGS</td>\n", " <td>9333500</td>\n", " <td>MDT</td>\n", " <td>9.20</td>\n", " <td>P</td>\n", " <td>6.15</td>\n", " <td>P</td>\n", " </tr>\n", " </tbody>\n", "</table>\n", "<p>539305 rows × 7 columns</p>\n", "</div>" ], "text/plain": [ " agency_cd site_no tz_cd cfs 144166_00060_cd \\\n", "datetime \n", "2001-05-07 01:00:00-06:00 USGS 9333500 MDT 71.00 A:[91] \n", "2001-05-07 01:15:00-06:00 USGS 9333500 MDT 71.00 A:[91] \n", "2001-05-07 01:30:00-06:00 USGS 9333500 MDT 71.00 A:[91] \n", "2001-05-07 01:45:00-06:00 USGS 9333500 MDT 70.00 A:[91] \n", "2001-05-07 02:00:00-06:00 USGS 9333500 MDT 70.00 A:[91] \n", "... ... ... ... ... ... \n", "2020-09-28 08:30:00-06:00 USGS 9333500 MDT 9.53 P \n", "2020-09-28 08:45:00-06:00 USGS 9333500 MDT 9.20 P \n", "2020-09-28 09:00:00-06:00 USGS 9333500 MDT 9.20 P \n", "2020-09-28 09:15:00-06:00 USGS 9333500 MDT 9.20 P \n", "2020-09-28 09:30:00-06:00 USGS 9333500 MDT 9.20 P \n", "\n", " gage_height 144167_00065_cd \n", "datetime \n", "2001-05-07 01:00:00-06:00 NaN None \n", "2001-05-07 01:15:00-06:00 NaN None \n", "2001-05-07 01:30:00-06:00 NaN None \n", "2001-05-07 01:45:00-06:00 NaN None \n", "2001-05-07 02:00:00-06:00 NaN None \n", "... ... ... \n", "2020-09-28 08:30:00-06:00 6.16 P \n", "2020-09-28 08:45:00-06:00 6.15 P \n", "2020-09-28 09:00:00-06:00 6.15 P \n", "2020-09-28 09:15:00-06:00 6.15 P \n", "2020-09-28 09:30:00-06:00 6.15 P \n", "\n", "[539305 rows x 7 columns]" ] }, "execution_count": 22, "metadata": {}, "output_type": "execute_result" } ], "source": [ "dd3" ] }, { "cell_type": "code", "execution_count": 23, "metadata": { "lines_to_next_cell": 2 }, "outputs": [ { "data": { "text/plain": [ "False" ] }, "execution_count": 23, "metadata": {}, "output_type": "execute_result" } ], "source": [ "dd3.equals(dd)" ] }, { "cell_type": "code", "execution_count": 24, "metadata": { "lines_to_next_cell": 2 }, "outputs": [ { "data": { "text/plain": [ "True" ] }, "execution_count": 24, "metadata": {}, "output_type": "execute_result" } ], "source": [ "dd3.round(3).equals(dd)" ] }, { "cell_type": "code", "execution_count": 25, "metadata": { "lines_to_next_cell": 2 }, "outputs": [ { "data": { "text/html": [ "<div>\n", "<style scoped>\n", " .dataframe tbody tr th:only-of-type {\n", " vertical-align: middle;\n", " }\n", "\n", " .dataframe tbody tr th {\n", " vertical-align: top;\n", " }\n", "\n", " .dataframe thead th {\n", " text-align: right;\n", " }\n", "</style>\n", "<table border=\"1\" class=\"dataframe\">\n", " <thead>\n", " <tr style=\"text-align: right;\">\n", " <th></th>\n", " <th>agency_cd</th>\n", " <th>site_no</th>\n", " <th>tz_cd</th>\n", " <th>cfs</th>\n", " <th>144166_00060_cd</th>\n", " <th>gage_height</th>\n", " <th>144167_00065_cd</th>\n", " </tr>\n", " <tr>\n", " <th>datetime</th>\n", " <th></th>\n", " <th></th>\n", " <th></th>\n", " <th></th>\n", " <th></th>\n", " <th></th>\n", " <th></th>\n", " </tr>\n", " </thead>\n", " <tbody>\n", " <tr>\n", " <th>2001-05-07 01:00:00-06:00</th>\n", " <td>USGS</td>\n", " <td>9333500</td>\n", " <td>MDT</td>\n", " <td>71.0</td>\n", " <td>A:[91]</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " </tr>\n", " <tr>\n", " <th>2001-05-07 01:15:00-06:00</th>\n", " <td>USGS</td>\n", " <td>9333500</td>\n", " <td>MDT</td>\n", " <td>71.0</td>\n", " <td>A:[91]</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " </tr>\n", " <tr>\n", " <th>2001-05-07 01:30:00-06:00</th>\n", " <td>USGS</td>\n", " <td>9333500</td>\n", " <td>MDT</td>\n", " <td>71.0</td>\n", " <td>A:[91]</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " </tr>\n", " <tr>\n", " <th>2001-05-07 01:45:00-06:00</th>\n", " <td>USGS</td>\n", " <td>9333500</td>\n", " <td>MDT</td>\n", " <td>70.0</td>\n", " <td>A:[91]</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " </tr>\n", " <tr>\n", " <th>2001-05-07 02:00:00-06:00</th>\n", " <td>USGS</td>\n", " <td>9333500</td>\n", " <td>MDT</td>\n", " <td>70.0</td>\n", " <td>A:[91]</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " </tr>\n", " </tbody>\n", "</table>\n", "</div>" ], "text/plain": [ " agency_cd site_no tz_cd cfs 144166_00060_cd \\\n", "datetime \n", "2001-05-07 01:00:00-06:00 USGS 9333500 MDT 71.0 A:[91] \n", "2001-05-07 01:15:00-06:00 USGS 9333500 MDT 71.0 A:[91] \n", "2001-05-07 01:30:00-06:00 USGS 9333500 MDT 71.0 A:[91] \n", "2001-05-07 01:45:00-06:00 USGS 9333500 MDT 70.0 A:[91] \n", "2001-05-07 02:00:00-06:00 USGS 9333500 MDT 70.0 A:[91] \n", "\n", " gage_height 144167_00065_cd \n", "datetime \n", "2001-05-07 01:00:00-06:00 NaN NaN \n", "2001-05-07 01:15:00-06:00 NaN NaN \n", "2001-05-07 01:30:00-06:00 NaN NaN \n", "2001-05-07 01:45:00-06:00 NaN NaN \n", "2001-05-07 02:00:00-06:00 NaN NaN " ] }, "execution_count": 25, "metadata": {}, "output_type": "execute_result" } ], "source": [ "dd.head()" ] }, { "cell_type": "code", "execution_count": 26, "metadata": { "lines_to_next_cell": 2 }, "outputs": [ { "data": { "text/plain": [ "'{\"agency_cd\":{\"989218800000\":\"USGS\",\"989219700000\":\"USGS\",\"9'" ] }, "execution_count": 26, "metadata": {}, "output_type": "execute_result" } ], "source": [ "dd.head().to_json()[:60]" ] } ], "metadata": { "jupytext": { "cell_metadata_filter": "-all", "main_language": "python", "notebook_metadata_filter": "-all" }, "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.5" } }, "nbformat": 4, "nbformat_minor": 4 }