{ "cells": [ { "cell_type": "code", "execution_count": 21, "metadata": {}, "outputs": [], "source": [ "import pandas as pd\n", "import numpy as np" ] }, { "cell_type": "code", "execution_count": 22, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "Index(['name', 'datetime', 'temp', 'feelslike', 'dew', 'humidity', 'precip',\n", " 'precipprob', 'preciptype', 'snow', 'snowdepth', 'windgust',\n", " 'windspeed', 'winddir', 'sealevelpressure', 'cloudcover', 'visibility',\n", " 'solarradiation', 'solarenergy', 'uvindex', 'severerisk', 'conditions',\n", " 'icon', 'stations'],\n", " dtype='object')" ] }, "execution_count": 22, "metadata": {}, "output_type": "execute_result" } ], "source": [ "data_hist = pd.read_csv('data\\in_data_hist.csv')\n", "data_hist.columns" ] }, { "cell_type": "code", "execution_count": 23, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
outdoor_tempoutdoor_feels_likeoutdoor_humidityoutdoor_pressureoutdoor_weather
timestamp
2024-04-24 00:00:0030.738.374.751006.0Partially cloudy
2024-04-24 01:00:0030.340.083.881005.3Partially cloudy
2024-04-24 02:00:0030.037.479.191005.0Partially cloudy
2024-04-24 03:00:0030.237.576.901005.0Partially cloudy
2024-04-24 04:00:0030.339.281.571004.8Partially cloudy
..................
2024-05-06 19:00:0032.938.858.341007.0Partially cloudy
2024-05-06 20:00:0032.639.964.241007.0Rain, Partially cloudy
2024-05-06 21:00:0032.639.663.241007.0Partially cloudy
2024-05-06 22:00:0032.041.172.211008.0Partially cloudy
2024-05-06 23:00:0032.040.570.651008.6Partially cloudy
\n", "

312 rows × 5 columns

\n", "
" ], "text/plain": [ " outdoor_temp outdoor_feels_like outdoor_humidity \\\n", "timestamp \n", "2024-04-24 00:00:00 30.7 38.3 74.75 \n", "2024-04-24 01:00:00 30.3 40.0 83.88 \n", "2024-04-24 02:00:00 30.0 37.4 79.19 \n", "2024-04-24 03:00:00 30.2 37.5 76.90 \n", "2024-04-24 04:00:00 30.3 39.2 81.57 \n", "... ... ... ... \n", "2024-05-06 19:00:00 32.9 38.8 58.34 \n", "2024-05-06 20:00:00 32.6 39.9 64.24 \n", "2024-05-06 21:00:00 32.6 39.6 63.24 \n", "2024-05-06 22:00:00 32.0 41.1 72.21 \n", "2024-05-06 23:00:00 32.0 40.5 70.65 \n", "\n", " outdoor_pressure outdoor_weather \n", "timestamp \n", "2024-04-24 00:00:00 1006.0 Partially cloudy \n", "2024-04-24 01:00:00 1005.3 Partially cloudy \n", "2024-04-24 02:00:00 1005.0 Partially cloudy \n", "2024-04-24 03:00:00 1005.0 Partially cloudy \n", "2024-04-24 04:00:00 1004.8 Partially cloudy \n", "... ... ... \n", "2024-05-06 19:00:00 1007.0 Partially cloudy \n", "2024-05-06 20:00:00 1007.0 Rain, Partially cloudy \n", "2024-05-06 21:00:00 1007.0 Partially cloudy \n", "2024-05-06 22:00:00 1008.0 Partially cloudy \n", "2024-05-06 23:00:00 1008.6 Partially cloudy \n", "\n", "[312 rows x 5 columns]" ] }, "execution_count": 23, "metadata": {}, "output_type": "execute_result" } ], "source": [ "columns = ['datetime', 'temp', 'feelslike', 'humidity', 'sealevelpressure', 'conditions']\n", "\n", "data_hist = data_hist[columns]\n", "data_hist = data_hist.rename(columns={'datetime': 'timestamp',\n", " 'temp': 'outdoor_temp',\n", " 'feelslike': 'outdoor_feels_like',\n", " 'humidity': 'outdoor_humidity',\n", " 'sealevelpressure': 'outdoor_pressure',\n", " 'conditions': 'outdoor_weather'})\n", "\n", "data_hist['timestamp'] = pd.to_datetime(data_hist['timestamp'])\n", "data_hist = data_hist.set_index('timestamp')\n", "data_hist" ] }, { "cell_type": "code", "execution_count": 24, "metadata": {}, "outputs": [], "source": [ "pm10 = pd.read_csv('data/pm10.csv')\n", "pm25 = pd.read_csv('data/pm25.csv')\n", "\n", "# use only data from 2023-04-24 to 2023-05-06\n", "\n", "pm10['ts'] = pd.to_datetime(pm10['date'])\n", "pm25['ts'] = pd.to_datetime(pm25['date'])\n", "\n", "pm10 = pm10[(pm10['ts'] >= '2024-04-24') & (pm10['ts'] <= '2024-05-06')]\n", "pm25 = pm25[(pm25['ts'] >= '2024-04-24') & (pm25['ts'] <= '2024-05-06')]\n", "\n", "pm10['mean'] = (pm10['q1'] + pm10['q3'] + pm10['median']) / 3\n", "pm25['mean'] = (pm25['q1'] + pm25['q3'] + pm25['median']) / 3\n" ] }, { "cell_type": "code", "execution_count": 25, "metadata": {}, "outputs": [], "source": [ "def generate_hourly_values(min_val, max_val, count):\n", " interpolated_values = np.linspace(min_val, max_val, count)\n", " return interpolated_values\n", "\n", "def generate_hourly_data(data):\n", " hourly_data = pd.DataFrame(columns=['ts', 'value'])\n", "\n", " for index, row in data.iterrows():\n", " hourly_values = generate_hourly_values(row['min'], row['max'], 24)\n", " hourly_day_data = pd.DataFrame({'ts': pd.date_range(start=row['ts'], periods=24, freq='H'), 'value': hourly_values})\n", " hourly_data = pd.concat([hourly_data, hourly_day_data])\n", " hourly_data.reset_index(drop=True, inplace=True)\n", " return hourly_data\n", "\n", "pm10 = generate_hourly_data(pm10)\n", "pm25 = generate_hourly_data(pm25)" ] }, { "cell_type": "code", "execution_count": 26, "metadata": {}, "outputs": [], "source": [ "pm10 = pm10.rename(columns={'value': 'outdoor_pm10'})\n", "pm25 = pm25.rename(columns={'value': 'outdoor_pm25'})" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Combine" ] }, { "cell_type": "code", "execution_count": 27, "metadata": {}, "outputs": [], "source": [ "data_hist.reset_index(inplace=True)\n", "\n", "pm10['ts'] = pd.to_datetime(pm10['ts'], utc=True)\n", "pm25['ts'] = pd.to_datetime(pm25['ts'], utc=True)\n", "data_hist['timestamp'] = pd.to_datetime(data_hist['timestamp'], utc=True)\n", "data_hist = data_hist.join(pm10.set_index('ts'), on='timestamp')\n", "data_hist = data_hist.join(pm25.set_index('ts'), on='timestamp')\n", "\n", "data_hist['outdoor_description'] = np.nan\n", "data_hist['indoor_temp'] = np.nan\n", "data_hist['indoor_light'] = np.nan\n", "\n", "data_hist = data_hist[['timestamp', 'outdoor_temp', 'outdoor_feels_like', 'outdoor_pressure',\n", " 'outdoor_humidity', 'outdoor_weather', 'outdoor_description', 'outdoor_pm25',\n", " 'outdoor_pm10', 'indoor_temp', 'indoor_light']]" ] }, { "cell_type": "code", "execution_count": 28, "metadata": {}, "outputs": [], "source": [ "from datetime import datetime\n", "\n", "def convert_datetime(datetime_str):\n", " datetime_obj = datetime.fromisoformat(datetime_str)\n", " formatted_datetime_str = datetime_obj.strftime('%Y-%m-%d %H:%M:%S')\n", " return formatted_datetime_str\n", "\n", "data_hist['timestamp'] = data_hist['timestamp'].apply(lambda x: convert_datetime(str(x)))" ] }, { "cell_type": "code", "execution_count": 29, "metadata": {}, "outputs": [], "source": [ "data_hist.to_csv('out/data_hist.csv', index=False)" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.11.4" } }, "nbformat": 4, "nbformat_minor": 2 }