diff --git a/topic/timeseries/dask-weather-data-import.ipynb b/topic/timeseries/dask-weather-data-import.ipynb index 25d56293..963b79d3 100644 --- a/topic/timeseries/dask-weather-data-import.ipynb +++ b/topic/timeseries/dask-weather-data-import.ipynb @@ -50,15 +50,14 @@ }, { "cell_type": "code", - "execution_count": null, - "id": "e0649e64", - "metadata": { - "scrolled": true - }, "outputs": [], "source": [ - "#!pip install dask pandas==2.0.0 'sqlalchemy[crate]'" - ] + "#!pip install dask pandas 'crate[sqlalchemy]'" + ], + "metadata": { + "collapsed": false + }, + "id": "a31d75fa072055fe" }, { "cell_type": "markdown", @@ -92,8 +91,16 @@ }, { "cell_type": "code", - "execution_count": null, - "outputs": [], + "execution_count": 37, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Dataset URL: https://www.kaggle.com/datasets/guillemservera/global-daily-climate-data\n" + ] + } + ], "source": [ "from cratedb_toolkit.datasets import load_dataset\n", "\n", @@ -101,12 +108,17 @@ "dataset.acquire()" ], "metadata": { - "collapsed": false - } + "collapsed": false, + "ExecuteTime": { + "end_time": "2024-04-08T15:42:23.278771Z", + "start_time": "2024-04-08T15:42:22.381931Z" + } + }, + "id": "dd108f22d04130e4" }, { "cell_type": "code", - "execution_count": 88, + "execution_count": 38, "outputs": [], "source": [ "from dask import dataframe as dd\n", @@ -117,25 +129,33 @@ "pbar.register()" ], "metadata": { - "collapsed": false - } + "collapsed": false, + "ExecuteTime": { + "end_time": "2024-04-08T15:42:23.358987Z", + "start_time": "2024-04-08T15:42:23.356811Z" + } + }, + "id": "56672457911d1bea" }, { "cell_type": "code", - "execution_count": 56, + "execution_count": 39, "id": "a506f7c9", - "metadata": {}, + "metadata": { + "ExecuteTime": { + "end_time": "2024-04-08T15:42:30.181619Z", + "start_time": "2024-04-08T15:42:24.164528Z" + } + }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "[########################################] | 100% Completed | 6.26 ss\n", - "[########################################] | 100% Completed | 6.37 s\n", - "[########################################] | 100% Completed | 6.47 s\n", - "[########################################] | 100% Completed | 6.47 s\n", + "[########################################] | 100% Completed | 3.22 ss\n", + "[########################################] | 100% Completed | 3.32 s\n", "\n", - "Index: 27635763 entries, 0 to 24220\n", + "Int64Index: 27635763 entries, 0 to 24220\n", "Data columns (total 14 columns):\n", " # Column Non-Null Count Dtype\n", "--- ------ -------------- -----\n", @@ -155,163 +175,16 @@ "13 sunshine_total_min 1021461 non-null float64\n", "dtypes: category(3), datetime64[ns](1), float64(10)\n", "memory usage: 2.6 GB\n", - "[########################################] | 100% Completed | 5.37 ss\n", - "[########################################] | 100% Completed | 5.48 s\n", - "[########################################] | 100% Completed | 5.58 s\n", - "[########################################] | 100% Completed | 5.68 s\n" + "[########################################] | 100% Completed | 2.56 ss\n", + "[########################################] | 100% Completed | 2.66 s\n" ] }, { "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
station_idcity_namedateseasonavg_temp_cmin_temp_cmax_temp_cprecipitation_mmsnow_depth_mmavg_wind_dir_degavg_wind_speed_kmhpeak_wind_gust_kmhavg_sea_level_pres_hpasunshine_total_min
041515Asadabad1957-07-01Summer27.021.135.60.0NaNNaNNaNNaNNaNNaN
141515Asadabad1957-07-02Summer22.818.932.20.0NaNNaNNaNNaNNaNNaN
241515Asadabad1957-07-03Summer24.316.735.61.0NaNNaNNaNNaNNaNNaN
341515Asadabad1957-07-04Summer26.616.137.84.1NaNNaNNaNNaNNaNNaN
441515Asadabad1957-07-05Summer30.820.041.70.0NaNNaNNaNNaNNaNNaN
\n", - "
" - ], - "text/plain": [ - " station_id city_name date season avg_temp_c min_temp_c max_temp_c \n", - "0 41515 Asadabad 1957-07-01 Summer 27.0 21.1 35.6 \\\n", - "1 41515 Asadabad 1957-07-02 Summer 22.8 18.9 32.2 \n", - "2 41515 Asadabad 1957-07-03 Summer 24.3 16.7 35.6 \n", - "3 41515 Asadabad 1957-07-04 Summer 26.6 16.1 37.8 \n", - "4 41515 Asadabad 1957-07-05 Summer 30.8 20.0 41.7 \n", - "\n", - " precipitation_mm snow_depth_mm avg_wind_dir_deg avg_wind_speed_kmh \n", - "0 0.0 NaN NaN NaN \\\n", - "1 0.0 NaN NaN NaN \n", - "2 1.0 NaN NaN NaN \n", - "3 4.1 NaN NaN NaN \n", - "4 0.0 NaN NaN NaN \n", - "\n", - " peak_wind_gust_kmh avg_sea_level_pres_hpa sunshine_total_min \n", - "0 NaN NaN NaN \n", - "1 NaN NaN NaN \n", - "2 NaN NaN NaN \n", - "3 NaN NaN NaN \n", - "4 NaN NaN NaN " - ] + "text/plain": " station_id city_name date season avg_temp_c min_temp_c max_temp_c \\\n0 41515 Asadabad 1957-07-01 Summer 27.0 21.1 35.6 \n1 41515 Asadabad 1957-07-02 Summer 22.8 18.9 32.2 \n2 41515 Asadabad 1957-07-03 Summer 24.3 16.7 35.6 \n3 41515 Asadabad 1957-07-04 Summer 26.6 16.1 37.8 \n4 41515 Asadabad 1957-07-05 Summer 30.8 20.0 41.7 \n\n precipitation_mm snow_depth_mm avg_wind_dir_deg avg_wind_speed_kmh \\\n0 0.0 NaN NaN NaN \n1 0.0 NaN NaN NaN \n2 1.0 NaN NaN NaN \n3 4.1 NaN NaN NaN \n4 0.0 NaN NaN NaN \n\n peak_wind_gust_kmh avg_sea_level_pres_hpa sunshine_total_min \n0 NaN NaN NaN \n1 NaN NaN NaN \n2 NaN NaN NaN \n3 NaN NaN NaN \n4 NaN NaN NaN ", + "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
station_idcity_namedateseasonavg_temp_cmin_temp_cmax_temp_cprecipitation_mmsnow_depth_mmavg_wind_dir_degavg_wind_speed_kmhpeak_wind_gust_kmhavg_sea_level_pres_hpasunshine_total_min
041515Asadabad1957-07-01Summer27.021.135.60.0NaNNaNNaNNaNNaNNaN
141515Asadabad1957-07-02Summer22.818.932.20.0NaNNaNNaNNaNNaNNaN
241515Asadabad1957-07-03Summer24.316.735.61.0NaNNaNNaNNaNNaNNaN
341515Asadabad1957-07-04Summer26.616.137.84.1NaNNaNNaNNaNNaNNaN
441515Asadabad1957-07-05Summer30.820.041.70.0NaNNaNNaNNaNNaNNaN
\n
" }, - "execution_count": 56, + "execution_count": 39, "metadata": {}, "output_type": "execute_result" } @@ -329,122 +202,29 @@ }, { "cell_type": "code", - "execution_count": 68, + "execution_count": 40, "id": "4c083721", - "metadata": {}, + "metadata": { + "ExecuteTime": { + "end_time": "2024-04-08T15:42:30.407165Z", + "start_time": "2024-04-08T15:42:30.182080Z" + } + }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "[########################################] | 100% Completed | 107.04 ms\n", - "[########################################] | 100% Completed | 211.77 ms\n", - "[########################################] | 100% Completed | 316.85 ms\n", - "[########################################] | 100% Completed | 421.17 ms\n" + "[########################################] | 100% Completed | 101.33 ms\n", + "[########################################] | 100% Completed | 201.26 ms\n" ] }, { "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
station_idcity_namecountrystateiso2iso3loc
041515AsadabadAfghanistanKunarAFAFG[71.1500045859, 34.8660000397]
138954FayzabadAfghanistanBadakhshanAFAFG[70.5792471913, 37.1297607616]
241560JalalabadAfghanistanNangarharAFAFG[70.4361034738, 34.4415269155]
338947KunduzAfghanistanKunduzAFAFG[68.8725296619, 36.7279506623]
438987Qala i NawAfghanistanBadghisAFAFG[63.1332996367, 34.983000131]
\n", - "
" - ], - "text/plain": [ - " station_id city_name country state iso2 iso3 \n", - "0 41515 Asadabad Afghanistan Kunar AF AFG \\\n", - "1 38954 Fayzabad Afghanistan Badakhshan AF AFG \n", - "2 41560 Jalalabad Afghanistan Nangarhar AF AFG \n", - "3 38947 Kunduz Afghanistan Kunduz AF AFG \n", - "4 38987 Qala i Naw Afghanistan Badghis AF AFG \n", - "\n", - " loc \n", - "0 [71.1500045859, 34.8660000397] \n", - "1 [70.5792471913, 37.1297607616] \n", - "2 [70.4361034738, 34.4415269155] \n", - "3 [68.8725296619, 36.7279506623] \n", - "4 [63.1332996367, 34.983000131] " - ] + "text/plain": " station_id city_name country state iso2 iso3 \\\n0 41515 Asadabad Afghanistan Kunar AF AFG \n1 38954 Fayzabad Afghanistan Badakhshan AF AFG \n2 41560 Jalalabad Afghanistan Nangarhar AF AFG \n3 38947 Kunduz Afghanistan Kunduz AF AFG \n4 38987 Qala i Naw Afghanistan Badghis AF AFG \n\n loc \n0 [71.1500045859, 34.8660000397] \n1 [70.5792471913, 37.1297607616] \n2 [70.4361034738, 34.4415269155] \n3 [68.8725296619, 36.7279506623] \n4 [63.1332996367, 34.983000131] ", + "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
station_idcity_namecountrystateiso2iso3loc
041515AsadabadAfghanistanKunarAFAFG[71.1500045859, 34.8660000397]
138954FayzabadAfghanistanBadakhshanAFAFG[70.5792471913, 37.1297607616]
241560JalalabadAfghanistanNangarharAFAFG[70.4361034738, 34.4415269155]
338947KunduzAfghanistanKunduzAFAFG[68.8725296619, 36.7279506623]
438987Qala i NawAfghanistanBadghisAFAFG[63.1332996367, 34.983000131]
\n
" }, - "execution_count": 68, + "execution_count": 40, "metadata": {}, "output_type": "execute_result" } @@ -466,9 +246,14 @@ }, { "cell_type": "code", - "execution_count": 119, + "execution_count": 41, "id": "903e0fed", - "metadata": {}, + "metadata": { + "ExecuteTime": { + "end_time": "2024-04-08T15:42:31.545551Z", + "start_time": "2024-04-08T15:42:31.542164Z" + } + }, "outputs": [], "source": [ "# Read countries, adapt the path to the files accordingly\n", @@ -497,13 +282,19 @@ ], "metadata": { "collapsed": false - } + }, + "id": "c3f1b9a28612a491" }, { "cell_type": "code", - "execution_count": 102, + "execution_count": 42, "id": "9eaf4af1", - "metadata": {}, + "metadata": { + "ExecuteTime": { + "end_time": "2024-04-08T15:42:33.584727Z", + "start_time": "2024-04-08T15:42:33.575660Z" + } + }, "outputs": [], "source": [ "import os\n", @@ -541,17 +332,20 @@ }, { "cell_type": "code", - "execution_count": 121, + "execution_count": 43, "id": "5f972876", - "metadata": {}, + "metadata": { + "ExecuteTime": { + "end_time": "2024-04-08T15:42:34.798906Z", + "start_time": "2024-04-08T15:42:34.793221Z" + } + }, "outputs": [ { "data": { - "text/plain": [ - "" - ] + "text/plain": "" }, - "execution_count": 121, + "execution_count": 43, "metadata": {}, "output_type": "execute_result" } @@ -588,17 +382,20 @@ }, { "cell_type": "code", - "execution_count": 105, + "execution_count": 45, "id": "9d8b46de", - "metadata": {}, + "metadata": { + "ExecuteTime": { + "end_time": "2024-04-08T15:42:43.972291Z", + "start_time": "2024-04-08T15:42:43.880509Z" + } + }, "outputs": [ { "data": { - "text/plain": [ - "" - ] + "text/plain": "" }, - "execution_count": 105, + "execution_count": 45, "metadata": {}, "output_type": "execute_result" } @@ -656,9 +453,14 @@ }, { "cell_type": "code", - "execution_count": 84, + "execution_count": 46, "id": "311e588c", - "metadata": {}, + "metadata": { + "ExecuteTime": { + "end_time": "2024-04-08T15:42:45.199060Z", + "start_time": "2024-04-08T15:42:45.197376Z" + } + }, "outputs": [], "source": [ "# Uncomment the following lines to process the actual weather data.\n", @@ -680,19 +482,21 @@ }, { "cell_type": "code", - "execution_count": 120, + "execution_count": 47, "id": "53e02715", - "metadata": {}, + "metadata": { + "ExecuteTime": { + "end_time": "2024-04-08T15:42:46.455692Z", + "start_time": "2024-04-08T15:42:46.145388Z" + } + }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "[########################################] | 100% Completed | 964.80 ms\n", - "[########################################] | 100% Completed | 1.06 s\n", - "[########################################] | 100% Completed | 1.16 s\n", - "[########################################] | 100% Completed | 1.17 s\n", - "[########################################] | 100% Completed | 1.27 s\n" + "[########################################] | 100% Completed | 203.73 ms\n", + "[########################################] | 100% Completed | 302.84 ms\n" ] } ], @@ -713,19 +517,21 @@ }, { "cell_type": "code", - "execution_count": 106, + "execution_count": 48, "id": "c1f87112", - "metadata": {}, + "metadata": { + "ExecuteTime": { + "end_time": "2024-04-08T15:42:48.171402Z", + "start_time": "2024-04-08T15:42:47.862200Z" + } + }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "[########################################] | 100% Completed | 1.17 sms\n", - "[########################################] | 100% Completed | 1.17 s\n", - "[########################################] | 100% Completed | 1.27 s\n", - "[########################################] | 100% Completed | 1.27 s\n", - "[########################################] | 100% Completed | 1.37 s\n" + "[########################################] | 100% Completed | 202.58 ms\n", + "[########################################] | 100% Completed | 302.57 ms\n" ] } ], @@ -733,6 +539,15 @@ "cities.to_sql('cities', CONNECTION_STRING, if_exists='append',\n", " index=False, chunksize=1000, parallel=True, method=insert_bulk)" ] + }, + { + "cell_type": "code", + "outputs": [], + "source": [], + "metadata": { + "collapsed": false + }, + "id": "6501d29fd917a04d" } ], "metadata": { diff --git a/topic/timeseries/requirements.txt b/topic/timeseries/requirements.txt index bb22c200..b192f711 100644 --- a/topic/timeseries/requirements.txt +++ b/topic/timeseries/requirements.txt @@ -1,7 +1,7 @@ crate[sqlalchemy]==0.35.2 cratedb-toolkit[datasets]==0.0.8 refinitiv-data<1.7 -pandas<2 +pandas<2.2.1 pycaret>=3.0,<3.4 pydantic<2 -sqlalchemy<2 +sqlalchemy>=1.0,<2.1 \ No newline at end of file