From 5d5b1868629bd4f590205619c257289fa46947a2 Mon Sep 17 00:00:00 2001 From: Camila Resende Date: Sat, 23 Mar 2019 00:02:32 -0300 Subject: [PATCH 01/23] is JSON data preparation --- .../isJson_dataPrep.ipynb | 392 ++++++++++++++++++ 1 file changed, 392 insertions(+) create mode 100644 analyses/2019_03_aliamcami_greatest_values_are_json/isJson_dataPrep.ipynb diff --git a/analyses/2019_03_aliamcami_greatest_values_are_json/isJson_dataPrep.ipynb b/analyses/2019_03_aliamcami_greatest_values_are_json/isJson_dataPrep.ipynb new file mode 100644 index 0000000..29b5242 --- /dev/null +++ b/analyses/2019_03_aliamcami_greatest_values_are_json/isJson_dataPrep.ipynb @@ -0,0 +1,392 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Start client" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/anaconda3/envs/overscripted/lib/python3.6/site-packages/dask/config.py:168: YAMLLoadWarning: calling yaml.load() without Loader=... is deprecated, as the default Loader is unsafe. Please read https://msg.pyyaml.org/load for full details.\n", + " data = yaml.load(f.read()) or {}\n", + "/anaconda3/envs/overscripted/lib/python3.6/site-packages/distributed/config.py:20: YAMLLoadWarning: calling yaml.load() without Loader=... is deprecated, as the default Loader is unsafe. Please read https://msg.pyyaml.org/load for full details.\n", + " defaults = yaml.load(f)\n" + ] + }, + { + "data": { + "text/html": [ + "\n", + "\n", + "\n", + "\n", + "\n", + "
\n", + "

Client

\n", + "\n", + "
\n", + "

Cluster

\n", + "
    \n", + "
  • Workers: 4
  • \n", + "
  • Cores: 4
  • \n", + "
  • Memory: 8.59 GB
  • \n", + "
\n", + "
" + ], + "text/plain": [ + "" + ] + }, + "execution_count": 1, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "import dask.dataframe as dd\n", + "from dask.distributed import Client\n", + "\n", + "#Initializing client\n", + "client = Client()\n", + "client" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Data\n", + "Using 10% sample and self produced samples\n", + " - 10% sample has 11292867 rows\n", + " - Filtered by value_len > df.mean() has 499805 rows" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "#Original sample\n", + "df = dd.read_parquet('sample_0.parquet', \n", + " engine='pyarrow', \n", + " columns=['value_1000', 'value', 'value_len', 'symbol', 'script_url'])\n", + "\n", + "# df.astype({'value_1000': str, 'value': str,'value_len': int,'symbol': int,'script_url': str})\n", + "df_index={'value_1000': str, 'value': str,'value_len': int,'symbol': int,'script_url': str}\n", + "df.columns" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Filtered value_len > 1356\n", + "1356 is the value_len mean\n", + "\n", + "To filter the data into something that is more interesting to this task I decided to only work with values that are at above the mean.\n", + "\n", + "All values above the mean count up to 499805 rows. That is just 4,42% of the whole sample, and a lot easier to work on. " + ] + }, + { + "cell_type": "code", + "execution_count": 39, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "499805" + ] + }, + "execution_count": 39, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "#Save\n", + "dff = df[df['value_len'] > 1356]\n", + "dd.to_parquet(df=dff, path='filtered_above_mean.parquet', engine='pyarrow')\n", + "# len(dff)" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "Index(['value_1000', 'value', 'value_len', 'symbol', 'script_url'], dtype='object')" + ] + }, + "execution_count": 2, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "#Read\n", + "df = dd.read_parquet('filtered_above_mean.parquet', engine='pyarrow')\n", + "df.columns" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## DF overview\n", + "Some overview about the sample: \n", + "- Mean: 1356.97,\n", + "- Min: 0,\n", + "- Max: 4496861\n", + "- Std: 26310.62" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "1356.9776628910975 0 4496861 26310.62140481331\n" + ] + } + ], + "source": [ + "df_mean = df['value_len'].mean()\n", + "df_min = df['value_len'].min()\n", + "df_max = df['value_len'].max()\n", + "df_std = df['value_len'].std()\n", + "(df_mean, df_min, df_max, df_std) = dd.compute(df_mean, df_min, df_max, df_std);\n", + "print(df_mean, df_min, df_max, df_std)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# is JSON analysis\n", + "\n", + "After manual initial analysis I have think that the huge values are json structured, to validate that I included an new column that is a boolean value with the validation of json" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [], + "source": [ + "import json\n", + "\n", + "def is_json(myjson):\n", + " try:\n", + " json.loads(myjson)\n", + " return True\n", + " except ValueError as e:\n", + " return False" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [], + "source": [ + "df.astype({'value_1000': str, 'value': str,'value_len': int,'symbol': int,'script_url': str})\n", + "df['is_json'] = df['value'].apply(is_json)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### is_JSON data\n", + "Saving the new produced data with 'is_json' columns into disk" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/anaconda3/envs/overscripted/lib/python3.6/site-packages/fastparquet/util.py:221: FutureWarning: A future version of pandas will default to `skipna=True`. To silence this warning, pass `skipna=True|False` explicitly.\n", + " inferred_dtype = infer_dtype(column)\n" + ] + } + ], + "source": [ + "#save\n", + "df.to_parquet('is_json_above_mean.parquet')" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
value_1000valuevalue_lensymbolscript_urlis_json
0{\"im-settings\":\"{\\\"val\\\":{\\\"settings\\\":{\\\"Site...{\"im-settings\":\"{\\\"val\\\":{\\\"settings\\\":{\\\"Site...3713window.sessionStoragehttps://assets.adobedtm.com/caacec67651710193d...True
1{\"APLUS_S_CORE_0.17.12_20171214163401_2ee09a0c...{\"APLUS_S_CORE_0.17.12_20171214163401_2ee09a0c...103878window.localStoragehttps://g.alicdn.com/alilog/mlog/aplus_v2.jsTrue
2{\"APLUS_S_CORE_0.17.12_20171214163401_2ee09a0c...{\"APLUS_S_CORE_0.17.12_20171214163401_2ee09a0c...103878window.localStoragehttps://g.alicdn.com/alilog/mlog/aplus_v2.jsTrue
3usunico=17/12/2017:0-00155123:830; SessionASM=...usunico=17/12/2017:0-00155123:830; SessionASM=...1358window.document.cookiehttps://www.coches.net/scripts/common.min.js?2...False
4usunico=17/12/2017:0-00155123:830; SessionASM=...usunico=17/12/2017:0-00155123:830; SessionASM=...1358window.document.cookiehttps://www.coches.net/scripts/common.min.js?2...False
\n", + "
" + ], + "text/plain": [ + " value_1000 \\\n", + "0 {\"im-settings\":\"{\\\"val\\\":{\\\"settings\\\":{\\\"Site... \n", + "1 {\"APLUS_S_CORE_0.17.12_20171214163401_2ee09a0c... \n", + "2 {\"APLUS_S_CORE_0.17.12_20171214163401_2ee09a0c... \n", + "3 usunico=17/12/2017:0-00155123:830; SessionASM=... \n", + "4 usunico=17/12/2017:0-00155123:830; SessionASM=... \n", + "\n", + " value value_len \\\n", + "0 {\"im-settings\":\"{\\\"val\\\":{\\\"settings\\\":{\\\"Site... 3713 \n", + "1 {\"APLUS_S_CORE_0.17.12_20171214163401_2ee09a0c... 103878 \n", + "2 {\"APLUS_S_CORE_0.17.12_20171214163401_2ee09a0c... 103878 \n", + "3 usunico=17/12/2017:0-00155123:830; SessionASM=... 1358 \n", + "4 usunico=17/12/2017:0-00155123:830; SessionASM=... 1358 \n", + "\n", + " symbol script_url \\\n", + "0 window.sessionStorage https://assets.adobedtm.com/caacec67651710193d... \n", + "1 window.localStorage https://g.alicdn.com/alilog/mlog/aplus_v2.js \n", + "2 window.localStorage https://g.alicdn.com/alilog/mlog/aplus_v2.js \n", + "3 window.document.cookie https://www.coches.net/scripts/common.min.js?2... \n", + "4 window.document.cookie https://www.coches.net/scripts/common.min.js?2... \n", + "\n", + " is_json \n", + "0 True \n", + "1 True \n", + "2 True \n", + "3 False \n", + "4 False " + ] + }, + "execution_count": 10, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "#read\n", + "df = dd.read_parquet('is_json_above_mean.parquet')\n", + "df.head()" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.6.8" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} From cd0ac0c95115e559d716fb7ffe65353dc5336eac Mon Sep 17 00:00:00 2001 From: Camila Resende Date: Sat, 23 Mar 2019 00:03:53 -0300 Subject: [PATCH 02/23] Quantitative analysts for json values --- .../isJson_Quantity_Analysis.ipynb | 509 ++++++++++++++++++ 1 file changed, 509 insertions(+) create mode 100644 analyses/2019_03_aliamcami_greatest_values_are_json/isJson_Quantity_Analysis.ipynb diff --git a/analyses/2019_03_aliamcami_greatest_values_are_json/isJson_Quantity_Analysis.ipynb b/analyses/2019_03_aliamcami_greatest_values_are_json/isJson_Quantity_Analysis.ipynb new file mode 100644 index 0000000..0a209bd --- /dev/null +++ b/analyses/2019_03_aliamcami_greatest_values_are_json/isJson_Quantity_Analysis.ipynb @@ -0,0 +1,509 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Start client" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/anaconda3/envs/overscripted/lib/python3.6/site-packages/dask/config.py:168: YAMLLoadWarning: calling yaml.load() without Loader=... is deprecated, as the default Loader is unsafe. Please read https://msg.pyyaml.org/load for full details.\n", + " data = yaml.load(f.read()) or {}\n", + "/anaconda3/envs/overscripted/lib/python3.6/site-packages/distributed/config.py:20: YAMLLoadWarning: calling yaml.load() without Loader=... is deprecated, as the default Loader is unsafe. Please read https://msg.pyyaml.org/load for full details.\n", + " defaults = yaml.load(f)\n" + ] + }, + { + "data": { + "text/html": [ + "\n", + "\n", + "\n", + "\n", + "\n", + "
\n", + "

Client

\n", + "\n", + "
\n", + "

Cluster

\n", + "
    \n", + "
  • Workers: 4
  • \n", + "
  • Cores: 4
  • \n", + "
  • Memory: 8.59 GB
  • \n", + "
\n", + "
" + ], + "text/plain": [ + "" + ] + }, + "execution_count": 2, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "import dask.dataframe as dd\n", + "from dask.distributed import Client\n", + "\n", + "#Initializing client\n", + "client = Client()\n", + "client" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Data\n", + "Using filtered and evaluated for json data named 'is_json_above_mean.parquet'. You can get this by running the 'isJson_dataPrep.ipynb'\n", + "\n", + "This new sample has 499805, meaning that its only 4,42% of the original sample (most values are smaller than the sample's mean of 1356)" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
value_1000valuevalue_lensymbolscript_urlis_json
0{\"im-settings\":\"{\\\"val\\\":{\\\"settings\\\":{\\\"Site...{\"im-settings\":\"{\\\"val\\\":{\\\"settings\\\":{\\\"Site...3713window.sessionStoragehttps://assets.adobedtm.com/caacec67651710193d...True
1{\"APLUS_S_CORE_0.17.12_20171214163401_2ee09a0c...{\"APLUS_S_CORE_0.17.12_20171214163401_2ee09a0c...103878window.localStoragehttps://g.alicdn.com/alilog/mlog/aplus_v2.jsTrue
2{\"APLUS_S_CORE_0.17.12_20171214163401_2ee09a0c...{\"APLUS_S_CORE_0.17.12_20171214163401_2ee09a0c...103878window.localStoragehttps://g.alicdn.com/alilog/mlog/aplus_v2.jsTrue
3usunico=17/12/2017:0-00155123:830; SessionASM=...usunico=17/12/2017:0-00155123:830; SessionASM=...1358window.document.cookiehttps://www.coches.net/scripts/common.min.js?2...False
4usunico=17/12/2017:0-00155123:830; SessionASM=...usunico=17/12/2017:0-00155123:830; SessionASM=...1358window.document.cookiehttps://www.coches.net/scripts/common.min.js?2...False
\n", + "
" + ], + "text/plain": [ + " value_1000 \\\n", + "0 {\"im-settings\":\"{\\\"val\\\":{\\\"settings\\\":{\\\"Site... \n", + "1 {\"APLUS_S_CORE_0.17.12_20171214163401_2ee09a0c... \n", + "2 {\"APLUS_S_CORE_0.17.12_20171214163401_2ee09a0c... \n", + "3 usunico=17/12/2017:0-00155123:830; SessionASM=... \n", + "4 usunico=17/12/2017:0-00155123:830; SessionASM=... \n", + "\n", + " value value_len \\\n", + "0 {\"im-settings\":\"{\\\"val\\\":{\\\"settings\\\":{\\\"Site... 3713 \n", + "1 {\"APLUS_S_CORE_0.17.12_20171214163401_2ee09a0c... 103878 \n", + "2 {\"APLUS_S_CORE_0.17.12_20171214163401_2ee09a0c... 103878 \n", + "3 usunico=17/12/2017:0-00155123:830; SessionASM=... 1358 \n", + "4 usunico=17/12/2017:0-00155123:830; SessionASM=... 1358 \n", + "\n", + " symbol script_url \\\n", + "0 window.sessionStorage https://assets.adobedtm.com/caacec67651710193d... \n", + "1 window.localStorage https://g.alicdn.com/alilog/mlog/aplus_v2.js \n", + "2 window.localStorage https://g.alicdn.com/alilog/mlog/aplus_v2.js \n", + "3 window.document.cookie https://www.coches.net/scripts/common.min.js?2... \n", + "4 window.document.cookie https://www.coches.net/scripts/common.min.js?2... \n", + "\n", + " is_json \n", + "0 True \n", + "1 True \n", + "2 True \n", + "3 False \n", + "4 False " + ] + }, + "execution_count": 3, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df = dd.read_parquet('is_json_above_mean.parquet')\n", + "df.head()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## DF overview\n", + "Some overview about the sample after the data prep: \n", + "- Rows: 499805\n", + "- Mean: 27829.33,\n", + "- Min: 1357,\n", + "- Max: 4496861\n", + "- Std: 122092.41" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "27829.332847810645 1357 4496861 122092.41371885882\n" + ] + } + ], + "source": [ + "df_mean = df['value_len'].mean()\n", + "df_min = df['value_len'].min()\n", + "df_max = df['value_len'].max()\n", + "df_std = df['value_len'].std()\n", + "(df_mean, df_min, df_max, df_std) = dd.compute(df_mean, df_min, df_max, df_std);\n", + "print(df_mean, df_min, df_max, df_std)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Quantity analysis " + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Whole\n", + "This whole sample has: \n", + "- False: 307577 rows\n", + " - 61,54% are not valid JSON\n", + " \n", + " \n", + "- True: 192228 rows\n", + " - 38,46% are valid JSON" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "False 307577\n", + "True 192228\n", + "Name: is_json, dtype: int64" + ] + }, + "execution_count": 7, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df['is_json'].value_counts().compute()\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## ORIGINAL SAMPLE: One std above the mean\n", + "Original Sample Data: \n", + "- Mean: 1356.97\n", + "- Std: 26310.62\n", + "\n", + "I'll be using the original sample's mean and std to make the following analyses\n" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": {}, + "outputs": [], + "source": [ + "ROW_COUNT = 499805\n", + "MEAN = 1356\n", + "STD = 26310" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n", + "- 46745 rows have the value_len greater than 27666\n", + "- This represent 9.35% of the rows on this sample\n", + "\n", + "As the value_len increases the percentage of valid JSON on the columns 'value' also increases, for this filtered sample the following data was verified: \n", + "- True: 46691 rows\n", + " - 99,88% are valid JSON\n", + " \n", + "- False: 54 rows\n", + " - 0,11% are not valid JSON\n" + ] + }, + { + "cell_type": "code", + "execution_count": 22, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "len: 46745 (9.35%)\n" + ] + } + ], + "source": [ + "dfa = df[df['value_len'] > (MEAN + STD)]\n", + "length = len(dfa)\n", + "print(\"len: {0} ({1:0.2f}%)\".format(length, length / ROW_COUNT * 100))" + ] + }, + { + "cell_type": "code", + "execution_count": 23, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "True 46691\n", + "False 54\n", + "Name: is_json, dtype: int64" + ] + }, + "execution_count": 23, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "dfa['is_json'].value_counts().compute()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## All greater values are JSON\n", + "\n", + "There is absolute no value greater than 104653 that represents a valid JSON. \n", + "\n", + "This implies that all the greater values are JSON but they represent very low percentage of the whole data. \n", + "\n", + "The top 46745 gratest value_len are valid JSONs, that is 9.35% of this sample (value_len > mean) and 0,41% of the original sample with all the smaller values. " + ] + }, + { + "cell_type": "code", + "execution_count": 34, + "metadata": {}, + "outputs": [], + "source": [ + "group = dfa.groupby('is_json')\n", + "group_result = group.agg({'value_len': ['mean', 'std', 'min', 'max', 'count']}).compute()" + ] + }, + { + "cell_type": "code", + "execution_count": 36, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
value_len
meanstdminmaxcount
is_json
False82460.05555613627.1180632813510465354
True271422.740185412552.29861327669449686146691
\n", + "
" + ], + "text/plain": [ + " value_len \n", + " mean std min max count\n", + "is_json \n", + "False 82460.055556 13627.118063 28135 104653 54\n", + "True 271422.740185 412552.298613 27669 4496861 46691" + ] + }, + "execution_count": 36, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "group_result" + ] + }, + { + "cell_type": "code", + "execution_count": 37, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "len: 46745 (9.35%)\n" + ] + } + ], + "source": [ + "allJson = df[df['value_len'] > 104653]\n", + "length = len(dfa)\n", + "print(\"len: {0} ({1:0.2f}%)\".format(length, length / ROW_COUNT * 100))" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.6.8" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} From 970ea0eeaabbc28f5fb33785d0bc15101eb52d3b Mon Sep 17 00:00:00 2001 From: Camila Resende Date: Sat, 23 Mar 2019 00:04:50 -0300 Subject: [PATCH 03/23] Readme with overview of the findings about the quantitative analysts --- .../README.md | 26 +++++++++++++++++++ 1 file changed, 26 insertions(+) create mode 100644 analyses/2019_03_aliamcami_greatest_values_are_json/README.md diff --git a/analyses/2019_03_aliamcami_greatest_values_are_json/README.md b/analyses/2019_03_aliamcami_greatest_values_are_json/README.md new file mode 100644 index 0000000..67e78a1 --- /dev/null +++ b/analyses/2019_03_aliamcami_greatest_values_are_json/README.md @@ -0,0 +1,26 @@ +# Overview + +All the greatest values are JSON, but they represent very little percentage of the whole data. + +### Most of the data have small value_len + (mean = 1356 for the 10% sample) +- 95,58% of the data have value_len smaller than the mean +- 4,42% are bigger than the mean +- 9.35% are valid JSON + +### Values above the mean: +- 61,54% are NOT valid JSON +- 38,46% are valid JSON + +### Values that are 1 standard deviation (std) above the mean + (std = 26310 for 10% sample): +- 0,11% are NOT valid JSON +- 99,88% are valid JSON +- The bigger the value the greater the chance of being a valid JSON + +### Values 4 std above the mean +- 100% are valid JSON +- The biggest non-JSON value have the length of 104653 + +## +The top 46745 gratest value_len are valid JSONs, that is 9.35% of the filtered sample (value_len > mean) and 0,41% of the original 10% sample. From 0272b1ccb0e4da8db75e1f8623554c05321489ec Mon Sep 17 00:00:00 2001 From: Camila Resende Date: Sun, 31 Mar 2019 13:57:10 -0300 Subject: [PATCH 04/23] Sample comparasions for quantity of valid json values --- .../isJson_Sample_Comparasion.ipynb | 917 ++++++++++++++++++ 1 file changed, 917 insertions(+) create mode 100644 analyses/2019_03_aliamcami_greatest_values_are_json/isJson_Sample_Comparasion.ipynb diff --git a/analyses/2019_03_aliamcami_greatest_values_are_json/isJson_Sample_Comparasion.ipynb b/analyses/2019_03_aliamcami_greatest_values_are_json/isJson_Sample_Comparasion.ipynb new file mode 100644 index 0000000..20a2660 --- /dev/null +++ b/analyses/2019_03_aliamcami_greatest_values_are_json/isJson_Sample_Comparasion.ipynb @@ -0,0 +1,917 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Start dask" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/anaconda3/envs/overscripted/lib/python3.6/site-packages/dask/config.py:168: YAMLLoadWarning: calling yaml.load() without Loader=... is deprecated, as the default Loader is unsafe. Please read https://msg.pyyaml.org/load for full details.\n", + " data = yaml.load(f.read()) or {}\n", + "/anaconda3/envs/overscripted/lib/python3.6/site-packages/distributed/config.py:20: YAMLLoadWarning: calling yaml.load() without Loader=... is deprecated, as the default Loader is unsafe. Please read https://msg.pyyaml.org/load for full details.\n", + " defaults = yaml.load(f)\n" + ] + }, + { + "data": { + "text/html": [ + "\n", + "\n", + "\n", + "\n", + "\n", + "
\n", + "

Client

\n", + "\n", + "
\n", + "

Cluster

\n", + "
    \n", + "
  • Workers: 4
  • \n", + "
  • Cores: 4
  • \n", + "
  • Memory: 8.59 GB
  • \n", + "
\n", + "
" + ], + "text/plain": [ + "" + ] + }, + "execution_count": 1, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "import dask.dataframe as dd\n", + "from dask.distributed import Client\n", + "\n", + "#Initializing client\n", + "client = Client()\n", + "client" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Data\n", + "This notebook starts using 'is_json_above_bean.parquet', this is a filtered data that you can get by running the data preparation notebook called 'jsJson_dataPrep.ipynb'. \n", + "This parquet contains the 10% sample data filtered by values above the mean of value_len. \n", + "\n", + "This new sample has 499805 rows, meaning that its only 4,42% of the original sample (most values are smaller than the sample's mean of 1356). " + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
value_lenis_json
03713True
1103878True
2103878True
31358False
41358False
\n", + "
" + ], + "text/plain": [ + " value_len is_json\n", + "0 3713 True\n", + "1 103878 True\n", + "2 103878 True\n", + "3 1358 False\n", + "4 1358 False" + ] + }, + "execution_count": 2, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df = dd.read_parquet('is_json_above_mean.parquet', columns=['value_len', 'is_json'])\n", + "df.head()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Visualization: " + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/anaconda3/envs/overscripted/lib/python3.6/site-packages/distributed/worker.py:2791: UserWarning: Large object of size 1.89 MB detected in task graph: \n", + " (" + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAZsAAAD8CAYAAAChHgmuAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDMuMC4zLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvnQurowAAGMJJREFUeJzt3X+wXGWd5/H3lyQQHFRCCJpKojfuRIUoQpLBWLorAwIB0TgWurGoJUbWbDlBQa0awrg7iK5bMLWrLDv4g10owNWBGIchqzBsxERdC4EEkZ/GXBHlmkguP4Q4TiCB7/7Rz41N6Htv306etOm8X1Vdfc63n3Oep0/l5nPPOc/tjsxEkqSaDuj2ACRJvc+wkSRVZ9hIkqozbCRJ1Rk2kqTqDBtJUnWGjSSpOsNGklSdYSNJqm58twfwx+Lwww/Pvr6+bg9DkvYp69evfywzp4zWzrAp+vr6WLduXbeHIUn7lIj4ZTvtvIwmSarOsJEkVVc1bCLi4Yi4NyLujoh1pXZYRKyOiI3leVKpR0RcFhH9EXFPRMxp2s/i0n5jRCxuqs8t++8v28ZIfUiSumNv3LP588x8rGl9OXBrZl4cEcvL+vnAqcCs8ngz8CXgzRFxGHAhMA9IYH1ErMrMJ0ubpcCPgJuABcDNI/QhaT+yfft2BgYG2LZtW7eHss+bOHEi06dPZ8KECR1t340JAguB48vyNcBaGkGwELg2G1+w86OIODQippa2qzPzCYCIWA0siIi1wMsy87ZSvxZ4D42wGa4PSfuRgYEBXvrSl9LX10e58KEOZCaPP/44AwMDzJw5s6N91L5nk8D/jYj1EbG01F6RmZsByvMRpT4NeKRp24FSG6k+0KI+Uh+S9iPbtm1j8uTJBs1uiggmT568W2eItc9s3pqZmyLiCGB1RPx0hLat/jVkB/W2lQBcCvCqV71qLJtK2kcYNHvG7h7Hqmc2mbmpPG8BbgCOAx4tl8coz1tK8wFgRtPm04FNo9Snt6gzQh+7ju+KzJyXmfOmTBn1b5IkSR2qdmYTEX8CHJCZW8vyycBngFXAYuDi8nxj2WQVcE5EXEdjgsBTmbk5Im4B/kvTjLKTgQsy84mI2BoR84HbgbOA/9G0r1Z9SNqP9S3/9h7d38MXv3OP7q+X1byM9grghnLqNR74emb+U0TcCayIiLOBXwHvK+1vAk4D+oHfA0sASqh8FriztPvM0GQB4CPA1cDBNCYG3FzqFw/TRxV7+h9wu/yHLvWWQw45hN/97nd7bH8f/OAHOf300znjjDP22D47VS1sMvMh4E0t6o8DJ7aoJ7BsmH1dBVzVor4OeEO7fUiSusNPEJCkSs4//3y++MUv7lz/9Kc/zUUXXcSJJ57InDlzeOMb38iNN774Kv/atWs5/fTTd66fc845XH311QCsX7+et7/97cydO5dTTjmFzZs3tzWW4bY7/vjjOf/88znuuON47Wtfyw9+8IPdeMfDM2wkqZJFixZx/fXX71xfsWIFS5Ys4YYbbuCuu+5izZo1fPKTn6RxYWd027dv56Mf/SgrV65k/fr1fOhDH+JTn/rUbm+3Y8cO7rjjDi699FIuuuiisb/RNvipz5JUybHHHsuWLVvYtGkTg4ODTJo0ialTp/Lxj3+c73//+xxwwAH8+te/5tFHH+WVr3zlqPvbsGED9913HyeddBIAzz33HFOnTt3t7d773vcCMHfuXB5++OEO3unoDBtJquiMM85g5cqV/OY3v2HRokV87WtfY3BwkPXr1zNhwgT6+vpe9MeS48eP5/nnn9+5PvR6ZjJ79mxuu+22MY1htO0OOuggAMaNG8eOHTvGtO92GTaS9hvdmMG5aNEiPvzhD/PYY4/xve99jxUrVnDEEUcwYcIE1qxZwy9/+eKvg3n1q1/NAw88wDPPPMO2bdu49dZbedvb3sbrXvc6BgcHue2223jLW97C9u3b+dnPfsbs2bNHHEOn2+1Jho0kVTR79my2bt3KtGnTmDp1KmeeeSbvete7mDdvHscccwyvf/3rX7TNjBkzeP/738/RRx/NrFmzOPbYYwE48MADWblyJR/72Md46qmn2LFjB+edd96oodHpdntStHtjqtfNmzcvO/2mTv/ORvrj9OCDD3LkkUd2exg9o9XxjIj1mTlvtG2djSZJqs7LaJK0j1u2bBk//OEPX1A799xzWbJkSZdG9GKGjaSelpk9/8nPl19+efU+dveWi5fRJPWsiRMn8vjjj+/2f5T7u6EvT5s4cWLH+/DMRlLPmj59OgMDAwwODnZ7KPu8oa+F7pRhI6lnTZgwoeOvMdae5WU0SVJ1ho0kqTrDRpJUnWEjSarOsJEkVWfYSJKqM2wkSdUZNpKk6gwbSVJ1ho0kqTrDRpJUnWEjSarOsJEkVWfYSJKqM2wkSdUZNpKk6gwbSVJ1ho0kqTrDRpJUnWEjSaquethExLiI+HFEfKusz4yI2yNiY0RcHxEHlvpBZb2/vN7XtI8LSn1DRJzSVF9Qav0Rsbyp3rIPSVJ37I0zm3OBB5vWLwG+kJmzgCeBs0v9bODJzPxT4AulHRFxFLAImA0sAL5YAmwccDlwKnAU8IHSdqQ+JEldUDVsImI68E7gf5X1AE4AVpYm1wDvKcsLyzrl9RNL+4XAdZn5TGb+AugHjiuP/sx8KDOfBa4DFo7ShySpC2qf2VwK/BXwfFmfDPw2M3eU9QFgWlmeBjwCUF5/qrTfWd9lm+HqI/UhSeqCamETEacDWzJzfXO5RdMc5bU9VW81xqURsS4i1g0ODrZqIknaA2qe2bwVeHdEPEzjEtcJNM50Do2I8aXNdGBTWR4AZgCU118OPNFc32Wb4eqPjdDHC2TmFZk5LzPnTZkypfN3KkkaUbWwycwLMnN6ZvbRuMH/3cw8E1gDnFGaLQZuLMuryjrl9e9mZpb6ojJbbSYwC7gDuBOYVWaeHVj6WFW2Ga4PSVIXdOPvbM4HPhER/TTur1xZ6lcCk0v9E8BygMy8H1gBPAD8E7AsM58r92TOAW6hMdttRWk7Uh+SpC4YP3qT3ZeZa4G1ZfkhGjPJdm2zDXjfMNt/Dvhci/pNwE0t6i37kCR1h58gIEmqzrCRJFVn2EiSqjNsJEnVGTaSpOoMG0lSdYaNJKk6w0aSVJ1hI0mqzrCRJFVn2EiSqjNsJEnVGTaSpOoMG0lSdYaNJKk6w0aSVJ1hI0mqzrCRJFVn2EiSqjNsJEnVGTaSpOoMG0lSdYaNJKk6w0aSVJ1hI0mqzrCRJFVn2EiSqjNsJEnVGTaSpOoMG0lSdW2FTUS8ofZAJEm9q90zmy9HxB0R8ZcRcWjVEUmSek5bYZOZbwPOBGYA6yLi6xFxUtWRSZJ6Rtv3bDJzI/AfgfOBtwOXRcRPI+K9rdpHxMRyNvSTiLg/Ii4q9ZkRcXtEbIyI6yPiwFI/qKz3l9f7mvZ1QalviIhTmuoLSq0/IpY31Vv2IUnqjnbv2RwdEV8AHgROAN6VmUeW5S8Ms9kzwAmZ+SbgGGBBRMwHLgG+kJmzgCeBs0v7s4EnM/NPyz4vKX0fBSwCZgMLgC9GxLiIGAdcDpwKHAV8oLRlhD4kSV3Q7pnN3wF3AW/KzGWZeRdAZm6icbbzItnwu7I6oTySRkCtLPVrgPeU5YVlnfL6iRERpX5dZj6Tmb8A+oHjyqM/Mx/KzGeB64CFZZvh+pAkdUG7YXMa8PXM/BeAiDggIl4CkJlfHW6jcgZyN7AFWA38HPhtZu4oTQaAaWV5GvBI2ecO4ClgcnN9l22Gq08eoQ9JUhe0GzbfAQ5uWn9JqY0oM5/LzGOA6TTORI5s1aw8xzCv7an6i0TE0ohYFxHrBgcHWzWRJO0B7YbNxKZLYpTll7TbSWb+FlgLzAcOjYjx5aXpwKayPEBjthvl9ZcDTzTXd9lmuPpjI/Sx67iuyMx5mTlvypQp7b4dSdIYtRs2/xwRc4ZWImIu8C8jbRARU4b+JiciDgbeQWOCwRrgjNJsMXBjWV5V1imvfzczs9QXldlqM4FZwB3AncCsMvPsQBqTCFaVbYbrQ5LUBeNHbwLAecA3ImLoDGEq8G9H2WYqcE2ZNXYAsCIzvxURDwDXRcR/Bn4MXFnaXwl8NSL6aZzRLALIzPsjYgXwALADWJaZzwFExDnALcA44KrMvL/s6/xh+pAkdUFbYZOZd0bE64HX0bgn8tPM3D7KNvcAx7aoP0Tj/s2u9W3A+4bZ1+eAz7Wo3wTc1G4fkqTuaPfMBuDPgL6yzbERQWZeW2VUkqSe0lbYRMRXgX8F3A08V8oJGDaSpFG1e2YzDziq3HyXJGlM2p2Ndh/wypoDkST1rnbPbA4HHoiIO2h85hkAmfnuKqOSJPWUdsPm0zUHIUnqbe1Off5eRLwamJWZ3ymfizau7tAkSb2i3a8Y+DCNT1H+SilNA/6x1qAkSb2l3QkCy4C3Ak/Dzi9SO6LWoCRJvaXdsHmmfGcMsPODMp0GLUlqS7th872I+Gvg4Ig4CfgG8H/qDUuS1EvaDZvlwCBwL/AfaHweWctv6JQkaVftzkZ7Hvif5SFJ0pi0+9lov6DFPZrMfM0eH5EkqeeM5bPRhkyk8VUAh+354UiSelFb92wy8/Gmx68z81LghMpjkyT1iHYvo81pWj2AxpnOS6uMSJLUc9q9jPbfmpZ3AA8D79/jo5Ek9aR2Z6P9ee2BSJJ6V7uX0T4x0uuZ+fk9MxxJUi8ay2y0PwNWlfV3Ad8HHqkxKElSbxnLl6fNycytABHxaeAbmfnvaw1MktQ72v24mlcBzzatPwv07fHRSJJ6UrtnNl8F7oiIG2h8ksBfANdWG5Ukqae0OxvtcxFxM/CvS2lJZv643rAkSb2k3ctoAC8Bns7M/w4MRMTMSmOSJPWYdr8W+kLgfOCCUpoA/O9ag5Ik9ZZ2z2z+Ang38M8AmbkJP65GktSmdsPm2cxMytcMRMSf1BuSJKnXtBs2KyLiK8ChEfFh4Dv4RWqSpDa1Oxvtv0bEScDTwOuAv8nM1VVHJknqGaOGTUSMA27JzHcABowkacxGvYyWmc8Bv4+Il++F8UiSelC792y2AfdGxJURcdnQY6QNImJGRKyJiAcj4v6IOLfUD4uI1RGxsTxPKvUo++2PiHuav7AtIhaX9hsjYnFTfW5E3Fu2uSwiYqQ+JEnd0W7YfBv4TzQ+6Xl902MkO4BPZuaRwHxgWUQcBSwHbs3MWcCtZR3gVGBWeSwFvgSN4AAuBN4MHAdc2BQeXypth7ZbUOrD9SFJ6oIR79lExKsy81eZec1Yd5yZm4HNZXlrRDwITAMWAseXZtcAa2n8wehC4NoyxfpHEXFoREwtbVdn5hNlTKuBBRGxFnhZZt5W6tcC7wFuHqEPSVIXjHZm849DCxHxzU47iYg+4FjgduAVJYiGAumI0mwaL/x+nIFSG6k+0KLOCH1IkrpgtLCJpuXXdNJBRBwCfBM4LzOfbrOvIdlBfSxjWxoR6yJi3eDg4Fg2lSSNwWhhk8MstyUiJtAImq9l5j+U8qPl8hjleUupDwAzmjafDmwapT69RX2kPl4gM6/IzHmZOW/KlCljfXuSpDaNFjZvioinI2IrcHRZfjoitkbESGcplJlhVwIPZubnm15aBQzNKFsM3NhUP6vMSpsPPFUugd0CnBwRk8rEgJNp/N3PZmBrRMwvfZ21y75a9SFJ6oIRJwhk5rjd2PdbgX9HY8r03aX218DFND7+5mzgV8D7yms3AacB/cDvgSVlDE9ExGeBO0u7zwxNFgA+AlwNHExjYsDNpT5cH5KkLmj3mzrHLDP/H63vqwCc2KJ9AsuG2ddVwFUt6uuAN7SoP96qD0lSd4zly9MkSeqIYSNJqs6wkSRVZ9hIkqozbCRJ1Rk2kqTqDBtJUnWGjSSpOsNGklSdYSNJqs6wkSRVZ9hIkqozbCRJ1Rk2kqTqDBtJUnWGjSSpOsNGklSdYSNJqs6wkSRVZ9hIkqozbCRJ1Rk2kqTqDBtJUnWGjSSpOsNGklSdYSNJqs6wkSRVZ9hIkqozbCRJ1Rk2kqTqDBtJUnWGjSSpOsNGklRdtbCJiKsiYktE3NdUOywiVkfExvI8qdQjIi6LiP6IuCci5jRts7i03xgRi5vqcyPi3rLNZRERI/UhSeqemmc2VwMLdqktB27NzFnArWUd4FRgVnksBb4EjeAALgTeDBwHXNgUHl8qbYe2WzBKH5KkLqkWNpn5feCJXcoLgWvK8jXAe5rq12bDj4BDI2IqcAqwOjOfyMwngdXAgvLayzLztsxM4Npd9tWqD0lSl+ztezavyMzNAOX5iFKfBjzS1G6g1EaqD7Soj9SHJKlL/lgmCESLWnZQH1unEUsjYl1ErBscHBzr5pKkNu3tsHm0XAKjPG8p9QFgRlO76cCmUerTW9RH6uNFMvOKzJyXmfOmTJnS8ZuSJI1sb4fNKmBoRtli4Mam+lllVtp84KlyCewW4OSImFQmBpwM3FJe2xoR88sstLN22VerPiRJXTK+1o4j4u+B44HDI2KAxqyyi4EVEXE28CvgfaX5TcBpQD/we2AJQGY+ERGfBe4s7T6TmUOTDj5CY8bbwcDN5cEIfUiSuqRa2GTmB4Z56cQWbRNYNsx+rgKualFfB7yhRf3xVn1Ikrrnj2WCgCSphxk2kqTqDBtJUnWGjSSpOsNGklSdYSNJqs6wkSRVZ9hIkqozbCRJ1Rk2kqTqDBtJUnWGjSSpOsNGklSdYSNJqs6wkSRVZ9hIkqozbCRJ1Rk2kqTqDBtJUnWGjSSpOsNGklSdYSNJqs6wkSRVZ9hIkqozbCRJ1Rk2kqTqDBtJUnWGjSSpOsNGklSdYSNJqs6wkSRVZ9hIkqozbCRJ1fVs2ETEgojYEBH9EbG82+ORpP3Z+G4PoIaIGAdcDpwEDAB3RsSqzHyguyPbs/qWf7trfT988Tu71rekfU+vntkcB/Rn5kOZ+SxwHbCwy2OSpP1WT57ZANOAR5rWB4A3d2ksPalbZ1WeUUn7pl4Nm2hRyxc1ilgKLC2rv4uIDR32dzjwWIfb9qJqxyMuqbHXqvy38UIejxfqhePx6nYa9WrYDAAzmtanA5t2bZSZVwBX7G5nEbEuM+ft7n56hcfjDzwWL+TxeKH96Xj06j2bO4FZETEzIg4EFgGrujwmSdpv9eSZTWbuiIhzgFuAccBVmXl/l4clSfutngwbgMy8CbhpL3W325fieozH4w88Fi/k8Xih/eZ4ROaL7ptLkrRH9eo9G0nSHxHDZjf0wkfiRMRVEbElIu5rqh0WEasjYmN5nlTqERGXlfd7T0TMadpmcWm/MSIWN9XnRsS9ZZvLIiI67WMvHIsZEbEmIh6MiPsj4tz9/HhMjIg7IuIn5XhcVOozI+L2MtbryyQcIuKgst5fXu9r2tcFpb4hIk5pqrf8Geqkj70hIsZFxI8j4ludjrNXjsWYZaaPDh40Jh78HHgNcCDwE+Cobo+rg/fxb4A5wH1Ntb8Flpfl5cAlZfk04GYaf8c0H7i91A8DHirPk8rypPLaHcBbyjY3A6d20sdeOhZTgTll+aXAz4Cj9uPjEcAhZXkCcHsZwwpgUal/GfhIWf5L4MtleRFwfVk+qvx8HATMLD8340b6GRprH3vxmHwC+DrwrU7G2UvHYszHrtsD2Fcf5T+MW5rWLwAu6Pa4OnwvfbwwbDYAU8vyVGBDWf4K8IFd2wEfAL7SVP9KqU0FftpU39lurH106bjcSOPz9fb74wG8BLiLxidxPAaML/WdPwc0Zn++pSyPL+1i15+NoXbD/QyVbcbUx146BtOBW4ETgG91Ms5eORadPLyM1rlWH4kzrUtj2dNekZmbAcrzEaU+3HseqT7Qot5JH3tVuSRxLI3f5vfb41EuG90NbAFW0/jt+7eZuaPFeHaOtbz+FDCZsR+nyR30sTdcCvwV8HxZ72ScvXIsxsyw6VxbH4nTY4Z7z2Otd9LHXhMRhwDfBM7LzKdHatqi1lPHIzOfy8xjaPxWfxxw5Ajj2VPHY6T33JXjERGnA1syc31zeYSx9Oyx6JRh07m2PhJnH/VoREwFKM9bSn249zxSfXqLeid97BURMYFG0HwtM/+hw7H2zPEYkpm/BdbSuGdzaEQM/Y1e83h2jrW8/nLgCcZ+nB7roI/a3gq8OyIepvEp8ifQONPZH49FRwybzvXyR+KsAoZmUC2mce9iqH5WmSE1H3iqXPK5BTg5IiaVWVQn07iuvBnYGhHzy6yrs3bZ11j6qK6M8Urgwcz8fNNL++vxmBIRh5blg4F3AA8Ca4Azhhnr0Hs4A/huNm4orAIWldlTM4FZNCZKtPwZKtuMtY+qMvOCzJyemX1lnN/NzDM7GOc+fyw61u2bRvvyg8ZMoZ/RuI79qW6Pp8P38PfAZmA7jd+UzqZx3fdWYGN5Pqy0DRpfSvdz4F5gXtN+PgT0l8eSpvo84L6yzd/xhz8kHnMfe+FYvI3GZYh7gLvL47T9+HgcDfy4HI/7gL8p9dfQ+A+yH/gGcFCpTyzr/eX11zTt61PlPWygzMAb6Weokz724nE5nj/MRtuvj8VYHn6CgCSpOi+jSZKqM2wkSdUZNpKk6gwbSVJ1ho0kqTrDRpJUnWEjSarOsJEkVff/ATyV0p7aVVUJAAAAAElFTkSuQmCC\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "cdf['value_len'].plot(kind='hist', legend=True)" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [], + "source": [ + "jsonGroup = cdf.groupby('is_json')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "And we cannot identify any non_json (blue) on the right side of the histogram. This means there all frquency of non-json values are very low or inexistent for the biggest values. Since there are so many small values, the biggest ones represent such a small portion that is hard to identify by look on the histograms and graphs. " + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "is_json\n", + "False AxesSubplot(0.125,0.125;0.775x0.755)\n", + "True AxesSubplot(0.125,0.125;0.775x0.755)\n", + "Name: value_len, dtype: object" + ] + }, + "execution_count": 7, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAZsAAAD8CAYAAAChHgmuAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDMuMC4zLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvnQurowAAG9tJREFUeJzt3X+01XWd7/HniwN4KE0Qwbgc9aAxcyUrwhPiaro3f4TodSRN70XvGlkNDWW6tDW1EptZSaWrvGsmZ1wxmiWFPwrQUrmKl4vkTHcqfxwUlR8RJ6I4YYKAaJOowPv+sT8HN7DPPntvzuds2Of1WOu79vf7/n5+7W8d33y/38/+fhURmJmZ5TSg3gMwM7PG52RjZmbZOdmYmVl2TjZmZpadk42ZmWXnZGNmZtk52ZiZWXZONmZmlp2TjZmZZTew3gM4VBx77LHR2tpa72GYmR1Wli9f/nJEjOipnJNN0traSnt7e72HYWZ2WJH020rK+TKamZll52RjZmbZOdmYmVl2vmdjZlalt956i87OTnbu3FnvofSZ5uZmWlpaGDRoUE31nWzMzKrU2dnJUUcdRWtrK5LqPZzsIoKtW7fS2dnJmDFjamrDl9HMzKq0c+dOhg8f3i8SDYAkhg8fflBnck42ZmY16C+JpsvBfl8nGzMzy873bMzMDlLrrEd6tb0N3/hvPZZpamrife97397tBx98kO6egrJhwwYuuOACVq5c2VtDrJqTTS9onfVIRf/nMDPrLUOGDGHFihX1HkbFsl1Gk9Qs6SlJz0laJekrKT5G0pOS1klaIGlwih+RtjvS/taitq5P8bWSzi2KT0mxDkmziuIl+zAza2QbNmzgIx/5CBMmTGDChAn8/Oc/P6DMqlWrmDhxIuPHj+f9738/69atA+Cee+7ZG//0pz/N7t27e3VsOe/ZvAGcFREfAMYDUyRNAm4GbomIscB2YEYqPwPYHhHvAW5J5ZA0DpgGvBeYAvyLpCZJTcAc4DxgHHBZKkuZPszMGsLrr7/O+PHjGT9+PBdddBEAI0eOZOnSpTzzzDMsWLCAa6655oB6t99+O9deey0rVqygvb2dlpYW1qxZw4IFC/jZz37GihUraGpq4t577+3V8Wa7jBYRAfwxbQ5KSwBnAZen+DxgNnAbMDWtA9wPfEuF6Q9TgfkR8QbwG0kdwMRUriMi1gNImg9MlbSmTB9mZg2h1GW0t956i6uvvnpvwvjVr351QL0zzjiDm266ic7OTi6++GLGjh3LsmXLWL58OR/60IeAQiIbOXJkr4436z2bdPaxHHgPhbOQXwOvRMSuVKQTGJ3WRwMbASJil6QdwPAUf6Ko2eI6G/eLn57qdNeHmVnDuuWWWzjuuON47rnn2LNnD83NzQeUufzyyzn99NN55JFHOPfcc/nud79LRDB9+nS+/vWvZxtb1qnPEbE7IsYDLRTORk4pVSx9lprEHb0YP4CkmZLaJbVv2bKlVBEzs8PGjh07GDVqFAMGDODuu+8ued9l/fr1nHTSSVxzzTVceOGFPP/885x99tncf//9bN68GYBt27bx299W9OaAivXJbLSIeEXSvwKTgKGSBqYzjxZgUyrWCRwPdEoaCBwNbCuKdymuUyr+cpk+9h/XHcAdAG1tbSUTkplZTw6V2aif/exn+cQnPsF9993HmWeeyTvf+c4DyixYsIB77rmHQYMG8e53v5svf/nLHHPMMdx4441MnjyZPXv2MGjQIObMmcOJJ57Ye4OLiCwLMAIYmtaHAP8PuAC4D5iW4rcDn03rVwG3p/VpwMK0/l7gOeAIYAywHmiikCjXp9jgVOa9qU7JPsotp512WtTqxOserrmumR1+Vq9eXe8h1EWp7w20RwU5IeeZzShgXrpvMyAlj4clrQbmS7oReBa4M5W/E7g7TQDYlhIOEbFK0kJgNbALuCoidgNIuhpYkpLP3IhYldq6rps+zMysDnLORnse+GCJ+Hrenk1WHN8JXNpNWzcBN5WILwYWV9qHmZnVh5+NZmZm2TnZmJlZdk42ZmaWnZONmZll56c+m5kdrNlH93J7O8ru3rp1K2effTYAf/jDH2hqamLEiBEAPPXUUwwefOg9e9jJxszsMDN8+PC9z0WbPXs2Rx55JF/4whf2KdP1+5YBAw6NC1iHxijMzOygdXR0cOqpp/KZz3yGCRMmsHHjRoYOHbp3//z58/nUpz4FwEsvvcTFF19MW1sbEydO5Iknnuiu2V7hZGNm1kBWr17NjBkzePbZZxk9uvtnEF9zzTV88YtfpL29nYULF+5NQrn4MpqZWQM5+eST974qoJzHHnuMtWvX7t3evn07r7/+OkOGDMkyLicbM7MGUvzwzQEDBnQ9qxKAnTt37l2PiD6dTODLaGZmDWrAgAEMGzaMdevWsWfPHh544IG9+8455xzmzJmzd3v/F7H1Np/ZmJkdrB6mKtfTzTffzJQpUzjhhBMYN24cb7zxBgBz5szhyiuv5Hvf+x67du3izDPP3Cf59DYVn2L1Z21tbdHe3l5T3dZZjxwy77Mws/zWrFnDKaeUehdkYyv1vSUtj4i2nur6MpqZmWXnZGNmZtk52ZiZ1aC/3YI42O/rZGNmVqXm5ma2bt3abxJORLB161aam5trbsOz0czMqtTS0kJnZydbtmyp91D6THNzMy0tLTXXd7IxM6vSoEGDGDNmTL2HcVjxZTQzM8vOycbMzLJzsjEzs+ycbMzMLDsnGzMzy87JxszMssuWbCQdL+lxSWskrZJ0bYrPlvR7SSvScn5RnesldUhaK+ncoviUFOuQNKsoPkbSk5LWSVogaXCKH5G2O9L+1lzf08zMepbzzGYX8PmIOAWYBFwlaVzad0tEjE/LYoC0bxrwXmAK8C+SmiQ1AXOA84BxwGVF7dyc2hoLbAdmpPgMYHtEvAe4JZUzM7M6yZZsIuLFiHgmrb8GrAG6fyE2TAXmR8QbEfEboAOYmJaOiFgfEW8C84GpkgScBdyf6s8DPl7U1ry0fj9wdipvZmZ10Cf3bNJlrA8CT6bQ1ZKelzRX0rAUGw1sLKrWmWLdxYcDr0TErv3i+7SV9u9I5fcf10xJ7ZLa+9NjJ8zM+lr2ZCPpSOBHwOci4lXgNuBkYDzwIvCPXUVLVI8a4uXa2jcQcUdEtEVE24gRI8p+DzMzq13WZCNpEIVEc29E/BggIl6KiN0RsQf4DoXLZFA4Mzm+qHoLsKlM/GVgqKSB+8X3aSvtPxrY1rvfzszMKpVzNpqAO4E1EfHNovioomIXASvT+iJgWppJNgYYCzwFPA2MTTPPBlOYRLAoCs/2fhy4JNWfDjxU1Nb0tH4J8JPoL88CNzM7BOV86vOHgb8CXpC0IsW+RGE22XgKl7U2AJ8GiIhVkhYCqynMZLsqInYDSLoaWAI0AXMjYlVq7zpgvqQbgWcpJDfS592SOiic0UzL+D3NzKwH2ZJNRPw7pe+dLC5T5ybgphLxxaXqRcR63r4MVxzfCVxazXjNzCwfP0HAzMyyc7IxM7PsnGzMzCw7JxszM8vOycbMzLJzsjEzs+ycbMzMLDsnGzMzy87JxszMsnOyMTOz7JxszMwsOycbMzPLzsnGzMyyc7IxM7PsnGzMzCw7JxszM8vOycbMzLJzsjEzs+ycbMzMLDsnGzMzy87JxszMsnOyMTOz7JxszMwsOycbMzPLLluykXS8pMclrZG0StK1KX6MpKWS1qXPYSkuSbdK6pD0vKQJRW1NT+XXSZpeFD9N0gupzq2SVK4PMzOrj5xnNruAz0fEKcAk4CpJ44BZwLKIGAssS9sA5wFj0zITuA0KiQO4ATgdmAjcUJQ8bktlu+pNSfHu+jAzszrIlmwi4sWIeCatvwasAUYDU4F5qdg84ONpfSpwVxQ8AQyVNAo4F1gaEdsiYjuwFJiS9r0rIn4REQHctV9bpfowM7M66JN7NpJagQ8CTwLHRcSLUEhIwMhUbDSwsahaZ4qVi3eWiFOmDzMzq4PsyUbSkcCPgM9FxKvlipaIRQ3xasY2U1K7pPYtW7ZUU9XMzKqQNdlIGkQh0dwbET9O4ZfSJTDS5+YU7wSOL6reAmzqId5SIl6uj31ExB0R0RYRbSNGjKjtS5qZWY8qSjaSTq224TQz7E5gTUR8s2jXIqBrRtl04KGi+BVpVtokYEe6BLYEmCxpWJoYMBlYkva9JmlS6uuK/doq1YeZmdXBwArL3S5pMPB94AcR8UoFdT4M/BXwgqQVKfYl4BvAQkkzgN8Bl6Z9i4HzgQ7gT8AnASJim6SvAU+ncl+NiG1p/co0piHAo2mhTB9mZlYHFSWbiPgLSWOBvwbaJT0FfC8ilpap8++Uvq8CcHaJ8gFc1U1bc4G5JeLtwAFnXRGxtVQfZmZWHxXfs4mIdcDfA9cB/xW4VdIvJV2ca3BmZtYYKr1n835Jt1D4rcxZwF+mH2ueBdyScXxmZtYAKr1n8y3gO8CXIuL1rmBEbJL091lGZmZmDaPSZHM+8HpE7AaQNABojog/RcTd2UZnZmYNodJ7No9RmPHV5R0pZmZm1qNKz2yaI+KPXRsR8UdJ78g0psPOhubLYXadOp+9o04dm5lVrtIzm//Y75H/pwGvlylvZma2V6VnNp8D7pPU9TiYUcD/yDMkMzNrNJX+qPNpSf8Z+HMKP9T8ZUS8lXVkZmbWMCo9swH4ENCa6nxQEhFxV5ZRmZlZQ6ko2Ui6GzgZWAHsTuGuF5aZmZmVVemZTRswLj2/zMzMrCqVzkZbCbw750DMzKxxVXpmcyywOj3t+Y2uYERcmGVUZmbWUCpNNrNzDsLMzBpbpVOf/03SicDYiHgsPT2gKe/QzMysUVT6ioG/Ae4Hvp1Co4EHcw3KzMwaS6UTBK6i8JrnV2Hvi9RG5hqUmZk1lkqTzRsR8WbXhqSBFH5nY2Zm1qNKk82/SfoSMETSx4D7gP+db1hmZtZIKk02s4AtwAvAp4HFgN/QaWZmFal0NtoeCq+F/k7e4ZiZWSOq9Nlov6HEPZqIOKnXR2RmZg2nmmejdWkGLgWO6f3hmJlZI6ronk1EbC1afh8R/wSclXlsZmbWICr9UeeEoqVN0meAo3qoM1fSZkkri2KzJf1e0oq0nF+073pJHZLWSjq3KD4lxTokzSqKj5H0pKR1khZIGpziR6TtjrS/teKjYWZmWVR6Ge0fi9Z3ARuA/95Dne8D3+LAd97cEhH/UByQNA6YBrwX+E/AY5L+LO2eA3wM6ASelrQoIlYDN6e25ku6HZgB3JY+t0fEeyRNS+X8CmszszqqdDbamdU2HBE/reKsYiowPyLeAH4jqQOYmPZ1RMR6AEnzgamS1lC4jHd5KjOPwsNCb0ttzU7x+4FvSZLfxWNmVj+Vzkb723L7I+KbVfR5taQrgHbg8xGxncKz1p4oKtOZYgAb94ufDgwHXomIXSXKj+6qExG7JO1I5V+uYoxmZtaLKv1RZxtwJYX/kI8GPgOMo3Dfpuy9m/3cRuH10uOBF3n78pxKlI0a4uXaOoCkmZLaJbVv2bKl3LjNzOwgVPPytAkR8RoUbvQD90XEp6rpLCJe6lqX9B3g4bTZCRxfVLQF2JTWS8VfBoZKGpjOborLd7XVmZ7hdjSwrZvx3AHcAdDW1ubLbGZmmVR6ZnMC8GbR9ptAa7WdSRpVtHkRhddNAywCpqWZZGOAscBTwNPA2DTzbDCFSQSL0v2Xx4FLUv3pwENFbU1P65cAP/H9GjOz+qr0zOZu4ClJD1C4JHURB84y24ekHwIfBY6V1AncAHxU0vjUxgYKz1kjIlZJWgispjDb7aqI2J3auRpYQuFlbXMjYlXq4jpgvqQbgWeBO1P8TuDuNMlgG4UEZWZmdaRK/9EvaQLwkbT504h4Ntuo6qCtrS3a29trqzz76N4dTFV976hf32bW70laHhFtPZWr9DIawDuAVyPinyncDxlT8+jMzKxfqfQJAjdQuGx1fQoNAu7JNSgzM2sslZ7ZXARcCPwHQERsoropz2Zm1o9VmmzeTDO6AkDSO/MNyczMGk2lyWahpG9T+G3L3wCP4RepmZlZhSp9Nto/SPoY8Crw58CXI2Jp1pGZmVnD6DHZSGoClkTEOYATjJmZVa3Hy2jpx5V/klTHH5OYmdnhrNInCOwEXpC0lDQjDSAirskyKjMzayiVJptH0mJmZla1sslG0gkR8buImNdXAzIzs8bT0z2bB7tWJP0o81jMzKxB9ZRsil9EdlLOgZiZWePqKdlEN+tmZmYV62mCwAckvUrhDGdIWidtR0S8K+vozMysIZRNNhHR1FcDMTOzxlXN+2zMzMxq4mRjZmbZOdmYmVl2TjZmZpadk42ZmWXnZGNmZtk52ZiZWXZONmZmlp2TjZmZZZct2UiaK2mzpJVFsWMkLZW0Ln0OS3FJulVSh6TnJU0oqjM9lV8naXpR/DRJL6Q6t0pSuT7MzKx+cp7ZfB+Ysl9sFrAsIsYCy9I2wHnA2LTMBG6DQuIAbgBOByYCNxQlj9tS2a56U3row8zM6iRbsomInwLb9gtPBbpexDYP+HhR/K4oeAIYKmkUcC6wNCK2RcR2YCkwJe17V0T8IiICuGu/tkr1YWZmddLX92yOi4gXAdLnyBQfDWwsKteZYuXinSXi5fo4gKSZktoltW/ZsqXmL2VmZuUdKhMEVCIWNcSrEhF3RERbRLSNGDGi2upmZlahvk42L6VLYKTPzSneCRxfVK4F2NRDvKVEvFwfZmZWJ32dbBYBXTPKpgMPFcWvSLPSJgE70iWwJcBkScPSxIDJwJK07zVJk9IstCv2a6tUH2ZmVic9vamzZpJ+CHwUOFZSJ4VZZd8AFkqaAfwOuDQVXwycD3QAfwI+CRAR2yR9DXg6lftqRHRNOriSwoy3IcCjaaFMH2ZmVifZkk1EXNbNrrNLlA3gqm7amQvMLRFvB04tEd9aqg8zM6ufQ2WCgJmZNTAnGzMzy87JxszMsnOyMTOz7JxszMwsOycbMzPLzsnGzMyyc7IxM7PsnGzMzCw7JxszM8vOycbMzLJzsjEzs+ycbMzMLDsnGzMzy87JxszMsnOyMTOz7JxszMwsOycbMzPLzsnGzMyyc7IxM7PsnGzMzCw7JxszM8vOycbMzLJzsjEzs+zqkmwkbZD0gqQVktpT7BhJSyWtS5/DUlySbpXUIel5SROK2pmeyq+TNL0oflpqvyPVVd9/SzMz61LPM5szI2J8RLSl7VnAsogYCyxL2wDnAWPTMhO4DQrJCbgBOB2YCNzQlaBSmZlF9abk/zpmZtadQ+ky2lRgXlqfB3y8KH5XFDwBDJU0CjgXWBoR2yJiO7AUmJL2vSsifhERAdxV1JaZmdVBvZJNAP9X0nJJM1PsuIh4ESB9jkzx0cDGorqdKVYu3lkibmZmdTKwTv1+OCI2SRoJLJX0yzJlS91viRriBzZcSHQzAU444YTyIzYzs5rV5cwmIjalz83AAxTuubyULoGRPjen4p3A8UXVW4BNPcRbSsRLjeOOiGiLiLYRI0Yc7NcyM7Nu9HmykfROSUd1rQOTgZXAIqBrRtl04KG0vgi4Is1KmwTsSJfZlgCTJQ1LEwMmA0vSvtckTUqz0K4oasvMzOqgHpfRjgMeSLORBwI/iIj/I+lpYKGkGcDvgEtT+cXA+UAH8CfgkwARsU3S14CnU7mvRsS2tH4l8H1gCPBoWszMrE76PNlExHrgAyXiW4GzS8QDuKqbtuYCc0vE24FTD3qwZmbWKw6lqc9mZtagnGzMzCw7JxszM8vOycbMzLJzsjEzs+ycbMzMLDsnGzMzy87JxszMsnOyMTOz7JxszMwsOycbMzPLzsnGzMyyc7IxM7PsnGzMzCw7JxszM8vOycbMzLJzsjEzs+ycbMzMLDsnGzMzy87JxszMsnOyMTOz7JxszMwsOycbMzPLzsnGzMyyc7IxM7PsBtZ7ALlImgL8M9AEfDcivlHnIeUx++g69bujPv2a2WGpIc9sJDUBc4DzgHHAZZLG1XdUZmb9V0MmG2Ai0BER6yPiTWA+MLXOYzIz67ca9TLaaGBj0XYncHqdxtKYfPnOzKrQqMlGJWJxQCFpJjAzbf5R0toa+zsWeLnGuo0o3/H4Sqn/aQ9p/v/Gvnw89tUIx+PESgo1arLpBI4v2m4BNu1fKCLuAO442M4ktUdE28G20yh8PN7mY7EvH4999afj0aj3bJ4GxkoaI2kwMA1YVOcxmZn1Ww15ZhMRuyRdDSyhMPV5bkSsqvOwzMz6rYZMNgARsRhY3EfdHfSluAbj4/E2H4t9+Xjsq98cD0UccN/czMysVzXqPRszMzuEONkcJElTJK2V1CFpVr3HUy1JcyVtlrSyKHaMpKWS1qXPYSkuSbem7/q8pAlFdaan8uskTS+KnybphVTnVkmqtY8+OBbHS3pc0hpJqyRd28+PR7OkpyQ9l47HV1J8jKQn01gXpEk4SDoibXek/a1FbV2f4mslnVsUL/n3U0sffUFSk6RnJT1c6zgb5VhULSK81LhQmHzwa+AkYDDwHDCu3uOq8jv8F2ACsLIo9r+AWWl9FnBzWj8feJTC75gmAU+m+DHA+vQ5LK0PS/ueAs5IdR4Fzquljz46FqOACWn9KOBXFB531F+Ph4Aj0/og4Mk0hoXAtBS/HbgyrX8WuD2tTwMWpPVx6W/jCGBM+ptpKvf3U20ffXhM/hb4AfBwLeNspGNR9bGr9wAO5yX9R2NJ0fb1wPX1HlcN36OVfZPNWmBUWh8FrE3r3wYu278ccBnw7aL4t1NsFPDLovjectX2Uafj8hDwMR+PAHgH8AyFJ3G8DAxM8b1/AxRmf56R1gemctr/76KrXHd/P6lOVX300TFoAZYBZwEP1zLORjkWtSy+jHZwSj0WZ3SdxtKbjouIFwHS58gU7+77lot3lojX0kefSpckPkjhX/P99niky0YrgM3AUgr/+n4lInaVGM/esab9O4DhVH+chtfQR1/4J+CLwJ60Xcs4G+VYVM3J5uBU9FicBtLd9602XksffUbSkcCPgM9FxKvlipaINdTxiIjdETGewr/qJwKnlBlPbx2Pct+5LsdD0gXA5ohYXhwuM5aGPRa1crI5OBU9Fucw9JKkUQDpc3OKd/d9y8VbSsRr6aNPSBpEIdHcGxE/rnGsDXM8ukTEK8C/UrhnM1RS12/0isezd6xp/9HANqo/Ti/X0EduHwYulLSBwlPkz6JwptMfj0VNnGwOTqM+FmcR0DWDajqFexdd8SvSDKlJwI50yWcJMFnSsDSLajKF68ovAq9JmpRmXV2xX1vV9JFdGuOdwJqI+GbRrv56PEZIGprWhwDnAGuAx4FLuhlr13e4BPhJFG4oLAKmpdlTY4CxFCZKlPz7SXWq7SOriLg+IloiojWN8ycR8T9rGOdhfyxqVu+bRof7QmG20K8oXMv+u3qPp4bx/xB4EXiLwr+UZlC47rsMWJc+j0llReGldL8GXgDaitr5a6AjLZ8sircBK1Odb/H2D4mr7qMPjsVfULgM8TywIi3n9+Pj8X7g2XQ8VgJfTvGTKPwHsgO4DzgixZvTdkfaf1JRW3+XvsNa0gy8cn8/tfTRh8flo7w9G61fH4tqFj9BwMzMsvNlNDMzy87JxszMsnOyMTOz7JxszMwsOycbMzPLzsnGzMyyc7IxM7PsnGzMzCy7/w9D5zzuFuBn3AAAAABJRU5ErkJggg==\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "jsonGroup['value_len'].plot(kind='hist', legend=True)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Sample overview\n", + "Some overview about the sample after the data prep: \n", + "- Rows: 499805\n", + "- Mean: 27829.33,\n", + "- Min: 1357,\n", + "- Max: 4496861\n", + "- Std: 122092.41" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [], + "source": [ + "#Hardcoded data to fast use, but your can update for the calculed value within the next few cells \n", + "MEAN = 27829.33\n", + "MIN = 1357\n", + "MAX = 4496861\n", + "STD = 122092.41\n", + "COUNT = 499805\n", + "\n", + "#Information for original sample.\n", + "ORIG_MEAN = 1356.97\n", + "ORIG_MIN = 0\n", + "ORIG_MAX = 4496861\n", + "ORIG_STD = 26310.62\n", + "ORIG_COUNT = 11292867\n", + "\n", + "#hardcoded information about described data for values one std above the mean: \n", + "A_MEAN = 271204.44\n", + "A_MIN = 27669\n", + "A_MAX = 4496861\n", + "A_STD = 306555\n", + "A_COUNT = 46745" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [], + "source": [ + "def describedData(df):\n", + " tmp_mean = df['value_len'].mean()\n", + " tmp_min = df['value_len'].min()\n", + " tmp_max = df['value_len'].max()\n", + " tmp_std = df['value_len'].std()\n", + " tmp_count = df['value_len'].count()\n", + " (tmp_mean, tmp_min, tmp_max, tmp_std, tmp_count) = dd.compute(tmp_mean, tmp_min, tmp_max, tmp_std, tmp_count);\n", + " return (tmp_mean, tmp_min, tmp_max, tmp_std, tmp_count)" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "27829.332847810645 1357 4496861 122092.41371885882 499805\n" + ] + } + ], + "source": [ + "#Calculate the described data for mean sample\n", + "(MEAN, MIN, MAX, STD, COUNT) = describedData(df)\n", + "print(MEAN, MIN, MAX, STD, COUNT)" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "1356.9776628910975 0 4496861 26310.62140481331 11292867\n" + ] + } + ], + "source": [ + "#Calculate the described data for original sample\n", + "(ORIG_MEAN, ORIG_MIN, ORIG_MAX, ORIG_STD, ORIG_COUNT) = describedData(dd.read_parquet('sample_0.parquet'))\n", + "print(ORIG_MEAN, ORIG_MIN, ORIG_MAX, ORIG_STD, ORIG_COUNT)" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "271204.44978072523 27669 4496861 306555.0273738244 46745\n" + ] + } + ], + "source": [ + "#Calculate the described data for one std above the mean (using mean and stf of the original sample)\n", + "std_above = df[df['value_len'] > ORIG_STD + ORIG_MEAN]\n", + "(A_MEAN, A_MIN, A_MAX, A_STD, A_COUNT) = describedData(std_above)\n", + "print(A_MEAN, A_MIN, A_MAX, A_STD, A_COUNT)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The following cell will create a dataframe of the described data calculated above and save it into a csv to fulture use, if calculations are not possible. " + ] + }, + { + "cell_type": "code", + "execution_count": 28, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
MEANMINMAXSTDCOUNT
ORIGINAL1356.9776630449686126310.62140511292867
ABOVE_MEAN27829.33284813574496861122092.413719499805
ABOVE_STD271204.449781276694496861306555.02737446745
\n", + "
" + ], + "text/plain": [ + " MEAN MIN MAX STD COUNT\n", + "ORIGINAL 1356.977663 0 4496861 26310.621405 11292867\n", + "ABOVE_MEAN 27829.332848 1357 4496861 122092.413719 499805\n", + "ABOVE_STD 271204.449781 27669 4496861 306555.027374 46745" + ] + }, + "execution_count": 28, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Comparasion of this sample and original 10% sample:\n", + "import pandas as pd\n", + "import numpy as np\n", + "%matplotlib inline\n", + "\n", + "compare = pd.DataFrame([(ORIG_MEAN, ORIG_MIN, ORIG_MAX, ORIG_STD, ORIG_COUNT),\n", + " (MEAN, MIN, MAX, STD, COUNT), \n", + " (A_MEAN, A_MIN, A_MAX, A_STD, A_COUNT)], \n", + " columns=['MEAN', 'MIN', 'MAX', 'STD', 'COUNT'],\n", + " index= ['ORIGINAL','ABOVE_MEAN', 'ABOVE_STD'])\n", + "compare.to_csv('describedData.csv')\n", + "compare" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Number of rows\n", + "The number of rows after filtering for values above the mean are about 4.42% of the original sample. \n", + "And the count for values one std above the mean is just 9.35% of this sample or 0.41% of original sample. \n", + "By this we can see that the really big values represent just a very small portion of the whole. " + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Above the mean / original 4.425846864219688\n", + "1 STD Above the mean / original 0.41393385754033946\n", + "1 STD Above the mean / Above mean 9.35264753253769\n" + ] + } + ], + "source": [ + "print('Above the mean / original', COUNT / ORIG_COUNT * 100)\n", + "print('1 STD Above the mean / original', A_COUNT / ORIG_COUNT * 100)\n", + "print('1 STD Above the mean / Above mean', A_COUNT / COUNT * 100)" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 15, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAUAAAADuCAYAAABI8d6AAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDMuMC4zLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvnQurowAAG4dJREFUeJzt3XmYHFW9//H3tzshJEACQbbLVuyBBCRslysgyPKAlrIJQtiCgqIsinjREhHilauNyE9wBQVkUeQni4CUsonsWyAETMKiQIHsYiQhkNnP/aNqzGQyk+mZ6anT3fV5PU8/zNT08klIPjm1nWPOOUREiqjkO4CIiC8qQBEpLBWgiBSWClBECksFKCKFpQIUkcJSAYpIYakARaSwVIAiUlgqQBEpLBWgiBSWClBECksFKCKFpQIUkcJSAYpIYakARaSwVIAiUlgqQBEpLBWgiBSWClBECksFKCKFpQIUkcJSAYpIYakARaSwVIAiUlgqQBEpLBWgiBSWClBECksFKCKFpQIUkcIa5TuANKYgiicA6wOrASsAo7PHCr3+2wksBBZk/50PvJVUwnc9xBZZijnnfGeQOhRE8RhgW2ASadFtkP23+zF+mB+xGHgL+DswD5jb/Ugq4RvDfG+RqqgAhSCKDdgS2KnHYxvSEZwP81lSiLOAO5NK+KKnLNLEVIAFlBXedkAI7A7swPBHdCPtReDO7PGnpBL+03MeaQIqwIIIongFYB/gIOBjwDp+Ew2LA2aTluFNSSV8wHMeaVAqwCYWRPFo0rI7FPg4MMFvohHzPHAVcKV2lWUwVIBNKIjidYETgM8Ca3uOkycH3A9cCfw2qYQLPeeROqcCbCJBFH8EOAk4AF3itBi4EbgwqYSP+A4j9UkF2OCCKF4FOAY4EdjKc5x6dR/wPSBOKqH+wMu/qQAbVBDFKwKnABEw0XOcRjEH+DZwrYpQQAXYcIIoLgOfBs4G1vMcp1GpCAVQATaUIIoPAc4BtvCdpUnMBE5KKuFM30HEDxVgA8hObnyP9IJlqa0u4BLg60klnO87jORLBVjHsgkH/h/wGd9ZCuCfwBnAJUkl7PIdRvKhAqxTQRSHwMXAur6zFMyjpLvFj/kOIiNPBVhngiheDbgQONp3lgLrAs4HvpFUwnbfYWTkqADrSBDFBwI/o1h3b9SzR4Bpur2ueakA60AQxeNIi+8Y31lkGQuAzyaV8FrfQaT2VICeBVG8GXA9sLXvLLJcPwdOTSrhYt9BpHZUgB4FUXwAcAXNO0tLs5kDHJZUwnm+g0htaFEkT4IoPhP4HSq/RjIFeCSI4n19B5Ha0AgwZ9k9vJcB03xnkSFrJz0ueIXvIDI8GgHmKIjiicDdqPwa3Wjg8iCKv+E7iAyPRoA5CaJ4DeAO4IO+s0hN/RQ4RXePNCYVYA6CKF4b+BOar69Z3QgcoTPEjUcFOMKCKF4PuAvYzHcWGVH3Ax9NKuEi30GkejoGOIKCKN4QuAeVXxHsCvw+iOKxvoNI9VSAIySI4k2Ae4GNfWeR3OwB/C5bglQagHaBR0AQxWuR3ke6oe8s4sXvgEOTStjpO4gsn0aANZbtAt2Myq/IDgJ+4juEDEwFWENBFBvpAt07+c4i3p0QRPHZvkPI8qkAa+u7wCd9h5C6MSOI4mN9h5D+6RhgjQRRfBzp2hIiPbUAOyeV8EnfQWRZKsAaCKJ4T+BW0lukRHp7Htg+qYQLfAeRpWkXeJiCKN4AuA6Vn/RvE+By3yFkWSrAYQiiuEQ6n99qvrNI3TswiOLTfYeQpakAh+e/SS9+FanGd4Io3s13CFlCxwCHKIjiqcDDgK76l8F4HZiaVMI3fQcRjQCHJLvY+deo/GTw1gF+7DuEpFSAQ/M9YEvfIaRhHRJE8cd9hxDtAg9ath7EHwHznUUa2svAVkklfM93kCLTCHAQsl3fi1H5yfBtAHzbd4iiUwEOztfRJAdSO18Monh73yGKTLvAVQqieGNgLrCi7yzSVJ4AdtTUWX5oBFi981H5Se1NBb7oO0RRaQRYhezi1Xt955CmNR8Ikkr4ru8gRaMR4ACyOf6+7zuHNLWJwJd8hygiFeDAPoUmOJWRd1oQxRN8hygaFeByZKO/s3znkEJYDTjVd4iiUQEu38fQYuaSny8HUbyq7xBFogJcvv/2HUAKZQJwmu8QRaKzwP0IongHYKbvHFI4C4GNkko433eQItAIsH8a/YkP44ETfIcoCo0A+xBE8Yak6ziUfWeRQnoJ2DiphF2+gzQ7jQD79mVUfuLPhqQn4GSEqQB7CaJ4JeA43zmk8L7gO0ARqACXtT+wsu8QUnj7BVG8ru8QzU4FuKzDfQcQIf27ebTvEM1OBdhDdhHqfr5ziGSm+w7Q7FSASzsYLXQk9WNSEMW6D30EqQCXpt1fqTef8B2gmakAM0EUrwns6TuHSC+6HGYEqQCXOBRd+yf1Z2oQxWv7DtGsVIBLaJ1WqUcGfNR3iGalAgSCKC4Du/jOIdIPFeAIUQGmtgNW8R1CpB/7BFE8yneIZqQCTH3YdwCR5VgV+JDvEM1IBZhSAUq928d3gGZU+ALM1v3YzXcOkQFs7ztAMyp8AQJbky5II1LPtvUdoBlVVYBmtp6Z3WRmfzWz583sQjNbwcz2MLMFZvaEmT1jZt/v8ZpjzezHPb4/ysyeMrO5ZvakmV1iZqtmP7vbzHbIvk7M7PoerzvEzC7vlecmM3uo17YZZjaUWZy1+yuNYJ0gitfyHaLZDFiAZmbADcCNzrnNgM1Jp4v63+wp9znnpgJTgY+b2TKXk5jZfqSTjH7UOTeZ9Kzrg0B//0N3MLPJ/eRZNXv9qma20UD5q7BDDd5DJA8aBdZYNSPAPYEW59wvAZxznaRl9hlgXPeTnHOLgdlAX3OYfYN0jY2Dut/DOXeZc+7Zfj7z+8AZ/fzsk8DvgWuozb27WvZSGsVU3wGaTTUFOBl4vOcG59xC4GVg0+5tZrYasBlwbz/vMYu0NKvxW2A7M9u0j59NA36TPaZV+X7LM6kG7yGSB40Aa6yaAjSgr5WTurfvZmZPAW8Atzjn3ljum5ltbWazs2OJh/XztE7gPODrvV67Fmnp3u+cew7oMLMpVfwa+hRE8froAmhpHBoB1lg1BTiXXsfJzGw8sD7pymn3Oee2IT2b+gUz6+tfqbmkx+22AR4ANs5ef7mZLQR2Be7u9ZqrSE9QbNBj22GkZ2xfNLMECBjebvBmw3itSN42DaJ4jO8QzaSaAvwTMM7MjgEwszJwPnA58H73k7IR2XeBr/XxHt8lPa73jHNuvHNuPGnBfSH7+n5gj54vcM61Az8ATu2xeRqwn3MucM4FpNdGDacAa3ESRSQvJUAzw9TQgAXo0oWDDwIONbO/As8BLfR9kuIi4MO9z8465/4A/BDYxMzmmdmDpLu5tw3w8ZcCowDMLCAdDT7c431fBBaa2X9mm840s1e6HwP92lABSuNRAdZQrgujm9kZzrnv5PaBAwii+GpqcyJFJC8HJpXwJt8hmkXeM0yMNrOz+vmZc859O9c0+tdUGo/+zNZQ3gW4qI9t44DjgdWBvAtQZ4Cl0agAayjXAnTOnd/9tZmtAnyJ9NrAa0hPrORNBSiNRgVYQ7lPsmhmE4HTgCOBK4DtnHP/yjtHRgUojUYFWEO5FqCZnUe69u7Pga2dc33tEudpvOfPFxmsNX0HaCZ5nwXuAlqBDpa+u8RIT4LkVkjZPICd2WeLNIrHkkq4o+8QzSLX+QCdcyXn3Fjn3CrdF0Rnj1XyLL/Myqj8pPEMaq/NzA4yM2dmk7LvAzNbnN2O+qSZPWhmW/R4/q5m9mg2vd0zZva5bPsefUxBN8rM3jSzdczscjN7MXvf2dm1vv1lWsvMbsk+f56Z/aHHLbKzzWx+j/e6s0fmJ8zs6Szf9MH9tvUt713gib02OeAdl+cwdAkd/5NGNNi/s9NI77Q6HJiRbXveObctgJmdQHpTw3QzWxu4GjjQOTfLzD4A3GZmrwJ/BNYzs8A5l2Tvszcwxzn3ejprHqc7566rItP/AHc45y7MMmzjnPsL2WQP2fyft3S/V3YTxPPZtHuY2cbADWZW6p6laqjyPgnyOGnp9Rx5rWJms4Hje/zG5mF0jp9VOFPsb8/uVp71ekfJucVWdi1mtJRKrsVKrgWzlpLRZkabQasZ7Ya1lbB2oKPkrN0wpxH6MpwrvwlhVc81s5VJl3v9CHAzSwqwp/FA90nIk4DLnXOz0s9yb5vZV4EZzrnYzK4lvR//3Oz5h5POyjRY6wC3d3/jnHtqMC92zr1gZqeRXjnSOAXonOvz1jMzO5j0Nrr9cozz/sBPkaGa4zbd4tWONdc4rHz3058p313aiDe2si4m/PsJnQO/Ryd0tpm1tRltbWbt2aOtxayjzayz1ay9Jf1vZ6tZZ0vJutLvS10tZq7FrKulZLSauexhrZYWb6uZtRulNjNrNyu1Y6UOo9xhVu6AUqfZqC4Y1YmN6jJGOxjVBaMdrED6j+cKmPlYqvKZQTz3QOBW59xz2W7ldsB80ltSZ5PuBY0Dum8lnUx6ZUZPj2XbIS27nwPnmtkY4GOkc4N2O8/Mzsy+nuucO7KfXD8B/r+ZnQzcCfzSOffaIH5dkE6vN+yp7OpirVHn3A09fuPy8l7On1c4/2L8xIs699/los79GUVH+16lJ2YfU759wY6lZ4IVrHPDgV5fhvJY58aOdYzte0Y2v7qgq91oazVrayct59a0nDtazdpbStaRlXNHi1lXq1nn4pJ1taZfd7VYybVk5dw9Iu4u6jbD2sysLS1nazcrdxilUY5XBxFxGnBB9vU12fc/Yeld4MNIS20/+p/6zgE452aa2crZMcMtgYd7XcJW1S6wc+62bDd2P9JF358wsynOuX8M4tdWk72DuijAbKie9wJNi1l2d1xGSAejRt/WteO2t3WlJzC3sJdfnF6+/eX9yo+uuhqLJpvVx5/FwShBaYxjxTHOrVhvBW1mq5PO5j7FzBxQJg35015PvZklu5HdU9/d3OPn2wPzenzfPRP7lgxt9xcA59x80uONV5vZLaRT312//FctZSrw9FA/v1veJ0FO62PzasD+wI/7+NmISSqhC6L4fWClPD9XUs+6DTY6o+P4jc7oOJ4JLHrn0PI98w4r/9ltYq9NLhmr+s5XxxZX+bxDgCudcyd0bzCze4D1ej1vV9J5PSEdHT5iZjc452ZnJXou6UmLbr8BbgImAMcNIT9mtifp6PH97I6wTUhnmK/29QHp9Ho/Gsrn95T3v7q9z7w60pmkj8rOAuVNBVgHFrDyqpd0hh+6pDOkRFfnHqXZT00v3z5/59K8DcZYx8a+89WZag/dTAMqvbZdT3rGt/sYoAFtpPfik53NPQr4RVZMBlzgnPt99xs45+aZ2fvA48653lnO63UoayfnXFsf2bYHfmxmHaR7fpc452YO8OvZxMyeAFYE3gV+NNwzwJDzhdBLfXC62+v6+E3MTRDFL5LOKi11amN77aXp5duTsPzw+NVZOMWs8Gfvr2LGgmN8h2gWuS+MbmZfMLOXgZeAl83sJTM7Me8cGZ0IqXMvuP/Y8OyOY3ffofWiqVu3XrL47PbpDz/Ttf79Xc7+6TubJ8tdc0cGJ+9jgGcCHwL2cM69kG3bGLjQzCY6587JMw9Lrn+SBrCIceOv6Nx35ys698Xo6tq1NGfO9PJtb+9SmrPuWGsvyvoub/oOUC0z+zTpjE89PeCcO8lHnr7kfS/ws8AHnXMtvbaPBZ50zm2eWxggiOIrgaPz/EwZGRvaG68cXb7jhf3LD41bg3e2NqNZFw86mhkLfuU7RLPI/dKD3uWXbVucTZSQt+cHfoo0gpfc2uud03H0eud0HM1KLF50QPmB2UeU72rb0l6aVDa3hu98NaRd4BrKuwBfMbO9nHN/6rnRzPYCXs85C6gAm9J7jF356s69//Pqzr0B53YuzZt7bPm2tz9c+sva46x1iwHfoL4N9o4JWY68T4J8Ebg4mzniFDM72cyuAC4GTs45C6gAC8Ds4a7Jkz/fftruW7X+cotdWi58/Wcdn7jvdTdxpnMsszdS57qo8s9snc4Cc2yWaa8+ch6SfX+3mT3b4/2u6/UeT5rZb3ptu9zMXs1uz8PMPmDpuuEDynsE2AocC2xOen+hAfeSLn/p4w+jCrBgXmWNdc7tmLbOuR3TGEvr+x8vP/ToUeU7W6fYi5uVzdX7bMt/Z8aC1iqfW4+zwAD8JcvWvRd4OPBkr+cc6Zx7rPcLzWxL0kHbh81spV6X0HWSLq/xsypzAPmPAC8AFjrnLnPOfcU5d5pz7lLSC5IvGOC1NZdUwrfoe6EmKYDFjBl3beceOx3Qds5um7T+aq1DW8965pbOne9Z5Fac51yd3duWqmoihB6zwBxHWjB9We4sMMBXgcg51wV0zwLTbaizwADcB+xkZqOznJsCs6t87RHAVaQzyezf62cXAF+2QU5QkfcIMOhr6hvn3GPZ7S0+PA980NNnS90wm+kmTZrZPmkSwNrMf/PIUXc+d1D5/jHr8vYUM8b5TgjMqfJ59ToLDKR3f90J7Et6O93NQO9Zon5tZt23/N3hnDs9+/owYB9gC9JDZj1L+GXSEe/RwO+pUt4FuOJyfjY2txRLexYVoPTyBhPXOr/jU2ud3/EpxtDW8tHSo48dPeqO9z9oz286yrr+w1OsuVU+ry5ngenhGtLzAROAr5Duive0zC6wme0I/MM595KZvQJcZmar9crxHdJCjasNkncBzjSzzzrnftFzo5kdRzpZqg+PAp/y9NnSAFpZYcUbu3bd4ca2XQHY1v723PRRt722d2nWGiuzeEuz3A4lzRroCfU+CwyAc+5RM5sCLM5GqdW8bBowqcfJjfHAJ4FLerzv37IRbtV/n/MuwFOB35nZkSwpvB1IJ5k8KOcs3R729LnSoGa7TTef3b7p5gAf4J1/HFG+69lPlu8dtYG9NdlsxJZaeJf0BMJA6nYWmF6+TpUnPs2sBBwKbOOcezXb9hHgTHoUYOZ/qdcRoHPuTeBDWfgp2ebYOXdXnjl6mQW0oynyZQjeZtU1fth58Bo/7DyY0XS07VuaOevoUXcs2s7+utFo61y/hh/1CDMWVHOzQD3PAvNvzrk/LufHPY8Bvg2cA7zaXX6Ze4GtzGydXu8718xmAdst7/O7eZsNpp4EUfwooKUGpaYm24vPTy/f9vd9y4+vPp73tjKjPIy3+xYzFsyoVTZJNdwsvCPkHlSAUmNz3UabfLXj85t8tQNWY+H8w8t/fvrQ8j2ljeyNrcx6rI9SnX4vMJah0wgQCKI4BG7xnUOKIVsfZe4g1kfpBCYyY8HCPPINVyPMAtNNBQgEUTye9Dqp4eyiiAxJFeujPMiMBbt4CdfkVICZIIofBP7Ldw4ptn7WR/kmMxbkPVdmIegY4BLXoQIUz/paH+UT5Ydu9nWNWLPLfUr8OnYt9ba2oRRaF6XyXV3bTTjo239Y5vZRqQ0VYCaphH9HF0VL/bnRd4BmpgJc2rW+A4j0ogIcQSrApWk3WOrJG6TTR8kIUQH2kFTCV4CHBnyiSD4uTSphp+8QzUwFuCztBks96AJ+MeCzZFhUgMv6NX6m5xfp6dakEr7kO0SzUwH2klTCf5CWoIhPF/sOUAQqwL79wHcAKbRXGMScdjJ0KsA+JJVwLunCKyI+XKKTH/lQAfZPo0DxoZNlZzmWEaIC7EdSCW9l6TURRPJwVVIJXx34aVILKsDly32tYim0NuBbvkMUiQpw+a4CXvcdQgrj0qQSJr5DFIkKcDmSStgCnOU7hxRCC+niP5IjFeDALgM0HZGMtJ8mlfA13yGKRgU4gKQSdpGuXi8yUhax7FKWkgMVYBWSSngn8AffOaRp/TC7A0lypgKs3umk12iJ1NJrwPd8hygqFWCVkko4D83OIbV3YlIJF/gOUVQqwME5C3jHdwhpGtcllfAm3yGKTAU4CNlxmlN955Cm8C/gZN8hik4FOEhJJbwCuMV3Dml4pyeV8E3fIYpOBTg0J5D+Cy4yFHcllfBS3yFEBTgk2QWrJ/rOIQ1pMfA53yEkpQIcoqQSXgNc6TuHNJxTk0r4vO8QklIBDs/JwAu+Q0jD+FVSCX/uO4QsoQIchqQSvgscjhZRkoHNAz7vO4QsTQU4TEklnAl82ncOqWvvAocklfA930FkaSrAGsiOB/6P7xxSlxxwVFIJn/YdRJalAqydGcBvfYeQunN2Uglv9h1C+qYCrJGkEjrgWGCm5yhSP65Bk5zWNXPO+c7QVIIoXgd4FFjPdxbx6hbg4KQStvsOIv3TCLDGkkr4OrA/oBk+iusu4FCVX/1TAY6ApBI+AeyDZo4pooeA/bP1ZKTOaRd4BAVRvD1wB7Ca7yySi9nAR5JKqH/4GoQKcIQFUTwVuBOY6DuLjKhngA9ravvGol3gEZbtDu8JvO07i4yYOcDeKr/GowLMQVIJnyQtQf0FaT53AbsmlfBV30Fk8FSAOUkq4V+A3dHkCc3kKmA/renRuHQMMGdBFE8EriUdEUrjOiephN/0HUKGRyPAnCWVcD6wL/Aj31lkSDqAz6r8moNGgB4FUXw88BNgBd9ZpCrvANOSSnir7yBSGypAz4Io3gW4AVjTdxZZrgeAI5JK+LLvIFI72gX2LKmEDwA7AA/6ziJ96gS+Beyu8ms+GgHWiSCKy8DXSKfVGu03jWReBo5MKuH9voPIyFAB1pnszpFfAVv5zlJw1wKf021tzU0FWIeCKF4B+CbpiFCjwXzNB76SVMLLfQeRkacCrGNBFG8DXEp6jFBGlgN+CXwtqYS6bbEgVIB1LojiEjCddM0RTbI6MmYBpySVUCeiCkYF2CCCKB4LfAmIgAme4zSL14EzgCuTStjlO4zkTwXYYIIoXh04EzgRXUA9VO8AFwLnaanKYlMBNqggijciXXDncHQ9Z7VeB34AXJQtai8FpwJscEEUbwycBHwGWNVznHr1V+A80l3dVt9hpH6oAJtEEMUrAccApwBbeo5TLx4HzgWu1zE+6YsKsAkFUbw38EUgpHi7x2+Rrsf766QSPuo7jNQ3FWATC6J4PeDg7LEbzVuG7wM3kt5Bc0dSCTs855EGoQIsiCCK1wQOIC3DvWj8O0wWA3cDVwM3JpVwkd840ohUgAUURPEE4BPAx0hHho1wgXUb8DDwZ9J1OB5OKmGb30jS6FSAQhDFAWkR/hewI7AN/q8xXAQ8BdxDWnr3J5Vwsd9I0mxUgLKMbDKGrYEpQNDjsSGwPjCqhh/3JullKn8DniNdYnIOkCSVUH84ZUSpAGVQsnkL1yUtxLWAscAYYMUej+7vS8AC0jsvej/+BfxTd2KITypAESmsZr0sQkRkQCpAESksFaCIFJYKUEQKSwUoIoWlAhSRwlIBikhhqQBFpLBUgCJSWCpAESksFaCIFJYKUEQKSwUoIoWlAhSRwlIBikhhqQBFpLBUgCJSWCpAESksFaCIFJYKUEQKSwUoIoWlAhSRwlIBikhhqQBFpLBUgCJSWCpAESksFaCIFJYKUEQKSwUoIoWlAhSRwlIBikhh/R+uFtLu4nk/AAAAAABJRU5ErkJggg==\n", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "compare['COUNT'].plot(kind='pie')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Max and Min values\n", + "\n", + "it is expected that the maximum will be the same for all mentioned samples since the filtering is being made by the minimum, and is also expected that the min is the value used to filter." + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 16, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAa0AAAD9CAYAAAAPryh0AAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDMuMC4zLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvnQurowAAGPVJREFUeJzt3X+QXGWd7/H3NwEMEjBIsiybwZssxlUQLuKIWHq3KNzShEWDLK6JCGi4xb0WKVCvrrCr8kNYf6CrsiveYoFNtNwEVvSCCnKzKv64ijIJyK8sS5AsDAiEJIiugAa+949+OvRMeqZ7xiQ9z/B+VU1Nn+c853m+PdNzPn1On+mOzESSpBpM6XUBkiR1y9CSJFXD0JIkVcPQkiRVw9CSJFXD0JIkVcPQkiRVw9CSJFXD0JIkVWOXXhews8ycOTPnzJnT6zIkqSqrV69+NDNn9bqOpudMaM2ZM4eBgYFelyFJVYmI/+h1Da08PShJqoahJUmqhqElSaqGoSVJqoahJUmqhqElSaqGoSVJqsZz5v+0ePRu+Kc/73UVkqTfg0dakqRqPHeOtGbOg3d9s9dVSFJdlkSvKxjCIy1JUjUMLUlSNQwtSVI1DC1JUjUMLUlSNQwtSVI1DC1JUjUMLUlSNQwtSVI1DC1JUjUMLUlSNQwtSVI1DC1JUjUMLUlSNQwtSVI1DC1JUjW6Dq2ImBoRN0fEN8ry3Ij4SUTcHRFXRMRupf15ZXldWT+nZYyzSvtdEfHGlvb5pW1dRJzZ0j7mOSRJk9dYjrTOANa2LH8C+ExmzgM2A6eU9lOAzZn5YuAzpR8RcSCwCDgImA9cXIJwKvB5YAFwILC49B3zHJKkya2r0IqIPuDPgUvLcgBHAV8pXZYDx5bbC8syZf3rS/+FwMrMfCoz7wXWAYeXr3WZ+fPM/C2wElg4zjkkSZNYt0danwX+CnimLO8DPJaZW8ryIDC73J4N3A9Q1v+y9N/aPmybkdrHM4ckaRLrGFoRcQzwSGaubm1u0zU7rNte7Z3m3yoiTo2IgYgY2LBhQ5tNJEk16eZI67XAmyNiPY1Td0fROPKaERG7lD59wIPl9iCwP0BZ/wJgU2v7sG1Gan90HHMMkZmXZGZ/ZvbPmjWri7sqSZrIOoZWZp6VmX2ZOYfGhRTfycwTgO8Cx5duJwNXl9vXlGXK+u9kZpb2ReXKv7nAPOCnwE3AvHKl4G5ljmvKNmOdQ5I0ie3SucuIPgisjIjzgZuBy0r7ZcCXImIdjaOfRQCZeUdEXAncCWwBTsvMpwEiYilwPTAVuDwz7xjPHJKkyS2eKwco/f39OTAw0OsyJKkqEbE6M/t7XUeT74ghSaqGoSVJqoahJUmqhqElSaqGoSVJqoahJUmqhqElSaqGoSVJqoahJUmqhqElSaqGoSVJqoahJUmqhqElSaqGoSVJqoahJUmqhqElSaqGoSVJqoahJUmqhqElSaqGoSVJqoahJUmqhqElSaqGoSVJqoahJUmqhqElSaqGoSVJqoahJUmqhqElSaqGoSVJqoahJUmqhqElSaqGoSVJqoahJUmqhqElSaqGoSVJqkbH0IqIaRHx04j4WUTcERHnlva5EfGTiLg7Iq6IiN1K+/PK8rqyfk7LWGeV9rsi4o0t7fNL27qIOLOlfcxzSJImr26OtJ4CjsrM/wocCsyPiCOATwCfycx5wGbglNL/FGBzZr4Y+EzpR0QcCCwCDgLmAxdHxNSImAp8HlgAHAgsLn0Z6xySpMmtY2hlw6/L4q7lK4GjgK+U9uXAseX2wrJMWf/6iIjSvjIzn8rMe4F1wOHla11m/jwzfwusBBaWbcY6hyRpEuvqNa1yRHQL8AiwCrgHeCwzt5Qug8Dscns2cD9AWf9LYJ/W9mHbjNS+zzjmkCRNYl2FVmY+nZmHAn00joxe1q5b+d7uiCe3Y/tocwwREadGxEBEDGzYsKHNJpKkmozp6sHMfAy4ATgCmBERu5RVfcCD5fYgsD9AWf8CYFNr+7BtRmp/dBxzDK/3kszsz8z+WbNmjeWuSpImoG6uHpwVETPK7d2BPwPWAt8Fji/dTgauLrevKcuU9d/JzCzti8qVf3OBecBPgZuAeeVKwd1oXKxxTdlmrHNIkiaxXTp3YT9gebnKbwpwZWZ+IyLuBFZGxPnAzcBlpf9lwJciYh2No59FAJl5R0RcCdwJbAFOy8ynASJiKXA9MBW4PDPvKGN9cCxzSJImt3iuHKD09/fnwMBAr8uQpKpExOrM7O91HU2+I4YkqRqGliSpGoaWJKkahpYkqRqGliSpGoaWJKkahpYkqRqGliSpGoaWJKkahpYkqRqGliSpGoaWJKka3bzLuyQ9p/3ud79jcHCQJ598stel7DDTpk2jr6+PXXfdtdeljMrQkqQOBgcH2XPPPZkzZw4R7T44vW6ZycaNGxkcHGTu3Lm9LmdUnh6UpA6efPJJ9tlnn0kZWAARwT777FPFkaShJUldmKyB1VTL/TO0JGmCiwhOPPHErctbtmxh1qxZHHPMMQAsW7aMpUuXAnDOOefw/Oc/n0ceeWRr/+nTp+/cgncgQ0uSJrg99tiD22+/nSeeeAKAVatWMXv27BH7z5w5k09/+tM7q7ydytCSpAosWLCAb37zmwCsWLGCxYsXj9h3yZIlXHHFFWzatGlnlbfTePWgJI3BuV+/gzsffHy7jnngH+3F2W86aNQ+ixYt4rzzzuOYY47h1ltvZcmSJfzgBz9o23f69OksWbKEz33uc5x77rnbtdZe80hLkipwyCGHsH79elasWMHRRx/dsf/pp5/O8uXLefzx7RuwveaRliSNQacjoh3pzW9+M+9///u54YYb2Lhx46h9Z8yYwdvf/nYuvvjinVTdzmFoSVIllixZwgte8AIOPvhgbrjhho793/e+9/GqV72KLVu27PjidhJPD0pSJfr6+jjjjDO67j9z5kze8pa38NRTT+3AqnauyMxe17BT9Pf358DAQK/LkFShtWvX8rKXvazXZexw7e5nRKzOzP4elbQNj7QkSdUwtCRJ1TC0JEnVMLQkSdUwtCRJ1TC0JEnVMLQkaYLr9NEkTQsXLuQ1r3nNkLbTTz+dj370o1uXL7jgAk477bQdW/AO5DtiSNIE1/rRJLvvvnvbjyZ57LHHWLNmDdOnT+fee+9l7ty5AJx//vkceuihnHDCCUQEl156KTfffHMv7sZ24ZGWJFWg00eTXHXVVbzpTW9i0aJFrFy5cmv7XnvtxQUXXMDSpUs57bTTOO+885gxY8ZOrX178khLksbiujPhodu275h/eDAs+PioXTp9NMmKFSs4++yz2XfffTn++OM566yztq5bvHgxF110EVOnTh1ymrFGHY+0ImL/iPhuRKyNiDsi4ozS/sKIWBURd5fve5f2iIiLImJdRNwaEYe1jHVy6X93RJzc0v7KiLitbHNRRMR455CkyWi0jyZ5+OGHWbduHa973et4yUtewi677MLtt9++df3g4CAPPfQQDz74IL/+9a93dunbVTdHWluA/5WZayJiT2B1RKwC3gl8OzM/HhFnAmcCHwQWAPPK16uBLwCvjogXAmcD/UCWca7JzM2lz6nAjcC1wHzgujJm13P8vj8MSeqowxHRjjTSR5NcccUVbN68eevrWI8//jgrV67k/PPPB+CMM87gnHPOYe3atZx77rlceOGFPal/e+h4pJWZv8jMNeX2r4C1wGxgIbC8dFsOHFtuLwS+mA03AjMiYj/gjcCqzNxUgmoVML+s2yszf5yNd+/94rCxxjKHJE1aS5Ys4SMf+QgHH3zwkPYVK1bwrW99i/Xr17N+/XpWr1699XWt6667jkceeYSTTjqJD3/4w3zta1/jzjvv7EX528WYLsSIiDnAK4CfAPtm5i+gEWzAH5Rus4H7WzYbLG2jtQ+2aWccc0jSpNXuo0nWr1/PfffdxxFHHLG1be7cuey1115873vf4z3veQ8XX3wxEcEee+zBJz/5SZYuXbqzS99uur4QIyKmA1cB78nMx8vLTm27tmnLcbSPWk4320TEqTROO/KiF72ow5CSNDG1ex3qyCOP5MgjjwTggQce2Gb9mjVrALjrrruGtB933HEcd9xx27/InaSrI62I2JVGYH05M79amh9unpIr3x8p7YPA/i2b9wEPdmjva9M+njmGyMxLMrM/M/tnzZrVzV2VJE1g3Vw9GMBlwNrM/LuWVdcAzSsATwaubmk/qVzhdwTwy3Jq73rgDRGxd7kK8A3A9WXdryLiiDLXScPGGssckqRJrJvTg68FTgRui4hbSttfAx8HroyIU4D7gLeWddcCRwPrgN8A7wLIzE0R8VHgptLvvMzcVG6/G1gG7E7jqsHrSvuY5pAkTW4dQyszf0j715AAXt+mfwJt39gqMy8HLm/TPgC8vE37xrHOIUk7QmYyymv51WvsVic+38ZJkjqYNm0aGzdurGbHPlaZycaNG5k2bVqvS+nIt3GSpA76+voYHBxkw4YNvS5lh5k2bRp9fX2dO/aYoSVJHey6665b321CveXpQUlSNQwtSVI1DC1JUjUMLUlSNQwtSVI1DC1JUjUMLUlSNQwtSVI1DC1JUjUMLUlSNQwtSVI1DC1JUjUMLUlSNQwtSVI1DC1JUjUMLUlSNQwtSVI1DC1JUjUMLUlSNQwtSVI1DC1JUjUMLUlSNQwtSVI1DC1JUjUMLUlSNQwtSVI1DC1JUjUMLUlSNQwtSVI1DC1JUjUMLUlSNQwtSVI1OoZWRFweEY9ExO0tbS+MiFURcXf5vndpj4i4KCLWRcStEXFYyzYnl/53R8TJLe2vjIjbyjYXRUSMdw5J0uTWzZHWMmD+sLYzgW9n5jzg22UZYAEwr3ydCnwBGgEEnA28GjgcOLsZQqXPqS3bzR/PHJKkya9jaGXm94FNw5oXAsvL7eXAsS3tX8yGG4EZEbEf8EZgVWZuyszNwCpgflm3V2b+ODMT+OKwscYyhyRpkhvva1r7ZuYvAMr3Pyjts4H7W/oNlrbR2gfbtI9nDknSJLe9L8SINm05jvbxzLFtx4hTI2IgIgY2bNjQYVhJ0kQ33tB6uHlKrnx/pLQPAvu39OsDHuzQ3temfTxzbCMzL8nM/szsnzVr1pjuoCRp4hlvaF0DNK8APBm4uqX9pHKF3xHAL8upveuBN0TE3uUCjDcA15d1v4qII8pVgycNG2ssc0iSJrldOnWIiBXAkcDMiBikcRXgx4ErI+IU4D7graX7tcDRwDrgN8C7ADJzU0R8FLip9DsvM5sXd7ybxhWKuwPXlS/GOockafKLxkV7k19/f38ODAz0ugxJqkpErM7M/l7X0eQ7YkiSqmFoSZKqYWhJkqphaEmSqmFoSZKqYWhJkqphaEmSqmFoSZKqYWhJkqphaEmSqmFoSZKqYWhJkqphaEmSqmFoSZKqYWhJkqphaEmSqmFoSZKqYWhJkqphaEmSqmFoSZKqYWhJkqphaEmSqmFoSZKqYWhJkqphaEmSqmFoSZKqYWhJkqphaEmSqmFoSZKqYWhJkqphaEmSqmFoSZKqYWhJkqphaEmSqmFoSZKqUW1oRcT8iLgrItZFxJm9rkeStONVGVoRMRX4PLAAOBBYHBEH9rYqSdKOtkuvCxinw4F1mflzgIhYCSwE7uxpVVIlMpNn8tnvz2SSze+U788MXW72afZrt9wcc+s2ZQy2jkHLds8uP5MJyZDlbNbZUkfbepOhc7cu03I/n2nW1Rynua45zgjLjFx3VzUM+Rk+O/Zo2wz5mY1w/59pNAxZHjpOc5uW388ztPwunr2/Q38eQ2uZaGoNrdnA/S3Lg8CrR9vg5xv+k8WX3Nhx4MavubMcwy+z665jGrN3deYYBu1+zK6HHMPPs9d1dv87Gr7z2xocowRE5tAdcEJjxzzSTrvluzqLgACmRDAlorEcQ5dbv08JiIiWbcpyy/pnx2ldLttMGbbcMhdRlqdMaTvGlABoWZ4CwbZz06kWmsvN+uFDPfwdtFNraEWbtm3+FCPiVOBUgD32O4Cnu/1rbTf6+LuVB0J3HWMMo3Y1ZrdzPzvqdh+z67nHMGjXP6Ux3fdux9wRdQ7dkbTdGbYuU3ZMrcux7Q6nuRNsu9zcyU1pmZMx1NC6g95mhzvyTrvtjrOllm12wCMGxrD729xJT2n3M9n2e7v72Xr/x/J7nswMre1jENi/ZbkPeHB4p8y8BLgEoL+/P6/8n6/ZOdVJknaIKi/EAG4C5kXE3IjYDVgEXNPjmiRJO1iVR1qZuSUilgLXA1OByzPzjh6XJUnawaoMLYDMvBa4ttd1SJJ2nlpPD0qSnoMMLUlSNQwtSVI1DC1JUjUMLUlSNWIsb8lTs4j4FXBXr+vowkzg0V4X0QXr3H5qqBGsc3urpc4/ycw9e11EU7WXvI/DXZnZ3+siOomIAevcfmqos4YawTq3t5rq7HUNrTw9KEmqhqElSarGcym0Lul1AV2yzu2rhjprqBGsc3uzznF4zlyIIUmq33PpSEuSVLkJF1oR0RcRV0fE3RFxT0R8LiJ2i4gjI+KXEXFzRPxbRHyqZZt3RsQ/tCy/IyJujYg7IuJnEXFpRMwo626IiP5ye31EXNWy3fERsWxYPVdHxI+HtZ0TEe/fQT8CSdIIJlRoReOjQr8K/J/MnAe8BJgOXFC6/CAzXwG8AjgmIl7bZoz5wHuBBZl5EHAY8CNg3xGm7Y+Ig0aoZ0bZfkZEzB3/PdNEFhFviYiMiJeW5TkR8URE3FKe9PwoIv6kpf/rIuKn5cnTv5VPyKY8sRr+BGeXiHg4IvaLiGURcW8Z95aI+NEoNb2z1PT6NnUeX5ZviIi7Wsb7yrAxfhYRK4a1LYuIByLieWV5ZkSsH/cPTzvdBH287hsR3yjz3xkR10bEwS3bbmoZ619bar45ItaW+k7u5v5PtP/TOgp4MjP/CSAzn46I9wL3At9tdsrMJyLiFmB2mzH+Bnh/Zj7QHAO4fJQ5PwX8NXBCm3V/AXwdeJjGB01+bMz3SDVYDPyQxu/4nNJ2T2YeChAR/4PGY+TkiPhD4J+BYzNzTUTMBK6PiAeA64C+iJiTmevLOH8G3J6Zv2g8J+MDmTkkXEZxW6nt22V5EfCzYX1OyMxt/o8mIl5G40npn0bEHpn5ny2rnwaWAF/osg5NLBPx8XoesCozP1dqOCQzbwOaNS0DvtEcKyLmlJpfUZb/GPhqRExp7v9HMqGOtICDgNWtDZn5OHAf8OJmW0TsDcwDvj/CGGvGMOeVwGER8eI26xYDK8rX4jGMqUpExHTgtcApNHYC7ewFbC63TwOWZeYagMx8FPgr4MzMfAb4F+BtLdsuovH4GY8fAIdHxK6lzhcDt3S57duBLwH/F3jzsHWfBd4bERPtSas6mMCP1/2AweZCZt46lo0z8+fA+4DTO/WdaKEVQLvLGZvt/y0ibgUeopHaD4062LOHp/dExNtG6PY0cCFw1rBt96Wxk/hhZv47sCUiXj62u6MKHAt8q/yON0XEYaX9gOZjh8Yf09+V9m2eWAEDpR0af/CLAMopuKOBq1r6XthyyuTLHWpL4F+BNwILgWva9Plyy3gXtrS/DbiC9k+47qPxTP3EDvNr4pmoj9fPA5dFxHcj4m8i4o/Gcd/WAC/t1GmihdYdwJC3NYmIvYD9gXtovKZ1CHAw8O6IOHSEMQ4DyMzbyiHzdcDuo8z7JeBPgRe1tL0N2Bu4t5zzn8PIz2xUr8XAynJ7Jc/u4O/JzEMz8wDgPTz7vyojPbFKgMy8CZheXlNYANyYmZtb+n2gjHtoZrY7JT3cShqPu5GeAZ/QMt4HACLiVcCGzPwPGqcWDytnJ1r9LfABJt4+QKObkI/XzLwe+GPgH2kEz80RMWuM9y266TTRHrDfBp4fEScBRMRU4NPAMuA3zU7lWcbHgA+2GeNjwKcioq+lbbTAIjN/B3yGxi+7aTEwPzPnZOYc4JUYWpNKROxD43XUS8sTkw/QeLIy/I/nGhpPaqDNEysaj407W5Y7BU3XMvOnwMuBmeVx343FwEvLfbqHxumivxg27joapxr/8vepTzvPRH+8ZuamzPznzDwRuKmlhm69AljbqdOECq1s/KfzW4C3RsTdwL8DT9J4UXG4/03jReYhV/Vl5rXARcB15SqWH9E4BXh9h+kvo1yYUl4kfBFwY8u49wKPR8SrS9OHImKw+TWmO6qJ4njgi5n5X8qTk/1pXPTTN6zf62js/KFxGuSdzaP8siP5BPDJlv4rgHfQ2MG0O6U3VmfR/m9gGxExBXgrcEjLE66FtH9N9gLAf92ox4R9vEbEURHx/HJ7T+AAGqehu91+Do2L4v6+U98J90JsZt4PvKnNqhvKV7PfEzx79eC9NI7GmuuWA8tHGP/IlttzWm4/BbSeh93mysTMbJ4//gnPXrWjei0GPj6s7SoaAXFAuUI1gN8C/x2gXFX1DuAfyx9nAJ/NzK83B8jMOyPiN8DqYVftQeM1gg+1LB+emb8drcjMvG6U1V+OiCfK7UeB84EHmlfPFt8HDoyI/YaNe0dErKGcTteEN5Efr68E/iEittA4GLq0nHoczQERcTMwDfgV8PedrhwE38ZJklSRCXV6UJKk0Uy404PSc0lEvAs4Y1jz/8vM03pRjzSaifB49fSgJKkanh6UJFXD0JIkVcPQkiRVw9CSJFXD0JIkVeP/A4NAz1QnJ20oAAAAAElFTkSuQmCC\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "compare[['MIN','MAX']].plot()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Mean and Std\n", + "> A low standard deviation indicates that the data points tend to be close to the mean (also called the expected value) of the set, while a high standard deviation indicates that the data points are spread out over a wider range of values. (https://en.wikipedia.org/wiki/Standard_deviation)\n", + "\n", + "It is noticeable that both mean and std are increassing as the data is filtered by bigger values. \n", + "\n", + "The increase of the mean is to be expected, since we are eliminating the smaller values and leaving only the bigger ones. \n", + "\n", + "But the STD is not necessary like the mean where it will increase after the filter, but since it is the case here we can safaly assume that as the values get bigger the more spread out they are, the mean of the sample is less accurate to represent the whole dataset since they have a huge difference of value from one another. " + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 17, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAacAAAD9CAYAAAAYjbi9AAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDMuMC4zLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvnQurowAAIABJREFUeJzt3Xd4VFX6wPHvS4CEklBDDRBKkCJNQlNRVFRAFLEBShMQ9KdrWXdXFNfOWta1rroiICBdRUEQEQs2IBB6h9ADIYEEUoD08/vj3sEhhCRAkjvl/TxPHmbOnHvvOzDknXvPe88RYwxKKaWUJynjdABKKaVUXpqclFJKeRxNTkoppTyOJiellFIeR5OTUkopj6PJSSmllMfR5KSUUsrjaHJSSinlcTQ5KaWU8jhlnQ6guNWsWdOEh4c7HYZSSnmVNWvWHDPGhDodh4vPJafw8HCio6OdDkMppbyKiOx3OgZ3ellPKaWUx9HkpJRSyuNoclJKKeVxfG7MKT9ZWVnExsaSnp7udCiOCAoKIiwsjHLlyjkdilJKFYlfJKfY2FiCg4MJDw9HRJwOp1QZY0hMTCQ2NpbGjRs7HY5SShWJX1zWS09Pp0aNGn6XmABEhBo1avjtWaNSyjv5RXIC/DIxufjze1dKeSe/uKynlFLqPIyBNZ86HcU5/ObMyWkiwpAhQ848z87OJjQ0lL59+wIwZcoUQkNDad++/ZmfrVu3nun/9ttvExQURHJy8pm2ZcuWISJ88803Z9r69u3LsmXLSv4NKaW838ljMPteWPiE05Gco9DkJCJBIrJKRDaIyBYRedFubywiUSKyS0TmiEh5uz3Qfh5jvx7utq+n7fYdInKzW3svuy1GRMa6ted7DG9UqVIlNm/ezOnTpwFYunQp9evXP6vPgAEDWL9+/ZmfVq1anXlt1qxZdOrUia+++uqsbcLCwhg/fnzJvwGllG/Z/RN8dCXE/AA3v+p0NOcoyplTBnC9MaYd0B7oJSJdgdeBt40xEcBxYKTdfyRw3BjTDHjb7oeItAIGAq2BXsCHIhIgIgHAB0BvoBUwyO5LAcfwSr1792bRokWAlWwGDRpUpO12795NWloar7zyCrNmzTrrtXbt2lGlShWWLl1a7PEqpXxQdgYsGQef9YcK1eCBn6Db/zkd1TkKHXMyxhggzX5azv4xwPXAvXb7VOAF4COgn/0Y4Avgv2KNyPcDZhtjMoC9IhIDdLb7xRhj9gCIyGygn4hsK+AYF+3Fb7aw9XDKpeziHK3qhfD8ra0L7Tdw4EBeeukl+vbty8aNGxkxYgS//fbbmdfnzJnD77//fub5ihUrqFChwplE1r17d3bs2EFCQgK1atU60+/ZZ5/l2Wef5cYbbyzW96WU8jEJ2+HLURC/CTo9ADe9DOUqOB1Vvoo05mSf4awHEoClwG7ghDEm2+4SC7iuUdUHDgLYrycDNdzb82xzvvYaBRzDK7Vt25Z9+/Yxa9Ys+vTpc87reS/rVahgfWhmz57NwIEDKVOmDHfccQeff/75Wdt1794d4KxEp5RSZxgDqz6BCddCahwMmgO3vOmxiQmKWK1njMkB2otIVeAroGV+3ew/86tbNgW055cgC+p/DhEZDYwGaNiwYX5dzijKGU5Juu222/jb3/7GsmXLSExMLLT/xo0b2bVr15mzoszMTJo0acLDDz98Vr9x48Yxfvx4ypbVAkyllJuTx2D+I7BzMTTrCf0+hODaTkdVqAuq1jPGnACWAV2BqiLi+k0YBhy2H8cCDQDs16sASe7tebY5X/uxAo6RN64JxphIY0xkaKjHLEeSrxEjRvDcc8/Rpk2bIvWfNWsWL7zwAvv27WPfvn0cPnyYQ4cOsX//2bPb33TTTRw/fpwNGzaURNhKKW8U86NV9LD7R+j1Gtz7uVckJihatV6ofcaEiFQAegLbgJ+Bu+xuw4D59uMF9nPs13+yx60WAAPtar7GQASwClgNRNiVeeWxiiYW2Nuc7xheKywsjMceeyzf1+bMmXNWKfny5cuZPXs2/fv3P6tf//79mT179jnbjxs3jtjY2BKJWynlRbLS4bunYfodUKE6PPAzdH0IynjP3UNi5YACOoi0xSpGCMBKZnONMS+JSBNgNlAdWAcMNsZkiEgQ8BnQAeuMaaBbscM4YASQDTxujFlst/cB3rGPMdkYM95uz/cYBcUbGRlp8i42uG3bNlq2zO9KpP/QvwOl/ETCdvhyJMRvhs6j4caXijS2JCJrjDGRpRBhkRSlWm8jVqLJ276HP6vt3NvTgbvPs6/xwDk35RhjvgW+LeoxlFJK5WEMrJ4I3z8L5SvDvXOh+c2Fb+ehdPRcKaW8XdpRWPAI7PwOmt0It38IlWsVvp0H0+SklFLeLOYH+OohSE+GXq9DlzHgA5M9a3JSSilvlJUOP74IKz+E0JYw9Guo7eytMsVJk5NSSnmbhG32TA+bofMYuPFFj76h9mJoclJKKW/hmulh6T8hMNi6b6n5TU5HVSI0OZWi8ePHM3PmTAICAihTpgzVqlXj+PHjpKWlcfTo0TPLqH/44Yc888wzxMXFERgYSGZmJj179uSVV16hatWqDr8LpZQj0o7C/Idh1xKIuAn6feD1RQ8F0eRUSlasWMHChQtZu3YtgYGBHDt2jMzMTOrVq8eyZct48803Wbhw4VnbzJgxg8jISDIzM3n66afp168fv/zyi0PvQCnlmF0/wNd20UPvN6z7l3yg6KEg3nO7sJeLi4ujZs2aBAYGAlCzZk3q1atXpG3Lly/PG2+8wYEDB3R6IqX8SVY6LH4KZtwJlWrC6GU+U41XGP87c1o8Fo5sKt591mkDvV8rsMtNN93ESy+9RPPmzenZsycDBgzg2muvLfIhAgICaNeuHdu3b6ddu3aXGrFSytPFb7WKHhK2QJcHoeeLUC7I6ahKjZ45lZLKlSuzZs0aJkyYQGhoKAMGDGDKlCkXtI/CpppSSvkAYyBqAkzoAScT4L4voPfrfpWYwB/PnAo5wylJAQEB9OjRgx49etCmTRumTp3K8OHDi7RtTk4OmzZt0vnxlPJlaQl20cP3dtHDh1DZs1daKCn+l5wcsmPHDsqUKUNERAQA69evp1GjRkXaNisri3HjxtGgQQPatm1bkmEqpZyy83uY/3+QkQp93oROo/xibOl8NDmVkrS0NP7yl79w4sQJypYtS7NmzZgwYUKB29x3330EBgaSkZFBz549mT/f61cMUUrllZUOS5+DVR9DrdYw7BuoVXpXSHJzDf9ZuqPUjldUmpxKSceOHVm+fHm+r7ku9blbtmxZyQellHJW/Ba76GErdHkIer5QqmNL2Tm5jJ23iS/WeN46cFoQoZRSpc0YWPk/mHCdtYz6fV9a4+GlmJgysnP4y6x1fLEmlsd7RpTacYtKz5yUUqo0pSXA1/8HMUuheS+47b+lXvRwOjOHMdPX8OvOo/yzbytGXt2YJ0o1gsL5TXIyxiB+OrioJehKeYidS6zElJnmWNFDSnoWI6esZs3+47xxZ1vu6dSgVI9fVH6RnIKCgkhMTKRGjRp+l6CMMSQmJhIU5F/3SCjlUbJO20UPE6D25XDnxFItenBJTMtg2Ker2HEklfcHXcEtbeuWegxF5RfJKSwsjNjYWI4ePep0KI4ICgoiLCzM6TCU8k/xW+CLkXB0G3R9GG54zpEbao8kp3PfxJXEHj/NhKGRXHeZZ08a6xfJqVy5cmdm/FZKqVJhDET9D5Y+D0FVYPCX0KynI6HsTzzJfROjOHEqi6kjOtO1SQ1H4rgQfpGclFKqVKXGWzfUxvxgFT30+8CauNUBO46kMmRSFJk5ucx8oAttw7xj2R1NTkopVZzcix5u+Q9EjnRspocNB08w7NNVlA8ow9wx3WheO9iROC6GJiellCoOWafh+3/C6k+gdhu76KGFY+Gs3JPIqKnRVKtUjhkju9KwRkXHYrkYhd6EKyINRORnEdkmIltE5DG7/QUROSQi6+2fPm7bPC0iMSKyQ0RudmvvZbfFiMhYt/bGIhIlIrtEZI6IlLfbA+3nMfbr4cX55pVSqlgc2WzNIr76E6vo4YEfHU1MP29PYNjkVdSpEsTnY670usQERZshIht40hjTEugKPCwirezX3jbGtLd/vgWwXxsItAZ6AR+KSICIBAAfAL2BVsAgt/28bu8rAjgOjLTbRwLHjTHNgLftfkop5Rlyc2HFh/DJdXD6OAyeB73+BWUDHQvpmw2HeWBaNM1rBzN3TDfqVPHO20gKTU7GmDhjzFr7cSqwDahfwCb9gNnGmAxjzF4gBuhs/8QYY/YYYzKB2UA/sW48uh74wt5+KnC7276m2o+/AG4Qf7tRSSnlmVLjYcZdsORpqwrvoeXQ7AZHQ5q96gCPzl5Hh4ZVmfFAF6pXKu9oPJfigubWsy+rdQCi7KZHRGSjiEwWkWp2W33goNtmsXbb+dprACeMMdl52s/al/16st1fKaWcs+M7+Kgb7F8Ot7wFA2c6Vo3nMvG3PYydt4lrIkKZNqILIUHlHI3nUhU5OYlIZeBL4HFjTArwEdAUaA/EAf9xdc1nc3MR7QXtK29so0UkWkSi/fVGW6VUKcg6DYuehFkDILgejPkFOjlXjQfWLDBvLd3JK4u20adNHT4ZGkmF8gGOxVNcilStJyLlsBLTDGPMPABjTLzb658AC+2nsYD7ZE1hwGH7cX7tx4CqIlLWPjty7+/aV6yIlAWqAEl54zPGTAAmAERGRupEckqp4ndkk7W8xdHt0O0Ra6YHB8eWwEpMLy/cxuQ/9nJ3xzBevaMNZQN8Y7GJolTrCTAJ2GaMecut3X1Spv7AZvvxAmCgXWnXGIgAVgGrgQi7Mq88VtHEAmPNSvozcJe9/TBgvtu+htmP7wJ+MjqLqVKqNOXmwooP4JPr4fQJGPIV3Dze8cSUk2t46suNTP5jL/dfFc7rd7b1mcQERTtzugoYAmwSkfV22zNY1XbtsS6z7QPGABhjtojIXGArVqXfw8aYHAAReQRYAgQAk40xW+z9PQXMFpFXgHVYyRD7z89EJAbrjGngJbxXpZS6MKlH4OuHYPdPcFkfa3mLSs4Pe2dm5/LEnPUs2hTHozdE8ETPCJ+b1Fp87UQkMjLSREdHOx2GUsrb7VgM8x+GzFNWeXjH+x0dW3I5nZnDQzPWsGzHUZ69pSWjujcplv2KyBpjTGSx7KwY6AwRSinlLvMUfP8sRE+COm3gzkkQepnTUQGQmp7FyCnRrN6fxGt3tGFg54ZOh1RiNDkppZRL3Ear6OHYDo8penBJOpnJsMmr2BaXwnsDO3Bru3pOh1SiNDkppVRuLqz8EH58ESpUhyFfQ9PrnI7qjCPJ6QyZFMWBpFNMGNqR61vUdjqkEqfJSSnl384qergFbnvfI4oeXA4knuK+SStJSstkyv2d6dbUc2IrSZqclFL+a/u3VtFD1mno+w50HO4RRQ8uu+JTGTwpiozsXGY80JX2DbxjLabioMlJKeV/Mk/B9+MgejLUaWsXPTR3OqqzbIpNZujkKMoGlGHO6G5cVsd71mIqDpqclFL+xb3o4cpH4fpnPabowSVqTyIjp0ZTtWI5ZozqQqMalZwOqdRpclJK+YfcXFj5AfzwojVJ69D50KSH01Gd4+cdCTz42RrCqlVg+qgu1K1SwemQHKHJSSnl+1LirKKHPT9Di75W0UPF6k5HdY5FG+N4fM46mtcOZtqIztSo7FlndKVJk5NSyrdtXwTzH4HsdLj1XbhimEcVPbjMXX2QsfM2ckXDakwa3okqFbx7yYtLpclJKeWbMk/BkmdgzadQt51V9FAzwumo8jXp9728vHAr3SNq8vGQjlQsr7+a9W9AKeV74jbAFyMhMQauegyuexbKet6qsMYY3vsxhrd/2Emv1nV4d1B7Ast6/1pMxUGTk1LKd+Tmwor/wo8vuRU9XOt0VPkyxjB+0TYm/r6XO68I4/U7fWctpuKgyUkp5RtS4uDrB2HPMo8uegBrLaZxX21i9uqDDL8ynOf6tqJMGc8bB3OSJiellPfbthAWPALZGXDre3DFUI8segBrLaa/zl3Pwo1x/OX6Zvz1xuY+txZTcdDkpJTyXpkn7aKHKVC3Pdw50WOLHgDSs3J4aPoaft5xlGf6tGD0NU2dDsljaXJSSnmnw+utmR4SY+Cqx+G6cR5Z9OCSmp7FqKnRrNqXxL/6t+HeLr67FlNx0OSklPIuubmw4n348WWoFArDFkDja5yOqkDHT2Yy7NNVbDmcwjsD2tOvfX2nQ/J4mpyUUt4j5TB89SDs/QVa3mqNL3lo0YNLQko6gydFsS/xFB8P7kjPVr6/FlNx0OSklPIO276BBX+xih5uex86DPHYogeXg0mnuG9iFIlpGUy5vxNXNq3pdEheQ5OTUsqzZZ6E756GtVPtoodJULOZ01EVKiYhlcETV3E6K4fpo7rQoWE1p0PyKpqclFKe6/A6u+hhN1z9BPR4xqOLHlw2H0pm6ORVlBFhzpiutKgT4nRIXkeTk1LK8+TmwvL34KdXvKbowWX1viRGfLqakArWWkzhNf1vLabioMlJKeVZkg9ZMz3s/RVa3mbNJO7hRQ8uv+w8ypjPoqlXxVqLqV5V/1yLqTgUOpGTiDQQkZ9FZJuIbBGRx+z26iKyVER22X9Ws9tFRN4TkRgR2SgiV7jta5jdf5eIDHNr7ygim+xt3hP7dunzHUMp5aO2LoCProTYNXDbf+GeaV6TmBZvimPU1NU0qVmZuQ9208R0iYoyy2A28KQxpiXQFXhYRFoBY4EfjTERwI/2c4DeQIT9Mxr4CKxEAzwPdAE6A8+7JZuP7L6u7XrZ7ec7hlLKl2SetCrx5g6B6o3hwd/gCs+vxnP5PPogD89cS9uwqswa3ZWafrxIYHEpNDkZY+KMMWvtx6nANqA+0A+YanebCtxuP+4HTDOWlUBVEakL3AwsNcYkGWOOA0uBXvZrIcaYFcYYA0zLs6/8jqGU8hWH1sLH18Daz+Dqv8LIpVDDe6b1mfLHXv7+xUaualaTz0Z29vtFAovLBY05iUg40AGIAmobY+LASmAiUsvuVh846LZZrN1WUHtsPu0UcIy8cY3GOvOiYUOdEkQpr5Cb82fRQ+XaMOwbaNzd6aiKzBjDf3+K4T9Ld3Jz69q8N6iDrsVUjIqcnESkMvAl8LgxJqWAWXTze8FcRHuRGWMmABMAIiMjL2hbpZQDkg/BV2Ng32/Qqh/0fcdrxpbASkyvLt7OhF/3cEeH+rxxV1tdi6mYFSk5iUg5rMQ0wxgzz26OF5G69hlNXSDBbo8FGrhtHgYcttt75GlfZreH5dO/oGMopbzV1vmw4FHIyYJ+H0D7+7xmbAmstZie/Xozs1YdYGi3Rrxwa2tdi6kEFKVaT4BJwDZjzFtuLy0AXBV3w4D5bu1D7aq9rkCyfWluCXCTiFSzCyFuApbYr6WKSFf7WEPz7Cu/YyilvE1GGsx/BOYOhepNrKKHDoO9KjFl5eTy+Jz1zFp1gIeva8qLt2liKilFOXO6ChgCbBKR9XbbM8BrwFwRGQkcAO62X/sW6APEAKeA+wGMMUki8jKw2u73kjEmyX78EDAFqAAstn8o4BhKKW9yaK0100PSHuj+JPR4GgK8q3AgPSuH/5uxlp+2J/BUrxY81MN7ija8kVgFcr4jMjLSREdHOx2GUgqsooc/3oWfx0PlOnDHxxB+tdNRXbC0jGxGTV1N1N4kXup3OUO6NnI6pGInImuMMZFOx+GiM0QopUpGcqy1vMW+36B1f+j7NlTwvvvoT5zKZNinq9l8KJm372nP7R10LabSoMlJKVX8tnwN3zxmFz18CO3v9aqxJZeElHSGTFrF3sST/G9wR27UtZhKjSYnpVTxyUiDxU/B+ulQvyPc8YlX3VDr7mDSKQZPiuJoagafDu/EVc10LabSpMlJKVU8Dq2xix72Qve/QY+xXlf04BKTkMaQSVGczMhm+qguXKFrMZU6TU5KqUuTmwN/vAM//8sqehi+CMKvcjqqi7b5UDLDJq9CRJgzphst6+paTE7Q5KSUunjJsTBvDOz/3auLHlyi9yVx/5TVBAeWZfqoLjQJrex0SH5Lk5NS6uJs+coqesjNgds/gnaDvLLoweXXnUcZ89ka6lQJYvqoLtTXJS8cpclJKXVhMlJh8Vi76CES7vzEmvHBi323+QiPzlpHk9BKfDayC6HBuuSF0zQ5KaWKLnYNfDkSTuyHa/4O1z7ltUUPLl+uieUfX26kbVgVpgzvTJWK3v1+fIUmJ6VU4XJz4Pe3YdmrEFzXKnpodKXTUV2yaSv28dz8LVzVrAYThkRSKVB/JXoK/ZdQShXsxEFreYv9f8Dld8Itb0GFqk5HdUmMMXy4bDf/XrKDG1vV5v1BHQgqp2sxeRJNTkqp89s8DxY+bhc9/A/aDfTqogewEtNr323n41/20N9ei6mcrsXkcTQ5KaXOlZFqz/QwA8I6wR0TvL7oAay1mP45fzMzow4wuGtDXrrtcl3ywkNpclJKnS022prp4cR+uOYfcO0/vL7oAay1mP72+Qbmrz/Mg9c25alel1HAit7KYZqclFKW3Bz4/S34+VUIqQfDv4VG3ZyOqlikZ+XwyMy1/LAtgX/0uoz/69HM6ZBUITQ5KaWsood5o+HAcrj8LrjlP15f9OByMiObB6ZFs3x3Ii/3a82QbuFOh6SKQJOTUv5u85fwzRNgcqH/BGh7j9cXPbicOJXJ8E9Xs+lQMm8PaEf/DmFOh6SKSJOTUv4qIxW+/TtsmGUXPXwC1Rs7HVWxSUhNZ+ikVew5epIP77uCm1vXcTokdQE0OSnlj2Kj7ZkeDlizPFzzDwjwnV8HscdPMXhiFPEpGUwe3omrI3QtJm/jO59GpVThcnPgt7esmR5C6sP9i6FhV6ejKla7j6YxZGIUafZaTB0bee8s6f5Mk5NS/uLEAbvoYQW0udsqegiq4nRUxWrL4WSGTloFwKzRXWldz7fenz/R5KSUP9j0BSz8659FD+0GOB1RsVuzP4nhn66msr0WU1Ndi8mraXJSypelp8Dif9hFD52t5S2qhTsdVbH7fdcxHpgWTe2QQKaP6kJYtYpOh6QuUaETSonIZBFJEJHNbm0viMghEVlv//Rxe+1pEYkRkR0icrNbey+7LUZExrq1NxaRKBHZJSJzRKS83R5oP4+xXw8vrjetlF84uBo+7g4b50CPp63xJR9MTN9vOcKIKatpVKMicx/sponJRxRltsMpQK982t82xrS3f74FEJFWwECgtb3NhyISICIBwAdAb6AVMMjuC/C6va8I4Dgw0m4fCRw3xjQD3rb7KaUKk5sDv7wBk2+2LuPd/x30GOtT1XguX62L5aEZa2lVL4TZo7tSKzjI6ZBUMSk0ORljfgWSiri/fsBsY0yGMWYvEAN0tn9ijDF7jDGZwGygn1gTW10PfGFvPxW43W1fU+3HXwA3iE6EpVTBju+HKbfAz+Ot5S0e/B0adnE6qhLx2Yp9PDFnA10aV2f6qC5UrVje6ZBUMbqUeeIfEZGN9mU/V61mfeCgW59Yu+187TWAE8aY7DztZ+3Lfj3Z7q+Uys+mL+B/V0P8FuuG2js/8blqPJcPl8Xwz/lb6NmyFpOHd6KyLhLocy42OX0ENAXaA3HAf+z2/M5szEW0F7Svc4jIaBGJFpHoo0ePFhS3Ur4nPcUqEf9yJNRqCQ/+Zk1B5IOMMby2eDtvfLeDfu3r8dHgjrpIoI+6qK8bxph412MR+QRYaD+NBRq4dQ0DDtuP82s/BlQVkbL22ZF7f9e+YkWkLFCF81xeNMZMACYAREZG5pvAlPJJB1dZy1skH7SKHrr/zSfHlgBycw3PLdjM9JUHuLdLQ17udzkBuhaTz7qoMycRqev2tD/gquRbAAy0K+0aAxHAKmA1EGFX5pXHKppYYIwxwM/AXfb2w4D5bvsaZj++C/jJ7q+UysmGZa/DZLtWyYeLHgCyc3J58vMNTF95gDHXNmH87ZqYfF2hn2QRmQX0AGqKSCzwPNBDRNpjXWbbB4wBMMZsEZG5wFYgG3jYGJNj7+cRYAkQAEw2xmyxD/EUMFtEXgHWAZPs9knAZyISg3XGNPCS361SvuD4fusy3sGV0HYA9HkTgkKcjqrEpGfl8JdZ61i6NZ6/33wZ/9ejqS4S6AfE105GIiMjTXR0tNNhKFUyNn4Oi/5qPb7lLWh7t7PxlLCTGdmM/iyaP2ISefG21gy7MtzpkHyWiKwxxkQ6HYeLb14DUMrXpCdby1tsnAMNusIdE6BaI6ejKlHJp7K4f8oq1h88wX/ubsedHXUtJn+iyUkpT3cgCuaNguRD0OMZ6P6kz44tuRxNzWDo5FXsTkjjw/s60utyXYvJ3/j2J1wpb5aTDb/+G359A6o0gBHfQYPOTkdV4g6dOM2QiVHEJaczaXgk3SNCnQ5JOUCTk1Ke6Pg+u+ghCtoOhD7/9umiB5c9R9MYPDGK1PRsPhvZmcjw6k6HpByiyUkpT7NxLix6EhC4cxK0uavQTXzBtrgUhkyKItdYazFdXt83Z7dQRaPJSSlPkZ5sJaVNn0PDblbRQ9WGTkdVKtYeOM7wyauoWN5ai6lZLV2Lyd9pclLKExxYCfMesIoerhsHV//V54seXJbHHGPUtGhCgwOZPrILDarrkhdKk5NSznIveqjaEEYsgQadnI6q1CzdGs/DM9fSuEYlPhvZmVohuuSFsmhyUsopSXutoofYVdBuEPR+wy+KHlzmrz/EX+du4PL6VZh6fydd8kKdRZOTUk7YMMcaX5IyflX04DJ95X7+OX8zXRpXZ+IwXfJCnUs/EUqVprOKHq6EOz72m6IHl4+W7eb177ZzfYtafHjfFbrkhcqXJielSsv+FdZlvJRDcN2z0P2vUMZ/fjEbY/j3kh18uGw3t7arx1v3tKNcwKWsd6p8mSYnpUpSThbsWgobZsL2RdZZ0sjvIcxj5tcsFbm5hhe+2cK0FfsZ1LkBr9zeRpe8UAXS5KRUcTMGjmyE9bOsy3enjkGlUOj2CFz7DwgMdjrCUpWdk8s/vtzIvLWHGH1NE57u3UKXvFCF0uSkVHFJPWLN7rBhFiRshYDQm5DFAAAaz0lEQVTycFlvaHcvNLsBAso5HWGpy8jO4dFZ61iyJZ4nb2zOI9c308SkikSTk1KXIisddiyyzpJ2/wgmF8I6WWstte4PFf13brhTmdmM+WwNv+06xvO3tuL+qxo7HZLyIpqclLpQxlgTsq6fCVu+hoxkCAmDq5+w7leqGeF0hI5LPp3FiCmrWXfgOP++qy13RzZwOiTlZTQ5KVVUJw7AhtnWZbukPVCuIrS8DdoPgvBroIxWngEcS8tg6KRV7EpI5YN7r6B3m7pOh6S8kCYnpQqSkQpbF1gJad9vVlt4d+j+N2h1m98VNxTm8InTDJ4UxeETp5k4rBPXNte1mNTF0eSkVF65ObD3V+ssadsCyDoF1ZtY9ya1G+B3N80W1b5jJ7lvYhQpp7OYNqILnRv773ibunSanJRyObbLGkfaOMe6UTawCrS9x6q2a9AZtMrsvLYfSWHIpFXk5Bpdi0kVC01Oyr+dSoIt86xqu0PRIAFW2fdNr8BlfaCczpJdmPUHTzBs8iqCypVh1piuNKullzrVpdPkpPxPThbE/GCdJe38DnIyoVZrKyG1uQeCazsdoddYvvsYD0yNpkblQGaM0rWYVPEpNDmJyGSgL5BgjLncbqsOzAHCgX3APcaY42LdXfcu0Ac4BQw3xqy1txkGPGvv9hVjzFS7vSMwBagAfAs8Zowx5zvGJb9j5b/iNlqFDZs+h5NHoWJNiBxpVdvVaauX7S7QD1vj+b+ZawmvUZHPRnahtq7FpIpRUWpfpwC98rSNBX40xkQAP9rPAXoDEfbPaOAjOJPMnge6AJ2B50Wkmr3NR3Zf13a9CjmGUkWXlgDL/wsfXQUfd4fVE60l0AfNhie3Q+/XoG47TUwXaP76Qzw4fQ0t6gQzZ3Q3TUyq2BV65mSM+VVEwvM09wN62I+nAsuAp+z2acYYA6wUkaoiUtfuu9QYkwQgIkuBXiKyDAgxxqyw26cBtwOLCziGUgXLSocd31pnSTE/gsmB+h2hz5tw+Z1+PWtDcZgZdYBxX2+iU3h1Jg2LJDjI/6ZlUiXvYsecahtj4gCMMXEiUsturw8cdOsXa7cV1B6bT3tBx1DqXMZA7Gp71oZ51rpJIfXhqsesWRtCmzsdoU+Y8Otu/vXtdq67LJSPBnfUtZhUiSnugoj8ro2Yi2i/sIOKjMa6NEjDhnoPil85cRA2zraq7ZJ2Q9kK1s2x7QZB42v8ar2kkmSM4a2lO3n/pxhuaVuXt+9pT/myOiOGKjkXm5ziRaSufUZTF0iw22MB90m0woDDdnuPPO3L7PawfPoXdIxzGGMmABMAIiMjLzi5KS+TkWbdHLthFuz9DTDQ6Gpr8b5W/XTWhmKWm2t4aeFWpizfx8BODRjfX9diUiXvYpPTAmAY8Jr953y39kdEZDZW8UOynVyWAP9yK4K4CXjaGJMkIqki0hWIAoYC7xdyDOWPcnOt6YM2zLKmE8o6CdUaQ4+nrVkbqoU7HaFPys7JZey8TXyxJpZRVzdm3C0tdckLVSqKUko+C+usp6aIxGJV3b0GzBWRkcAB4G67+7dYZeQxWKXk9wPYSehlYLXd7yVXcQTwEH+Wki+2fyjgGMqfHIuxEtLGOZB8EAJDoM1d0P5eaNBFq+xKUEZ2Do/PXs/izUd4omdzHr1B12JSpUeswjrfERkZaaKjo50OQ12K08dh8zwrKcWuBikDTa+3xpFa3ALlKjgdoc9zX4vpub6tGHG1rsXk60RkjTEm0uk4XHSGCOUZcrKtxfrWz4QdiyEnA2q1ghtfhjZ3Q4guu1BaUtKzGPHpatYeOM4bd7blnk66FpMqfZqclLOObLYv282FkwlQsQZE3m+dJenNsaUuMS2DoZNXsTM+lfcHXcEtbfVLgXKGJidV+tISrCmE1s+C+E1Qphw0v9kaR2p2I5Qt73SEfiku+TSDJ0YRe/w0E4ZGct1lemuhco4mJ1U6sjOsy3UbZsGupdasDfWu0FkbPMT+RGstphOnspg2ojNdmtRwOiTl5zQ5qZJjDMRGw4aZVoFD+gkIrgtX/sW6bFerhdMRKmDHkVSGTIoiKyeXmQ90oW1YVadDUkqTkyoBybHWKrIbZkPiLmvWhpZ9rYTUpIfO2uBBNhw8wbBPV1E+oAxzx3QjorbewKw8gyYnVTwyT8K2b6xqu72/Ys3acJU1t12rfhAU4nSEKo8VuxMZNXU11SuXZ8bIrjSsoWsxKc+hyUldvNxc2P+7dYa0dT5kplkzNfQYC20HQHW9N8ZT/bQ9noemr6VhdWstpjpVdMkL5Vk0OakLl7jbKmzYMAeSD0D5YGjd36q2a9hNy7893DcbDvPEnPW0rBvC1BGdqV5JqyOV59HkpIrm9AnY8pWVlA5GWbM2NLkOej4Pl/WB8npJyBvMXnWAp7/aRKdG1Zk4PJIQXYtJeShNTur8crJh909WQtq+yJq1IbQF9HzRumynszZ4lYm/7eGVRdu4tnko/xvckQrltTBFeS5NTupc8VuswoZNn0NaPFSoDh2HWdV29TroZTsvY4zh7R928d6Pu7ilTV3eHqBrMSnPp8lJWU4es2dtmAlHNkKZstC8l5WQIm7SWRu8VG6u4eVFW/n0j33cExnGq3e01bWYlFfQ5OTPsjNg53fWNEIxSyE3G+q2h95vwOV3QSWdJcCb5eQaxn65kc/XxDLiqsY8e0tLymhiUl5Ck5O/MQYOrbVnbfjSWp4iuC50e9ietaGl0xGqYpCZncvjc9bx7aYjPHZDBI/3jNC1mJRX0eTkL5IPWQv2bZgFx3ZC2SBo0RfaD7Kq7nTWBp9xOjOHB6ev4ZedR3n2lpaM6t7E6ZCUumCanHxZ5knYttA6S9rzC2Cs+5BufQ9a3w5BVZyOUBWzlPQsRk2JZvX+JF67ow0DOzd0OiSlLoomJ1+TmwsHllvjSFu/tmZtqNoIrn0K2g2A6vot2lclncxk2ORVbItL4b2BHbi1XT2nQ1Lqomly8hWJu/+8bHfCNWvD7dDOnrWhjJYO+7IjyekMmRTFgaRTTBjaketb1HY6JKUuiSYnb5aebM3asH4WHFwJiDXr9/X/tMaTdNYGv3Ag8RT3TVpJUlomU0d0pquuxaR8gCYnb5ObA7t/tsaRti+C7HSoeRn0fMGetUEv5fiTnfGpDJ4YRWZOLjMf6Eq7BroWk/INmpy8RfxW65LdxrmQdgQqVIMOQ6xqu3pX6KwNfmhj7AmGTV5FuYAyzBndjcvq6FpMyndocvJkJ4/Bpi+ss6S4DdasDRE3WfcjNb8ZygY6HaFySNSeREZOjaZqxXLMGNWFRjUqOR2SUsVKk5Onyc6EXUuscaRdS+xZG9pBr9ehzV1QqabTESqH/bw9gQenryGsWgVmjOqqazEpn3RJyUlE9gGpQA6QbYyJFJHqwBwgHNgH3GOMOS7W7envAn2AU8BwY8xaez/DgGft3b5ijJlqt3cEpgAVgG+Bx4wx5lJi9kjGwOG1VkLa/IU1a0Pl2tD1IavarnYrpyNUHmLRxjgem72Oy+oEM21EZ2pU1rNn5ZuK48zpOmPMMbfnY4EfjTGvichY+/lTQG8gwv7pAnwEdLGT2fNAJGCANSKywBhz3O4zGliJlZx6AYuLIWbPkHLYLv+eDUe3Q0AgtLjFWrSvyXUQoCe26k9zVx9k7LyNdGxUjUnDO+laTMqnlcRvv35AD/vxVGAZVnLqB0yzz3xWikhVEalr911qjEkCEJGlQC8RWQaEGGNW2O3TgNvx9uSUecqqstswE/YsA5MLDbrCre9Cq9uhglZbqXNN+n0vLy/cyjXNQ/lY12JSfuBSk5MBvhcRA3xsjJkA1DbGxAEYY+JEpJbdtz5w0G3bWLutoPbYfNq9jzGwf7mVkLbMh8xUqNIQuv8N2g2EGk2djlB5KGMM7/64i3d+2EXvy+vwzsD2BJbVxKR836Ump6uMMYftBLRURLYX0De/WmdzEe3n7lhkNNblPxo29KC5xJL2WpfsNsyCE/uhfGXr7KjdQGh0lc7aoApkjOGVRduY9Pte7uoYxmt3tKFsgH5mlH+4pORkjDls/5kgIl8BnYF4EalrnzXVBRLs7rFAA7fNw4DDdnuPPO3L7PawfPrnF8cEYAJAZGSkswUT6SnWnHbrZ1lz3CHQ5Fq4bhy07AvlteRXFS4n1/DMvE3MiT7I8CvDea5vK12LSfmVi05OIlIJKGOMSbUf3wS8BCwAhgGv2X/OtzdZADwiIrOxCiKS7QS2BPiXiFSz+90EPG2MSRKRVBHpCkQBQ4H3LzbeEpWbA3t+ts6Sti2E7NNQIwJueM6ataFKWOH7UMqWmZ3LE3PXs2hjHI9e34wnbmyuazEpv3MpZ061ga/s/zRlgZnGmO9EZDUwV0RGAgeAu+3+32KVkcdglZLfD2AnoZeB1Xa/l1zFEcBD/FlKvhhPK4ZI2G6NI22cC6lxEFTVqrRrfy/U76izNqgLlp5lrcW0bMdRnunTgtHX6Hik8k/ia7cNRUZGmujo6JI7wMlEawXZDTPh8DqQAGvWhvaDoHkvnbVBXbTU9CxGTY1m1b4kxt/ehnu7eND4qfJ5IrLGGBPpdBwueiNNUWRnwq7vrcKGnUsgNwvqtIGbX4U2d0PlUKcjVF7u+MlMhn26iq2HU3hnQHv6tffOwlSliosmp/MxBuLW/zlrw6lEqFQLuoyx5rarc7nTESofEZ+SzuCJUexPOsXHQzpyQ0tdi0kpTU55pcTBprlWUjq6zZ61oY81jVDT63XWBlWsDiad4r6JUSSmZTDl/k5c2VTnTlQKNDlZsk5bszasn2lV3ZlcCOsMfd+G1v2t5SmUKma74lMZPCmK9KxcZjzQlfa6FpNSZ/hvcjIGDqy0Z234GjJSoEoD6P4ktB0INZs5HaHyYZtikxk6OYqyAWWYO0bXYlIqL/9LTsf3/Tlrw/F9UK4StOpnVds1ulpnbVAlbtXeJEZOWU1IBWstpvCaemO2Unn5R3LKSLXOjjbMgv1/AAKNr4Frx0LLWyGwstMRKj/xy86jjPksmnpVKzB9ZBfqVa3gdEhKeSTfTU65ObD3F6uwYds39qwNzeD6f1qzNlRtUPg+lCpGizfF8ejsdUTUCmbayM7U1LWYlDov30tO2enwwwuwYQ6kHoagKtYlu3b3QlikztqgSkRuriHxZCbxKekkpKYTn5JBfEq6/WM93haXQoeG1Zg8vBNVKuhaTEoVxPeSU8I2+OM9aNYTev0LmveGcrqMtbo4xhhS0rPPSTQJKekcsZ8npKSTkJpBdu7Zs62IQI1KgdQOCaR2SBBXN6vJYz0jqFje9/7bKVXcfO9/SUh9eDIaKtcqvK/ya6czc4g/k2TSSXCd7aRmEJ+cTnyq1Z6elXvOtiFBZalTJYjaIUE0Da1J7ZBA6lQJolZw0JlkFBocSDld4kKpi+J7yalyLU1Mfi4zO5ejaXaiSU7/M+HYCciVjFLTs8/ZtkK5ADvJBNIurOqZRPPnTyC1goN0JVqlSpjvJSfls3JyDYknM86c4bhfVrOeW48TT2aes225ADlzVhNRqzJXN6tJrZBAagcH2WdAgdQKCSI4sKwuT6GUB9DkpBxnjCH5dFaeIgK3ogL7MtvRtAxy8hnXqVnZGtepXzWIDg2rUtvt0prrbKdaxfK6WJ9SXkSTkypRJzOyzySaBHsM50hyBvGp6fYZj5WAMrLPHdepWrEctYODqBUSSPNaNf+8rGYnnTohQdSsXF6XLlfKB2lyUhclIzuHhDMJJ+PsMx23AoPUjHPHdSqWD6BOiJV0OjSsaj92O9uxE1JQOR3XUcpfaXJSZ8nJNSSmZZwZz8lbNh1vl00n5TOuUz6gjDWOExLEZXWC6R4RemY8x0o41uPgIL3HRylVME1OfsIYw4lTWcSnpnMk2b1s2rrM5rrkdjQ1gzzDOpQRCA22kk5YtYp0bFTtzGW1Wm5jO9UqltNiAqVUsdDk5APSXOM6Z+7NObdsOiElg8ycc8d1qlUsdya5tKgTTG37Elsdt8tsNSrpuI5SqnRpcvJg6Vk5HE3Nv2zafXznZGbOOdtWDixLrZBA6oQE0Sm8+pmy6dohQdSpYt2rExqs4zpKKc+kyckB2Tm5HEvLLLBsOj41nROnss7ZtnzZMmfGcFrWC6HHZbXOnOG4klGtkCAqB+o/rVLKe+lvsGKUm2s4firTSjR2qXR+ZdPH0s4d1wkoI4Ta9+s0rFGRzo2rn1U2XdtOPFUq6LiOUsr3aXIqAmMMqRnZZyWY/G4YTUhNJyvHnLN9jUrlz1SqtaobQm23CjZX4qlROZAAvUlUKaUAL0hOItILeBcIACYaY14rzv2nZ+Wck2wSUjM4kvzn4/iUdE7lM64THFT2THLp0ri6XUgQeKaooHZIIKHBgQSW1XEdpZS6EB6dnEQkAPgAuBGIBVaLyAJjzNbCts3KyeVYmivJnD07getxfEoGyafPHdcJLFvGuj8nOIjW9UK4vsWf4zqun1rBgVTScR2llCoRnv7btTMQY4zZAyAis4F+wHmT066ENCJf+YHEkxmYfMZ1atn36zSuWYmuTWqcSTSu5Q9qBwcRUkEn/1RKKSd5enKqDxx0ex4LdClog3JlhBtb1bJnoD57AtAalXTyT6WU8gaenpzyyyTnVByIyGhgNEDDhg159Y62JR2XUkqpEuTpt/3HAg3cnocBh/N2MsZMMMZEGmMiQ0NDSy04pZRSJcPTk9NqIEJEGotIeWAgsMDhmJRSSpUwj76sZ4zJFpFHgCVYpeSTjTFbHA5LKaVUCfPo5ARgjPkW+NbpOJRSSpUeT7+sp5RSyg9pclJKKeVxNDkppZTyOJqclFJKeRwxeef48XIikgrscDqOIqgJHHM6iCLQOIuPN8QIGmdx85Y4LzPGBDsdhIvHV+tdhB3GmEingyiMiERrnMXHG+L0hhhB4yxu3hSn0zG408t6SimlPI4mJ6WUUh7HF5PTBKcDKCKNs3h5Q5zeECNonMVN47wIPlcQoZRSyvv54pmTUkopL+dIchKRMBGZLyK7RGS3iLwrIuVFpIeIJIvIOhHZLiJvum0zXET+6/Z8sIhsFJEtIrJBRCaKSFX7tWUiEmk/3iciX7ptd5eITMkTz3wRWZGn7QUR+VsJ/RUopZQqQKknJ7HWP58HfG2MiQCaA5WB8XaX34wxHYAOQF8RuSqfffQCngB6G2NaA1cAy4Ha5zlspIi0Pk88Ve3tq4pI44t/Z8rTiUh/ETEi0sJ+Hi4ip0Vkvf0FZ7mIXObW/2oRWWV/UdpuL2qJ/SUq75eZsiISLyJ1RWSKiOy197teRJYXENNwO6Yb8onzLvv5MhHZ4ba/L/LsY4OIzMrTNkVEDolIoP28pojsu+i/PFXqPPTzWltEFtrH3yoi34pIG7dtk9z29YNbzOtEZJsd37CivH8n7nO6Hkg3xnwKYIzJEZEngL3Az65OxpjTIrIea6n2vMYBfzPGHHLtA5hcwDHfBJ4B7svntTuBb4B4rPWiXr3gd6S8xSDgd6x/5xfstt3GmPYAIjIG63MyTETqADOB240xa0WkJrBERA4Bi4EwEQk3xuyz99MT2GyMibO+f/F3Y8xZSaQAm+zYfrSfDwQ25OlznzHmnPtQRKQl1pfMa0SkkjHmpNvLOcAI4KMixqE8iyd+Xl8Clhpj3rVjaGuM2QS4YpoCLHTtS0TC7Zg72M+bAPNEpIwrB5yPE5f1WgNr3BuMMSnAAaCZq01EqgERwK/n2cfaCzjmXOAKEWmWz2uDgFn2z6AL2KfyIiJSGbgKGIn1nz0/IcBx+/HDwBRjzFoAY8wx4B/AWGNMLvA5MMBt24FYn6GL8RvQWUTK2XE2A9YXcdt7gc+A74Hb8rz2DvCEiPjizfY+zYM/r3WxVijHPs7GC9nYGLMH+CvwaGF9nUhOAuRXIuhq7y4iG4EjWBn4SIE7+/OUcreIDDhPtxzg38DTebatjfWL4HdjzE4gW0Quv7C3o7zE7cB39r9zkohcYbc3dX1+sP7TvGW3n/MlCoi228H6jz0QwL501gf40q3vv90udcwoJDYD/ADcDPQj/9WeZ7jt799u7QOAOeT/5eoA1jfvIYUcX3keT/28fgBMEpGfRWSciNS7iPe2FmhRWCcnktMW4KypPEQkBGgA7MYac2oLtAEeEpH259nHFQDGmE32ae5ioEIBx/0MuAZo6NY2AKgG7LWvx4dz/m8pyrsNAmbbj2fz5y/y3caY9saYpsDj/Hmvx/m+RBkAY8xqoLJ9zb83sNIYc9yt39/t/bY3xuR3OTmv2VifvfN9o73PbX9/BxCRTsBRY8x+rEuCV9hXHNz9C/g7WpnrbTzy82qMWQI0AT7BSjDrRCT0At+bFKWTEx/YH4GKIjIUQEQCgP8AU4BTrk72N4ZXgafy2cerwJsiEubWVlBiwhiTBbyN9Q/qMgjoZYwJN8aEAx3R5ORzRKQG1ljnRPtLyN+xvpjk/U+yAOsLDOTzJQrr87HV7XlhCaXIjDGrgMuBmvZnvygGAS3s97Qb6zLPnXn2G4N1ifCeS4lPlR5P/7waY5KMMTONMUOA1W4xFFUHYFthnUo9ORnrrt/+wN0isgvYCaRjDezl9T+sgd6zqujspdvfAxbbFSPLsS7dLSnk8JOwi0DsgbqGwEq3/e4FUkSki930rIjEun4u6I0qT3IXMM0Y08j+ItIAqwAnLE+/q7F+yYN1+WK468zd/oXxOvCGW/9ZwGCsXyT5XYq7UE+T//+Dc4hIGeBuoK3bl6t+5D9uOh7Q2yK8h8d+XkXkehGpaD8OBppiXT4u6vbhWAVq7xfW15GBUmPMQeDWfF5aZv+4+p3mz2q9vVhnV67XpgJTz7P/Hm6Pw90eZwDu10jPqQQ0xriu7UbxZ4WM8m6DgNfytH2JlQia2lWhAmQCowDsKqbBwCf2f0IB3jHGfOPagTFmq4icAtbkqZID6xr+s27POxtjMgsK0hizuICXZ4jIafvxMeAV4JCrYtX2K9BKROrm2e8WEVmLfSlceTxP/rx2BP4rItlYJzcT7UuGBWkqIuuAICAVeL+wSj3Q6YuUUkp5IB0kVUop5XH0/gelSoGI3A88lqf5D2PMw07Eo1RBPOHzqpf1lFJKeRy9rKeUUsrjaHJSSinlcTQ5KaWU8jianJRSSnkcTU5KKaU8zv8DOlODoGpQIvEAAAAASUVORK5CYII=\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "compare[['MEAN','STD']].plot()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# IS JSON\n", + "\n", + "This whole sample has: \n", + "- False: 307577 rows\n", + " - 61,54% are not valid JSON\n", + " \n", + "- True: 192228 rows\n", + " - 38,46% are valid JSON" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 18, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAPoAAADuCAYAAAAQqxqwAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDMuMC4zLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvnQurowAAFdpJREFUeJzt3XmUXGWZx/HvU72xN5sssuQmkLDLzghhVVTGQoHjAC5wcEfCJg7IZVEvI8MpNhVGDFuGHZkTFZFcCMNiQhIIiWwBRIFgQcIuhCKYhaTzzh+3mmlip7uqu6qee+/7fM6p091Fdd9fh/r13d9XnHMYY/KtoB3AGNN8VnRjPGBFN8YDVnRjPGBFN8YDVnRjPGBFN8YDVnRjPGBFN8YDVnRjPGBFN8YDVnRjPGBFN8YDVnRjPGBFN8YDVnRjPGBFN8YDVnRjPGBFN8YDVnRjPGBFN8YDVnRjPGBFN8YDVnRjPGBFH4SI9IjIE30ewQCvDUTk6dalM6Y27doBMmCxc24X7RDGDIet0YeguuaeJiKPVR/79POaHURkVnUrYI6IjK4+f0yf568SkbbW/wbGN1b0wa3eZ7P99upzbwKfcc7tBhwNXN7P930PuKy6NbAHMF9Etqu+fmz1+R7ga83/FYzvbNN9cP1tuncAvxSR3rKO6ef7HgbOEZHNgd85554XkU8DuwOzRQRgdZI/GsY0lRV9aE4D3gB2JtkqWrLyC5xzt4rII0ARuEdEvg0IcINz7qxWhjXGij403cB859wKETkO+Kf9bBEZBbzonLu8+vkngP8F7hCRnzvn3hSR9YG1nXMvtSJ0EMZrACOAzYHOGr/tfeA14JVyqfiPZmUzzSU2P/rAROR959xaKz03GvgtsAj4I3Cyc26t6qm3Sc65HUXkLOAYYBnwOvBV59w7InI0cBbJlsAy4ETn3MxGZA3CeF0gICnziJU+HwFsOMxFLAReBeYBc4EXgOerH+eWS8V/2rIx6WBFz7AgjAPggD6PUYpxlgOPAlOBKcD0cqm4UDGP6cOKniFBGG/NR4u9pW6iAfUAj5MUfyowrVwqvqsbyV9W9BSr7lMfDhxKUuyP6yYalhXAkyRr+9vKpeIs3Th+saKnTBDG7cBnSM6vHw6sqZuoaZ4BrgduKpeKbyhnyT0rekoEYTwG+C5wLLCRcpxWWg7cDVwHTCqXisuU8+SSFV1REMadwBHA8cBBynHS4C3gFuC6cqk4RztMnljRFVT3vU8GfoBfa+96zAJ+Ui4VJ2sHyQMregsFYdwBfAc4F9hUOU5WPAicXS4VZ2gHyTIregsEYVwguXgmAkbqpsmsu4BzyqXiE9pBssiK3mRBGB8B/BTYQTtLDjhgIvCjcqn4nHaYLLGiN0kQxgcDFwB7amfJoeXADcB55VJxnnaYLLCiN1gQxhsBVwOHaWfxwBLgPODicqnYox0mzazoDRSE8eEkJf+YdhbPzAa+US4Vn9EOklZW9AYIwngd4DLg68pRfPYBybGQUrlUXK4dJm2s6MMUhPEBJPuLI7SzGCBZu3+lXCrO1Q6SJlb0IQrCuIvkYNtpJCPHmPRYCJxULhVv1A6SFlb0IQjCeFfgJuyUWdrdCpxQLhXf0w6izYpepyCMv0pyA0atQzEZXc8B/1ouFV/UDqLJhnuuQxDGZwI3YyXPkjHAQ0EY764dRJOt0WtQvYT1cuBE7SxmyN4HjvT1Jhlbow8iCOPVgd9gJc+6tYA7gzD+pnYQDVb0AQRhvAFwH8k94yb72oEJQRj/WDtIq9mm+yoEYTySZOSTbbSzmKa4huSIvBeXzlrR+xGE8W4kt0VurJ3FNFUMHO3DxBRW9JVUz5FPAdZRjmJa44/AIeVS8QPtIM1k++h9BGE8GpiMldwnBwHXB2Gc66sbrehVQRhvBtyLjeHmo68AF2qHaCbbdAeCMF4fmAZsr53FqDq5XCr+UjtEM3hf9OqQy/cB+2lnMepWkFxU8zvtII1mm+5wLVZykygAtwRhPFY7SKN5XfQgjM8lmRnFmF6rAX8Iwnhb7SCN5O2mexDGRwG3YfeSm/6Vgb3LpeLr2kEawcuiB2G8FcnMnnmdwNA0xn3AZ8ulYuZL4t2mexDGbcCNWMnN4A4GTtIO0QjeFR34IbCPdgiTGRfmYX/dq033IIx3IZm8r0M7i8mUP5Hsr2d2dFlv1ujVwRxvwkpu6rcHycSYmeVN0YHzgR21Q5jMOicI48xOr+XFpnsQxvuT3KXk0x8203h/BXYtl4qLtYPUK/dv/CCM1yaZYCH3v6tpum3I6M0vPrz5LwYC7RAmN04Kwvgg7RD1yvWmexDG2wFPAW3aWUyuPAnsVi4VV2gHqVXe1+jnYyU3jbczGbtHIrdr9CCM9yCZcM+YZpgPjMnKgbk8r9H/UzuAybXNge9rh6hVLtfo1amMp2jnMLn3HhCUS8UF2kEGk9c1+gXaAYwX1iGZNjv1crdGD8L4UOBO7RzGGxWStfq72kEGkqs1enXI3vO1cxivdJOBtXquig4cTXLqw5hWOjUI43W1Qwwkb0U/VTuA8VI3KT+vnpuiVwcH+KR2DuOtb2gHGEhuik7K/6FN7u0ahHFqdxtzUfTqOHCp3nQyXkjtyiYXRQcOATbVDmG897UgjFM5glFeip7av6TGKxsCX9QO0Z/MFz0I4w2BL2jnMKYqlSudzBcd+BrQqR3CmKpDgjBO3W5kHoqeyr+gxlupPDCc6aJXT2ek9pSG8dZx2gFWlumiA4dqBzCmH9sHYbyFdoi+sl70g7UDGLMKB2gH6CuzRQ/CeE1sDjWTXlb0BtkfO9pu0suK3iC22W7SbHSaTrNluej7aQcwZhCpWatnsuhBGK8O7KKdw5hBWNGHaQ9s+mOTflb0YbKj7SYLtgvCeCPtEJDdou+tHcCYGu2vHQCyW/TttQMYU6MdtANABoteHdJ5S+0cxtQo0A4A0F7rC0VkDHAGMKLv9znnPtWEXAPZGOhq8TKNGaoR2gGgjqIDE4ErgWuAnubEqUkq/uGMqVEq3q/1bLovd86Nd87Ncs492vtoWrJVS8U/nDE12iIIY/Vd5HoC3Cki40RkUxFZv/fRtGSrZkU3WdJBCgYurWfTvfdm+jP6POeAUY2LUxMrusmaEcArmgFqLrpzbmQzg9TBim6yJgAe0gxQz1H3DuAE/v8CgCnAVc65ZU3INRAruska9fdsPZvu40n2N35V/frY6nPfbnSoQaj/oxlTJ/X3bD1F39M513cgxgdE5MlGBxpI9a61dVq5TGMaQP1gXD1H3XtEZKveL0RkFK0/n65+msKYIVAfCameNfoZwB9F5EVASDZHbEx1Ywanfkt1PUfd7xeR0cA2JEX/i3NuadOSGZMf6kWveVNYRI4EOp1zc0jmOvu1iOzWtGTG5EemNt1/5JybKCL7Ap8DLiE56v4vTUlmWmonefH5w9pmvKadI4+W0vEyFFUz1FP03gNvRWC8c+4OEYkaH8m02ucLjzx2Rcdlo0UYrZ0lp2ZpB6jnKPYrInIVcBRwl4h01fn9JoXGtd0x44qOy3YSYW3tLDmmebcnUN8a/SjgEOAS59y7IrIpH73u3WTMpR3jp3ypbdqB2jk8kP6ii8g6zrn3gNVILnuletfaUmC2iLQ559R/EVO7Ait6JnaeN2P3wvMHamfxxELtALWs0W8lmbX0UZK71aT6fO/na4nINc65s5sT8SNcC5aRa6uxdPH9Xac/tZm8nYpBCz3xtnaAQfexnXOHVj+OdM6Nqn788HNgE+AIEWn6gI3lUnERsKjZy8mr9am8Patr3NzN5O29tLN45u/aAYZ9MM051+Oc2w64uQF5avFyi5aTKyPl1Zdndp28cB1ZvKN2Fg+lf41eBxn8JQ3xUouWkxt7ybN/vr/zjNU7ZXmgncVT2V+j99Gq/Wcreh2OKEyb/T+dPx1REPcx7SweU1+j13N6LS2s6DU6rX3itFPabt9bJJP/n/PkDe0AjXwDfNDAnzUQK3oNftXxi6mfb5uVmkn+PPe8doB6bmoZKyJrVj8/RkR+JiIfjpzhnPtkMwL2w4o+gDZ6lk/qPHualTw1FhJV1O8hqGcffTywSER2Bn5IUrgbm5JqYFb0VViDJf+Y0XXKEzsWyvtpZzEf+qt2AKh/AgcHHAZc5py7DFSuj34VWK6w3FTbiAVvze4a9/ImsmAP7SzmIzJX9IUichZwDBCLSBsKN9SXS8UeYH6rl5tmY2Te32Z0nbJkTVmynXYW80/+oh0A6iv60STXt3/LOfc6sBlwcVNSDS4VfyXTYGzh6acnd4bdHdKzhXYW069UvFdrLrpz7nXn3M+cc9OqX7/snNPYRweYqbTcVPly2wOP3NxxwVYFcRpTY5naPKYdAGq7e226c25fEVnIRy+KEcA55zSGX35YYZmpclb7LQ9+ty0eK0KbdhazSm8QVeZqh4Aaiu6c27f6MU0DE8wEVuDlwBfOTei4ZOqn2x4/UDuJGdQM7QC9MlmUcqlYAf6snaPV2lm+7J7OMx+ykmeGFb0BpmsHaKW1WPTezK6TntqmMH+sdhZTMyt6A9ynHaBVNuXt12d1jXttQ3nPhtfOjsWk5EAcZLvoD5Dsp+fa9lKeO63r1J415INttLOYujxEVGn1TMOrlNmil0vFBSTDW+XWpwqPPTmp8+wN2mXFZtpZTN3u0A7QV2aLXnWvdoBmOa5t8sMTOi7ZtiCsq53FDIkVvYHu1g7QDP/Rft3UqP3GT4rQpZ3FDMnjRJVUDXmW9QEJZgB/A0ZqB2kM527puODBsW3P2C2m2fZ77QAry/QavVwqOuB67RyN0MmypQ90/vtMK3kuWNGb4AYyPt77OrxfeaTrxGdHFV7fWzuLGba5RJU52iFWlvmil0vFl0hOtWXS5vLWq7O6Tvz7evL+LtpZTEP8t3aA/mS+6FXXaQcYip3lheemdn6/sJos20o7i2mI5aT0vZiXov8OqGiHqMfnCrMe/33njzdpE7eJdhbTMHEaxofrTy6KXi4VFwO3aeeo1XfbJs24suMXO4igcYuvaZ6rtQOsSi6KXpXKTaaVXdR+5ZSz2m/dR4RO7SymoeYBk7VDrEpuil4uFR8BntXOsSrCihUTO6MHj2p/8ECRlk1fZVrnWqJKau+9yE3Rq36lHaA/q7F0ydTO02btWXjOpirOp/eBK7RDDCRvRb+alM22uh7vvfNI14nPb1l4q1UTXJjWG09UUZ9fbSC5Knq5VPwAOE87R69AXps3s+ukSrcs2kk7i2maxcCl2iEGk6uiV91ACsbS3kP++uz9naev1iXLc3IdvlmFa4gq6pMoDiZ3Ra9O8PBjzQyHFWb8aWLneVu02VTFefcBcJF2iFrkruhVv0FpGJ9T2347/RcdV+wiwloayzctNYGo8op2iFrksujVu9rOafVy/6vj8imndfx2X5uP3AvvorzlWI9cFh2gXCpOBh5sxbIKrOi5o/PcaV9om3lgK5ZnUuEnRJW/a4eoVW6LXtX0tXp1quLHdi68aFMV++MZUnrNxqrkuujlUnE68Idm/fwNefetWV3jXtpU3tmzWcswqfR9okqmpu7OddGrTgAWNPqHjpb55Ye7Tl60lizZvtE/26Ta7USVzM0pkPuil0vFV4FxjfyZ+xSefuaezjPX7pCeEY38uSb1FgKnaYcYitwXHaBcKt5Gg25jPbJtyqxbOi4YWRC3QSN+nsmUM4gqL2mHGAovil41Dnh1OD/gzPZfP3hR+9W7i7BGgzKZ7LiXqHKVdoih8qbo1ZldvjnU77+649IpJ7Tfub/NR+6lYb130sCbogOUS8V7gPH1fE87y5fd3RlO/2zbowc2J5XJgOOJKvO1QwyHV0WvOh14vpYXrsnihQ91nTxnu8LL+zY5k0mv64gqE7VDDJd3RS+XiouAY4GegV63Ce+8Mbtr3CsbSWX31iQzKfQoDT5jo8W7osOHw06t8qq5beXlF6d3nbJsDVm6bQtjmXR5EziCqLJEO0gjeFl0gHKpeCH9DCi5f+HJOXd1huu1y4rNFWKZdFgGfImoMk87SKN4W/Sq44EpvV8c03bvzBs6LhxTENbTi2RS4BSiynTtEI0kzmV62rJhC8J4feDhn7Tf8NrX2+7ZT8T7P36+u5qocrx2iEbzvugAE849euS32ifPBDbSzmJU3Q4cSVQZ8EBtFlnRe0Xdu5Fsxq+tnMTouBc4lKjygXaQZrDN1F5R5THgCGCpdhTTcjOAw/NacrCif1RUuR/4AskQvsYPjwNFosoi7SDNZJvu/Ym6DwAmgQ3wmHPPAAcRVd7SDtJstkbvT1SZCnyWjE3FbOryELCfDyUHK/qqRZWHgYNpwug0Rt1dwGeIKt78v7WiDySq/Ak4gJTN52aG5SbgsLzvk6/Mij6YqPIUsBfwsHYUM2w/A47L2sCOjWAH42oVdXcB15Dc+WayZSlwMlHlGu0gWqzo9Yq6Q+ACQLSjmJrMI7lBZbZ2EE1W9KGIur8IXA9280vK3Q98OUszqjSL7aMPRVT5A/AJ+tz5ZlLnQuBzVvKErdGHI+ouAGcAPwU6lNOYxDzgO0SVe7SDpIkVvRGi7t2BW4Ex2lE8NwH4AVHlPe0gaWNFb5Soew2SNfspYNMmt9h8krX4ZO0gaWVFb7SoeyfgCsBmV20+R7IWP52oYpcrD8CK3ixR97HAxcDG2lFyairJZvpj2kGywIreTFF3N8nm/PFAp3KavHgB+CFR5XbtIFliRW+FqHsEcC7wdWz/fagWAOcDv8zzABHNYkVvpah7JPAjkstorfC1eQ34OXAlUWWhdpissqJriLq3IplA4ivAaspp0uoF4CLgRqKKDe81TFZ0TVH3BiSzdH4PGKWcJi1mAZcCvyGqrNAOkxdW9DSIugU4hGSer8/j36XJC4CbgWuJKnO0w+SRFT1tou4tgSOBfwP+hfzeJbcMuBu4EZhkm+fNZUVPs6h7C+BLJKXfh+yX/m1gMhADk30aykmbFT0rou6PkwxY+Sng08DHdQPVxAFPkxR7EjAzj7OgZIEVPaui7q2BfauPvYBt0L8o502Sg2mPVB+ziSrv6kYyYEXPj6i7Hdga2KHPY3tgc2DdBi5pOfASMHelxxNElXIjFiAiG5AMGgGwCdAD9A7LvJdzzi6YqZMV3QfJeHcbkVx33/tYn+Qe+vY+j7bqx8XAu9VHpc/nbwPzWjm4oohEwPvOuUtWel5I3r92Cq4GdnWWD5Ij2vOqj8wSka2B3wPTSc5IHC4iTzrn1q3+9y8DBzvnvi0iGwPjgS2BFcApzrmZStHV+Xa+1mTf9sAE59yuwCsDvO5y4CLn3B7AUcC1rQiXVrZGN1kz1zlXy4iuBwPbJFv4AKwnIqs757ycQNOKbrLmH30+X8FHry3oe9+AYAfuPmSb7iazqgfiFojIaBEpkMxv3+s+4MTeL0Rkl1bnSxMrusm6M0mutrufZOy4XicCY0Vkjoj8GfiORri0sNNrxnjA1ujGeMCKbowHrOjGeMCKbowHrOjGeMCKbowHrOjGeMCKbowHrOjGeMCKbowHrOjGeMCKbowHrOjGeMCKbowHrOjGeMCKbowHrOjGeMCKbowHrOjGeMCKbowHrOjGeMCKbowHrOjGeMCKbowHrOjGeOD/AM0W+8UGUHW2AAAAAElFTkSuQmCC\n", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "is_json_counts = df['is_json'].value_counts().compute()\n", + "is_json_counts.plot(kind='pie')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "As the value_len increases the frequence of valid JSON on the columns 'value' also increases,\n", + "for the rows that have the value_len one std above the mean, we have the following:\n", + "- isJson True: 46691 rows\n", + " - 99,88% are valid JSON\n", + " \n", + "- isJson False: 54 rows\n", + " - 0,11% are not valid JSON\n", + "\n", + "\n", + "The valid json also represent 9.35% of the data because the number of non Json are too small to make a percentual difference. " + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "one std above the mean = len: 46745 (9.35%)\n" + ] + } + ], + "source": [ + "print(\"one std above the mean = len: {0} ({1:0.2f}%)\".format(A_COUNT, A_COUNT / COUNT * 100))" + ] + }, + { + "cell_type": "code", + "execution_count": 20, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
value_len
meanstdminmaxcount
is_json
False82460.05555613627.1180632813510465354
True271422.740185412552.29861327669449686146691
\n", + "
" + ], + "text/plain": [ + " value_len \n", + " mean std min max count\n", + "is_json \n", + "False 82460.055556 13627.118063 28135 104653 54\n", + "True 271422.740185 412552.298613 27669 4496861 46691" + ] + }, + "execution_count": 20, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "group = std_above.groupby('is_json')\n", + "group_result = group.agg({'value_len': ['mean', 'std', 'min', 'max', 'count']}).compute()\n", + "group_result" + ] + }, + { + "cell_type": "code", + "execution_count": 21, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "NOT json count: 54 (0.01%)\n", + "IS json count: 46691 (9.34%)\n" + ] + } + ], + "source": [ + "a = group_result['value_len']['count']\n", + "print(\"NOT json count: {0} ({1:0.2f}%)\".format(a[0], a[0] / COUNT * 100))\n", + "print(\"IS json count: {0} ({1:0.2f}%)\".format(a[1], a[1] / COUNT * 100))" + ] + }, + { + "cell_type": "code", + "execution_count": 22, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 22, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAQIAAADuCAYAAADSvgkdAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDMuMC4zLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvnQurowAAEh1JREFUeJzt3XmQnVWdxvHv6bCDvAhhSSAYdmWRyD6Cg0HQQbawODAFOkOGJYIwTlGKoIynQEUccAw4YA2jcWBAKAUxhkFBtlKjISyhhmUgxRAQIQsk8wYyMEnnnvnjvE132l7u7bv87nnf51N1qzu3b/d9oO99+rzLeY8LISAi1dZjHUBE7KkIRERFICIqAhFBRSAiqAhEBBWBiKAiEBFUBCKCikBEUBGICCoCEUFFICKoCEQEFYGIoCIQEVQEIoKKQERQEYgIKgIRQUUgIqgIRAQVgYigIhARVAQigopARFARiAgqAhFBRSAiqAhEBFjPOoB0iM82BCYWt62ADYH1gQ2Ir4MasLb42AssBxYXt2X4vGaQWjrEhRCsM0gr+GwcsCewH7Az8Q2/fXGbCIxv4qevBV6nvxgWA4uAJ4An8PnLTfxs6QIqghT5zAF7AAcUtwOBKcAmRoneoK8U4PHi40KNItKhIkiFz3YDTgCOJr75N7cNNKqVwH3AHOBufL7MOI+MQEXQreJf/YOJb/4TgA/YBmpKDZhPLIU5+HyBcR4ZREXQTXy2HvBxYBpwHLCdbaC2eYVYCjfj87nWYURF0B18Nhk4GzgTmGAbpuOeBb4P3KTNBzsqAitx6H80cAFxFFD1czpWAz8GrsPn86zDVI2KoNN8tgnwGeDvgPcbp+lW84GZwG34fK11mCpQEXRKPKHnfOASmjumXyXPApfh8zusg5SdiqDdfNYDnA5cAbzPOE2qHgUuxef3WQcpKxVBO/nsL4BvAvtaRymJB4FLtA+h9VQE7eCzA4CrgCOso5TUz4Av4vPnrYOUhYqglXy2OfBtYDrgjNOU3TvAV4FrtEOxeSqCVvHZUcTj4ZOso1TMI8B0fP60dZCUqQia5bPNgH8EZlhHqbDVwOXAVfi81zpMilQEzfDZ4cAsYCfrKALEWY9n4vMnrYOkRkUwFj7bGLgSuBDtC+g2a4B/wOfftA6SEhVBo3w2ibjX+kPWUWREPyaODlZZB0mBiqARPjsE+CnlnRVYNk8C0/D5Iusg3a7qE13q57NPAw+hEkjJvsB8fDbVOki304hgNPEU4SuBL1pHkTHrBS7C59daB+lWKoKR+Ow9wK3AsdZRpCVmAefi8zXWQbqNimA4PtuOeM29va2jSEvdDZyCz9+xDtJNVARD8dn2wAPA7tZRpC0eAI7XEYV+KoLBfLYj8YWyi3UUaau5wNH4fKV1kG6gIhgolsDDwGTjJNIZvwM+gc/ftA5iTYcP+/hsAnA/KoEq+TPgbny2qXUQayoCAJ+NB34F7GodRTruI8AcfLaRdRBLKoL41+Be4rqBUk0fBX5gHcJStYsgXlL8JjRvQOCv8NmXrUNYqXYRwGXASdYhpGtcgc8q+Xqo7lEDn00D7kTTiGVdq4DDqrY+YzWLwGd7Ew8dbWYdRbrSH4AD8fkS6yCdUr1NA59tSbyegEpAhjMJuKtYlKYSqlUEPhsH3A7sbB1Fut4hwHXWITqlWkUAlwJHWoeQZJyNz462DtEJ1dlH4LN9gMeA9a2jSFJeBfbG5yusg7RTNUYEPluPOBddJSCNmkgFNhGqUQTx6kL7W4eQZJ2Oz060DtFO5d808NlewOPABtZRJGlLiZsIy6yDtEO5RwTxKMEsVALSvG2AG6xDtEu5iwAuAg60DiGlcXJZT0Eu76aBz3YAFgKVnl4qLbcQ2LNsayyWeUTgUQlI6+0GnGMdotXKOSLw2R7A08A46yhSSkuBXfD5W9ZBWqWsI4KvoRKQ9tkG+IJ1iFYq34jAZ/sD89H0YmmvVcCu+HyxdZBWKOOI4EpUAtJ+mwJftQ7RKuUaEcTFLh+wjiGV0Qvshc+ftw7SrLKNCL5hHUAqZT1KsjhueUYEPjsM+LV1DKmcd4AdUz/1uEwjggusA0glbQR81jpEs8oxIoiLli4iDtVEOm0JcVSw2jrIWJVlRDADlYDY2ZbEL4uffhHEGYZ/ax1DKm+GdYBmpF8EcAwwwTqEVN7h+Oz91iHGqgxFcJZ1AJHC2dYBxirtIvDZtsAnrWOIFD5lHWCs0i4COB5NLpLuMQmf7WcdYixSL4JjrQOIDDLNOsBYpFsEPtsILVYi3UdF0GFHAJtYhxAZZB98tpN1iEalXATHWQcQGUZyo4KUi+AY6wAiwzjBOkCj0pxr4LMpwBPWMUSGsRbYFp+/YR2kXqmOCDQakG42DjjUOkQjUi2CpP4nSyUltbBOqkWQ5EkbUikqgrby2QTitE+RbnaAdYBGpFcEGg1IGrZK6XwCFYFI+ySzeaAiEGkfFUEbqQgkFcnsJ0irCHy2FbCjdQyROu1jHaBeaRUBJHspKKmkrYpZsl2vriJwzt1fz30doGsTSmq2tw5QjxEvAe6c24g41Xe8c+699C8uujkwsc3ZhrKdwXOKNGN74AXrEKMZbS2Ac4HPE9/0j9FfBCuBf25jruGoCCQ16Y8IQggzgZnOuQtCCNd1KNNIVASSmvSLoE8I4Trn3IeByQO/J4RwU5tyDUf7CCQ15SkC59zNwC7AAuJca4AAdLoINCKQ1JSnCIgnRuwZ7K9ioiKQ1FjsVG9YvecRPEV3vAm3sQ4g0qBN632gc26tc27BgNvkER472Tn3VCsCQv0jgvHAM865R4Aa8OHi/reJmwrLin8fFEJoz9LQcbFTrXgsqdmwgce+HUKY0rYkI6j3jeWHuX8q8FYI4eqBdzrnHPF6iLUmsg2mFY0kRRs0883FqOBm+kcWnwshzB30mL2AWcVz9QAnhxAWOufOAC4s7p8HnBdCWMsQ6j1q8PAwIacO+HxX4C7gN8DBwDTn3JMhhC2Kr58GHBlCOMs5ty1wA3HeQA24MITw+1FiaDQgXSsEAvG13PexBtQCbk0D5/Fv7JxbUHz+YgjhRGApcFQI4R3n3G7Aj/jTyUwzgJkhhFuccxsA45xzHwBOBQ4NIaxxzl0PnM4wO/jrPWrwZvEfCLFd1gdWAd8e9NA9gTNDCDOccyP97GuBb4UQfl803hxg71FiaETQRsULeZ0X8aAbAWrgihd432NdrXhh1AIu9H/N9X0MAULo/1gb4r5BN+j7vDbg/uJzQuj/vLbuY1yNnlDDER/fw4DH0fe1Gj3Uwrtf59376GFt/+ch4NzagV8LPcXz9bgajrX0uP7Hvnuy3Tp6GZfPqP/XMNSmwfrAd51zU4ib4bsP8X2/A77snNsBuLMYDXwM2B+YHwfobEwslSHVOyJ4z8B/O+emAQcN8dAXQgjz6/iRRwJ7FAEB3uuc2ziE8PYI39PKzQwZxDkc8cU87B+w+NsafODI4EDSkG+5rvVyk9//98ASYF/i7+adwQ8IIdzqnJtHvLr3L51zZxH/L/1bCOGSep5kTLMPQwh3EZccG2zVgM9rrPsrGzgLyxF3LE4pbtuPUgIAa8aSVcRYs6/bDHit2N/2aYYYGTvndgb+O4RwLTAb+CBwP3CKc26b4jFbOufeN9yT1LtpcNKAf/YQt1FG/FMQQqg551YU2zUvACfSf3ThV8D5wD8VP39KCGHB0D/pXSoCSVFvk99/PXCHc+5TwIOs+8e2z6nAGc65NcBi4PIQwnLn3FeAe51zPcT3z/nAS0M9SV0rHTnnZg34Zy+wCLgROI/iqEGxs/AnA7dxnHOnAt8gDo+eATYsdhZuTdxZuDuxjB4MIZw/ahCf9aJ9BZKWp/B511+gJK0lz3y2AtjCOoZIA36Nz//cOsRo6r0wyQ7OuZ8655Y655Y45+4o9lB22msGzynSjCRes/XuLJxF3AkxkTiJ4ufFfZ32R4PnFGnGq9YB6lFvEWwdQpgVQugtbj8Etm5jruEk8T9VZIBSjQhed86d4ZwbV9zOACyWfFYRSGpKVQTTgb8kHpp4DTgFOLNdoUagIpDUJFEE9Z6/fwXw1yGEFRBPTgCuJhZEJ6kIJDVJFEG9I4IP9pUAQAhhOfCh9kQakYpAUpPEa7beIugpLmcOvDsisJgN+AeD5xQZqzfx+YrRH2av3iK4BpjrnLvCOXc5MBf4VvtiDcPnrwDLO/68ImPzhHWAetVVBMXVik8mzoJaBpwUQri5ncFG8LjR84o06jHrAPWqe3gfQniGOF/A2qPEacwi3S6ZP1qpLYIKsQhEUpDMiEBFINIebwHPWYeoV3pF4POXgNetY4iMYgE+T+aqWukVQZTMkEsqK5n9A5BuEdRzXUQRS0ltwqZaBA9ZBxAZQQDutQ7RiFSL4GHgf6xDiAzjEXy+xDpEI9IsAp/3AvdYxxAZxmzrAI1Kswiin1kHEBnGz60DNCrlIrgHXeJcus8ifP6f1iEalW4R+Hwl2mko3Se50QCkXARRcttiUnpJviZVBCKts5J4RCs5aReBz18mXhtBpBvchs+T3G+VdhFEN1oHECkk+1osQxHcjk4uEnsL8HlSpxUPlH4R+Pxt4BbrGFJ5yY4GoAxFEF1vHUAqbSVgdem+lihHEfj8GeA+6xhSWbPw+ZvWIZpRjiKIvmMdQCqpBlxnHaJZZSqCe0jo0lBSGnPw+QvWIZpVniLweQC+bh1DKiUA3jpEK5SnCKJbgOQmfEiybsfnySxiMpJyFUG8WOQl1jGkEtYAX7EO0SrlKgIAn99Noud7S1L+tQz7BvqUrwiii60DSKmtAi63DtFK5SwCn88D7rSOIaU1E58vtg7RSuUsguhSoNc6hJTOcixWAm+z8haBz58D/sU6hpTOZfg8tw7RauUtguhLwMvWIaQ0HgJusA7RDuUugnj+93TiiR8izVgFTC9OXCudchcBgM/vB75nHUOSdzE+f9E6RLuUvwiiLwCl/SVK2z1Eyae6V6MIfL4KOBNtIkjjSr1J0KcaRQDg84cpwXRR6bhSbxL0qU4RRJcA/2UdQpLxC0q+SdDHhVDqEc+f8tnuwDxgC+so0tWeBw7G55W4MG7VRgTg8+eBU4G11lGka+XA8VUpAahiEQD4/F7gIusY0pVqwGnFmamVUc0iAPD5TOD71jGk61yMz39hHaLTqlsE0XnAb6xDSNe4GZ9fbR3CQrWLwOergZOAl6yjiLl5wNnWIaxUuwgAfL4M+CSwzDqKmHkGOBaf/591ECsqAuhbIOUoYIV1FOm4hcDH8Pnr1kEsqQj6+PxJ4OPEQ0dSDS8CR5TtakNjoSIYKK5m+wm0unIVvABMxeevWAfpBiqCweL1Do8AKj1ULLnngMPxuXYSF1QEQ4mLVnwUqPyQsYSeJpbAH62DdBMVwXB8/jRwGPCsdRRpmfuAj+DzJdZBuo2KYCRxAYtDgDnWUaRpM4Gj8bmODA2herMPx8JnPcDX0HJqKVoNnIfPdTr5CFQEjfDZacAPgI2to0hdlgIn4fPfWgfpdiqCRvlsP+AuYJJ1FBnRAuAEfK7L2ddB+wga5fPHgQOAB62jyLB+CByqEqifRgRj5TNHnL14FbCpcRqJXgPOwefaudsgFUGzfLYT8boGU62jVNy/AxfqqMDYqAhaIY4OPkscHWxmnKZqFgPn4vPZ1kFSpiJoJZ9NJo4OjjBOUhW3Ahfg8+XWQVKnImi1ODr4G+ByYAfbMKX1JPClKl5SrF1UBO3is42A84FLgS2N05TFi8BlwK1lX3mo01QE7eazjLj24ufR0YWxWkY8s/N7xeXlpMVUBJ3is22Jf83OAdY3TpOKt4BrgGuKJe6lTVQEneaznYlrKnwGHWEYzhLiUvbX4/Ol1mGqQEVgJW4yTAc+B+xsnKZbPEGcJfgjbQJ0lorAWpzZeBTxUtrHU73NhreA24Ab8fkj1mGqSkXQTXy2DXGT4RTgIMDZBmqb1cS5Gj8Bbtf2vz0VQbeKOxePAY4jjhhSP+LwBvAfwGzgl3rzdxcVQQriOQlTiaVwLOlMgV5IfOPPBn6Lz7UCdZdSEaQoTnTaH9hvwMfxppngFeCx4vYo8Jj2+KdDRVAWPptEfynsDUwEJgDbARu06Fn+F3iVON33VeIVgeMbX2/6pKkIyi7OfdiSWAgT6C+HTYH1gHHFR4BeYE1xe5s4s+9V+t78PtcqUCWlIhARXapMRFQEIoKKQERQEYgIKgIRQUUgIqgIRAQVgYigIhARVAQigopARFARiAgqAhFBRSAiqAhEBBWBiKAiEBFUBCKCikBEUBGICCoCEUFFICKoCEQEFYGIoCIQEVQEIoKKQERQEYgIKgIRQUUgIqgIRAT4fxoxDoxWW+6XAAAAAElFTkSuQmCC\n", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "group_result['value_len']['count'].plot(kind='pie')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### All greater values are JSON\n", + "\n", + "There is absolute no value greater than 104653 (max value for non-json) that represents a valid JSON. \n", + "\n", + "This implies that all the greater values are JSON but they represent very low percentage of the whole data (6.76%). " + ] + }, + { + "cell_type": "code", + "execution_count": 26, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "104653\n", + "len: 33788 (6.76%)\n" + ] + } + ], + "source": [ + "max_non_json_value = group_result['value_len']['max'][0]\n", + "allJson = df[df['value_len'] > max_non_json_value ]\n", + "length = len(allJson)\n", + "print(\"len: {0} ({1:0.2f}%)\".format(length, length / COUNT * 100))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.6.8" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} From c5ec9b967a2c62cd4a94d52b2dbffd5799398b3e Mon Sep 17 00:00:00 2001 From: Camila Resende Date: Sun, 31 Mar 2019 14:11:53 -0300 Subject: [PATCH 05/23] Data prep saving other samples --- .../isJson_dataPrep.ipynb | 204 +++++++++++++++++- 1 file changed, 199 insertions(+), 5 deletions(-) diff --git a/analyses/2019_03_aliamcami_greatest_values_are_json/isJson_dataPrep.ipynb b/analyses/2019_03_aliamcami_greatest_values_are_json/isJson_dataPrep.ipynb index 29b5242..547739c 100644 --- a/analyses/2019_03_aliamcami_greatest_values_are_json/isJson_dataPrep.ipynb +++ b/analyses/2019_03_aliamcami_greatest_values_are_json/isJson_dataPrep.ipynb @@ -30,7 +30,7 @@ "\n", "

Client

\n", "\n", "\n", @@ -46,7 +46,7 @@ "" ], "text/plain": [ - "" + "" ] }, "execution_count": 1, @@ -184,7 +184,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "# is JSON analysis\n", + "# is JSON Column\n", "\n", "After manual initial analysis I have think that the huge values are json structured, to validate that I included an new column that is a boolean value with the validation of json" ] @@ -244,7 +244,7 @@ }, { "cell_type": "code", - "execution_count": 10, + "execution_count": 2, "metadata": {}, "outputs": [ { @@ -356,7 +356,7 @@ "4 False " ] }, - "execution_count": 10, + "execution_count": 2, "metadata": {}, "output_type": "execute_result" } @@ -366,6 +366,200 @@ "df = dd.read_parquet('is_json_above_mean.parquet')\n", "df.head()" ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Saving other possible usefull samples to future analyses" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/anaconda3/envs/overscripted/lib/python3.6/site-packages/fastparquet/util.py:221: FutureWarning: A future version of pandas will default to `skipna=True`. To silence this warning, pass `skipna=True|False` explicitly.\n", + " inferred_dtype = infer_dtype(column)\n", + "distributed.nanny - WARNING - Worker exceeded 95% memory budget. Restarting\n", + "tornado.application - ERROR - Exception in callback >\n", + "Traceback (most recent call last):\n", + " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/psutil/_common.py\", line 341, in wrapper\n", + " ret = self._cache[fun]\n", + "AttributeError: 'Process' object has no attribute '_cache'\n", + "\n", + "During handling of the above exception, another exception occurred:\n", + "\n", + "Traceback (most recent call last):\n", + " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/psutil/_common.py\", line 341, in wrapper\n", + " ret = self._cache[fun]\n", + "AttributeError: _cache\n", + "\n", + "During handling of the above exception, another exception occurred:\n", + "\n", + "Traceback (most recent call last):\n", + " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/psutil/_psosx.py\", line 359, in catch_zombie\n", + " yield\n", + " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/psutil/_psosx.py\", line 400, in _get_pidtaskinfo\n", + " ret = cext.proc_pidtaskinfo_oneshot(self.pid)\n", + "ProcessLookupError: [Errno 3] No such process\n", + "\n", + "During handling of the above exception, another exception occurred:\n", + "\n", + "Traceback (most recent call last):\n", + " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/tornado/ioloop.py\", line 907, in _run\n", + " return self.callback()\n", + " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/distributed/nanny.py\", line 266, in memory_monitor\n", + " memory = proc.memory_info().rss\n", + " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/psutil/_common.py\", line 344, in wrapper\n", + " return fun(self)\n", + " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/psutil/__init__.py\", line 1166, in memory_info\n", + " return self._proc.memory_info()\n", + " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/psutil/_psosx.py\", line 339, in wrapper\n", + " return fun(self, *args, **kwargs)\n", + " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/psutil/_psosx.py\", line 469, in memory_info\n", + " rawtuple = self._get_pidtaskinfo()\n", + " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/psutil/_psosx.py\", line 339, in wrapper\n", + " return fun(self, *args, **kwargs)\n", + " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/psutil/_common.py\", line 344, in wrapper\n", + " return fun(self)\n", + " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/psutil/_psosx.py\", line 400, in _get_pidtaskinfo\n", + " ret = cext.proc_pidtaskinfo_oneshot(self.pid)\n", + " File \"/anaconda3/envs/overscripted/lib/python3.6/contextlib.py\", line 99, in __exit__\n", + " self.gen.throw(type, value, traceback)\n", + " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/psutil/_psosx.py\", line 372, in catch_zombie\n", + " raise AccessDenied(proc.pid, proc._name)\n", + "psutil.AccessDenied: psutil.AccessDenied (pid=20976)\n", + "tornado.application - ERROR - Exception in callback >\n", + "Traceback (most recent call last):\n", + " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/psutil/_common.py\", line 341, in wrapper\n", + " ret = self._cache[fun]\n", + "AttributeError: 'Process' object has no attribute '_cache'\n", + "\n", + "During handling of the above exception, another exception occurred:\n", + "\n", + "Traceback (most recent call last):\n", + " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/psutil/_common.py\", line 341, in wrapper\n", + " ret = self._cache[fun]\n", + "AttributeError: _cache\n", + "\n", + "During handling of the above exception, another exception occurred:\n", + "\n", + "Traceback (most recent call last):\n", + " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/psutil/_psosx.py\", line 359, in catch_zombie\n", + " yield\n", + " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/psutil/_psosx.py\", line 400, in _get_pidtaskinfo\n", + " ret = cext.proc_pidtaskinfo_oneshot(self.pid)\n", + "ProcessLookupError: [Errno 3] No such process\n", + "\n", + "During handling of the above exception, another exception occurred:\n", + "\n", + "Traceback (most recent call last):\n", + " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/tornado/ioloop.py\", line 907, in _run\n", + " return self.callback()\n", + " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/distributed/nanny.py\", line 266, in memory_monitor\n", + " memory = proc.memory_info().rss\n", + " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/psutil/_common.py\", line 344, in wrapper\n", + " return fun(self)\n", + " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/psutil/__init__.py\", line 1166, in memory_info\n", + " return self._proc.memory_info()\n", + " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/psutil/_psosx.py\", line 339, in wrapper\n", + " return fun(self, *args, **kwargs)\n", + " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/psutil/_psosx.py\", line 469, in memory_info\n", + " rawtuple = self._get_pidtaskinfo()\n", + " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/psutil/_psosx.py\", line 339, in wrapper\n", + " return fun(self, *args, **kwargs)\n", + " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/psutil/_common.py\", line 344, in wrapper\n", + " return fun(self)\n", + " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/psutil/_psosx.py\", line 400, in _get_pidtaskinfo\n", + " ret = cext.proc_pidtaskinfo_oneshot(self.pid)\n", + " File \"/anaconda3/envs/overscripted/lib/python3.6/contextlib.py\", line 99, in __exit__\n", + " self.gen.throw(type, value, traceback)\n", + " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/psutil/_psosx.py\", line 372, in catch_zombie\n", + " raise AccessDenied(proc.pid, proc._name)\n", + "psutil.AccessDenied: psutil.AccessDenied (pid=20976)\n", + "tornado.application - ERROR - Exception in callback >\n", + "Traceback (most recent call last):\n", + " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/psutil/_common.py\", line 341, in wrapper\n", + " ret = self._cache[fun]\n", + "AttributeError: 'Process' object has no attribute '_cache'\n", + "\n", + "During handling of the above exception, another exception occurred:\n", + "\n", + "Traceback (most recent call last):\n", + " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/psutil/_common.py\", line 341, in wrapper\n", + " ret = self._cache[fun]\n", + "AttributeError: _cache\n", + "\n", + "During handling of the above exception, another exception occurred:\n", + "\n", + "Traceback (most recent call last):\n", + " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/psutil/_psosx.py\", line 359, in catch_zombie\n", + " yield\n", + " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/psutil/_psosx.py\", line 400, in _get_pidtaskinfo\n", + " ret = cext.proc_pidtaskinfo_oneshot(self.pid)\n", + "ProcessLookupError: [Errno 3] No such process\n", + "\n", + "During handling of the above exception, another exception occurred:\n", + "\n", + "Traceback (most recent call last):\n", + " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/tornado/ioloop.py\", line 907, in _run\n", + " return self.callback()\n", + " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/distributed/nanny.py\", line 266, in memory_monitor\n", + " memory = proc.memory_info().rss\n", + " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/psutil/_common.py\", line 344, in wrapper\n", + " return fun(self)\n", + " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/psutil/__init__.py\", line 1166, in memory_info\n", + " return self._proc.memory_info()\n", + " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/psutil/_psosx.py\", line 339, in wrapper\n", + " return fun(self, *args, **kwargs)\n", + " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/psutil/_psosx.py\", line 469, in memory_info\n", + " rawtuple = self._get_pidtaskinfo()\n", + " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/psutil/_psosx.py\", line 339, in wrapper\n", + " return fun(self, *args, **kwargs)\n", + " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/psutil/_common.py\", line 344, in wrapper\n", + " return fun(self)\n", + " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/psutil/_psosx.py\", line 400, in _get_pidtaskinfo\n", + " ret = cext.proc_pidtaskinfo_oneshot(self.pid)\n", + " File \"/anaconda3/envs/overscripted/lib/python3.6/contextlib.py\", line 99, in __exit__\n", + " self.gen.throw(type, value, traceback)\n", + " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/psutil/_psosx.py\", line 372, in catch_zombie\n", + " raise AccessDenied(proc.pid, proc._name)\n", + "psutil.AccessDenied: psutil.AccessDenied (pid=20976)\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "distributed.nanny - WARNING - Worker process 20976 was killed by unknown signal\n", + "distributed.nanny - WARNING - Restarting worker\n" + ] + } + ], + "source": [ + "df[df['is_json'] == True].to_parquet('all_json_above_mean.parquet')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "df[df['is_json'] == False].to_parquet('all_NON_json_above_mean.parquet')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] } ], "metadata": { From c3fb738d358db68418f068525bfa141bdd9ddcc4 Mon Sep 17 00:00:00 2001 From: Camila Resende Date: Sun, 31 Mar 2019 14:18:07 -0300 Subject: [PATCH 06/23] Update readme with future questions --- .../README.md | 45 +++++++++++++++++++ 1 file changed, 45 insertions(+) diff --git a/analyses/2019_03_aliamcami_greatest_values_are_json/README.md b/analyses/2019_03_aliamcami_greatest_values_are_json/README.md index 67e78a1..6c7d919 100644 --- a/analyses/2019_03_aliamcami_greatest_values_are_json/README.md +++ b/analyses/2019_03_aliamcami_greatest_values_are_json/README.md @@ -24,3 +24,48 @@ All the greatest values are JSON, but they represent very little percentage of t ## The top 46745 gratest value_len are valid JSONs, that is 9.35% of the filtered sample (value_len > mean) and 0,41% of the original 10% sample. + + +--- + +# Future questions + +## About JSONs: +- **The JSON values are always from the same location or related domains?** +- **Are there a set of location domains that always produces a JSON?** +- Does the JSON values follow a structure pattern? What pattern? +- What data does the JSON hold? Is there any pattern on content? +- Do they have nested JSON? Css? Html? Javascript? Recursive study on JSON properties. + +- Is a JSON's structure for a single script_url domain always the same? +- Is every JSON with the same structure produced by the same script_url domain? + +## General +I'm think some things here maybe a crawler investigation or just wiki reading, since someone may have already described and explained. I just need to find, read and understand it. + +- Are there other valid data types like html, css... in the values column or just JSON? +- Where does the value comes from? What is it used for? + +## Smal: value_len < mean +- What are the small values? +- Does the smaller values have any pattern? +- What the majority data type? + +## Medium: mean < value_len < (mean + std) +- How many rows are there in the intersection of *“no JSON”* and *“everything is JSON”* ? +- What are they? Are they from a specific script_url domain? Or realated domains? + +## Big: value_len > (mean + std) +- What are the big non-JSON values? + +## Security and data sharing: +- Do the value columns have any javascript? nested javascript? +- Do the javascripts in the dataset contain known malicious behaviors? +- Can they collect data that threatens user's privacy? + +## Statistical knowledge / coincidence: +The **mean** of the original 10% sample is pretty similar to the **std** of the sample taken after filtering for values above the mean +- why? +- Is it a coincidence? +- Is it always like this? +- Is it a statistical pattern? \ No newline at end of file From 1a5bcdb740460048a9f149b0466958088d1e895e Mon Sep 17 00:00:00 2001 From: Camila Resende Date: Sun, 31 Mar 2019 14:57:17 -0300 Subject: [PATCH 07/23] Add of 'domain' column to data prep --- .../isJson_dataPrep.ipynb | 993 ++++++++++++++++-- 1 file changed, 934 insertions(+), 59 deletions(-) diff --git a/analyses/2019_03_aliamcami_greatest_values_are_json/isJson_dataPrep.ipynb b/analyses/2019_03_aliamcami_greatest_values_are_json/isJson_dataPrep.ipynb index 547739c..5175e39 100644 --- a/analyses/2019_03_aliamcami_greatest_values_are_json/isJson_dataPrep.ipynb +++ b/analyses/2019_03_aliamcami_greatest_values_are_json/isJson_dataPrep.ipynb @@ -9,7 +9,7 @@ }, { "cell_type": "code", - "execution_count": 1, + "execution_count": 2, "metadata": {}, "outputs": [ { @@ -30,7 +30,7 @@ "\n", "

Client

\n", "\n", "\n", @@ -46,10 +46,10 @@ "" ], "text/plain": [ - "" + "" ] }, - "execution_count": 1, + "execution_count": 2, "metadata": {}, "output_type": "execute_result" } @@ -75,17 +75,27 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": 3, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "Index(['value_1000', 'value', 'value_len', 'symbol', 'script_url', 'location'], dtype='object')" + ] + }, + "execution_count": 3, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "#Original sample\n", "df = dd.read_parquet('sample_0.parquet', \n", " engine='pyarrow', \n", - " columns=['value_1000', 'value', 'value_len', 'symbol', 'script_url'])\n", + " columns=['value_1000', 'value', 'value_len', 'symbol', 'script_url', 'location'])\n", "\n", "# df.astype({'value_1000': str, 'value': str,'value_len': int,'symbol': int,'script_url': str})\n", - "df_index={'value_1000': str, 'value': str,'value_len': int,'symbol': int,'script_url': str}\n", "df.columns" ] }, @@ -103,39 +113,28 @@ }, { "cell_type": "code", - "execution_count": 39, + "execution_count": 4, "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "499805" - ] - }, - "execution_count": 39, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "#Save\n", - "dff = df[df['value_len'] > 1356]\n", - "dd.to_parquet(df=dff, path='filtered_above_mean.parquet', engine='pyarrow')\n", + "df = df[df['value_len'] > 1356]\n", + "dd.to_parquet(df=df, path='filtered_above_mean.parquet', engine='pyarrow')\n", "# len(dff)" ] }, { "cell_type": "code", - "execution_count": 2, + "execution_count": 7, "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "Index(['value_1000', 'value', 'value_len', 'symbol', 'script_url'], dtype='object')" + "Index(['value_1000', 'value', 'value_len', 'symbol', 'script_url', 'location'], dtype='object')" ] }, - "execution_count": 2, + "execution_count": 7, "metadata": {}, "output_type": "execute_result" } @@ -180,6 +179,196 @@ "print(df_mean, df_min, df_max, df_std)" ] }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Domains\n", + "The following code is from this same project: ~/analyses/hello_world.ipynb\n" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [], + "source": [ + "import tldextract\n", + "\n", + "def extract_domain(url):\n", + " \"\"\"Use tldextract to return the base domain from a url\"\"\"\n", + " try:\n", + " extracted = tldextract.extract(url)\n", + " return '{}.{}'.format(extracted.domain, extracted.suffix)\n", + " except Exception as e:\n", + " return 'ERROR'" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [], + "source": [ + "df.astype({'value_1000': str, 'value': str,'value_len': int,'symbol': int,'script_url': str, 'location': str})\n", + "df['location_domain'] = df.location.apply(extract_domain)\n", + "df['script_domain'] = df.script_url.apply(extract_domain)" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [], + "source": [ + "#save\n", + "df.to_parquet('0_sample_domains.parquet', engine='pyarrow')" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
value_1000valuevalue_lensymbolscript_urllocationlocation_domainscript_domain
0{\"im-settings\":\"{\\\"val\\\":{\\\"settings\\\":{\\\"Site...{\"im-settings\":\"{\\\"val\\\":{\\\"settings\\\":{\\\"Site...3713window.sessionStoragehttps://assets.adobedtm.com/caacec67651710193d...https://www.canada.ca/en/services.htmlcanada.caadobedtm.com
1{\"APLUS_S_CORE_0.17.12_20171214163401_2ee09a0c...{\"APLUS_S_CORE_0.17.12_20171214163401_2ee09a0c...103878window.localStoragehttps://g.alicdn.com/alilog/mlog/aplus_v2.jshttps://maniform.world.tmall.com/category-1282...tmall.comalicdn.com
2{\"APLUS_S_CORE_0.17.12_20171214163401_2ee09a0c...{\"APLUS_S_CORE_0.17.12_20171214163401_2ee09a0c...103878window.localStoragehttps://g.alicdn.com/alilog/mlog/aplus_v2.jshttps://maniform.world.tmall.com/category-1282...tmall.comalicdn.com
3usunico=17/12/2017:0-00155123:830; SessionASM=...usunico=17/12/2017:0-00155123:830; SessionASM=...1358window.document.cookiehttps://www.coches.net/scripts/common.min.js?2...https://www.coches.net/fiat/segunda-mano/coches.netcoches.net
4usunico=17/12/2017:0-00155123:830; SessionASM=...usunico=17/12/2017:0-00155123:830; SessionASM=...1358window.document.cookiehttps://www.coches.net/scripts/common.min.js?2...https://www.coches.net/fiat/segunda-mano/coches.netcoches.net
\n", + "
" + ], + "text/plain": [ + " value_1000 \\\n", + "0 {\"im-settings\":\"{\\\"val\\\":{\\\"settings\\\":{\\\"Site... \n", + "1 {\"APLUS_S_CORE_0.17.12_20171214163401_2ee09a0c... \n", + "2 {\"APLUS_S_CORE_0.17.12_20171214163401_2ee09a0c... \n", + "3 usunico=17/12/2017:0-00155123:830; SessionASM=... \n", + "4 usunico=17/12/2017:0-00155123:830; SessionASM=... \n", + "\n", + " value value_len \\\n", + "0 {\"im-settings\":\"{\\\"val\\\":{\\\"settings\\\":{\\\"Site... 3713 \n", + "1 {\"APLUS_S_CORE_0.17.12_20171214163401_2ee09a0c... 103878 \n", + "2 {\"APLUS_S_CORE_0.17.12_20171214163401_2ee09a0c... 103878 \n", + "3 usunico=17/12/2017:0-00155123:830; SessionASM=... 1358 \n", + "4 usunico=17/12/2017:0-00155123:830; SessionASM=... 1358 \n", + "\n", + " symbol script_url \\\n", + "0 window.sessionStorage https://assets.adobedtm.com/caacec67651710193d... \n", + "1 window.localStorage https://g.alicdn.com/alilog/mlog/aplus_v2.js \n", + "2 window.localStorage https://g.alicdn.com/alilog/mlog/aplus_v2.js \n", + "3 window.document.cookie https://www.coches.net/scripts/common.min.js?2... \n", + "4 window.document.cookie https://www.coches.net/scripts/common.min.js?2... \n", + "\n", + " location location_domain \\\n", + "0 https://www.canada.ca/en/services.html canada.ca \n", + "1 https://maniform.world.tmall.com/category-1282... tmall.com \n", + "2 https://maniform.world.tmall.com/category-1282... tmall.com \n", + "3 https://www.coches.net/fiat/segunda-mano/ coches.net \n", + "4 https://www.coches.net/fiat/segunda-mano/ coches.net \n", + "\n", + " script_domain \n", + "0 adobedtm.com \n", + "1 alicdn.com \n", + "2 alicdn.com \n", + "3 coches.net \n", + "4 coches.net " + ] + }, + "execution_count": 12, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "#read\n", + "df = dd.read_parquet('0_sample_domains.parquet', engine='pyarrow')\n", + "df.head()" + ] + }, { "cell_type": "markdown", "metadata": {}, @@ -191,7 +380,7 @@ }, { "cell_type": "code", - "execution_count": 6, + "execution_count": 13, "metadata": {}, "outputs": [], "source": [ @@ -207,11 +396,11 @@ }, { "cell_type": "code", - "execution_count": 8, + "execution_count": 14, "metadata": {}, "outputs": [], "source": [ - "df.astype({'value_1000': str, 'value': str,'value_len': int,'symbol': int,'script_url': str})\n", + "\n", "df['is_json'] = df['value'].apply(is_json)" ] }, @@ -225,7 +414,7 @@ }, { "cell_type": "code", - "execution_count": 9, + "execution_count": 15, "metadata": {}, "outputs": [ { @@ -244,7 +433,7 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": 16, "metadata": {}, "outputs": [ { @@ -273,6 +462,9 @@ " value_len\n", " symbol\n", " script_url\n", + " location\n", + " location_domain\n", + " script_domain\n", " is_json\n", " \n", " \n", @@ -284,6 +476,9 @@ " 3713\n", " window.sessionStorage\n", " https://assets.adobedtm.com/caacec67651710193d...\n", + " https://www.canada.ca/en/services.html\n", + " canada.ca\n", + " adobedtm.com\n", " True\n", " \n", " \n", @@ -293,6 +488,9 @@ " 103878\n", " window.localStorage\n", " https://g.alicdn.com/alilog/mlog/aplus_v2.js\n", + " https://maniform.world.tmall.com/category-1282...\n", + " tmall.com\n", + " alicdn.com\n", " True\n", " \n", " \n", @@ -302,6 +500,9 @@ " 103878\n", " window.localStorage\n", " https://g.alicdn.com/alilog/mlog/aplus_v2.js\n", + " https://maniform.world.tmall.com/category-1282...\n", + " tmall.com\n", + " alicdn.com\n", " True\n", " \n", " \n", @@ -311,6 +512,9 @@ " 1358\n", " window.document.cookie\n", " https://www.coches.net/scripts/common.min.js?2...\n", + " https://www.coches.net/fiat/segunda-mano/\n", + " coches.net\n", + " coches.net\n", " False\n", " \n", " \n", @@ -320,6 +524,9 @@ " 1358\n", " window.document.cookie\n", " https://www.coches.net/scripts/common.min.js?2...\n", + " https://www.coches.net/fiat/segunda-mano/\n", + " coches.net\n", + " coches.net\n", " False\n", " \n", " \n", @@ -348,15 +555,22 @@ "3 window.document.cookie https://www.coches.net/scripts/common.min.js?2... \n", "4 window.document.cookie https://www.coches.net/scripts/common.min.js?2... \n", "\n", - " is_json \n", - "0 True \n", - "1 True \n", - "2 True \n", - "3 False \n", - "4 False " + " location location_domain \\\n", + "0 https://www.canada.ca/en/services.html canada.ca \n", + "1 https://maniform.world.tmall.com/category-1282... tmall.com \n", + "2 https://maniform.world.tmall.com/category-1282... tmall.com \n", + "3 https://www.coches.net/fiat/segunda-mano/ coches.net \n", + "4 https://www.coches.net/fiat/segunda-mano/ coches.net \n", + "\n", + " script_domain is_json \n", + "0 adobedtm.com True \n", + "1 alicdn.com True \n", + "2 alicdn.com True \n", + "3 coches.net False \n", + "4 coches.net False " ] }, - "execution_count": 2, + "execution_count": 16, "metadata": {}, "output_type": "execute_result" } @@ -376,17 +590,15 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": 19, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ - "/anaconda3/envs/overscripted/lib/python3.6/site-packages/fastparquet/util.py:221: FutureWarning: A future version of pandas will default to `skipna=True`. To silence this warning, pass `skipna=True|False` explicitly.\n", - " inferred_dtype = infer_dtype(column)\n", "distributed.nanny - WARNING - Worker exceeded 95% memory budget. Restarting\n", - "tornado.application - ERROR - Exception in callback >\n", + "tornado.application - ERROR - Exception in callback >\n", "Traceback (most recent call last):\n", " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/psutil/_common.py\", line 341, in wrapper\n", " ret = self._cache[fun]\n", @@ -433,8 +645,8 @@ " self.gen.throw(type, value, traceback)\n", " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/psutil/_psosx.py\", line 372, in catch_zombie\n", " raise AccessDenied(proc.pid, proc._name)\n", - "psutil.AccessDenied: psutil.AccessDenied (pid=20976)\n", - "tornado.application - ERROR - Exception in callback >\n", + "psutil.AccessDenied: psutil.AccessDenied (pid=21460)\n", + "tornado.application - ERROR - Exception in callback >\n", "Traceback (most recent call last):\n", " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/psutil/_common.py\", line 341, in wrapper\n", " ret = self._cache[fun]\n", @@ -481,8 +693,11 @@ " self.gen.throw(type, value, traceback)\n", " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/psutil/_psosx.py\", line 372, in catch_zombie\n", " raise AccessDenied(proc.pid, proc._name)\n", - "psutil.AccessDenied: psutil.AccessDenied (pid=20976)\n", - "tornado.application - ERROR - Exception in callback >\n", + "psutil.AccessDenied: psutil.AccessDenied (pid=21460)\n", + "distributed.nanny - WARNING - Worker process 21460 was killed by unknown signal\n", + "distributed.nanny - WARNING - Restarting worker\n", + "distributed.nanny - WARNING - Worker exceeded 95% memory budget. Restarting\n", + "tornado.application - ERROR - Exception in callback >\n", "Traceback (most recent call last):\n", " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/psutil/_common.py\", line 341, in wrapper\n", " ret = self._cache[fun]\n", @@ -529,25 +744,685 @@ " self.gen.throw(type, value, traceback)\n", " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/psutil/_psosx.py\", line 372, in catch_zombie\n", " raise AccessDenied(proc.pid, proc._name)\n", - "psutil.AccessDenied: psutil.AccessDenied (pid=20976)\n" + "psutil.AccessDenied: psutil.AccessDenied (pid=21468)\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "distributed.nanny - WARNING - Worker process 20976 was killed by unknown signal\n", - "distributed.nanny - WARNING - Restarting worker\n" - ] - } - ], - "source": [ - "df[df['is_json'] == True].to_parquet('all_json_above_mean.parquet')" - ] - }, - { - "cell_type": "code", - "execution_count": null, + "tornado.application - ERROR - Exception in callback >\n", + "Traceback (most recent call last):\n", + " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/psutil/_common.py\", line 341, in wrapper\n", + " ret = self._cache[fun]\n", + "AttributeError: 'Process' object has no attribute '_cache'\n", + "\n", + "During handling of the above exception, another exception occurred:\n", + "\n", + "Traceback (most recent call last):\n", + " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/psutil/_common.py\", line 341, in wrapper\n", + " ret = self._cache[fun]\n", + "AttributeError: _cache\n", + "\n", + "During handling of the above exception, another exception occurred:\n", + "\n", + "Traceback (most recent call last):\n", + " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/psutil/_psosx.py\", line 359, in catch_zombie\n", + " yield\n", + " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/psutil/_psosx.py\", line 400, in _get_pidtaskinfo\n", + " ret = cext.proc_pidtaskinfo_oneshot(self.pid)\n", + "ProcessLookupError: [Errno 3] No such process\n", + "\n", + "During handling of the above exception, another exception occurred:\n", + "\n", + "Traceback (most recent call last):\n", + " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/tornado/ioloop.py\", line 907, in _run\n", + " return self.callback()\n", + " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/distributed/nanny.py\", line 266, in memory_monitor\n", + " memory = proc.memory_info().rss\n", + " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/psutil/_common.py\", line 344, in wrapper\n", + " return fun(self)\n", + " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/psutil/__init__.py\", line 1166, in memory_info\n", + " return self._proc.memory_info()\n", + " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/psutil/_psosx.py\", line 339, in wrapper\n", + " return fun(self, *args, **kwargs)\n", + " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/psutil/_psosx.py\", line 469, in memory_info\n", + " rawtuple = self._get_pidtaskinfo()\n", + " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/psutil/_psosx.py\", line 339, in wrapper\n", + " return fun(self, *args, **kwargs)\n", + " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/psutil/_common.py\", line 344, in wrapper\n", + " return fun(self)\n", + " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/psutil/_psosx.py\", line 400, in _get_pidtaskinfo\n", + " ret = cext.proc_pidtaskinfo_oneshot(self.pid)\n", + " File \"/anaconda3/envs/overscripted/lib/python3.6/contextlib.py\", line 99, in __exit__\n", + " self.gen.throw(type, value, traceback)\n", + " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/psutil/_psosx.py\", line 372, in catch_zombie\n", + " raise AccessDenied(proc.pid, proc._name)\n", + "psutil.AccessDenied: psutil.AccessDenied (pid=21468)\n", + "distributed.nanny - WARNING - Worker process 21468 was killed by unknown signal\n", + "distributed.nanny - WARNING - Restarting worker\n", + "distributed.nanny - WARNING - Worker exceeded 95% memory budget. Restarting\n", + "tornado.application - ERROR - Exception in callback >\n", + "Traceback (most recent call last):\n", + " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/psutil/_common.py\", line 341, in wrapper\n", + " ret = self._cache[fun]\n", + "AttributeError: 'Process' object has no attribute '_cache'\n", + "\n", + "During handling of the above exception, another exception occurred:\n", + "\n", + "Traceback (most recent call last):\n", + " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/psutil/_common.py\", line 341, in wrapper\n", + " ret = self._cache[fun]\n", + "AttributeError: _cache\n", + "\n", + "During handling of the above exception, another exception occurred:\n", + "\n", + "Traceback (most recent call last):\n", + " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/psutil/_psosx.py\", line 359, in catch_zombie\n", + " yield\n", + " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/psutil/_psosx.py\", line 400, in _get_pidtaskinfo\n", + " ret = cext.proc_pidtaskinfo_oneshot(self.pid)\n", + "ProcessLookupError: [Errno 3] No such process\n", + "\n", + "During handling of the above exception, another exception occurred:\n", + "\n", + "Traceback (most recent call last):\n", + " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/tornado/ioloop.py\", line 907, in _run\n", + " return self.callback()\n", + " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/distributed/nanny.py\", line 266, in memory_monitor\n", + " memory = proc.memory_info().rss\n", + " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/psutil/_common.py\", line 344, in wrapper\n", + " return fun(self)\n", + " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/psutil/__init__.py\", line 1166, in memory_info\n", + " return self._proc.memory_info()\n", + " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/psutil/_psosx.py\", line 339, in wrapper\n", + " return fun(self, *args, **kwargs)\n", + " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/psutil/_psosx.py\", line 469, in memory_info\n", + " rawtuple = self._get_pidtaskinfo()\n", + " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/psutil/_psosx.py\", line 339, in wrapper\n", + " return fun(self, *args, **kwargs)\n", + " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/psutil/_common.py\", line 344, in wrapper\n", + " return fun(self)\n", + " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/psutil/_psosx.py\", line 400, in _get_pidtaskinfo\n", + " ret = cext.proc_pidtaskinfo_oneshot(self.pid)\n", + " File \"/anaconda3/envs/overscripted/lib/python3.6/contextlib.py\", line 99, in __exit__\n", + " self.gen.throw(type, value, traceback)\n", + " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/psutil/_psosx.py\", line 372, in catch_zombie\n", + " raise AccessDenied(proc.pid, proc._name)\n", + "psutil.AccessDenied: psutil.AccessDenied (pid=21484)\n", + "tornado.application - ERROR - Exception in callback >\n", + "Traceback (most recent call last):\n", + " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/psutil/_common.py\", line 341, in wrapper\n", + " ret = self._cache[fun]\n", + "AttributeError: 'Process' object has no attribute '_cache'\n", + "\n", + "During handling of the above exception, another exception occurred:\n", + "\n", + "Traceback (most recent call last):\n", + " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/psutil/_common.py\", line 341, in wrapper\n", + " ret = self._cache[fun]\n", + "AttributeError: _cache\n", + "\n", + "During handling of the above exception, another exception occurred:\n", + "\n", + "Traceback (most recent call last):\n", + " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/psutil/_psosx.py\", line 359, in catch_zombie\n", + " yield\n", + " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/psutil/_psosx.py\", line 400, in _get_pidtaskinfo\n", + " ret = cext.proc_pidtaskinfo_oneshot(self.pid)\n", + "ProcessLookupError: [Errno 3] No such process\n", + "\n", + "During handling of the above exception, another exception occurred:\n", + "\n", + "Traceback (most recent call last):\n", + " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/tornado/ioloop.py\", line 907, in _run\n", + " return self.callback()\n", + " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/distributed/nanny.py\", line 266, in memory_monitor\n", + " memory = proc.memory_info().rss\n", + " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/psutil/_common.py\", line 344, in wrapper\n", + " return fun(self)\n", + " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/psutil/__init__.py\", line 1166, in memory_info\n", + " return self._proc.memory_info()\n", + " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/psutil/_psosx.py\", line 339, in wrapper\n", + " return fun(self, *args, **kwargs)\n", + " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/psutil/_psosx.py\", line 469, in memory_info\n", + " rawtuple = self._get_pidtaskinfo()\n", + " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/psutil/_psosx.py\", line 339, in wrapper\n", + " return fun(self, *args, **kwargs)\n", + " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/psutil/_common.py\", line 344, in wrapper\n", + " return fun(self)\n", + " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/psutil/_psosx.py\", line 400, in _get_pidtaskinfo\n", + " ret = cext.proc_pidtaskinfo_oneshot(self.pid)\n", + " File \"/anaconda3/envs/overscripted/lib/python3.6/contextlib.py\", line 99, in __exit__\n", + " self.gen.throw(type, value, traceback)\n", + " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/psutil/_psosx.py\", line 372, in catch_zombie\n", + " raise AccessDenied(proc.pid, proc._name)\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "psutil.AccessDenied: psutil.AccessDenied (pid=21484)\n", + "tornado.application - ERROR - Exception in callback >\n", + "Traceback (most recent call last):\n", + " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/psutil/_common.py\", line 341, in wrapper\n", + " ret = self._cache[fun]\n", + "AttributeError: 'Process' object has no attribute '_cache'\n", + "\n", + "During handling of the above exception, another exception occurred:\n", + "\n", + "Traceback (most recent call last):\n", + " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/psutil/_common.py\", line 341, in wrapper\n", + " ret = self._cache[fun]\n", + "AttributeError: _cache\n", + "\n", + "During handling of the above exception, another exception occurred:\n", + "\n", + "Traceback (most recent call last):\n", + " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/psutil/_psosx.py\", line 359, in catch_zombie\n", + " yield\n", + " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/psutil/_psosx.py\", line 400, in _get_pidtaskinfo\n", + " ret = cext.proc_pidtaskinfo_oneshot(self.pid)\n", + "ProcessLookupError: [Errno 3] No such process\n", + "\n", + "During handling of the above exception, another exception occurred:\n", + "\n", + "Traceback (most recent call last):\n", + " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/tornado/ioloop.py\", line 907, in _run\n", + " return self.callback()\n", + " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/distributed/nanny.py\", line 266, in memory_monitor\n", + " memory = proc.memory_info().rss\n", + " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/psutil/_common.py\", line 344, in wrapper\n", + " return fun(self)\n", + " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/psutil/__init__.py\", line 1166, in memory_info\n", + " return self._proc.memory_info()\n", + " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/psutil/_psosx.py\", line 339, in wrapper\n", + " return fun(self, *args, **kwargs)\n", + " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/psutil/_psosx.py\", line 469, in memory_info\n", + " rawtuple = self._get_pidtaskinfo()\n", + " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/psutil/_psosx.py\", line 339, in wrapper\n", + " return fun(self, *args, **kwargs)\n", + " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/psutil/_common.py\", line 344, in wrapper\n", + " return fun(self)\n", + " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/psutil/_psosx.py\", line 400, in _get_pidtaskinfo\n", + " ret = cext.proc_pidtaskinfo_oneshot(self.pid)\n", + " File \"/anaconda3/envs/overscripted/lib/python3.6/contextlib.py\", line 99, in __exit__\n", + " self.gen.throw(type, value, traceback)\n", + " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/psutil/_psosx.py\", line 372, in catch_zombie\n", + " raise AccessDenied(proc.pid, proc._name)\n", + "psutil.AccessDenied: psutil.AccessDenied (pid=21484)\n", + "tornado.application - ERROR - Exception in callback >\n", + "Traceback (most recent call last):\n", + " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/psutil/_common.py\", line 341, in wrapper\n", + " ret = self._cache[fun]\n", + "AttributeError: 'Process' object has no attribute '_cache'\n", + "\n", + "During handling of the above exception, another exception occurred:\n", + "\n", + "Traceback (most recent call last):\n", + " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/psutil/_common.py\", line 341, in wrapper\n", + " ret = self._cache[fun]\n", + "AttributeError: _cache\n", + "\n", + "During handling of the above exception, another exception occurred:\n", + "\n", + "Traceback (most recent call last):\n", + " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/psutil/_psosx.py\", line 359, in catch_zombie\n", + " yield\n", + " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/psutil/_psosx.py\", line 400, in _get_pidtaskinfo\n", + " ret = cext.proc_pidtaskinfo_oneshot(self.pid)\n", + "ProcessLookupError: [Errno 3] No such process\n", + "\n", + "During handling of the above exception, another exception occurred:\n", + "\n", + "Traceback (most recent call last):\n", + " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/tornado/ioloop.py\", line 907, in _run\n", + " return self.callback()\n", + " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/distributed/nanny.py\", line 266, in memory_monitor\n", + " memory = proc.memory_info().rss\n", + " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/psutil/_common.py\", line 344, in wrapper\n", + " return fun(self)\n", + " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/psutil/__init__.py\", line 1166, in memory_info\n", + " return self._proc.memory_info()\n", + " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/psutil/_psosx.py\", line 339, in wrapper\n", + " return fun(self, *args, **kwargs)\n", + " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/psutil/_psosx.py\", line 469, in memory_info\n", + " rawtuple = self._get_pidtaskinfo()\n", + " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/psutil/_psosx.py\", line 339, in wrapper\n", + " return fun(self, *args, **kwargs)\n", + " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/psutil/_common.py\", line 344, in wrapper\n", + " return fun(self)\n", + " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/psutil/_psosx.py\", line 400, in _get_pidtaskinfo\n", + " ret = cext.proc_pidtaskinfo_oneshot(self.pid)\n", + " File \"/anaconda3/envs/overscripted/lib/python3.6/contextlib.py\", line 99, in __exit__\n", + " self.gen.throw(type, value, traceback)\n", + " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/psutil/_psosx.py\", line 372, in catch_zombie\n", + " raise AccessDenied(proc.pid, proc._name)\n", + "psutil.AccessDenied: psutil.AccessDenied (pid=21484)\n", + "distributed.nanny - WARNING - Worker process 21484 was killed by unknown signal\n", + "distributed.nanny - WARNING - Restarting worker\n", + "distributed.nanny - WARNING - Worker exceeded 95% memory budget. Restarting\n", + "tornado.application - ERROR - Exception in callback >\n", + "Traceback (most recent call last):\n", + " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/psutil/_common.py\", line 341, in wrapper\n", + " ret = self._cache[fun]\n", + "AttributeError: 'Process' object has no attribute '_cache'\n", + "\n", + "During handling of the above exception, another exception occurred:\n", + "\n", + "Traceback (most recent call last):\n", + " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/psutil/_common.py\", line 341, in wrapper\n", + " ret = self._cache[fun]\n", + "AttributeError: _cache\n", + "\n", + "During handling of the above exception, another exception occurred:\n", + "\n", + "Traceback (most recent call last):\n", + " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/psutil/_psosx.py\", line 359, in catch_zombie\n", + " yield\n", + " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/psutil/_psosx.py\", line 400, in _get_pidtaskinfo\n", + " ret = cext.proc_pidtaskinfo_oneshot(self.pid)\n", + "ProcessLookupError: [Errno 3] No such process\n", + "\n", + "During handling of the above exception, another exception occurred:\n", + "\n", + "Traceback (most recent call last):\n", + " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/tornado/ioloop.py\", line 907, in _run\n", + " return self.callback()\n", + " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/distributed/nanny.py\", line 266, in memory_monitor\n", + " memory = proc.memory_info().rss\n", + " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/psutil/_common.py\", line 344, in wrapper\n", + " return fun(self)\n", + " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/psutil/__init__.py\", line 1166, in memory_info\n", + " return self._proc.memory_info()\n", + " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/psutil/_psosx.py\", line 339, in wrapper\n", + " return fun(self, *args, **kwargs)\n", + " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/psutil/_psosx.py\", line 469, in memory_info\n", + " rawtuple = self._get_pidtaskinfo()\n", + " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/psutil/_psosx.py\", line 339, in wrapper\n", + " return fun(self, *args, **kwargs)\n", + " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/psutil/_common.py\", line 344, in wrapper\n", + " return fun(self)\n", + " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/psutil/_psosx.py\", line 400, in _get_pidtaskinfo\n", + " ret = cext.proc_pidtaskinfo_oneshot(self.pid)\n", + " File \"/anaconda3/envs/overscripted/lib/python3.6/contextlib.py\", line 99, in __exit__\n", + " self.gen.throw(type, value, traceback)\n", + " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/psutil/_psosx.py\", line 372, in catch_zombie\n", + " raise AccessDenied(proc.pid, proc._name)\n", + "psutil.AccessDenied: psutil.AccessDenied (pid=21476)\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "tornado.application - ERROR - Exception in callback >\n", + "Traceback (most recent call last):\n", + " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/psutil/_common.py\", line 341, in wrapper\n", + " ret = self._cache[fun]\n", + "AttributeError: 'Process' object has no attribute '_cache'\n", + "\n", + "During handling of the above exception, another exception occurred:\n", + "\n", + "Traceback (most recent call last):\n", + " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/psutil/_common.py\", line 341, in wrapper\n", + " ret = self._cache[fun]\n", + "AttributeError: _cache\n", + "\n", + "During handling of the above exception, another exception occurred:\n", + "\n", + "Traceback (most recent call last):\n", + " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/psutil/_psosx.py\", line 359, in catch_zombie\n", + " yield\n", + " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/psutil/_psosx.py\", line 400, in _get_pidtaskinfo\n", + " ret = cext.proc_pidtaskinfo_oneshot(self.pid)\n", + "ProcessLookupError: [Errno 3] No such process\n", + "\n", + "During handling of the above exception, another exception occurred:\n", + "\n", + "Traceback (most recent call last):\n", + " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/tornado/ioloop.py\", line 907, in _run\n", + " return self.callback()\n", + " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/distributed/nanny.py\", line 266, in memory_monitor\n", + " memory = proc.memory_info().rss\n", + " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/psutil/_common.py\", line 344, in wrapper\n", + " return fun(self)\n", + " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/psutil/__init__.py\", line 1166, in memory_info\n", + " return self._proc.memory_info()\n", + " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/psutil/_psosx.py\", line 339, in wrapper\n", + " return fun(self, *args, **kwargs)\n", + " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/psutil/_psosx.py\", line 469, in memory_info\n", + " rawtuple = self._get_pidtaskinfo()\n", + " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/psutil/_psosx.py\", line 339, in wrapper\n", + " return fun(self, *args, **kwargs)\n", + " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/psutil/_common.py\", line 344, in wrapper\n", + " return fun(self)\n", + " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/psutil/_psosx.py\", line 400, in _get_pidtaskinfo\n", + " ret = cext.proc_pidtaskinfo_oneshot(self.pid)\n", + " File \"/anaconda3/envs/overscripted/lib/python3.6/contextlib.py\", line 99, in __exit__\n", + " self.gen.throw(type, value, traceback)\n", + " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/psutil/_psosx.py\", line 372, in catch_zombie\n", + " raise AccessDenied(proc.pid, proc._name)\n", + "psutil.AccessDenied: psutil.AccessDenied (pid=21476)\n", + "tornado.application - ERROR - Exception in callback >\n", + "Traceback (most recent call last):\n", + " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/psutil/_common.py\", line 341, in wrapper\n", + " ret = self._cache[fun]\n", + "AttributeError: 'Process' object has no attribute '_cache'\n", + "\n", + "During handling of the above exception, another exception occurred:\n", + "\n", + "Traceback (most recent call last):\n", + " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/psutil/_common.py\", line 341, in wrapper\n", + " ret = self._cache[fun]\n", + "AttributeError: _cache\n", + "\n", + "During handling of the above exception, another exception occurred:\n", + "\n", + "Traceback (most recent call last):\n", + " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/psutil/_psosx.py\", line 359, in catch_zombie\n", + " yield\n", + " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/psutil/_psosx.py\", line 400, in _get_pidtaskinfo\n", + " ret = cext.proc_pidtaskinfo_oneshot(self.pid)\n", + "ProcessLookupError: [Errno 3] No such process\n", + "\n", + "During handling of the above exception, another exception occurred:\n", + "\n", + "Traceback (most recent call last):\n", + " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/tornado/ioloop.py\", line 907, in _run\n", + " return self.callback()\n", + " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/distributed/nanny.py\", line 266, in memory_monitor\n", + " memory = proc.memory_info().rss\n", + " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/psutil/_common.py\", line 344, in wrapper\n", + " return fun(self)\n", + " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/psutil/__init__.py\", line 1166, in memory_info\n", + " return self._proc.memory_info()\n", + " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/psutil/_psosx.py\", line 339, in wrapper\n", + " return fun(self, *args, **kwargs)\n", + " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/psutil/_psosx.py\", line 469, in memory_info\n", + " rawtuple = self._get_pidtaskinfo()\n", + " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/psutil/_psosx.py\", line 339, in wrapper\n", + " return fun(self, *args, **kwargs)\n", + " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/psutil/_common.py\", line 344, in wrapper\n", + " return fun(self)\n", + " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/psutil/_psosx.py\", line 400, in _get_pidtaskinfo\n", + " ret = cext.proc_pidtaskinfo_oneshot(self.pid)\n", + " File \"/anaconda3/envs/overscripted/lib/python3.6/contextlib.py\", line 99, in __exit__\n", + " self.gen.throw(type, value, traceback)\n", + " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/psutil/_psosx.py\", line 372, in catch_zombie\n", + " raise AccessDenied(proc.pid, proc._name)\n", + "psutil.AccessDenied: psutil.AccessDenied (pid=21476)\n", + "distributed.nanny - WARNING - Worker process 21476 was killed by unknown signal\n", + "distributed.nanny - WARNING - Restarting worker\n", + "distributed.nanny - WARNING - Worker exceeded 95% memory budget. Restarting\n", + "tornado.application - ERROR - Exception in callback >\n", + "Traceback (most recent call last):\n", + " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/psutil/_common.py\", line 341, in wrapper\n", + " ret = self._cache[fun]\n", + "AttributeError: 'Process' object has no attribute '_cache'\n", + "\n", + "During handling of the above exception, another exception occurred:\n", + "\n", + "Traceback (most recent call last):\n", + " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/psutil/_common.py\", line 341, in wrapper\n", + " ret = self._cache[fun]\n", + "AttributeError: _cache\n", + "\n", + "During handling of the above exception, another exception occurred:\n", + "\n", + "Traceback (most recent call last):\n", + " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/psutil/_psosx.py\", line 359, in catch_zombie\n", + " yield\n", + " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/psutil/_psosx.py\", line 400, in _get_pidtaskinfo\n", + " ret = cext.proc_pidtaskinfo_oneshot(self.pid)\n", + "ProcessLookupError: [Errno 3] No such process\n", + "\n", + "During handling of the above exception, another exception occurred:\n", + "\n", + "Traceback (most recent call last):\n", + " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/tornado/ioloop.py\", line 907, in _run\n", + " return self.callback()\n", + " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/distributed/nanny.py\", line 266, in memory_monitor\n", + " memory = proc.memory_info().rss\n", + " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/psutil/_common.py\", line 344, in wrapper\n", + " return fun(self)\n", + " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/psutil/__init__.py\", line 1166, in memory_info\n", + " return self._proc.memory_info()\n", + " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/psutil/_psosx.py\", line 339, in wrapper\n", + " return fun(self, *args, **kwargs)\n", + " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/psutil/_psosx.py\", line 469, in memory_info\n", + " rawtuple = self._get_pidtaskinfo()\n", + " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/psutil/_psosx.py\", line 339, in wrapper\n", + " return fun(self, *args, **kwargs)\n", + " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/psutil/_common.py\", line 344, in wrapper\n", + " return fun(self)\n", + " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/psutil/_psosx.py\", line 400, in _get_pidtaskinfo\n", + " ret = cext.proc_pidtaskinfo_oneshot(self.pid)\n", + " File \"/anaconda3/envs/overscripted/lib/python3.6/contextlib.py\", line 99, in __exit__\n", + " self.gen.throw(type, value, traceback)\n", + " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/psutil/_psosx.py\", line 372, in catch_zombie\n", + " raise AccessDenied(proc.pid, proc._name)\n", + "psutil.AccessDenied: psutil.AccessDenied (pid=21497)\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "tornado.application - ERROR - Exception in callback >\n", + "Traceback (most recent call last):\n", + " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/psutil/_common.py\", line 341, in wrapper\n", + " ret = self._cache[fun]\n", + "AttributeError: 'Process' object has no attribute '_cache'\n", + "\n", + "During handling of the above exception, another exception occurred:\n", + "\n", + "Traceback (most recent call last):\n", + " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/psutil/_common.py\", line 341, in wrapper\n", + " ret = self._cache[fun]\n", + "AttributeError: _cache\n", + "\n", + "During handling of the above exception, another exception occurred:\n", + "\n", + "Traceback (most recent call last):\n", + " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/psutil/_psosx.py\", line 359, in catch_zombie\n", + " yield\n", + " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/psutil/_psosx.py\", line 400, in _get_pidtaskinfo\n", + " ret = cext.proc_pidtaskinfo_oneshot(self.pid)\n", + "ProcessLookupError: [Errno 3] No such process\n", + "\n", + "During handling of the above exception, another exception occurred:\n", + "\n", + "Traceback (most recent call last):\n", + " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/tornado/ioloop.py\", line 907, in _run\n", + " return self.callback()\n", + " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/distributed/nanny.py\", line 266, in memory_monitor\n", + " memory = proc.memory_info().rss\n", + " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/psutil/_common.py\", line 344, in wrapper\n", + " return fun(self)\n", + " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/psutil/__init__.py\", line 1166, in memory_info\n", + " return self._proc.memory_info()\n", + " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/psutil/_psosx.py\", line 339, in wrapper\n", + " return fun(self, *args, **kwargs)\n", + " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/psutil/_psosx.py\", line 469, in memory_info\n", + " rawtuple = self._get_pidtaskinfo()\n", + " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/psutil/_psosx.py\", line 339, in wrapper\n", + " return fun(self, *args, **kwargs)\n", + " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/psutil/_common.py\", line 344, in wrapper\n", + " return fun(self)\n", + " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/psutil/_psosx.py\", line 400, in _get_pidtaskinfo\n", + " ret = cext.proc_pidtaskinfo_oneshot(self.pid)\n", + " File \"/anaconda3/envs/overscripted/lib/python3.6/contextlib.py\", line 99, in __exit__\n", + " self.gen.throw(type, value, traceback)\n", + " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/psutil/_psosx.py\", line 372, in catch_zombie\n", + " raise AccessDenied(proc.pid, proc._name)\n", + "psutil.AccessDenied: psutil.AccessDenied (pid=21497)\n", + "tornado.application - ERROR - Exception in callback >\n", + "Traceback (most recent call last):\n", + " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/psutil/_common.py\", line 341, in wrapper\n", + " ret = self._cache[fun]\n", + "AttributeError: 'Process' object has no attribute '_cache'\n", + "\n", + "During handling of the above exception, another exception occurred:\n", + "\n", + "Traceback (most recent call last):\n", + " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/psutil/_common.py\", line 341, in wrapper\n", + " ret = self._cache[fun]\n", + "AttributeError: _cache\n", + "\n", + "During handling of the above exception, another exception occurred:\n", + "\n", + "Traceback (most recent call last):\n", + " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/psutil/_psosx.py\", line 359, in catch_zombie\n", + " yield\n", + " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/psutil/_psosx.py\", line 400, in _get_pidtaskinfo\n", + " ret = cext.proc_pidtaskinfo_oneshot(self.pid)\n", + "ProcessLookupError: [Errno 3] No such process\n", + "\n", + "During handling of the above exception, another exception occurred:\n", + "\n", + "Traceback (most recent call last):\n", + " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/tornado/ioloop.py\", line 907, in _run\n", + " return self.callback()\n", + " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/distributed/nanny.py\", line 266, in memory_monitor\n", + " memory = proc.memory_info().rss\n", + " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/psutil/_common.py\", line 344, in wrapper\n", + " return fun(self)\n", + " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/psutil/__init__.py\", line 1166, in memory_info\n", + " return self._proc.memory_info()\n", + " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/psutil/_psosx.py\", line 339, in wrapper\n", + " return fun(self, *args, **kwargs)\n", + " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/psutil/_psosx.py\", line 469, in memory_info\n", + " rawtuple = self._get_pidtaskinfo()\n", + " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/psutil/_psosx.py\", line 339, in wrapper\n", + " return fun(self, *args, **kwargs)\n", + " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/psutil/_common.py\", line 344, in wrapper\n", + " return fun(self)\n", + " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/psutil/_psosx.py\", line 400, in _get_pidtaskinfo\n", + " ret = cext.proc_pidtaskinfo_oneshot(self.pid)\n", + " File \"/anaconda3/envs/overscripted/lib/python3.6/contextlib.py\", line 99, in __exit__\n", + " self.gen.throw(type, value, traceback)\n", + " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/psutil/_psosx.py\", line 372, in catch_zombie\n", + " raise AccessDenied(proc.pid, proc._name)\n", + "psutil.AccessDenied: psutil.AccessDenied (pid=21497)\n", + "distributed.nanny - WARNING - Worker process 21497 was killed by unknown signal\n", + "distributed.nanny - WARNING - Restarting worker\n", + "distributed.nanny - WARNING - Worker exceeded 95% memory budget. Restarting\n", + "tornado.application - ERROR - Exception in callback >\n", + "Traceback (most recent call last):\n", + " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/psutil/_common.py\", line 341, in wrapper\n", + " ret = self._cache[fun]\n", + "AttributeError: 'Process' object has no attribute '_cache'\n", + "\n", + "During handling of the above exception, another exception occurred:\n", + "\n", + "Traceback (most recent call last):\n", + " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/psutil/_common.py\", line 341, in wrapper\n", + " ret = self._cache[fun]\n", + "AttributeError: _cache\n", + "\n", + "During handling of the above exception, another exception occurred:\n", + "\n", + "Traceback (most recent call last):\n", + " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/psutil/_psosx.py\", line 359, in catch_zombie\n", + " yield\n", + " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/psutil/_psosx.py\", line 400, in _get_pidtaskinfo\n", + " ret = cext.proc_pidtaskinfo_oneshot(self.pid)\n", + "ProcessLookupError: [Errno 3] No such process\n", + "\n", + "During handling of the above exception, another exception occurred:\n", + "\n", + "Traceback (most recent call last):\n", + " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/tornado/ioloop.py\", line 907, in _run\n", + " return self.callback()\n", + " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/distributed/nanny.py\", line 266, in memory_monitor\n", + " memory = proc.memory_info().rss\n", + " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/psutil/_common.py\", line 344, in wrapper\n", + " return fun(self)\n", + " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/psutil/__init__.py\", line 1166, in memory_info\n", + " return self._proc.memory_info()\n", + " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/psutil/_psosx.py\", line 339, in wrapper\n", + " return fun(self, *args, **kwargs)\n", + " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/psutil/_psosx.py\", line 469, in memory_info\n", + " rawtuple = self._get_pidtaskinfo()\n", + " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/psutil/_psosx.py\", line 339, in wrapper\n", + " return fun(self, *args, **kwargs)\n", + " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/psutil/_common.py\", line 344, in wrapper\n", + " return fun(self)\n", + " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/psutil/_psosx.py\", line 400, in _get_pidtaskinfo\n", + " ret = cext.proc_pidtaskinfo_oneshot(self.pid)\n", + " File \"/anaconda3/envs/overscripted/lib/python3.6/contextlib.py\", line 99, in __exit__\n", + " self.gen.throw(type, value, traceback)\n", + " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/psutil/_psosx.py\", line 372, in catch_zombie\n", + " raise AccessDenied(proc.pid, proc._name)\n", + "psutil.AccessDenied: psutil.AccessDenied (pid=21489)\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "tornado.application - ERROR - Exception in callback >\n", + "Traceback (most recent call last):\n", + " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/psutil/_common.py\", line 341, in wrapper\n", + " ret = self._cache[fun]\n", + "AttributeError: 'Process' object has no attribute '_cache'\n", + "\n", + "During handling of the above exception, another exception occurred:\n", + "\n", + "Traceback (most recent call last):\n", + " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/psutil/_common.py\", line 341, in wrapper\n", + " ret = self._cache[fun]\n", + "AttributeError: _cache\n", + "\n", + "During handling of the above exception, another exception occurred:\n", + "\n", + "Traceback (most recent call last):\n", + " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/psutil/_psosx.py\", line 359, in catch_zombie\n", + " yield\n", + " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/psutil/_psosx.py\", line 400, in _get_pidtaskinfo\n", + " ret = cext.proc_pidtaskinfo_oneshot(self.pid)\n", + "ProcessLookupError: [Errno 3] No such process\n", + "\n", + "During handling of the above exception, another exception occurred:\n", + "\n", + "Traceback (most recent call last):\n", + " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/tornado/ioloop.py\", line 907, in _run\n", + " return self.callback()\n", + " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/distributed/nanny.py\", line 266, in memory_monitor\n", + " memory = proc.memory_info().rss\n", + " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/psutil/_common.py\", line 344, in wrapper\n", + " return fun(self)\n", + " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/psutil/__init__.py\", line 1166, in memory_info\n", + " return self._proc.memory_info()\n", + " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/psutil/_psosx.py\", line 339, in wrapper\n", + " return fun(self, *args, **kwargs)\n", + " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/psutil/_psosx.py\", line 469, in memory_info\n", + " rawtuple = self._get_pidtaskinfo()\n", + " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/psutil/_psosx.py\", line 339, in wrapper\n", + " return fun(self, *args, **kwargs)\n", + " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/psutil/_common.py\", line 344, in wrapper\n", + " return fun(self)\n", + " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/psutil/_psosx.py\", line 400, in _get_pidtaskinfo\n", + " ret = cext.proc_pidtaskinfo_oneshot(self.pid)\n", + " File \"/anaconda3/envs/overscripted/lib/python3.6/contextlib.py\", line 99, in __exit__\n", + " self.gen.throw(type, value, traceback)\n", + " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/psutil/_psosx.py\", line 372, in catch_zombie\n", + " raise AccessDenied(proc.pid, proc._name)\n", + "psutil.AccessDenied: psutil.AccessDenied (pid=21489)\n", + "distributed.nanny - WARNING - Worker process 21489 was killed by unknown signal\n", + "distributed.nanny - WARNING - Restarting worker\n" + ] + } + ], + "source": [ + "df[df['is_json'] == True].to_parquet('all_json_above_mean.parquet', engine='pyarrow')" + ] + }, + { + "cell_type": "code", + "execution_count": 4, "metadata": {}, "outputs": [], "source": [ From efc051ecd1ec58daf6191954aad35786bda6163b Mon Sep 17 00:00:00 2001 From: Camila Resende Date: Sun, 31 Mar 2019 17:36:32 -0300 Subject: [PATCH 08/23] Update jsJson_dataPrep to include an extra column with the md5 of value columns --- .../isJson_dataPrep.ipynb | 767 +++++++++++++++++- 1 file changed, 762 insertions(+), 5 deletions(-) diff --git a/analyses/2019_03_aliamcami_greatest_values_are_json/isJson_dataPrep.ipynb b/analyses/2019_03_aliamcami_greatest_values_are_json/isJson_dataPrep.ipynb index 5175e39..f1cb759 100644 --- a/analyses/2019_03_aliamcami_greatest_values_are_json/isJson_dataPrep.ipynb +++ b/analyses/2019_03_aliamcami_greatest_values_are_json/isJson_dataPrep.ipynb @@ -9,7 +9,7 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": 1, "metadata": {}, "outputs": [ { @@ -30,7 +30,7 @@ "\n", "

Client

\n", "\n", "\n", @@ -46,10 +46,10 @@ "" ], "text/plain": [ - "" + "" ] }, - "execution_count": 2, + "execution_count": 1, "metadata": {}, "output_type": "execute_result" } @@ -431,6 +431,763 @@ "df.to_parquet('is_json_above_mean.parquet')" ] }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
value_1000valuevalue_lensymbolscript_urllocationlocation_domainscript_domainis_json
0{\"im-settings\":\"{\\\"val\\\":{\\\"settings\\\":{\\\"Site...{\"im-settings\":\"{\\\"val\\\":{\\\"settings\\\":{\\\"Site...3713window.sessionStoragehttps://assets.adobedtm.com/caacec67651710193d...https://www.canada.ca/en/services.htmlcanada.caadobedtm.comTrue
1{\"APLUS_S_CORE_0.17.12_20171214163401_2ee09a0c...{\"APLUS_S_CORE_0.17.12_20171214163401_2ee09a0c...103878window.localStoragehttps://g.alicdn.com/alilog/mlog/aplus_v2.jshttps://maniform.world.tmall.com/category-1282...tmall.comalicdn.comTrue
2{\"APLUS_S_CORE_0.17.12_20171214163401_2ee09a0c...{\"APLUS_S_CORE_0.17.12_20171214163401_2ee09a0c...103878window.localStoragehttps://g.alicdn.com/alilog/mlog/aplus_v2.jshttps://maniform.world.tmall.com/category-1282...tmall.comalicdn.comTrue
3usunico=17/12/2017:0-00155123:830; SessionASM=...usunico=17/12/2017:0-00155123:830; SessionASM=...1358window.document.cookiehttps://www.coches.net/scripts/common.min.js?2...https://www.coches.net/fiat/segunda-mano/coches.netcoches.netFalse
4usunico=17/12/2017:0-00155123:830; SessionASM=...usunico=17/12/2017:0-00155123:830; SessionASM=...1358window.document.cookiehttps://www.coches.net/scripts/common.min.js?2...https://www.coches.net/fiat/segunda-mano/coches.netcoches.netFalse
\n", + "
" + ], + "text/plain": [ + " value_1000 \\\n", + "0 {\"im-settings\":\"{\\\"val\\\":{\\\"settings\\\":{\\\"Site... \n", + "1 {\"APLUS_S_CORE_0.17.12_20171214163401_2ee09a0c... \n", + "2 {\"APLUS_S_CORE_0.17.12_20171214163401_2ee09a0c... \n", + "3 usunico=17/12/2017:0-00155123:830; SessionASM=... \n", + "4 usunico=17/12/2017:0-00155123:830; SessionASM=... \n", + "\n", + " value value_len \\\n", + "0 {\"im-settings\":\"{\\\"val\\\":{\\\"settings\\\":{\\\"Site... 3713 \n", + "1 {\"APLUS_S_CORE_0.17.12_20171214163401_2ee09a0c... 103878 \n", + "2 {\"APLUS_S_CORE_0.17.12_20171214163401_2ee09a0c... 103878 \n", + "3 usunico=17/12/2017:0-00155123:830; SessionASM=... 1358 \n", + "4 usunico=17/12/2017:0-00155123:830; SessionASM=... 1358 \n", + "\n", + " symbol script_url \\\n", + "0 window.sessionStorage https://assets.adobedtm.com/caacec67651710193d... \n", + "1 window.localStorage https://g.alicdn.com/alilog/mlog/aplus_v2.js \n", + "2 window.localStorage https://g.alicdn.com/alilog/mlog/aplus_v2.js \n", + "3 window.document.cookie https://www.coches.net/scripts/common.min.js?2... \n", + "4 window.document.cookie https://www.coches.net/scripts/common.min.js?2... \n", + "\n", + " location location_domain \\\n", + "0 https://www.canada.ca/en/services.html canada.ca \n", + "1 https://maniform.world.tmall.com/category-1282... tmall.com \n", + "2 https://maniform.world.tmall.com/category-1282... tmall.com \n", + "3 https://www.coches.net/fiat/segunda-mano/ coches.net \n", + "4 https://www.coches.net/fiat/segunda-mano/ coches.net \n", + "\n", + " script_domain is_json \n", + "0 adobedtm.com True \n", + "1 alicdn.com True \n", + "2 alicdn.com True \n", + "3 coches.net False \n", + "4 coches.net False " + ] + }, + "execution_count": 8, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "#read\n", + "df = dd.read_parquet('is_json_above_mean.parquet')\n", + "df.head()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Value md5\n", + "Include new columns called \"value_md5\" that is the md5 of value column" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [], + "source": [ + "import hashlib\n", + "\n", + "def md5(value):\n", + " return hashlib.md5(value.encode('utf-8')).hexdigest()" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [], + "source": [ + "df['value_md5'] = df['value'].apply(md5)" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
value_1000valuevalue_lensymbolscript_urllocationlocation_domainscript_domainis_jsonvalue_md5
0{\"im-settings\":\"{\\\"val\\\":{\\\"settings\\\":{\\\"Site...{\"im-settings\":\"{\\\"val\\\":{\\\"settings\\\":{\\\"Site...3713window.sessionStoragehttps://assets.adobedtm.com/caacec67651710193d...https://www.canada.ca/en/services.htmlcanada.caadobedtm.comTruecff77029e3ae45dd439a62987b1d8340
1{\"APLUS_S_CORE_0.17.12_20171214163401_2ee09a0c...{\"APLUS_S_CORE_0.17.12_20171214163401_2ee09a0c...103878window.localStoragehttps://g.alicdn.com/alilog/mlog/aplus_v2.jshttps://maniform.world.tmall.com/category-1282...tmall.comalicdn.comTrue9ac0a0a0afb677c8fd985a7c2f4ddbc5
2{\"APLUS_S_CORE_0.17.12_20171214163401_2ee09a0c...{\"APLUS_S_CORE_0.17.12_20171214163401_2ee09a0c...103878window.localStoragehttps://g.alicdn.com/alilog/mlog/aplus_v2.jshttps://maniform.world.tmall.com/category-1282...tmall.comalicdn.comTrue9ac0a0a0afb677c8fd985a7c2f4ddbc5
3usunico=17/12/2017:0-00155123:830; SessionASM=...usunico=17/12/2017:0-00155123:830; SessionASM=...1358window.document.cookiehttps://www.coches.net/scripts/common.min.js?2...https://www.coches.net/fiat/segunda-mano/coches.netcoches.netFalsedb64465b639e01993d9212390f057628
4usunico=17/12/2017:0-00155123:830; SessionASM=...usunico=17/12/2017:0-00155123:830; SessionASM=...1358window.document.cookiehttps://www.coches.net/scripts/common.min.js?2...https://www.coches.net/fiat/segunda-mano/coches.netcoches.netFalsedb64465b639e01993d9212390f057628
\n", + "
" + ], + "text/plain": [ + " value_1000 \\\n", + "0 {\"im-settings\":\"{\\\"val\\\":{\\\"settings\\\":{\\\"Site... \n", + "1 {\"APLUS_S_CORE_0.17.12_20171214163401_2ee09a0c... \n", + "2 {\"APLUS_S_CORE_0.17.12_20171214163401_2ee09a0c... \n", + "3 usunico=17/12/2017:0-00155123:830; SessionASM=... \n", + "4 usunico=17/12/2017:0-00155123:830; SessionASM=... \n", + "\n", + " value value_len \\\n", + "0 {\"im-settings\":\"{\\\"val\\\":{\\\"settings\\\":{\\\"Site... 3713 \n", + "1 {\"APLUS_S_CORE_0.17.12_20171214163401_2ee09a0c... 103878 \n", + "2 {\"APLUS_S_CORE_0.17.12_20171214163401_2ee09a0c... 103878 \n", + "3 usunico=17/12/2017:0-00155123:830; SessionASM=... 1358 \n", + "4 usunico=17/12/2017:0-00155123:830; SessionASM=... 1358 \n", + "\n", + " symbol script_url \\\n", + "0 window.sessionStorage https://assets.adobedtm.com/caacec67651710193d... \n", + "1 window.localStorage https://g.alicdn.com/alilog/mlog/aplus_v2.js \n", + "2 window.localStorage https://g.alicdn.com/alilog/mlog/aplus_v2.js \n", + "3 window.document.cookie https://www.coches.net/scripts/common.min.js?2... \n", + "4 window.document.cookie https://www.coches.net/scripts/common.min.js?2... \n", + "\n", + " location location_domain \\\n", + "0 https://www.canada.ca/en/services.html canada.ca \n", + "1 https://maniform.world.tmall.com/category-1282... tmall.com \n", + "2 https://maniform.world.tmall.com/category-1282... tmall.com \n", + "3 https://www.coches.net/fiat/segunda-mano/ coches.net \n", + "4 https://www.coches.net/fiat/segunda-mano/ coches.net \n", + "\n", + " script_domain is_json value_md5 \n", + "0 adobedtm.com True cff77029e3ae45dd439a62987b1d8340 \n", + "1 alicdn.com True 9ac0a0a0afb677c8fd985a7c2f4ddbc5 \n", + "2 alicdn.com True 9ac0a0a0afb677c8fd985a7c2f4ddbc5 \n", + "3 coches.net False db64465b639e01993d9212390f057628 \n", + "4 coches.net False db64465b639e01993d9212390f057628 " + ] + }, + "execution_count": 11, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.head()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/anaconda3/envs/overscripted/lib/python3.6/site-packages/fastparquet/util.py:221: FutureWarning: A future version of pandas will default to `skipna=True`. To silence this warning, pass `skipna=True|False` explicitly.\n", + " inferred_dtype = infer_dtype(column)\n", + "distributed.nanny - WARNING - Worker exceeded 95% memory budget. Restarting\n", + "tornado.application - ERROR - Exception in callback >\n", + "Traceback (most recent call last):\n", + " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/psutil/_common.py\", line 341, in wrapper\n", + " ret = self._cache[fun]\n", + "AttributeError: 'Process' object has no attribute '_cache'\n", + "\n", + "During handling of the above exception, another exception occurred:\n", + "\n", + "Traceback (most recent call last):\n", + " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/psutil/_common.py\", line 341, in wrapper\n", + " ret = self._cache[fun]\n", + "AttributeError: _cache\n", + "\n", + "During handling of the above exception, another exception occurred:\n", + "\n", + "Traceback (most recent call last):\n", + " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/psutil/_psosx.py\", line 359, in catch_zombie\n", + " yield\n", + " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/psutil/_psosx.py\", line 400, in _get_pidtaskinfo\n", + " ret = cext.proc_pidtaskinfo_oneshot(self.pid)\n", + "ProcessLookupError: [Errno 3] No such process\n", + "\n", + "During handling of the above exception, another exception occurred:\n", + "\n", + "Traceback (most recent call last):\n", + " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/tornado/ioloop.py\", line 907, in _run\n", + " return self.callback()\n", + " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/distributed/nanny.py\", line 266, in memory_monitor\n", + " memory = proc.memory_info().rss\n", + " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/psutil/_common.py\", line 344, in wrapper\n", + " return fun(self)\n", + " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/psutil/__init__.py\", line 1166, in memory_info\n", + " return self._proc.memory_info()\n", + " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/psutil/_psosx.py\", line 339, in wrapper\n", + " return fun(self, *args, **kwargs)\n", + " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/psutil/_psosx.py\", line 469, in memory_info\n", + " rawtuple = self._get_pidtaskinfo()\n", + " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/psutil/_psosx.py\", line 339, in wrapper\n", + " return fun(self, *args, **kwargs)\n", + " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/psutil/_common.py\", line 344, in wrapper\n", + " return fun(self)\n", + " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/psutil/_psosx.py\", line 400, in _get_pidtaskinfo\n", + " ret = cext.proc_pidtaskinfo_oneshot(self.pid)\n", + " File \"/anaconda3/envs/overscripted/lib/python3.6/contextlib.py\", line 99, in __exit__\n", + " self.gen.throw(type, value, traceback)\n", + " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/psutil/_psosx.py\", line 372, in catch_zombie\n", + " raise AccessDenied(proc.pid, proc._name)\n", + "psutil.AccessDenied: psutil.AccessDenied (pid=1373)\n", + "tornado.application - ERROR - Exception in callback >\n", + "Traceback (most recent call last):\n", + " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/psutil/_common.py\", line 341, in wrapper\n", + " ret = self._cache[fun]\n", + "AttributeError: 'Process' object has no attribute '_cache'\n", + "\n", + "During handling of the above exception, another exception occurred:\n", + "\n", + "Traceback (most recent call last):\n", + " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/psutil/_common.py\", line 341, in wrapper\n", + " ret = self._cache[fun]\n", + "AttributeError: _cache\n", + "\n", + "During handling of the above exception, another exception occurred:\n", + "\n", + "Traceback (most recent call last):\n", + " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/psutil/_psosx.py\", line 359, in catch_zombie\n", + " yield\n", + " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/psutil/_psosx.py\", line 400, in _get_pidtaskinfo\n", + " ret = cext.proc_pidtaskinfo_oneshot(self.pid)\n", + "ProcessLookupError: [Errno 3] No such process\n", + "\n", + "During handling of the above exception, another exception occurred:\n", + "\n", + "Traceback (most recent call last):\n", + " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/tornado/ioloop.py\", line 907, in _run\n", + " return self.callback()\n", + " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/distributed/nanny.py\", line 266, in memory_monitor\n", + " memory = proc.memory_info().rss\n", + " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/psutil/_common.py\", line 344, in wrapper\n", + " return fun(self)\n", + " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/psutil/__init__.py\", line 1166, in memory_info\n", + " return self._proc.memory_info()\n", + " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/psutil/_psosx.py\", line 339, in wrapper\n", + " return fun(self, *args, **kwargs)\n", + " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/psutil/_psosx.py\", line 469, in memory_info\n", + " rawtuple = self._get_pidtaskinfo()\n", + " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/psutil/_psosx.py\", line 339, in wrapper\n", + " return fun(self, *args, **kwargs)\n", + " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/psutil/_common.py\", line 344, in wrapper\n", + " return fun(self)\n", + " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/psutil/_psosx.py\", line 400, in _get_pidtaskinfo\n", + " ret = cext.proc_pidtaskinfo_oneshot(self.pid)\n", + " File \"/anaconda3/envs/overscripted/lib/python3.6/contextlib.py\", line 99, in __exit__\n", + " self.gen.throw(type, value, traceback)\n", + " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/psutil/_psosx.py\", line 372, in catch_zombie\n", + " raise AccessDenied(proc.pid, proc._name)\n", + "psutil.AccessDenied: psutil.AccessDenied (pid=1373)\n", + "distributed.nanny - WARNING - Worker process 1373 was killed by unknown signal\n", + "distributed.nanny - WARNING - Restarting worker\n", + "distributed.nanny - WARNING - Worker exceeded 95% memory budget. Restarting\n", + "tornado.application - ERROR - Exception in callback >\n", + "Traceback (most recent call last):\n", + " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/psutil/_common.py\", line 341, in wrapper\n", + " ret = self._cache[fun]\n", + "AttributeError: 'Process' object has no attribute '_cache'\n", + "\n", + "During handling of the above exception, another exception occurred:\n", + "\n", + "Traceback (most recent call last):\n", + " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/psutil/_common.py\", line 341, in wrapper\n", + " ret = self._cache[fun]\n", + "AttributeError: _cache\n", + "\n", + "During handling of the above exception, another exception occurred:\n", + "\n", + "Traceback (most recent call last):\n", + " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/psutil/_psosx.py\", line 359, in catch_zombie\n", + " yield\n", + " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/psutil/_psosx.py\", line 400, in _get_pidtaskinfo\n", + " ret = cext.proc_pidtaskinfo_oneshot(self.pid)\n", + "ProcessLookupError: [Errno 3] No such process\n", + "\n", + "During handling of the above exception, another exception occurred:\n", + "\n", + "Traceback (most recent call last):\n", + " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/tornado/ioloop.py\", line 907, in _run\n", + " return self.callback()\n", + " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/distributed/nanny.py\", line 266, in memory_monitor\n", + " memory = proc.memory_info().rss\n", + " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/psutil/_common.py\", line 344, in wrapper\n", + " return fun(self)\n", + " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/psutil/__init__.py\", line 1166, in memory_info\n", + " return self._proc.memory_info()\n", + " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/psutil/_psosx.py\", line 339, in wrapper\n", + " return fun(self, *args, **kwargs)\n", + " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/psutil/_psosx.py\", line 469, in memory_info\n", + " rawtuple = self._get_pidtaskinfo()\n", + " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/psutil/_psosx.py\", line 339, in wrapper\n", + " return fun(self, *args, **kwargs)\n", + " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/psutil/_common.py\", line 344, in wrapper\n", + " return fun(self)\n", + " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/psutil/_psosx.py\", line 400, in _get_pidtaskinfo\n", + " ret = cext.proc_pidtaskinfo_oneshot(self.pid)\n", + " File \"/anaconda3/envs/overscripted/lib/python3.6/contextlib.py\", line 99, in __exit__\n", + " self.gen.throw(type, value, traceback)\n", + " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/psutil/_psosx.py\", line 372, in catch_zombie\n", + " raise AccessDenied(proc.pid, proc._name)\n", + "psutil.AccessDenied: psutil.AccessDenied (pid=1375)\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "tornado.application - ERROR - Exception in callback >\n", + "Traceback (most recent call last):\n", + " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/psutil/_common.py\", line 341, in wrapper\n", + " ret = self._cache[fun]\n", + "AttributeError: 'Process' object has no attribute '_cache'\n", + "\n", + "During handling of the above exception, another exception occurred:\n", + "\n", + "Traceback (most recent call last):\n", + " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/psutil/_common.py\", line 341, in wrapper\n", + " ret = self._cache[fun]\n", + "AttributeError: _cache\n", + "\n", + "During handling of the above exception, another exception occurred:\n", + "\n", + "Traceback (most recent call last):\n", + " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/psutil/_psosx.py\", line 359, in catch_zombie\n", + " yield\n", + " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/psutil/_psosx.py\", line 400, in _get_pidtaskinfo\n", + " ret = cext.proc_pidtaskinfo_oneshot(self.pid)\n", + "ProcessLookupError: [Errno 3] No such process\n", + "\n", + "During handling of the above exception, another exception occurred:\n", + "\n", + "Traceback (most recent call last):\n", + " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/tornado/ioloop.py\", line 907, in _run\n", + " return self.callback()\n", + " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/distributed/nanny.py\", line 266, in memory_monitor\n", + " memory = proc.memory_info().rss\n", + " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/psutil/_common.py\", line 344, in wrapper\n", + " return fun(self)\n", + " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/psutil/__init__.py\", line 1166, in memory_info\n", + " return self._proc.memory_info()\n", + " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/psutil/_psosx.py\", line 339, in wrapper\n", + " return fun(self, *args, **kwargs)\n", + " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/psutil/_psosx.py\", line 469, in memory_info\n", + " rawtuple = self._get_pidtaskinfo()\n", + " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/psutil/_psosx.py\", line 339, in wrapper\n", + " return fun(self, *args, **kwargs)\n", + " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/psutil/_common.py\", line 344, in wrapper\n", + " return fun(self)\n", + " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/psutil/_psosx.py\", line 400, in _get_pidtaskinfo\n", + " ret = cext.proc_pidtaskinfo_oneshot(self.pid)\n", + " File \"/anaconda3/envs/overscripted/lib/python3.6/contextlib.py\", line 99, in __exit__\n", + " self.gen.throw(type, value, traceback)\n", + " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/psutil/_psosx.py\", line 372, in catch_zombie\n", + " raise AccessDenied(proc.pid, proc._name)\n", + "psutil.AccessDenied: psutil.AccessDenied (pid=1375)\n", + "distributed.nanny - WARNING - Worker process 1375 was killed by unknown signal\n", + "distributed.nanny - WARNING - Restarting worker\n", + "distributed.nanny - WARNING - Worker exceeded 95% memory budget. Restarting\n", + "tornado.application - ERROR - Exception in callback >\n", + "Traceback (most recent call last):\n", + " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/psutil/_common.py\", line 341, in wrapper\n", + " ret = self._cache[fun]\n", + "AttributeError: 'Process' object has no attribute '_cache'\n", + "\n", + "During handling of the above exception, another exception occurred:\n", + "\n", + "Traceback (most recent call last):\n", + " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/psutil/_common.py\", line 341, in wrapper\n", + " ret = self._cache[fun]\n", + "AttributeError: _cache\n", + "\n", + "During handling of the above exception, another exception occurred:\n", + "\n", + "Traceback (most recent call last):\n", + " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/psutil/_psosx.py\", line 359, in catch_zombie\n", + " yield\n", + " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/psutil/_psosx.py\", line 400, in _get_pidtaskinfo\n", + " ret = cext.proc_pidtaskinfo_oneshot(self.pid)\n", + "ProcessLookupError: [Errno 3] No such process\n", + "\n", + "During handling of the above exception, another exception occurred:\n", + "\n", + "Traceback (most recent call last):\n", + " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/tornado/ioloop.py\", line 907, in _run\n", + " return self.callback()\n", + " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/distributed/nanny.py\", line 266, in memory_monitor\n", + " memory = proc.memory_info().rss\n", + " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/psutil/_common.py\", line 344, in wrapper\n", + " return fun(self)\n", + " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/psutil/__init__.py\", line 1166, in memory_info\n", + " return self._proc.memory_info()\n", + " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/psutil/_psosx.py\", line 339, in wrapper\n", + " return fun(self, *args, **kwargs)\n", + " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/psutil/_psosx.py\", line 469, in memory_info\n", + " rawtuple = self._get_pidtaskinfo()\n", + " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/psutil/_psosx.py\", line 339, in wrapper\n", + " return fun(self, *args, **kwargs)\n", + " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/psutil/_common.py\", line 344, in wrapper\n", + " return fun(self)\n", + " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/psutil/_psosx.py\", line 400, in _get_pidtaskinfo\n", + " ret = cext.proc_pidtaskinfo_oneshot(self.pid)\n", + " File \"/anaconda3/envs/overscripted/lib/python3.6/contextlib.py\", line 99, in __exit__\n", + " self.gen.throw(type, value, traceback)\n", + " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/psutil/_psosx.py\", line 372, in catch_zombie\n", + " raise AccessDenied(proc.pid, proc._name)\n", + "psutil.AccessDenied: psutil.AccessDenied (pid=1421)\n", + "tornado.application - ERROR - Exception in callback >\n", + "Traceback (most recent call last):\n", + " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/psutil/_common.py\", line 341, in wrapper\n", + " ret = self._cache[fun]\n", + "AttributeError: 'Process' object has no attribute '_cache'\n", + "\n", + "During handling of the above exception, another exception occurred:\n", + "\n", + "Traceback (most recent call last):\n", + " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/psutil/_common.py\", line 341, in wrapper\n", + " ret = self._cache[fun]\n", + "AttributeError: _cache\n", + "\n", + "During handling of the above exception, another exception occurred:\n", + "\n", + "Traceback (most recent call last):\n", + " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/psutil/_psosx.py\", line 359, in catch_zombie\n", + " yield\n", + " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/psutil/_psosx.py\", line 400, in _get_pidtaskinfo\n", + " ret = cext.proc_pidtaskinfo_oneshot(self.pid)\n", + "ProcessLookupError: [Errno 3] No such process\n", + "\n", + "During handling of the above exception, another exception occurred:\n", + "\n", + "Traceback (most recent call last):\n", + " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/tornado/ioloop.py\", line 907, in _run\n", + " return self.callback()\n", + " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/distributed/nanny.py\", line 266, in memory_monitor\n", + " memory = proc.memory_info().rss\n", + " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/psutil/_common.py\", line 344, in wrapper\n", + " return fun(self)\n", + " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/psutil/__init__.py\", line 1166, in memory_info\n", + " return self._proc.memory_info()\n", + " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/psutil/_psosx.py\", line 339, in wrapper\n", + " return fun(self, *args, **kwargs)\n", + " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/psutil/_psosx.py\", line 469, in memory_info\n", + " rawtuple = self._get_pidtaskinfo()\n", + " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/psutil/_psosx.py\", line 339, in wrapper\n", + " return fun(self, *args, **kwargs)\n", + " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/psutil/_common.py\", line 344, in wrapper\n", + " return fun(self)\n", + " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/psutil/_psosx.py\", line 400, in _get_pidtaskinfo\n", + " ret = cext.proc_pidtaskinfo_oneshot(self.pid)\n", + " File \"/anaconda3/envs/overscripted/lib/python3.6/contextlib.py\", line 99, in __exit__\n", + " self.gen.throw(type, value, traceback)\n", + " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/psutil/_psosx.py\", line 372, in catch_zombie\n", + " raise AccessDenied(proc.pid, proc._name)\n", + "psutil.AccessDenied: psutil.AccessDenied (pid=1421)\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "tornado.application - ERROR - Exception in callback >\n", + "Traceback (most recent call last):\n", + " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/psutil/_common.py\", line 341, in wrapper\n", + " ret = self._cache[fun]\n", + "AttributeError: 'Process' object has no attribute '_cache'\n", + "\n", + "During handling of the above exception, another exception occurred:\n", + "\n", + "Traceback (most recent call last):\n", + " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/psutil/_common.py\", line 341, in wrapper\n", + " ret = self._cache[fun]\n", + "AttributeError: _cache\n", + "\n", + "During handling of the above exception, another exception occurred:\n", + "\n", + "Traceback (most recent call last):\n", + " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/psutil/_psosx.py\", line 359, in catch_zombie\n", + " yield\n", + " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/psutil/_psosx.py\", line 400, in _get_pidtaskinfo\n", + " ret = cext.proc_pidtaskinfo_oneshot(self.pid)\n", + "ProcessLookupError: [Errno 3] No such process\n", + "\n", + "During handling of the above exception, another exception occurred:\n", + "\n", + "Traceback (most recent call last):\n", + " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/tornado/ioloop.py\", line 907, in _run\n", + " return self.callback()\n", + " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/distributed/nanny.py\", line 266, in memory_monitor\n", + " memory = proc.memory_info().rss\n", + " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/psutil/_common.py\", line 344, in wrapper\n", + " return fun(self)\n", + " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/psutil/__init__.py\", line 1166, in memory_info\n", + " return self._proc.memory_info()\n", + " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/psutil/_psosx.py\", line 339, in wrapper\n", + " return fun(self, *args, **kwargs)\n", + " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/psutil/_psosx.py\", line 469, in memory_info\n", + " rawtuple = self._get_pidtaskinfo()\n", + " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/psutil/_psosx.py\", line 339, in wrapper\n", + " return fun(self, *args, **kwargs)\n", + " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/psutil/_common.py\", line 344, in wrapper\n", + " return fun(self)\n", + " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/psutil/_psosx.py\", line 400, in _get_pidtaskinfo\n", + " ret = cext.proc_pidtaskinfo_oneshot(self.pid)\n", + " File \"/anaconda3/envs/overscripted/lib/python3.6/contextlib.py\", line 99, in __exit__\n", + " self.gen.throw(type, value, traceback)\n", + " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/psutil/_psosx.py\", line 372, in catch_zombie\n", + " raise AccessDenied(proc.pid, proc._name)\n", + "psutil.AccessDenied: psutil.AccessDenied (pid=1421)\n", + "tornado.application - ERROR - Exception in callback >\n", + "Traceback (most recent call last):\n", + " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/psutil/_common.py\", line 341, in wrapper\n", + " ret = self._cache[fun]\n", + "AttributeError: 'Process' object has no attribute '_cache'\n", + "\n", + "During handling of the above exception, another exception occurred:\n", + "\n", + "Traceback (most recent call last):\n", + " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/psutil/_common.py\", line 341, in wrapper\n", + " ret = self._cache[fun]\n", + "AttributeError: _cache\n", + "\n", + "During handling of the above exception, another exception occurred:\n", + "\n", + "Traceback (most recent call last):\n", + " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/psutil/_psosx.py\", line 359, in catch_zombie\n", + " yield\n", + " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/psutil/_psosx.py\", line 400, in _get_pidtaskinfo\n", + " ret = cext.proc_pidtaskinfo_oneshot(self.pid)\n", + "ProcessLookupError: [Errno 3] No such process\n", + "\n", + "During handling of the above exception, another exception occurred:\n", + "\n", + "Traceback (most recent call last):\n", + " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/tornado/ioloop.py\", line 907, in _run\n", + " return self.callback()\n", + " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/distributed/nanny.py\", line 266, in memory_monitor\n", + " memory = proc.memory_info().rss\n", + " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/psutil/_common.py\", line 344, in wrapper\n", + " return fun(self)\n", + " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/psutil/__init__.py\", line 1166, in memory_info\n", + " return self._proc.memory_info()\n", + " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/psutil/_psosx.py\", line 339, in wrapper\n", + " return fun(self, *args, **kwargs)\n", + " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/psutil/_psosx.py\", line 469, in memory_info\n", + " rawtuple = self._get_pidtaskinfo()\n", + " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/psutil/_psosx.py\", line 339, in wrapper\n", + " return fun(self, *args, **kwargs)\n", + " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/psutil/_common.py\", line 344, in wrapper\n", + " return fun(self)\n", + " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/psutil/_psosx.py\", line 400, in _get_pidtaskinfo\n", + " ret = cext.proc_pidtaskinfo_oneshot(self.pid)\n", + " File \"/anaconda3/envs/overscripted/lib/python3.6/contextlib.py\", line 99, in __exit__\n", + " self.gen.throw(type, value, traceback)\n", + " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/psutil/_psosx.py\", line 372, in catch_zombie\n", + " raise AccessDenied(proc.pid, proc._name)\n", + "psutil.AccessDenied: psutil.AccessDenied (pid=1421)\n", + "distributed.nanny - WARNING - Worker process 1421 was killed by unknown signal\n", + "distributed.nanny - WARNING - Restarting worker\n" + ] + } + ], + "source": [ + "#save\n", + "df.to_parquet('is_json_above_mean_md5.parquet')" + ] + }, { "cell_type": "code", "execution_count": 16, @@ -577,7 +1334,7 @@ ], "source": [ "#read\n", - "df = dd.read_parquet('is_json_above_mean.parquet')\n", + "df = dd.read_parquet('is_json_above_mean_md5.parquet')\n", "df.head()" ] }, From 2b617decabb37103ca60012742af1ae2511b821e Mon Sep 17 00:00:00 2001 From: Camila Resende Date: Sun, 31 Mar 2019 19:42:13 -0300 Subject: [PATCH 09/23] Rename 'isJson_Sample_Comparasion' to 'isJson_Quantitative_Comparasion' --- ...le_Comparasion.ipynb => isJson_Quantitative_Comparasion.ipynb} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename analyses/2019_03_aliamcami_greatest_values_are_json/{isJson_Sample_Comparasion.ipynb => isJson_Quantitative_Comparasion.ipynb} (100%) diff --git a/analyses/2019_03_aliamcami_greatest_values_are_json/isJson_Sample_Comparasion.ipynb b/analyses/2019_03_aliamcami_greatest_values_are_json/isJson_Quantitative_Comparasion.ipynb similarity index 100% rename from analyses/2019_03_aliamcami_greatest_values_are_json/isJson_Sample_Comparasion.ipynb rename to analyses/2019_03_aliamcami_greatest_values_are_json/isJson_Quantitative_Comparasion.ipynb From 68700ecf597d3ae859295a68dd33f67f68288232 Mon Sep 17 00:00:00 2001 From: Camila Resende Date: Sun, 31 Mar 2019 19:44:46 -0300 Subject: [PATCH 10/23] Rename folder from ''2019_03_aliamcami_greatest_values_are_json' to '2019_03_aliamcami_value_analyses' --- .../README.md | 0 .../isJson_Quantitative_Comparasion.ipynb | 0 .../isJson_Quantity_Analysis.ipynb | 0 .../isJson_dataPrep.ipynb | 0 4 files changed, 0 insertions(+), 0 deletions(-) rename analyses/{2019_03_aliamcami_greatest_values_are_json => 2019_03_aliamcami_value_analyses}/README.md (100%) rename analyses/{2019_03_aliamcami_greatest_values_are_json => 2019_03_aliamcami_value_analyses}/isJson_Quantitative_Comparasion.ipynb (100%) rename analyses/{2019_03_aliamcami_greatest_values_are_json => 2019_03_aliamcami_value_analyses}/isJson_Quantity_Analysis.ipynb (100%) rename analyses/{2019_03_aliamcami_greatest_values_are_json => 2019_03_aliamcami_value_analyses}/isJson_dataPrep.ipynb (100%) diff --git a/analyses/2019_03_aliamcami_greatest_values_are_json/README.md b/analyses/2019_03_aliamcami_value_analyses/README.md similarity index 100% rename from analyses/2019_03_aliamcami_greatest_values_are_json/README.md rename to analyses/2019_03_aliamcami_value_analyses/README.md diff --git a/analyses/2019_03_aliamcami_greatest_values_are_json/isJson_Quantitative_Comparasion.ipynb b/analyses/2019_03_aliamcami_value_analyses/isJson_Quantitative_Comparasion.ipynb similarity index 100% rename from analyses/2019_03_aliamcami_greatest_values_are_json/isJson_Quantitative_Comparasion.ipynb rename to analyses/2019_03_aliamcami_value_analyses/isJson_Quantitative_Comparasion.ipynb diff --git a/analyses/2019_03_aliamcami_greatest_values_are_json/isJson_Quantity_Analysis.ipynb b/analyses/2019_03_aliamcami_value_analyses/isJson_Quantity_Analysis.ipynb similarity index 100% rename from analyses/2019_03_aliamcami_greatest_values_are_json/isJson_Quantity_Analysis.ipynb rename to analyses/2019_03_aliamcami_value_analyses/isJson_Quantity_Analysis.ipynb diff --git a/analyses/2019_03_aliamcami_greatest_values_are_json/isJson_dataPrep.ipynb b/analyses/2019_03_aliamcami_value_analyses/isJson_dataPrep.ipynb similarity index 100% rename from analyses/2019_03_aliamcami_greatest_values_are_json/isJson_dataPrep.ipynb rename to analyses/2019_03_aliamcami_value_analyses/isJson_dataPrep.ipynb From 0820ceadb29e0f957c9c7c5034e184187c5fdfc1 Mon Sep 17 00:00:00 2001 From: Camila Resende Date: Sun, 31 Mar 2019 19:45:55 -0300 Subject: [PATCH 11/23] Removal of outdated notebook --- .../isJson_Quantity_Analysis.ipynb | 509 ------------------ 1 file changed, 509 deletions(-) delete mode 100644 analyses/2019_03_aliamcami_value_analyses/isJson_Quantity_Analysis.ipynb diff --git a/analyses/2019_03_aliamcami_value_analyses/isJson_Quantity_Analysis.ipynb b/analyses/2019_03_aliamcami_value_analyses/isJson_Quantity_Analysis.ipynb deleted file mode 100644 index 0a209bd..0000000 --- a/analyses/2019_03_aliamcami_value_analyses/isJson_Quantity_Analysis.ipynb +++ /dev/null @@ -1,509 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Start client" - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/anaconda3/envs/overscripted/lib/python3.6/site-packages/dask/config.py:168: YAMLLoadWarning: calling yaml.load() without Loader=... is deprecated, as the default Loader is unsafe. Please read https://msg.pyyaml.org/load for full details.\n", - " data = yaml.load(f.read()) or {}\n", - "/anaconda3/envs/overscripted/lib/python3.6/site-packages/distributed/config.py:20: YAMLLoadWarning: calling yaml.load() without Loader=... is deprecated, as the default Loader is unsafe. Please read https://msg.pyyaml.org/load for full details.\n", - " defaults = yaml.load(f)\n" - ] - }, - { - "data": { - "text/html": [ - "\n", - "\n", - "\n", - "\n", - "\n", - "
\n", - "

Client

\n", - "\n", - "
\n", - "

Cluster

\n", - "
    \n", - "
  • Workers: 4
  • \n", - "
  • Cores: 4
  • \n", - "
  • Memory: 8.59 GB
  • \n", - "
\n", - "
" - ], - "text/plain": [ - "" - ] - }, - "execution_count": 2, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "import dask.dataframe as dd\n", - "from dask.distributed import Client\n", - "\n", - "#Initializing client\n", - "client = Client()\n", - "client" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Data\n", - "Using filtered and evaluated for json data named 'is_json_above_mean.parquet'. You can get this by running the 'isJson_dataPrep.ipynb'\n", - "\n", - "This new sample has 499805, meaning that its only 4,42% of the original sample (most values are smaller than the sample's mean of 1356)" - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
value_1000valuevalue_lensymbolscript_urlis_json
0{\"im-settings\":\"{\\\"val\\\":{\\\"settings\\\":{\\\"Site...{\"im-settings\":\"{\\\"val\\\":{\\\"settings\\\":{\\\"Site...3713window.sessionStoragehttps://assets.adobedtm.com/caacec67651710193d...True
1{\"APLUS_S_CORE_0.17.12_20171214163401_2ee09a0c...{\"APLUS_S_CORE_0.17.12_20171214163401_2ee09a0c...103878window.localStoragehttps://g.alicdn.com/alilog/mlog/aplus_v2.jsTrue
2{\"APLUS_S_CORE_0.17.12_20171214163401_2ee09a0c...{\"APLUS_S_CORE_0.17.12_20171214163401_2ee09a0c...103878window.localStoragehttps://g.alicdn.com/alilog/mlog/aplus_v2.jsTrue
3usunico=17/12/2017:0-00155123:830; SessionASM=...usunico=17/12/2017:0-00155123:830; SessionASM=...1358window.document.cookiehttps://www.coches.net/scripts/common.min.js?2...False
4usunico=17/12/2017:0-00155123:830; SessionASM=...usunico=17/12/2017:0-00155123:830; SessionASM=...1358window.document.cookiehttps://www.coches.net/scripts/common.min.js?2...False
\n", - "
" - ], - "text/plain": [ - " value_1000 \\\n", - "0 {\"im-settings\":\"{\\\"val\\\":{\\\"settings\\\":{\\\"Site... \n", - "1 {\"APLUS_S_CORE_0.17.12_20171214163401_2ee09a0c... \n", - "2 {\"APLUS_S_CORE_0.17.12_20171214163401_2ee09a0c... \n", - "3 usunico=17/12/2017:0-00155123:830; SessionASM=... \n", - "4 usunico=17/12/2017:0-00155123:830; SessionASM=... \n", - "\n", - " value value_len \\\n", - "0 {\"im-settings\":\"{\\\"val\\\":{\\\"settings\\\":{\\\"Site... 3713 \n", - "1 {\"APLUS_S_CORE_0.17.12_20171214163401_2ee09a0c... 103878 \n", - "2 {\"APLUS_S_CORE_0.17.12_20171214163401_2ee09a0c... 103878 \n", - "3 usunico=17/12/2017:0-00155123:830; SessionASM=... 1358 \n", - "4 usunico=17/12/2017:0-00155123:830; SessionASM=... 1358 \n", - "\n", - " symbol script_url \\\n", - "0 window.sessionStorage https://assets.adobedtm.com/caacec67651710193d... \n", - "1 window.localStorage https://g.alicdn.com/alilog/mlog/aplus_v2.js \n", - "2 window.localStorage https://g.alicdn.com/alilog/mlog/aplus_v2.js \n", - "3 window.document.cookie https://www.coches.net/scripts/common.min.js?2... \n", - "4 window.document.cookie https://www.coches.net/scripts/common.min.js?2... \n", - "\n", - " is_json \n", - "0 True \n", - "1 True \n", - "2 True \n", - "3 False \n", - "4 False " - ] - }, - "execution_count": 3, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "df = dd.read_parquet('is_json_above_mean.parquet')\n", - "df.head()" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## DF overview\n", - "Some overview about the sample after the data prep: \n", - "- Rows: 499805\n", - "- Mean: 27829.33,\n", - "- Min: 1357,\n", - "- Max: 4496861\n", - "- Std: 122092.41" - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "27829.332847810645 1357 4496861 122092.41371885882\n" - ] - } - ], - "source": [ - "df_mean = df['value_len'].mean()\n", - "df_min = df['value_len'].min()\n", - "df_max = df['value_len'].max()\n", - "df_std = df['value_len'].std()\n", - "(df_mean, df_min, df_max, df_std) = dd.compute(df_mean, df_min, df_max, df_std);\n", - "print(df_mean, df_min, df_max, df_std)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Quantity analysis " - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Whole\n", - "This whole sample has: \n", - "- False: 307577 rows\n", - " - 61,54% are not valid JSON\n", - " \n", - " \n", - "- True: 192228 rows\n", - " - 38,46% are valid JSON" - ] - }, - { - "cell_type": "code", - "execution_count": 7, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "False 307577\n", - "True 192228\n", - "Name: is_json, dtype: int64" - ] - }, - "execution_count": 7, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "df['is_json'].value_counts().compute()\n" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## ORIGINAL SAMPLE: One std above the mean\n", - "Original Sample Data: \n", - "- Mean: 1356.97\n", - "- Std: 26310.62\n", - "\n", - "I'll be using the original sample's mean and std to make the following analyses\n" - ] - }, - { - "cell_type": "code", - "execution_count": 14, - "metadata": {}, - "outputs": [], - "source": [ - "ROW_COUNT = 499805\n", - "MEAN = 1356\n", - "STD = 26310" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "\n", - "- 46745 rows have the value_len greater than 27666\n", - "- This represent 9.35% of the rows on this sample\n", - "\n", - "As the value_len increases the percentage of valid JSON on the columns 'value' also increases, for this filtered sample the following data was verified: \n", - "- True: 46691 rows\n", - " - 99,88% are valid JSON\n", - " \n", - "- False: 54 rows\n", - " - 0,11% are not valid JSON\n" - ] - }, - { - "cell_type": "code", - "execution_count": 22, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "len: 46745 (9.35%)\n" - ] - } - ], - "source": [ - "dfa = df[df['value_len'] > (MEAN + STD)]\n", - "length = len(dfa)\n", - "print(\"len: {0} ({1:0.2f}%)\".format(length, length / ROW_COUNT * 100))" - ] - }, - { - "cell_type": "code", - "execution_count": 23, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "True 46691\n", - "False 54\n", - "Name: is_json, dtype: int64" - ] - }, - "execution_count": 23, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "dfa['is_json'].value_counts().compute()" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## All greater values are JSON\n", - "\n", - "There is absolute no value greater than 104653 that represents a valid JSON. \n", - "\n", - "This implies that all the greater values are JSON but they represent very low percentage of the whole data. \n", - "\n", - "The top 46745 gratest value_len are valid JSONs, that is 9.35% of this sample (value_len > mean) and 0,41% of the original sample with all the smaller values. " - ] - }, - { - "cell_type": "code", - "execution_count": 34, - "metadata": {}, - "outputs": [], - "source": [ - "group = dfa.groupby('is_json')\n", - "group_result = group.agg({'value_len': ['mean', 'std', 'min', 'max', 'count']}).compute()" - ] - }, - { - "cell_type": "code", - "execution_count": 36, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
value_len
meanstdminmaxcount
is_json
False82460.05555613627.1180632813510465354
True271422.740185412552.29861327669449686146691
\n", - "
" - ], - "text/plain": [ - " value_len \n", - " mean std min max count\n", - "is_json \n", - "False 82460.055556 13627.118063 28135 104653 54\n", - "True 271422.740185 412552.298613 27669 4496861 46691" - ] - }, - "execution_count": 36, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "group_result" - ] - }, - { - "cell_type": "code", - "execution_count": 37, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "len: 46745 (9.35%)\n" - ] - } - ], - "source": [ - "allJson = df[df['value_len'] > 104653]\n", - "length = len(dfa)\n", - "print(\"len: {0} ({1:0.2f}%)\".format(length, length / ROW_COUNT * 100))" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.6.8" - } - }, - "nbformat": 4, - "nbformat_minor": 2 -} From 327429a3f6ae02bfb32ad84fbbd5d854aba50c6e Mon Sep 17 00:00:00 2001 From: Camila Resende Date: Sun, 31 Mar 2019 19:46:53 -0300 Subject: [PATCH 12/23] Add analyse for the correlation the domain and the value have with each other --- .../isJson_correlation_domain_and_value.ipynb | 922 ++++++++++++++++++ 1 file changed, 922 insertions(+) create mode 100644 analyses/2019_03_aliamcami_value_analyses/isJson_correlation_domain_and_value.ipynb diff --git a/analyses/2019_03_aliamcami_value_analyses/isJson_correlation_domain_and_value.ipynb b/analyses/2019_03_aliamcami_value_analyses/isJson_correlation_domain_and_value.ipynb new file mode 100644 index 0000000..f171d76 --- /dev/null +++ b/analyses/2019_03_aliamcami_value_analyses/isJson_correlation_domain_and_value.ipynb @@ -0,0 +1,922 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Start Dask" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/anaconda3/envs/overscripted/lib/python3.6/site-packages/dask/config.py:168: YAMLLoadWarning: calling yaml.load() without Loader=... is deprecated, as the default Loader is unsafe. Please read https://msg.pyyaml.org/load for full details.\n", + " data = yaml.load(f.read()) or {}\n", + "/anaconda3/envs/overscripted/lib/python3.6/site-packages/distributed/config.py:20: YAMLLoadWarning: calling yaml.load() without Loader=... is deprecated, as the default Loader is unsafe. Please read https://msg.pyyaml.org/load for full details.\n", + " defaults = yaml.load(f)\n" + ] + }, + { + "data": { + "text/html": [ + "\n", + "\n", + "\n", + "\n", + "\n", + "
\n", + "

Client

\n", + "\n", + "
\n", + "

Cluster

\n", + "
    \n", + "
  • Workers: 4
  • \n", + "
  • Cores: 4
  • \n", + "
  • Memory: 8.59 GB
  • \n", + "
\n", + "
" + ], + "text/plain": [ + "" + ] + }, + "execution_count": 1, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "import dask.dataframe as dd\n", + "from dask.distributed import Client\n", + "\n", + "#Initializing client\n", + "client = Client()\n", + "client" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Objective\n", + "\n", + "The objective of this notebook is to answer two main questions: \n", + " - \"The JSON values are always from the same location or related domains?\" \n", + " - \"Are there a set of location domains that always produces a JSON?\"\n", + "\n", + "To answer this we will use the sample data set produced by the notebook \"isJson_dataPrep.ipynb\" called 'all_json_above_mean.parquet' for first question and 'is_json_above_mean_md5.parquet' for seccond, this contains two extra calculated columns that will be important: 'is_json' and 'location_domain'.\n", + "\n", + "\n", + "OBS.: For \"value\" comparison I will use instead value_md5, because its reliable and faster. Value_md5 is the calculated md5 for the value columns \n", + "OBS2.: To see validation that all biggest values are json please reffer to 'isJson_Sample_Comparasion.ipynb'\n", + "\n", + "### Findings: \n", + "\n", + "On this notebook I was able to validate couple facts about the two proposed questions, which are: \n", + "- One domain produces multiple JSONs\n", + "- One JSON is usually (99.9%) produced by a single domain. \n", + "\n", + "\n", + "- One domain can produce values there are both Json or not, but most produce only one type\n", + "- Most of the domains that produce a single type produces JSON type. " + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "---\n", + "\n", + "# Are there a set of location domains that always produces a JSON?\n", + "The dataset used to this analise contains non-json values as well for the sake of proving that one domain may or may not produce only json values." + ] + }, + { + "cell_type": "code", + "execution_count": 37, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "Index(['value_md5', 'is_json', 'location_domain'], dtype='object')" + ] + }, + "execution_count": 37, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df = dd.read_parquet('is_json_above_mean_md5.parquet', engine='pyarrow', columns=['value_md5', 'is_json', 'location_domain'])\n", + "df.columns" + ] + }, + { + "cell_type": "code", + "execution_count": 38, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
value_md5is_jsonlocation_domain
0cff77029e3ae45dd439a62987b1d8340Truecanada.ca
19ac0a0a0afb677c8fd985a7c2f4ddbc5Truetmall.com
29ac0a0a0afb677c8fd985a7c2f4ddbc5Truetmall.com
3db64465b639e01993d9212390f057628Falsecoches.net
4db64465b639e01993d9212390f057628Falsecoches.net
\n", + "
" + ], + "text/plain": [ + " value_md5 is_json location_domain\n", + "0 cff77029e3ae45dd439a62987b1d8340 True canada.ca\n", + "1 9ac0a0a0afb677c8fd985a7c2f4ddbc5 True tmall.com\n", + "2 9ac0a0a0afb677c8fd985a7c2f4ddbc5 True tmall.com\n", + "3 db64465b639e01993d9212390f057628 False coches.net\n", + "4 db64465b639e01993d9212390f057628 False coches.net" + ] + }, + "execution_count": 38, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 60, + "metadata": {}, + "outputs": [], + "source": [ + "location_domain_group = df.compute().groupby('location_domain')" + ] + }, + { + "cell_type": "code", + "execution_count": 160, + "metadata": {}, + "outputs": [], + "source": [ + "agg = location_domain_group.agg({'value_md5': ['nunique', 'count'], 'is_json': ['sum', 'nunique']})" + ] + }, + { + "cell_type": "code", + "execution_count": 161, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
value_md5is_json
nuniquecountsumnunique
location_domain
0123movies.com222.01
10010.com288.01
1001freefonts.com20155155.01
10fastfingers.com42828.01
10jqka.com.cn73030.01
\n", + "
" + ], + "text/plain": [ + " value_md5 is_json \n", + " nunique count sum nunique\n", + "location_domain \n", + "0123movies.com 2 2 2.0 1\n", + "10010.com 2 8 8.0 1\n", + "1001freefonts.com 20 155 155.0 1\n", + "10fastfingers.com 4 28 28.0 1\n", + "10jqka.com.cn 7 30 30.0 1" + ] + }, + "execution_count": 161, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "agg.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 178, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "1563" + ] + }, + "execution_count": 178, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "#Count the number of domains that only produce one type of value (json or non-json)\n", + "f1 = agg['is_json']['nunique'] == 1\n", + "agg_1 = agg[f1]\n", + "oneType = len(agg_1['is_json'])\n", + "oneType" + ] + }, + { + "cell_type": "code", + "execution_count": 180, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "1226" + ] + }, + "execution_count": 180, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "#Out of the ones there have only one type of output, these are the ones that have as JSON\n", + "f2 = agg['is_json']['sum'] > 0\n", + "agg_1a = agg[f1 & f2]\n", + "oneType_json = len(agg_1a['is_json'])\n", + "oneType_json" + ] + }, + { + "cell_type": "code", + "execution_count": 185, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 185, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAWYAAAFbCAYAAADmwiRlAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDMuMC4zLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvnQurowAAIABJREFUeJzt3XmcXFWd9/HP6SV7uCRhSwJSkLBGIGRhG0EWHxkJMgqKgIoLMoo74zgUDy5XZJwgbvggIjjsKDgoMFJICDGCEHYIEFCBhIAEgRDIzdprneePW0k6kKSru6vqd5fv+/WqVzqVSve3afqb0+eee47z3iMiIsnRZB1AREQ2pmIWEUkYFbOISMKomEVEEkbFLCKSMCpmEZGEUTGLiCSMillEJGFUzCIiCaNiFhFJGBWziEjCqJhFRBJGxSwikjAqZhGRhFExi4gkjIpZRCRhVMwiIgmjYhYRSRgVs4hIwqiYRUQSRsUsIpIwKmYRkYRRMYuIJIyKWUQkYVTMIiIJo2IWEUkYFbOISMKomEVEEkbFLCKSMCpmEZGEUTGLiCSMillEJGFUzCIiCaNiFhFJGBWziEjCqJhFRBJGxSwikjAqZhGRhFExi4gkjIpZRCRhVMwiIgnTYh1AZEDCYBAwCmgl/v+5ufJrz8e65zywAlgORITRKovIIr1x3nvrDCJvFwbDgN2AHYHxPR5jgW2BbSqPkQP4KF1ARFzUPR+vA4uA54CFwHOE0eoBfByRPlExi614xLsn8E5gUuXXdwIFkjXV9gpxUW8oa/grsIAw6rIMJtmjYpbGCYMhwMHAu4B9iQt4IumeUmsD5gMPAQ+f2XHGvT/+3syFxpkk5VTMUj9hMJi4iA+vPA4CBhsmqrsD2n629DVGAdwL3FN5PLp45oxO02CSKipmqZ14WuIg4Ag2FPEQy0iN1OmbX9qt/ZodN/FHq4E7gFuAWxfPnLGssckkbVTMMjDxqPifgROB9zOwi3Gp9lx57Lz3dPzwkF5e1k08ir4ZuGXxzBnP1z+ZpI2KWfouLuOj2VDGW9kGSoYru957d9j1ycP6+NeeIB5J37x45oxH6xBLUkjFLNWJpynWlfFxqIzf5uSOc566rzxp0gDexd+B3wCXLp4545kaxZIUUjHLloXBPsAZwClAYJwmsbynfff2q10nLYNq8e6AucAlxCNpXTjMGRWzvF28rO3DxIV8sHGaVFjhhz25b/sv96nDu34FuJx4FP1CHd6/JJCKWTYIg3HAF4B/Jb6rTqp0b/ekuz7aec676/ghysDtxKPo0uKZM8p1/FhiLM0L+6VWwmAacCbxKLnVOE0qzS5PrcUUxpY0AcdUHi8WiqVLgIsWz5yxss4fVwxoxJxnYXAQcB5wlHWUtDu0/Scv/91vN67BH3YZ8APg/y2eOUN7eWSIijmPwmB/4kI+xjpKFnR798qE9ut2MIywFDgfuHjxzBlrDXNIjaiY8yQM9gbOBY4HnHGazHixvN39h3X85CDrHMQXCmcClyyeOaPdOoz0n4o5D8JgAvAd4GSStWNbJlzfdcRdxa7T63nhr6+WAP8FXLZ45owO6zDSd/omzbIwGEsYXEa8PeVH0de7LmaVp42yzvAW44GLgGcLxdInrMNI32nEnEVh0ES87O08dIdeXXlP117tV3S2MXiodZYtuAv43OKZM/5qHUSqo2LOmjCYAvwCmGYdJQ9W+8F/ndR+xZ7WOarQAVwAnLd45ow26zCyZfrRNivCYCvC4KfAg6iUG+Zpv/Nr1hmqNAg4B1hQKJbeax1GtkzFnAVh8CHgL8CXiA8elQaZ0z0lbTdpTQBmFYql6wvFkuUSP9kCTWWkWRjsQnyRR+uRjRzVfsGLC/34d1jn6KeIeBT9c93inSwaMadVGHyCeC9flbKRsnfLUlzKEO8WeBFwb6FY2sU6jGygYk6bMBhJGFwLXAmMME6Ta68yapF1hho5CHisUCydYB1EYirmNIk3G3qUeE2yGHugvOca6ww1FAA3FoqliwvFUqYPzE0DFXMahIEjDL4GzAMmWseR2O3d07N4vuEZwP2FYmk36yB5pmJOujDYFigR7yKmLTkTwnvKfy7vm9Xymgw8WiiWTrEOkldalZFkYXAUcA0w1jqKbGytb312r/arslrMPV0OfFG71jWWRsxJFQb/BtyBSjmRnvU7vmKdoUE+DTxUKJb2tg6SJyrmpAmDVsLgl8AP0dcnseaWJ+dp29RJwH2FYulI6yB5oW/8JAmDMcBs4DTrKLJls7qn5+0nma2APxSKJa0IagAVc1KEwW7A/UCS9vWVTfCe6Gm/867WOQwMAq4pFEtnWwfJOl38S4IwOBj4X3QydSos9cGj09t/PsU6h7GfA19aPHNGt3WQLNKI2VoYfBCYg0o5NR4u766TqeP1zr8rFEvDrINkkYrZUhh8DrgRSPIm6/IWd3RPG26dISGOA/5YKJa2tQ6SNSpmK2HweeIfB/U1SBHv8XPLkydY50iQA4F5hWJJ/01qSKVgIS7ln1nHkL7rpGXxckYm7Yw/axOJd6jbwzpIXzjnznXOvcc6x6aomBtNpZxqC/3Yl60zJNT2wJw0bR/qvf+W9/5O6xybomJuJJVy6t1d3k8bym/eeOJyHl/Ld+qcKzjn/uKcu8w595Rz7g7n3FDn3GTn3P3OuSecczc550ZVXv8n59z5zrkHnXPPOOcO3cz7vdI596HK2zOdc09X3tcPKs/t7JybU3lujnPuHT3+3k+dc/Occ4vWvY9aUjE3iko5E2Z1T9vOOkPC7UJczrX+77Qb8DPv/SRgOXACcDVwlvd+X+BJ4Ns9Xt/ivT8A+Opbnn8b59xo4IPApMr7Oq/yRxcBV1eeuw74aY+/NhZ4F3AsMHOAn9vbqJgbQaWcCd6zZr6fqG1Xe7cHMLtQLNVyLv557/38ytuPEJ9duLX3/q7Kc1cBh/V4/e96vLbQy/teAbQBv3TOHQ+s22f7YOBXlbevIS7idW723pe9908TT+PUlIq53sLgs6iUM2E5I54p06TDbquzL3B7oViq1Z7V7T3e7ga2rvL13UALgHPuCufcfOfcbT1f6L3vAg4Afgt8ALh9M++z5914PfPUfN8UFXM9hcExqJQz47HyxMg6Q8ocAJTqdBNKBLzZY/7448BdW3g93vtPee8ne+83OifTOTcCCLz3txFPfUyu/NE84KTK2x8F7qlV+N6omOslDPYDbgA0wsqI2eWpuhGo7w4FbqrTcVWfAC5wzj1BXKbn9uN9eGAkcGvl/dwFnFn5sy8Dn6o8/3HgKwOPXB3tlVEPYTAOeJD4KrVkxIFtF732KqN18a9/fgOctHjmjMQUjnPu98CPvPdzrbO8lUbMtRYGw4FbUSlnSpdvWqJSHpATgW9Zh1jHOXc5MIwGTk/0hYq5lsKgCfg1sL91FKmtF/z2L1hnyIBvF4qlmq/57Q/v/ae990d57zuts2yKirm2fgy83zqE1N6fy/t0WWfIAAdcVSiWNHDphYq5VsLgi8QXCySDZpWnj7HOkBHDgFvqcANKpqiYayEM3g38xDqG1If3tD9c3iMPJ2I3yk7A9YViSSuWNkPFPFDxOX3XoWVxmbWSoc920jLIOkfGHAH8l3WIpFIxD9wVaAVGpj1Z3nWZdYaM+nqhWDreOkQSqZgHIgy+gi72Zd7s8lSNluvnyrTt49wIKub+CoP9ge9bx5D6m1OesrN1hgwbCVxbKJZarIMkiYq5P8JgBHA98XHukmHd3r3yd7/dOOscGTcNKFqHSBIVc//8DNjdOoTU3xK/jW4saYxvFoqlfa1DJIWKua/C4GPAqdYxpDHuK09qs86QE4OI55tbrYMkgYq5L8LgHcDF1jGkcW4vT+9t31+pnf2Bc6xDJIGKuW9+TnyxQnLAe7ruK++tKavGOke3bKuYqxcGJwHH9Po6yYw1DH6ujcHag7mxWoj308j1hXUVczXCYDRwoXUMaay/+J1fs86QU/vQywGqWadirs4FgDZdyZk53fvrNns7ZxWKpWnWIayomHsTBocAn7KOIY13R3najtYZcqwZuLRQLNX8oNM0UDFvSRg0E69ZzuX/HHlW9m7ZQj9ed/zZ2p/4rL3cUTFv2efZcGKu5MirjFpknUEAOK9QLOXuAqyKeXPCYDvgu9YxxMYD5T3XWGcQIN67+cxeX5UxKubN+wYQWIcQG7O6p29lnUHWK+btxBMV86bEd/h91jqG2PCe8t3lfSda55D1RgLfsQ7RSCrmTfsm2jkut9poXbiaobrDM1k+UyiW9rIO0Sgq5rcKg4nAJ61jiJ1n/Y6vWGeQt2khR/ufq5jf7jvE/xNITs0tT9byyGQ6tlAsHWkdohFUzD2FwSTgJOsYYuuO7mljrTPIZl1gHaARVMwb+y76b5Jr3hM95Qu7WueQzZpSKJb+2TpEvamE1gmDqcAHrWOIrdcJFoLTVEayfc06QL2pmDc41zqA2Hu4vPtK6wzSq/cUiqX9rEPUk4oZIAz2QnstC3BH97Th1hmkKpkeNauYY1+0DiDJMLc8eYJ1BqnKSYViabx1iHrRsrAw2AodripAh29ZvJyRBescm9O1Yimvl35E96o3ca6JEZOPZqtp/8Kbcy9nzXMP4ppbaNl6B7Y55qs0DRlB20tP88YdF+OaW9nmuK/TOmoc5bZVLL3lfLY78VxcuqfSW4EvAUXrIPWgEXN8M8kI6xBib5Ef+5J1hi1qambUEacx/vRL2OHjP2DloyU6Xn+RIYXJjDvtZ4z79EW0jh5PdP//ALDioZvY9gNns/Vhp7LysdsAWD7veoKDT0x7Ka/z2UKxlMnv3XwXcxg44AvWMSQZ7irv560zbEnLiNEM3iHewqNp8DBax+xE98plDN1lCq4pPmxl8Lg96Fr5OgCuqQXf1YHvasc1tdD55j/oXrmMIe/Yx+xzqLGtgdOsQ9RDvosZ3gvoFGQBYFb3tNTsYNYVvUrHq4sYPG6PjZ5f9cRshu4an8gUHPRhlt1+ESsevoWRU45l+d1Xs/WhH7OIW09fLRRLmTsCLO9zzLroJwB4z5r5fmIqdpQrd6xl6U3fY/RRp9M0eNj656N5N0BTM8P3PhyAQdvvythTfwhA298X0DxiNABLbzkf19TMqCNPo3n4qIbnr7EC8f0HNxrnqKn8jpjDYFe0RE4qljPi2TJNiR95+e4ult70PYbvfTjD9jhk/fOrnpzDmoUPss37//1t88fee6J5NxD808ksv/dXbP2uUxg+6QhWPPL7Rsevl8ydyZnfYoYzyPfnLz08Vp74pnWG3njvWfaHC2kdsxNbHbDhJtW1ix5hxQM3st0J36Kpdcjb/t7qBXMYOmEazUNG4DvbwTWBc/Hb2fDeQrG0jXWIWsrnVEZ8yKqWyMl6s8tTh/X+KlvtS55m9VNzad22wMtXfAmAUYedyht3Xorv7uTVG74BxBcAxxwdz9KVO9tYtWAO258Yn5K21fQPsPSm7+GaW9jmuP+w+URqrwU4EbjYOkitOO8TfSG6PsLgKOBO6xiSHAe2XfTaq4xOzcU/eZt7F8+c8S7rELWS1x/lT7QOIMnR5ZuWqJRT75BCsbSzdYhayV8xx9MY2kVO1nvBb/+idQYZMAecbB2iVvJXzHAEsK11CEmOe8r7dFhnkJo4xTpAreSxmDWNIRu5vTw9U1f0c2yfQrH0TusQtZCvYg6DFjSNIT14T/vD5T12s84hNZOJUXO+ihmOBDQ6kvVWMvTZTloGWeeQmjm5UCylfoemvBWzpjFkIwvKuyyzziA1VQAmW4cYqPwUcxg0AR+wjiHJcmd5qkbL2XOkdYCByk8xwzRgjHUISZbZ5anvsM4gNXeUdYCBylMxv8c6gCRLt3ev/N1vl9njiXLs0EKx1GodYiBUzJJbS/w2L1hnkLoYARxgHWIg8lHMYTAUOKTX10mu3Fee1GadQeom1dMZ+Shm+CdgsHUISZZZ5Wmp3yVeNivVFwDzUsyHWQeQZPGervvKk1JxYon0y8GFYmmodYj+yksxH2odQJJlDYOfW8vgxO/BLP02CEjtNqDZL+YwaAUOtI4hyfIXv/Nr1hmk7lI7z5z9YoapQGp/pJH6mNO9f+LP95MBO8I6QH/loZgPsg4gyTO7PHVH6wxSd/sViqVU/gOch2LexzqAJEvZu2XP+R0zc9qFbNZgIJU7B+ahmDOxP6vUzquMWmSdQRomld//2S7mMHDA3tYxJFkeKO+5xjqDNIyKOYEKxLdniqw3q3v6VtYZpGFUzAmUyi+K1I/3lP9c3meCdQ5pmFR2QNaLeZJ1AEmWdloXrWKYRsz5MbFQLKVuO4Zei9k5t7tzbo5zbkHl9/s6575R/2g1kcp/LaV+nvE7/sM6gzRUM7CXdYi+qmbEfBlwNtAJ4L1/AjipnqFqSMUsG5lbnpz68+Ckz1LXA9UU8zDv/YNvea6rHmFqKgyagT2tY0iy3NE9bax1Bmm4TBbz6865CYAHcM59CEjDj4M7oa0+pQfviZ7yhV2tc0jDpW7JbEsVr/kCcCmwp3NuCfA88NG6pqoNjYxkI68TLAQ3xTqHNNw46wB91Wsxe+8XAe9xzg0Hmrz3K+sfqya2tw4gyfJIefcV1hnExHbWAfqqmlUZY5xzPwX+DPzJOXehcy4Np03vYB1AkmVW9zTdbJRP21oH6Ktq5pivB5YCJwAfqrx9Qz1D1YiKWTYytzxZN5bk05BCsTTSOkRfVFPMo7333/XeP195nAdsXe9gNaBilvU6fMvi5YzUGX/5larpjGqKea5z7iTnXFPlcSJQqnewGlAxy3qL/Ngl1hnEVOaK+bPAr4D2yuN64N+ccyudc0m+mKKLf7LeXeV9u60ziKlUFXM1qzJSNTfTg0bMst6s7un6hzrfUlXM1azKuNE5d4xzLm0bHukbUQDwnjXz/cSJ1jnEVKpWZlRTtpcQ31DyrHNupnMu+bc5h8FgdACrVCxnxLNlmlJ59pvUTLZGzN77O733HwWmAIuB2c65ec65TznnWusdsJ+SmksMzC9PWG6dQcxlbsRM5YaSTwKfAR4DLiQu6tl1SzYw1dxqLjkxuzx1iHUGMZeqfXN6LTDn3O+Id2m7Bni/937dBkY3OOcerme4AVAxy3pzuqfsYp1BzKWqE6oJ+0vv/W09n3DODfbet3vvp9Up10Cl6osg9dPlm5a8yujx1jnEXKo6oZqpjPM28dx9tQ5SY6n6Ikj9vOC3f9E6gyRCqi7+brbAnHM7AOOBoc65/YF1Jz9sBQxrQLaBUDELAPeU39lpnUESIVWdsKWwRxNf8NsR+CEbinkF8H/rG2vAUvVFkPo5uXnuTsc33/OUdQ6xtZohr8IM6xhV22yBee+vAq5yzp3gvf9tAzPVgpbLCQCDXNcug1JwEprU10jWpmqvlGrWMaetlEEjZhHZWKr2SknbbdbVStUXQUTqLlWdkNViXm0dQEQSJVXzWdVsYjTMOfdN59xlld/v5pw7tv7RBiQt5xKKSGO0Wwfoi2pGzFcQf1IHV37/Epte25wkq6wDiEiiLLMO0BfVFPME7/33gU4A7/1aNiydS6Yw6gA6rGOISGK8bh2gL6op5g7n3FDAAzjnJpCOHwsi6wAikhipKuZqlpV9G7gd2Mk5dx3wT8Q3niTdG6Rsqz8RqZtsFbP3frZz7lHgIOIpjK9479PwSaZqTklE6ipVfVDtjRhDgDcrr9/bOYf3/u76xaqJVH0hRKSu0jCYXK+a/ZjPBz4CPAWUK097QMUsImmRrWIGPgDs4b1PwwW/nl6xDiAiiZGqYq5mVcYi0rkp0ELrACKSCGsJozXWIfqimhHzGmC+c24OPZbJee+/XLdUtfGcdQARSYR/9P6SZKmmmP+38kgbjZhFBOBv1gH6qppivgGYSHzBb6H3vq2+kWrmJaCNeEWJiORX6op5s3PMzrkW59z3iQvuKuBa4O/Oue8755I/5xxGnnh+XETy7a/WAfpqSxf/LgBGA7t476d67/cHJgBbAz9oRLga0HSGiGSqmI8FTvfer99C03u/AjgDOKbewWpEFwBFJFPF7L33fhNPdlPZ0CgFVMwi+bacMHrVOkRfbamYn3bOnfrWJ51zHyM9/wJpKkMk31J34Q+2vCrjC8DvnHOfBh4hHiVPB4YCH2xAtlrQsfUi+ZaWQeRGNjti9t4v8d4fCJwLLAZeBM713h/gvU/HUeBh9BKQjqwiUg+ZGzED4L3/I/DHBmSplweA461DiIiJx60D9EdWT8nu6X7rACJiogzMsw7RHypmEcmqpwij5dYh+iMPxfww0GUdQkQa7h7rAP2V/WIOo7XAE9YxRKTh/mwdoL+yX8wxTWeI5I+KOeFUzCL58mJluWwq5aWY77MOICINldr5ZchLMYfRc8Q3yIhIPqR2GgPyUsyx31sHEJGG0Yg5JVTMIvnwMinfJydPxfwnYJV1CBGpu1srJxilVn6KOYzagTusY4hI3aXx8OiN5KeYY5rOEMm21cAc6xADlbdiLhFvbCIi2XQHYdRmHWKg8lXMYbSUeBtQEcmm31oHqIV8FXNM0xki2dRORr6/81jMt1gHEJG6mEUYrbAOUQv5K+YwehqYbx1DRGruf6wD1Er+ijl2pXUAEamptWRgmdw6vZ75l1G/Ai4AWq2D1MLfXu/mIzeuXf/7RW+WOfeIwRxeaOFzt7bR1uVpaYKLZwzlgPHN/PbpTr71p3ZGD3Xc/JGhjBnWxMI3ypzzxzau/9Aww89EpN9+k5VpDADnfapvkOm/MLgFOM46Rq11lz3jf7SKBz4znNN/v5YzDxrE+3Zr5bZnO/n+vR386ZPDOeS/VzPrY8O4fkEnbV3wpQMHcfJv13Du4YPZbUyz9acg0h+HEEaZ2UUyr1MZAJdbB6iHOc93M2F0Eztv3YRzsKI9fj5qg3EjHQBNDtq7PWs6Pa3N8OcXuhg7okmlLGn1eJZKGfI7lQHxzSYvA+Osg9TS9Qs6Ofmd8QzNT44ewtHXruHfZ7dR9jDv08MB+Pa7B3P0tWsYN7KJaz84lBNvXMP1J2gKQ1LrF9YBai2/UxkAYfBd4BvWMWqlo9sz7oereOrzw9l+RBNf/kMb7965mRP2buU3T3Vy6SMd3Hnq8I3+zlXzO1je5jlwx2Z+MK+DUUMcF75vCMNandFnIdInq4BxhNFK6yC1lOepDIDLyNAt2n94tospY5vYfkT8Zb3q8Q6O3yv+oejDe7fw4JLujV6/ptNz1eOdfH76IM6e087l/zKUqeOaue6JzoZnF+mnX2etlCHvxRxGLwK3W8eolV/3mMYAGDeyibteiMv4j893s9uYjb/c37+3na8cOIjWZsfaTnDE889rOnP8U5SkzSXWAeohz3PM6/wYOMY6xECt6fTMXtTNL44duv65y94/hK/c3kZXGYa0wKU9/uzllWUefrlMePgQAL528CAO+u/VbD0kXkInkgIPEUaPWoeoh3zPMa8TBg8AB1jHEJE+OY0wyuTqqnxPZWzwPesAItInS4DrrEPUi4o59r/AAusQIlK1mZVTiTJJxQxUzgf7L+sYIlKVJcQrqjJLxbzBDcBC6xAi0qvzszxaBhXzBmHUDZxvHUNEtuhl4FLrEPWmYt7YVcBL1iFEZLMyP1oGFfPGwqgD+KF1DBHZpH+Qg9EyqJg35RLgBesQIvI252fhBOxqqJjfKv7C/4d1DBHZSG5Gy6Bi3rQw+g1wt3UMEVnvbMJobe8vywYV8+Z9lQztPCeSYvOAq61DNJKKeXPC6DEyesqJSIp0A1+o3ASWGyrmLTsHiKxDiOTYJYTRfOsQjaZi3pIweg34rnUMkZxaSoZOGOoLFXPvfgo8Yx1CJIfOIoyWW4ewoGLuTRh1AmdaxxDJmfuBK61DWFExVyOMbgOutY4hkhNlcnjBrycVc/W+iPbREGmEi7N6ZFS1VMzVCqMI+DSQ23/FRRrgWeAs6xDWVMx9EUazgZ9bxxDJqG7gVMJojXUQayrmvvs68Jx1CJEMOp8wut86RBKomPsq/tf8E+h2bZFaegwIrUMkhYq5P8JoHnCBdQyRjFgNnFxZmiqomAfiW8AT1iFEMuBLhNHfrEMkifNeiwz6LQz2Ah4ERlhHEUmpXxNGp1iHSBqNmAcijP5CvIRORPpuIfA56xBJpGIeqDD6H+BH1jFEUmYFcBxhtMI6SBKpmGvjLOAu6xAiKVEmvtj3tHWQpFIx10IYdQEfRoe4ilTjPyr7z8hm6OJfLYXBvsTH4Ay3jiKSUJcTRqdZh0g6jZhrKYyeAD6G9tMQ2ZR7gDOsQ6SBirnWwuhm4JvWMUQSZjFwPGHUYR0kDTJbzM65eWYfPIz+E7jY7OOLJMsq4hUYS62DpEVmi9l7f4hxhC+izfVFOoGPEEZPWgdJk8wWs3NulXNurHPubufcfOfcAufcoZU/O9k592TlufPf8nf+0zn3uHPufufc9v0OEJ++8Cng5gF/MiLp1E28LE4rMPoos8VccQowy3s/GdgPmO+cGwecDxwJTAamO+c+UHn9cOB+7/1+wN3A6QP66PEyupOAOQN6PyLpUybeW/m31kHSKOvF/BDwKedcCOzjvV8JTAf+5L1f6r3vAq4DDqu8vgO4tfL2I0BhwAnCqB34F+LDJUXywAOfIYx+ZR0krTJdzN77u4lLdwlwjXPuVMBt4a90+g0Lu7uBlpoECaPVwDFoNzrJhzMIoyusQ6RZpovZObcz8Jr3/jLgv4EpwAPAu51z2zjnmoGTacTt1GH0JvBe4jPNRLLqq4TRL6xDpF2Wi9kDhxPPKz8GnABc6L3/B3A2MBd4HHjUe39LQxKF0avEc9t/acjHE2msswijC61DZEEmb8l2zo0hLtydrbNsUhiMAUrAgdZRRGrkG5X1+1IDmRsxV1Zd3Af8wDrLZoXRMuAo4HbrKCID1A38q0q5tjI5Yk6NMGgFriRe1ieSNquJbx4pWQfJmsyNmFMlPnzyY4Dm5SRtXgMOVynXh0bMSREGZwPfs44hUoVngPcRRousg2SVijlJwuA04BdAs3UUkc24j3hDotetg2SZijlpwuBI4HpgW+soIm9xM3AKYbTWOkjWaY45acLoj2y4EUYkKS4ATlApN4ZGzEkVBoOAn6A8g1viAAAEAklEQVQTH8TWCuCThNFN1kHyRMWcdGFwKnAJMNQ6iuTOAuJR8jPWQfJGUxlJF0ZXAwcDugIujXQ1cKBK2YZGzGkRBlsD1wDHWkeRTFsFfJ4wusY6SJ5pxJwWYbQcOA44E2gzTiPZ9BgwRaVsTyPmNAqDPYl/1JxuHUUyoQv4EfBNnWKdDCrmtAqDZuLtS78FtBqnkfR6lPi0kcesg8gGKua0C4N9iA8B0OhZ+mIt8G3gR4RRt3UY2ZiKOQvi0fNXgO8Cw4zTSPLdCXxWe10kl4o5S8JgV+DnxEdYibzVG8DXCKMrrYPIlqmYsygM3kd8C+0k6yiSGL8mPo/vNesg0jsVc1bF0xunAecC2xunETtzic/ie8g6iFRPxZx1YTASOAv4N3Rbd57MB4qE0SzrINJ3Kua8CIMdgf8EPg444zRSP88D3wB+TRjpmzulVMx5Ewb7Exf0+6yjSE0tJV6V8wvdJJJ+Kua8itc/fx04Cd2gkmavARcBPyGMVlqHkdpQMeddGOxEvP/G6cAI4zRSvSeBHwO/IozarcNIbamYJRbvXncG8GVgB+M0smkeuA34MWE0xzqM1I+KWTYWBoOBU4GvAnsbp5HYGuAq4ELC6G/WYaT+VMyyeWEwHfgE8Tz0GOM0efQ34ArgMsLoDesw0jgqZuldfP7gDOKSPgZdLKynN4hPSb+aMNKBvDmlYpa+CYNtgJOJS3qqcZqsWAXcCtwA3KblbqJilv4Lg0nAB4mPuzoA3bjSF6uJL+StK+O1xnkkQVTMUhthsD3xdMcM4CggsA2UON3AQ8Rbbs4G7iOMOm0jSVKpmKX24g2UDgD+T+VxENBimsnGM2wo4rmEUWScR1JCxSz1FwbDgP2BaT0eu5Otw4DXAguINw96AJhNGL1oG0nSSsUsNuJd76awcVlPIB3z1K8RF3DPxzM6oklqRcUsyRGPrAuVx8493l73+0btK70SWAK8VHmse/t54HHC6JUG5ZCcUjFLeoTBUOKCHguMJN7bY3jl1009BgOdQFePX9uJV0T0fETE5RsXcBitaNjnJLIJKmYRkYTJ0sUXEZFMUDGLiCSMillEJGFUzCIiCaNiFhFJGBWziEjCqJhFRBJGxSwikjAqZhGRhFExi4gkjIpZRCRhVMwiIgmjYhYRSRgVs4hIwqiYRUQSRsUsIpIwKmYRkYRRMYuIJIyKWUQkYVTMIiIJo2IWEUkYFbOISMKomEVEEkbFLCKSMCpmEZGEUTGLiCSMillEJGFUzCIiCaNiFhFJGBWziEjCqJhFRBJGxSwikjAqZhGRhFExi4gkjIpZRCRhVMwiIgmjYhYRSRgVs4hIwqiYRUQSRsUsIpIwKmYRkYRRMYuIJMz/B5iMTfFQ5PYAAAAAAElFTkSuQmCC\n", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "series = pd.Series([(oneType - oneType_json), (oneType_json)], index=['non-json', 'json'], name='One type')\n", + "series.plot.pie(figsize=(6, 6), autopct='%1.0f%%')" + ] + }, + { + "cell_type": "code", + "execution_count": 179, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "294" + ] + }, + "execution_count": 179, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "#Count the number of domains that only produce one BOTH json and non-json values\n", + "agg_2 = agg[agg['is_json']['nunique'] == 2]\n", + "twoTypes = len(agg_2['is_json'])\n", + "twoTypes" + ] + }, + { + "cell_type": "code", + "execution_count": 187, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 187, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAWYAAAFbCAYAAADmwiRlAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDMuMC4zLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvnQurowAAIABJREFUeJzt3XecVNXBxvHf2cLSERBRNHqNhVgQKxZsmDcxyaQYo1hi7xiNxHqNbX3VOKZZo1Gj0dcSTdTEmJvEqNgLMRABex2NSBAQhs7s7pz3jzvgosDO7t6Zc++d5/v5zGeX2dmdB3GfPXvm3HOMtRYREYmPOtcBRERkZSpmEZGYUTGLiMSMillEJGZUzCIiMaNiFhGJGRWziEjMqJhFRGJGxSwiEjMqZhGRmFExi4jEjIpZRCRmVMwiIjGjYhYRiRkVs4hIzKiYRURiRsUsIhIzKmYRkZhRMYuIxIyKWUQkZlTMIiIxo2IWEYkZFbOISMyomEVEYkbFLCISMypmEZGYUTGLiMSMillEJGZUzCIiMaNiFhGJGRWziEjMqJhFRGJGxSwiEjMqZhGRmFExi4jEjIpZRCRmVMwiIjGjYhYRiRkVs4hIzKiYRURiRsUsIhIzDa4DiHSH5wf1wFrAQKBXuw/Zzzy0/Z+LwHzg41w201LZhCKdZ6z97P+/Iu55ftAD2BzYDFivdFu39HYosDZhGfcHTDeeKg/MAj4uvf3s+zOAN3LZzPRuPIdIp6iYxalSAQ8HtgS2avd2U+L1G9084FXglfa3XDYzw2kqSSUVs1SN5wdNwK7AnsBIwgLehHgVcGfNJSzpV4HJwNO5bOZVt5Ek6VTMUjGl0fDOwBhgb8JS7ukyU5XMBp4BngaeAF7KZTNFp4kkUVTMEhnPDxqBUYQlPIawiHu7zBQTc4AJwCPAI7lsJuc2jsSdilm6xfODnsA3gINLb/u4TZQIbwH3AffkspmprsNI/KiYpdNKI+N9gYOA7wD93CZKtFeBe4Hf5bKZt1yHkXhQMUtZSuuF9yEcGX+XcKmaROvfwO+Ae3PZzAeuw4g7KmZZI88PdgSOAb4HrOM4Tq2wwPPAPcAduWxmnuM8UmUqZvmc0rK2g4AfEL6YJ+4sBG4Drs5lM287ziJVomKWFTw/WJ+wjI8DhjiOIysrAn8FrsxlMxNch5HKUjELnh9sA5xJOH/c6DiOdGwqcBVwdy6bWeY6jERPxVzDPD/4H+As4Kuus0iXzAR+DVyfy2Y+dh1GoqNirkGeH+wGXAHs7jqLRGIpcA1wuV4oTAcVcw3x/GAL4HLCtceSPp8APwGu0xRHsqmYa0DpRb2LgaOAerdppAreBy4E7tQeHcmkYk4xzw8GAD5wGitvIi+1YSpwTi6b+bvrINI5KuYUKq1DPgX4MTDIcRxxbwJwdi6bmeQ6iJRHxZwynh/sBfyGcKN5keUscDtwRi6b+cR1GFkzFXNKeH7QH/gpcALdO2pJ0m0W8KNcNnOX6yCyeirmFPD84BvAjcAGrrNIYvwdGKe9oeNJxZxgnh8MJrwC7DDXWSSRFgHnEF6goiKIERVzQnl+cCBwHdrxTbpvAnBMLpt533UQCamYE8bzg3WB6wn3RBaJygLCFwZvdh1EVMyJ4vnBPoR79GrnN6mUPwJH5bKZ+a6D1DIVcwJ4fmAI5wIvRVfuSeW9CXwvl8287DpIrVIxx1zp6r3b0f4WUl2LgBNy2czdroPUIhVzjHl+MAJ4AF0sIu5cSzj33OI6SC1RMceU5weHE+6129t1Fql5zwFjc9nMdNdBaoWKOWY8P+hBuDZ5nOssIu3MBA7OZTNPuA5SC+pcB5BPlZbCPYVKWeJnKPCo5wdnuA5SCzRijgnPDzYHHgY8x1FEOnIVcLquFqwcFXMMeH6wC/AXYLDrLCJluoPwasFW10HSSFMZjnl+8C3gMVTKkiyHA3/0/EAHMFSAitkhzw+OJLzSSisvJIm+CTxcWmsvEVIxO+L5wanAb9GVfJJsewBPen4w1HWQNFExO+D5wXmEx81rQ3tJg5HAs54fbOw6SFroxb8q8/wgS7jvhUjazAD2zWUz01wHSToVcxV5fnAx4bHyImk1G9g9l8284TpIkqmYq8Tzgx8CV7vOIVIFHwCjc9nMh66DJJWKuQo8P/g+4bpPzSlLrXgN2COXzcxxHSSJVMwV5vlBBvgT0OA6i0iV/RP4ci6bWeg6SNJoVUYFeX6wO/AHVMpSm0YBD5Q25pJOUDFXiOcHIwkvs9aVUVLLvgLc4fmBuqYT9B+rAjw/2AT4O6ArokRgLPAr1yGSRMUcsdLWnY8A67rOIhIjJ3l+8L+uQySFXvyLkOcHjcDjwGjXWURi6vBcNnOn6xBxpxFztH6GSllkTW7y/GB71yHiTiPmiHh+cDDwO9c5RBLgA2DHXDYzy3WQuFIxR8Dzg62AiUAf11lEEuIJ4CvaaH/VNJXRTZ4f9AceQKUs0hl7A5e7DhFXKubuuw3Y3HUIkQQ6w/ODb7sOEUcq5m7w/OAc4Luuc4gklAFu8/zAcx0kbjTH3EWeH+wD/AOdQCLSXf8k3PCo4DpIXGjE3AWeH6xNuAJDpSzSfaPQfPNKVMxdcw2wjusQIiky3vODXV2HiAtNZXRS6cWKB13nEEmh14DtctnMMtdBXIv1iNkYE6t9XD0/WAu4wXUOkZTaAh29BsS8mGPoF8Aw1yFEUuxszw+2cx3CtdgXszFmPWPMU8aYl4wxLxtj9ijdf4gxZlrpvivaPX6hMeYyY8wUY8wLxpihUeTw/OArwDFRfC0RWa0G4FbPD2r6cInYFzNwKPCwtXZbYCTwkjFmGHAFsA+wLbCTMWa/0uP7AC9Ya0cCTwHHdzeA5wd9gZu7+3VEpCzbAme7DuFSEor5ReBoY0wzMMJauwDYCXjCWjvLWtsK3AXsWXp8gfDkEIBJgBdBhiuAjSL4OiJSngs9P9jCdQhXYl/M1tqnCEt3OnCHMeYI1nzadIv9dKlJG908b8/zgz2Bcd35GiLSaU3ALbV6JFXs/9LGmI2Aj621NwO3ANsT7uS2lzFmbWNMPXAI8GTUz13a+P4m1vyDQEQqY1fgZNchXIhtMRtjGoBlhLtQvWSM+TfwPeBqa+0M4FzC00KmAJOttZVYWzwOGF6Brysi5bmotINjTYntBSbGmJHAzdbaUS6ev7Rm+W1gsIvnF5EVLstlM+e7DlFNsRwxG2NOItyLwuU/xgWolEXi4EelQ45rRiyL2Vr7a2vtltbaf7h4fs8Pvgic4uK5ReRzegPNrkNUUyyLOQYuA3q4DiEiKxzr+UHNvN6jYv4Mzw9GAge5ziEiK2kAfuI6RLWomD/vMrQ8TiSO9vf8YBfXIapBxdyO5wejgYzrHCKyWld0/JDkUzGv7DLXAURkjfb0/OCbrkNUmoq5xPODnYG9XOcQkQ6lfk2zivlTP3IdQETKsnPa55pVzIDnB18gvNxbRJJhvOsAlaRiDp1CN3ehE5Gq+l5pQJVKNV/Mnh/0AU5wnUNEOqWBFF+dW/PFDBwFrOU6hIh02vGeH/R2HaISarqYPT8wwGmuc4hIlwwEjnQdohJqupgJLybZzHUIEemy00oDrFSp9WLWEjmRZBsOfN11iKjVbDF7frA14SnbIpJsqZuOrNliBg53HUBEIvGVtC2dq8liLs1JaWtPkXQwwPddh4hSTRYz4em7G7kOISKROcx1gCjVajEf7DqAiERqK88PtnUdIio1V8yeH9QDY13nEJHIpWbUXIv7Q+wNDHUdorPmv/gnFk75BxhoHOKx9jfGYxrCYwk/eeTXLJz2KBuefl/42EkPsfClv1Hffwjr7H8+pr6RpR++wuI3nmPQl493+dcQqaQDgTNdh4hCzY2YSeA0RuuC2cyf9BDrHnklw469HopFFr32FADLZrxFcdmilR6/cMrDrHfMdfQYuglL3puMtZb8s/cwYPQhLuKLVMuGnh+Mch0iCjVVzJ4fNAL7u87RJcU2bGsBW2zDti6jvu8gbLGNuU/cylp7H/35x7e1YVuWYeoaWPTKBHptsiP1PftWP7dIdR3gOkAUaqqYgX2BQa5DdFZDv7XpP+q7TL/haD687nBMU296bbw9Cyb/hd6b7kxD35X/Sv1H7c+MO86guDhP0/pbsOjlx+i3nY4ylJqQin3Va22OOZFrl9uWLmTxWxNZ/6RbqGvqw6wHsyx8+TEWv/4sQw+9/HOP77v1PvTdOryocd4zd9Nvh2+z5N1JLHr5Mer7D2HgPsdiTK39TJYa8UXPD7bPZTOTXQfpjpr57ixdVPI11zm6YmnuJRoGDKW+9wBMfQO9N9+Vec/cTcu8j5h+4/F8eMMx2JZlTL9x5Rf2WhfMofDft+i92S7kn7+Htb9zTvhCYG6Ko7+JSFV8w3WA7upwxGyMOdBa+4eO7kuArYG1XYfoiob+Qyh89AbFlqWYhiaWvj+F/jvtR/8dvrXiMR/88gDWP/HmlT5v3tN3stYe4Qoi21IAY8AYbOuyquYXqbIxwKWuQ3RHOSPmc8u8L+7GuA7QVU3DhtN7+Ghm3DaeGbf+AKyl38g1D/4LM98BoMfQTQDou81XmXHLKRRmvkOvjXeoeGYRh3b1/KCH6xDdYay1q/6AMV8n/JVgLHBvuw/1B7a01iZqWYrnB38CvuM6h4hUxV65bOYp1yG6ak0j5o+AfwFLgUntbn8mXN2QGJ4f1AF7uc4hIlWT2N+QYQ1zzNbaKcAUY8xd1trWKmaqhG3RuX4itWQMcLHrEF1VzhzzW8aYdz97q3iyaCX6p6eIdNounh/0dB2iq8pZx7xju/d7El6PnrSLNFTMIrWliXB738ddB+mKDkfM1to57W7TrbVXkaAjmTw/aAD2dJ1DRKousQOyctYxb9/uj3WEI+h+FUsUvR1IVl4RiUZ6ixn4Rbv3W4EcydrPeLTrACLixM6eHzTlspnEXVHVYTFbaxP7U6ckNacaiEinNALDgamug3RWh3PMxpjBxphrjDGTjTGTjDFXG2MGVyNcREa6DiAizmzpOkBXlLNc7h5gFuF2egeU3r93jZ8RE6X9l7/kOoeIOLOV6wBdUc4c8yBr7SXt/nypMWa/SgWK2BZAoq+ZF5FuSWQxlzNiftwYc7Axpq50GwsElQ4WkW1cBxARp1I7lXEicDewrHS7BzjdGLPAGDO/kuEisIXrACLi1KaeHzS5DtFZ5azKSPIa4OGuA4iIU/UkcGVGOasyHivnvpjSC38ikrjpjNWOmI0xPYHewNrGmIGAKX2oPzCsCtm6pbTV56auc4iIc4l7AXBNUxknAuMJS7j9wYbzgV9VMlREPMKNTESktqWnmK21VwNXG2NOtdZeW8VMUfmC6wAiEgsbuw7QWeWsY84bY4747J3W2v+rQJ4oreM6gIjEwhDXATqrnGLeqd37PYEvE05tqJhFJAnSV8zW2lPb/9kYMwC4o2KJopO4fwwRqYgenh8MyGUzeddBylXOBSaftRjYLOogFaARs4gsl6iBWjkb5T8E2NIf6wmvpvt9JUNFJFH/ECJSUUOAt12HKFc5c8w/b/d+K/C+tfbDCuWJkkbMIrJcogZq5Zz59yTwOuHxTAOBQqVDRUTFLCLLJaoPyrkkeyzwT8LTsccCE40xB1Q6WAQS9RNSRCoqUX1QzlTGecBO1tqPAYwxQ4BHgfsqGaw7PD+oBwa5ziEisZGoYi5nVUbd8lIumVPm57k0mE/39hARSVQxlzNi/rsx5mHgd6U/HwT8tXKRItHoOoCIxEpv1wE6o5wLTM4yxuwP7E44Cr3JWvvHiicTEYlOvesAnVHOiBlr7QPAAxXOIiJSKWV1XVzEfa64qzS/LCLtJWrEnNZiFhFpL1Ej5kSFFemsk+sffHZcw5/Xcp1D3FpEz5mQcR2jbOXslTEaaAY2Kj3eANZa+8XKRusWTWUIALe2fW378Q33z+xhWj3XWcSdfiz5yHWGzihnKuMW4JeEqzJ2AnZk5T2aRWJrKU29TmoZn7d2xUZcUpuKrgN0RjnFnLfW/s1a+7G1ds7yW8WTdY9GzLLChOL2I/9tN33GdQ5xqs11gM4op5gfN8b8zBizqzFm++W3iicTidDhhXO3bbV1ifp1ViKVlM3XgPJe/Nu59HbHdvdZYJ/o40Sm1XUAiZdF9Op3VsuJr1/Z44ZhrrOIE4k5vQTKu/JvTDWCRCzuUy3iwB+Le+w0rvjn5zavm76b6yxSdfNcB+gMY+2qXxMxxhxmrb3TGHP6qj5urf1lRZN1k+cH8wn3kBZZYSDzP5nUNK5YZ+zarrNIVV1Mc77ZdYhyrWmOuU/pbb/V3OJutusAEj9z6T/o0tbD3nSdQ6ouUSPm1U5lWGtvLL29uHpxIjUL2Nh1CImfW9u+vtsxDX+buIGZvXPHj5aUSEcxL2eM6QkcC2wF9Fx+v7X2mArmisIs1wEkvg5cdtFGzzWdmjeGAa6zSFUkqpjLWS53B7AusC/wJLABsKCSoSKiYpbVmsHgdX/V9p2prnNI1aSumDe11l4ALLLW3k54wfmIysaKhOaYZY1+3nrQHnNsv3+7ziFVkaiVWuUUc0vp7TxjzNbAAMCrWKLoaMQsHTqgcNFga1nsOodU3PuuA3RGOcV8kzFmIHAB8GfgVeCnFU0VDRWzdOg9O2zDu9q+/KLrHFJRc2nOz3cdojM6LGZr7W+stXOttU9aa79orV3HWvvraoTrJhWzlOWC1qP3WGB7veI6h1RMznWAzipnVcZawBGE0xcrHm+t/WHlYkViuusAkgyWurqDC+c3/aXHeQVj6OE6j0Qu5zpAZ5UzlfFXwlKeBkxqd4s7XUQgZXvFbrxpUNzledc5pCJyrgN01movyV7xAGMmW2sTuZuc5wcfAuu7ziHJ0EBry7Sm43K9TGEz11kkUqfRnL/GdYjOKGsdszHmeGPMesaYQctvFU8WDY2apWytNDQeVTi7xdpk7d0rHcq5DtBZ5RRzAfgZ8DyfTmP8q5KhIvSG6wCSLBPtlls+XRyhTfXT5R3XATqrnGI+nfAiE89au3HpFufz/tp73XUASZ4TWk4f1WLrE7XuVVZrKQkcoJVTzK9AYhfgv+w6gCTPUpp6ndxy2lydE5gKr9CcT9zBGeWcYNIGvGSMeRxYtvzOBCyXA5jiOoAk0yPFHbedYjd5elvzzh6us0i3JPKS+3JGzH8CLgOeI1nL5chlM7OB/7rOIcl0WOHckW22bobrHNItL7kO0BXlHC11uzGmB7B56a43rLUta/qcmJlKuDueSKcspHf/c1qPf/3njTeu5zqLdFkii7nDEbMxZm/gLeBXwPXAm8aYPSucK0qazpAuu69tr1FvF4c95zqHdEmRhH7/lzOV8Qvgq9bavay1exLuy3xlZWNFSt9U0i1jCxcML1qTqG0jBYB3aM4vdB2iK8op5kZr7YrlJtbaN4HGykWK3JOEPzlFuuQTBgzOth6SuCVXkpjrLT6nnGL+lzHmFmPM3qXbzSTkxT+AXDYzl3CeWaTLbmr75m4f2UH/dJ1DOuVJ1wG6qpxiHke4lvmHwGmE+zGfVMlQFfCE6wCSfAcuu+gL1pKofX1r3OOuA3RVOfsxLyM89+9Ea+13rbVXlu5LksT+A0l8TGfIeje0fTuRLybVoI9ozid2r5zVFrMJNRtjZhNe2vyGMWaWMebC6sWLzFNonlki8NPWg3b/xPZL5BKsGpPowdiaRszjgdHATtbawdbaQcDOwGhjzI+qki4iuWxmHgldzyhxY8yBhQsHWssS10k+65gHl7DOzxaw9fUrL0S4dmKB4dctZKvrF3L2I0sBePaDVra5YSE73byQtz8Jxyzzllr2vXMRHW0FnBCpLeYjgEOste8tv8Na+y5wWOljSfOE6wCSDu/Y9Te6p21M7F4IPGrbRv5+WO+V7nv8vVYefKOFqSf14ZWT+3LmbuEBLb94vsD9Y3vxk316csOLBQAueXIZP969CWNM1bNXQGqLudFaO/uzd1prZ5Gs5XLLPeE6gKTHea3H7r7Q9nzVdY729tyogUG9Vi7VG/5VwN+9iaaG8P51+oTf8o31sKQVFrdYGuvhnU+KTF9QZC+vnO1zYu8DmvPvug7RHWsq5kIXPxZXT4E2QJdoFKmrP7RwXqO1xHp7gjfnFHn6/VZ2/s1C9rptES9OD78Fzt29iRMeWspVEwucMqoH501YyiVjmhynjcw/XAforjUV80hjzPxV3BYAI6oVMCq5bCYPTHSdQ9Jjqt1ks78VRz3rOseatBZh7lJ44dg+/OwrPRl732KstWy7bj0vHNeHx4/sw7tziwzrV4cFDrpvMYc9sISZCxP9WvkfXQfortUWs7W23lrbfxW3ftbaJE5lAPzedQBJl9NaTtltqW1823WO1dmgv2H/LRowxjBq/XrqDMxe/OmLe9ZaLn1qGRfs2cTFTy7j4r2bOGybRq6ZmMRfigGYDzzqOkR3lXOBSZr8Hi2bkwi10NDjmJazllkbz/+v9vtSIxPeC/eJf3NOG4U2WLv3p/PQt09pIbNZAwN7GRa3QJ0Jb4tjPUGzRn+hOZ/YnyrL1VQx57KZGcDTrnNIujxX3Hqr54tbOv//6pD7F7PrLYt4Y06RDX65gFsmFzhmu0benWvZ+vqFHHzfEm7fr9eKVReLWyy3T2nh5J3ClRqn79KD7/1+Cec+tpRxOyX1l2IecB0gCiYlaxbL5vnBScANrnNIuvRi2eKpTcfNbjRtG7rOUsOWAGvTnE/qUXgr1NSIueR+IHFngEm8LaGp9yktp2prULceTkMpQw0Wcy6bmQVMcJ1D0ufh4qjtphU3fsZ1jhp2v+sAUam5Yi6513UASadDCz8e0WaNzpmsvoWE55OmQq0W8x9J5kUyEnML6DPgvNZj33edowbdm9TTSlalJou5tHl+4q8Okni6p22fnd8trvu86xw15jeuA0SpJou55C7XASS9xhYu3LRozSeuc9SIl2nOv+A6RJRquZjvBz5yHULSaTZrDflp60Gvuc5RI25xHSBqNVvMuWymBbjedQ5Jr1+3fXv0f+3AF13nSLnlJyylSs0Wc8mNwFLXISS9Dig0r28tC1znSLE/0ZxP3frxmi7mXDYzG801SwV9aIcMu7kt82/XOVLsRtcBKqGmi7nkKtcBJN1+0nroHnNtXx3iGr1JNOcTfVLJ6tR8MeeymZeBx1znkDQzZmzhwgHWatosYle4DlApNV/MJVe7DiDp9pbdwPtD2146qCE6b5OiS7A/S8Uc+gvhP7RIxfitx+++yPbUErpo/JzmfCz3wI6CihnIZTMWuMZ1Dkm3InX13y/8uC7u5wQmwEzgdtchKknF/KnfAp87FVwkSi/ZTYf/o7jDc65zJNw1NOdTPV+vYi7JZTMLSfGLCRIfp7b8cJdltvEd1zkSagE1cGGYinllv0KXaUuFFWhsOrblzMVxPScw5q6iOT/PdYhKUzG3k8tmlgCXus4h6fdMccSIiXYLbarfOXOAn7sOUQ0q5s/7DfCe6xCSfscUztqhxdZ/6DpHglxOc36+6xDVoGL+jNLmRue7ziHpt5iefca3/GCm6xwJ8R/CqcaaoGJetd8B2hVMKi4o7rLDK8WNNKXRsR+nfSVGeyrmVSitaz7DdQ6pDYcUzhvRZo1Gzqs3iS5sNmaM8YwxL3fi8eONMb3b/dnZUVUq5tXIZTNPE54NKFJR8+k74ILWo/W6xuqdQXPeVuF5xgO9O3xUFaiY1+wc0FVaUnl3t/3PLu8X10nV8UgRuZfm/JPd+PwGY8ztxpipxpj7jDG9jTFfNsb82xgzzRhzqzGmyRjzQ2AY8LgxZsWOdcaYy4wxU4wxLxhjhnb7b1MmFfMa5LKZt4DLXeeQ2nBg4aJNipa5rnPEyDzCUWx3DAdustZuA8wHTgduAw6y1o4AGoBx1tprCK9hGGOtHVP63D7AC9bakcBTwPHdzFI2FXPHLgNecR1C0u9jBg75ZeuBr7rOESPn0pz/bze/xn+stc+W3r8T+DLwnrX2zdJ9twN7ruZzC4QbnEE4z+11M0vZVMwdyGUzBeBY0FVaUnnXtX139Md2rX+5zhEDzxHN6STdmZtusdYu//w2wtF1VaiYy5DLZiaiPZulSg4oXLSutThbERADLcCJEb3gt6ExZtfS+4cAjwKeMWbT0n2HA8vnsBcA/SJ4zm5TMZfvfEAbz0jFfWCHbnBL29cnu87h0M9pzpe9zK0DrwFHGmOmAoOAK4GjgT8YY6YR/ib869JjbwL+1v7FP1fMpyN16YjnB2OACa5zSC2w9qWmE6atZRZt4zpJlb0DjKA5v8R1EJc0Yu6EXDbzOHCz6xxSC4w5qHBBP2tZ5jpJFbUBR9Z6KYOKuSvOAqa7DiHp94bdcOMHins87zpHFV1Kc/7Zjh+WfirmTsplM3lgnOscUhvObjlh98W26XXXOargOeAS1yHiQsXcBbls5iHgWtc5JP3aqG84rHAu1tLqOksF5YHv05xvcx0kLlTMXXcGUEu/Zoojk+3mX5pQ3C7Nv+KPozmfcx0iTrQqoxs8P1gfmAys4zqLpFsThaVTm46b0WRaN3adJWJ30Jw/wnWIuNGIuRty2cx04GDCV5NFKmYZPXoe33LGAmu7dSVb3LwF/MB1iDhSMXdTaQmdTjyRinuqOHKbf9nhT7vOEZH5wLdpzi9wHSSOVMzRuAJ40HUISb+jCmdv32rrkn5OYBE4lOZ8Law26RIVcwRKJ54cCbztOouk2yJ69f1Ry8nd3XHNtfNozgeuQ8SZijkipfXN+wOLXWeRdHuouNuOrxW/kNRVGvfQnM+6DhF3KuYI5bKZacAxdG+rQZEOHVI4f8s2a2a5ztFJkwm/P6QDKuaI5bKZe4HTXOeQdJtHv4EXtx6RpKmzmcB+2gejPCrmCshlM9cCl7rOIen2f2377vpBMs4JzANfozn/H9dBkkIXmFSQ5wc3ACe5ziHpNZRPPn6h6ZQmYxjgOstqLAH2pTmflmV+VaERc2X9ALjPdQhJr5kMWufqtv2nuc6xGq3AWJVy52nEXGGeH/QA/kp4CKRIRbzYNG7SEJPfwXWOdixwBM35O10HSSKNmCusdJjrfoAO2JSKOaBw0VBrWeQ6Rzt+f6fyAAAGJklEQVTjVcpdp2Kuglw2sxD4BvCG6yySTu/bdTe4ve2rk1znKPlfmvPXuA6RZJrKqCLPDzYkPDNwE9dZJH0MxeJLTSe8MsAsHuEwxgU057UiqZs0Yq6iXDbzAbAH8KrrLJI+lrq6gwsX9HZ4TuDpKuVoqJirLJfNzAD2IrwKSiRSr9mNNnmwuFu11zZb4CSa81dW+XlTS1MZjnh+MAAIgNGus0i6NNDaMrXpuHd7m8LwKjxdG3A0zfk7qvBcNUMjZkdKmx59lbCcRSLTSkPjkQW/aG3FD3BoAQ5SKUdPxexQLptZTLiU7reus0i6vGi/tMUTxZHPVPAp5gPfojl/fwWfo2ZpKiMmPD+4DPix6xySHk0Ulk5rOu6/PUyrF/GXzhGW8ssRf10p0Yg5JnLZzHmEl3Cn+Zh6qaJl9Oh5Usv4fMTnBD4HjFIpV5aKOUZy2cz1hJduz3SdRdJhQnH7kZPtZlHtVXEXsA/N+aTtA504msqIIc8PhgF/AHZznUWSrw9LFkxpOn5BgykO6+KXsMBFNOcviTKXrJ5GzDGUy2Y+AvYGrnUcRVJgEb36ndVy4vQufvpCwpUXKuUq0og55jw/+D5wE9DbdRZJtod7nP3s8LoPO7Nu/iXCbTvfqlQmWTUVcwJ4fjACeADY1HUWSa6BzP9kUtO4tjpjh5Tx8OsJL7F2dXl3TdNURgKUDnndEfiz6yySXHPpP+jS1sM6OicwDxxAc/4HKmV3VMwJUbpScD/gdMLjekQ67da2r+/6oV174mo+/E9gO1004p6mMhLI84NNgd8QboYk0inrMee/zzWd2qvdOYFtwM+AC2nOtziMJiUaMSdQLpt5GxhDeEHKQsdxJGFmMHjda9v2W35O4CvArjTnz1Upx4dGzAnn+cFGwM3AV1xnkURpeaTHmRduVvfRlZpLjh8Vc0p4fnAs8AuI7TH2Eh8vACeUXlSWGNJURkrksplbgC2Bh1xnkdiaD5wCjFYpx5tGzCnk+cH+wOXA5q6zSCxY4F7gzFw209UrAKWKVMwp5flBA3A8cBEw1HEccedR4JxcNqOjzBJExZxynh/0Bc4EzgD6Oo4j1TMJ8HPZzKOug0jnqZhrhOcHQwlHz8cDDY7jSOW8A5wP3JvLZvTNnVAq5hrj+cHmhPPP+7vOIpGaCVwC3JTLZrQeOeFUzDXK84NdCI+y+iZgHMeRrptNuD3sL3PZjC42SgkVc43z/GALwjnow4AejuNI+aYBVwN35bKZpa7DSLRUzAKA5wfrAT8knIMe7DiOrFoRCICrctnMBNdhpHJUzLISzw96AocApwLbOY4joQXAb4FrctnMO67DSOWpmGW1PD8YDZxMuN2oTlCpvneA64Bbc9nMfNdhpHpUzNKh0lrobwMHA/uiuehK+gj4PeGVehO15K02qZilUzw/GAh8j7Ckx6D9VqLwMXAfYRk/k8tmio7ziGMqZukyzw/WBQ4knJPeBS2764w5hOc43gs8kctm2hznkRhRMUskSvtC7wvsA+yN9udYldcJ9674K/CoLgSR1VExS0V4frAlYUmPISzqQU4DufEh8ARhGT+qnd2kXCpmqTjPD+qAbfi0qEcDA52Gip4FXgWeAZ4mnCt+320kSSoVszhRuqBla2Crz9z6u8xVBgvkCEu4/e21XDazwGEuSREVs8SK5wdfICzo5aW9EbAeMIzqlfZSYBbhPhT/4fMFvLhKOaRGqZglMTw/6E1Y0kMJLxsfTDh3PYhwamT5dqam3VuzivtaCVdFLC/f9m9n5bKZRRX9i4h0QMUsIhIzujhARCRmVMwiIjGjYhYRiRkVs4hIzKiYRURiRsUsIhIzKmYRkZhRMYuIxIyKWUQkZlTMIiIxo2IWEYkZFbOISMyomEVEYkbFLCISMypmEZGYUTGLiMSMillEJGZUzCIiMaNiFhGJGRWziEjMqJhFRGJGxSwiEjMqZhGRmFExi4jEjIpZRCRmVMwiIjGjYhYRiRkVs4hIzKiYRURiRsUsIhIzKmYRkZhRMYuIxIyKWUQkZlTMIiIxo2IWEYkZFbOISMyomEVEYkbFLCISMypmEZGYUTGLiMSMillEJGZUzCIiMfP/yJL5lUvL2ZgAAAAASUVORK5CYII=\n", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "series = pd.Series([(oneType), (twoTypes)], index=['Json', 'both'], name='Domain output')\n", + "series.plot.pie(figsize=(6, 6), autopct='%1.0f%%')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Conclusion\n", + "Most of the domains has only one type of value output, but not all of them. 16% have outputs that can be json and non-json\n", + "\n", + "Out of the ones that has one value type, 78% is json. \n", + "\n", + ">Are there a set of location domains that always produces a JSON?\n", + "\n", + "Yes, there is a set that always produces the value as a valid JSON, but not all of them. There are also the ones there never produces JSON and some that produces both. \n", + "\n", + "---" + ] + }, + { + "cell_type": "code", + "execution_count": 189, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "location_domain\n", + "twitter.com 5594\n", + "petsmart.com 2313\n", + "cdiscount.com 1835\n", + "debenhams.com 1229\n", + "mediamarkt.de 1094\n", + "Name: value_md5, dtype: int64" + ] + }, + "execution_count": 189, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "#Using the above methoed I could tell that the domains did not have only one \n", + "#output, but I could not find a way to tell the output type. \n", + "#Thats why I decided to calculate by hand as you propably notice, sorry. \n", + "\n", + "location_domain_group_unique_md5 = location_domain_group['value_md5'].nunique()\n", + "location_domain_group_unique_md5.sort_values(ascending=False).head()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "---\n", + "\n", + "## The JSON values are always from the same location or related domains?\n", + "For \"value\" comparison I will use value_md5 instead, because its reliable and faster\n", + "\n", + "* value_md5 is the calculated md5 for the value columns" + ] + }, + { + "cell_type": "code", + "execution_count": 191, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "Index(['value_md5', 'location_domain', 'value_len'], dtype='object')" + ] + }, + "execution_count": 191, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df = dd.read_parquet('all_json_above_mean.parquet', columns=['value_md5','location_domain', 'value_len'])\n", + "df.columns" + ] + }, + { + "cell_type": "code", + "execution_count": 192, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
value_md5location_domainvalue_len
0cff77029e3ae45dd439a62987b1d8340canada.ca3713
19ac0a0a0afb677c8fd985a7c2f4ddbc5tmall.com103878
29ac0a0a0afb677c8fd985a7c2f4ddbc5tmall.com103878
3983f2d6827a86b128a02cf7442c94af1coches.net1686
4b2ad4d7452aeed3df181b1501cc20231coches.net1686
\n", + "
" + ], + "text/plain": [ + " value_md5 location_domain value_len\n", + "0 cff77029e3ae45dd439a62987b1d8340 canada.ca 3713\n", + "1 9ac0a0a0afb677c8fd985a7c2f4ddbc5 tmall.com 103878\n", + "2 9ac0a0a0afb677c8fd985a7c2f4ddbc5 tmall.com 103878\n", + "3 983f2d6827a86b128a02cf7442c94af1 coches.net 1686\n", + "4 b2ad4d7452aeed3df181b1501cc20231 coches.net 1686" + ] + }, + "execution_count": 192, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 196, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/anaconda3/envs/overscripted/lib/python3.6/site-packages/distributed/worker.py:2791: UserWarning: Large object of size 1.89 MB detected in task graph: \n", + " (\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
location_domain
nunique
value_md5
000599fa6f59053c67e6ccbef137a0d21
0005e12de9897336bf5c7e352a8075681
00076462ead16ac77a1d56745584fd5b1
0007a2345e42bca1d5cac86e356bb87b1
000b0b6b104a36cbc6f31b923e1b31a71
\n", + "" + ], + "text/plain": [ + " location_domain\n", + " nunique\n", + "value_md5 \n", + "000599fa6f59053c67e6ccbef137a0d2 1\n", + "0005e12de9897336bf5c7e352a807568 1\n", + "00076462ead16ac77a1d56745584fd5b 1\n", + "0007a2345e42bca1d5cac86e356bb87b 1\n", + "000b0b6b104a36cbc6f31b923e1b31a7 1" + ] + }, + "execution_count": 207, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "aggmd.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 210, + "metadata": {}, + "outputs": [], + "source": [ + "f1 = aggmd['location_domain']['nunique'] > 1\n", + "aggf = aggmd[f1]" + ] + }, + { + "cell_type": "code", + "execution_count": 215, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "35746" + ] + }, + "execution_count": 215, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "unique_values_count = len(aggmd)\n", + "unique_values_count" + ] + }, + { + "cell_type": "code", + "execution_count": 218, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "(35, 35711)" + ] + }, + "execution_count": 218, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "values_multiple_origin = len(aggf)\n", + "values_single_origin = unique_values_count - values_multiple_origin \n", + "(values_multiple_origin, values_single_origin )" + ] + }, + { + "cell_type": "code", + "execution_count": 219, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 219, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAXcAAAFbCAYAAAAurs6zAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDMuMC4zLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvnQurowAAIABJREFUeJzt3Xd8ZVW9/vHPN9PCFDbNkRkQASleyiBNQS+9eKUNKlLkR1VA4KJwwSvlCktApag4CCJdmh1w6KI0B5Q6FEGBQUHKDKCUTTKTmUly1u+PdTLJhJST5Jyz9tn7eb9eeZ2ck+TkmUzyZGXttdc27z0iIpIvTbEDiIhI9ancRURySOUuIpJDKncRkRxSuYuI5JDKXUQkh1TuIiI5pHIXEckhlbuISA6p3EVEckjlLiKSQyp3EZEcUrmLiOSQyl1EJIdU7iIiOaRyFxHJIZW7iEgOqdxFRHJI5S4ikkMqdxGRHFK5i4jkkMpdRCSHVO4iIjmkchcRySGVu4hIDqncRURySOUuIpJDKncRkRxSuYuI5JDKXUQkh1TuIiI5pHIXEckhlbuISA6p3EVEckjlLiKSQyp3EZEcUrmLiOSQyl1EJIdU7iIiOaRyFxHJIZW7iEgOjY4dQGREXNIMrASMA8aUX8aWb7u+vzv7eGkFXselbfWOLFIP5r2PnUHk/VwyCvggsAowtcft1F6PLT/Cz9QCvF5+mdfj9d4vb+LSzhF+LpG6UblLXKHE1wc2K79sDHwYmAyMipistxIwB5gNPF6+nY1L34maSqQfKnepH5c0AevSXeSbAxsB42PGGqGX6Fn2ofBfj5pIBJW71JJLVga2IZT45oRR+aSomerjdULRPwTcBjyGS/WDJnWlcpfqcsl6wB7AdOATgMUNlAnzgFuBW4Df49IFkfNIAajcZWTCnPknCWU+HVgrbqDMWwjcQyj6W3Dpy5HzSE6p3GXoXDIB2JlQ5rsSliLK8PwFuJlQ9g/h0lLkPJITKnepTFhPvhewD7Aj0Bw3UC69AVwLXIZLn40dRhqbyl0GFubQjwAOYORryqVyfwIuA36FS+fHDiONR+Uu7xdG6V8glPqnIqcpuhbg58CFuPSp2GGkcajcpZtLpgBHE0pd8+jZcx9wPjBTZ8vKYFTuAi7ZBDgO2JuwL4tk28vAjwlz82/FDiPZpHIvMpfsBHwT2Cp2FBmWVuA84Hu49L3YYSRbVO5FFEbqZxNWvUjjews4C7gAly6MHUayQeVeJC5ZE/g2YTmjzhzNn9eA04ErcGlH7DASl8q9CFzyAeBUwoHSMZHTSO3NIfx//1J72hSXyj3PwpmkxwMnUIwNu2RpTwAn49LbYweR+lO555FLRgOHE0ZvH4ycRuKbBZyESx+IHUTqR+WeNy7ZDfgBsHbsKJI5lwPH49I0dhCpPZV7XrhkOWAGcGDsKJJprwGH49LbYgeR2lK554FL/ouwD8kqsaNIw7gaOFaXCcwvlXsjc8kkwhTMl2NHkYY0D/gKLr0pdhCpPpV7o3LJDsAVwGqxo0jD+zlwjLYyyBeVe6MJyxvPBb6CTkSS6nkDOBqXXh87iFSHyr2RuGRr4EpgzdhRJLd+TSj5f8UOIiOjcm8ELhlL2Avma2i0LrU3F/gcLn0odhAZPpV71rlkMnADumiG1NciwsHWn8YOIsOjcs8yl2wE3IQOmko8MwgnPuniIA1G5Z5VLvkscA0wIXYUKby7gL1x6duxg0jlmmIHkD645JvA9ajYJRt2AB7BJRvEDiKV08g9S1yyDGHt+r6xo4j0oRU4CJfeEDuIDE7lnhUuWQX4LbBZ7CgiA/DAGYDTXvHZpnLPApd8nFDsU2JHEanQTOAAXNoSO4j0TeUem0v2I0zFNMeOIjJEjwM749J/xw4i76cDqjG55CjgOlTs0pg2Bu7FJSvHDiLvp3KPxSXHAheiM06lsa0PzMIlOhcjY1TuMbjk68B5sWOIVMlahIL/SOwg0k3lXm8uOQU4J3YMkSpbjVDw68YOIoHKvZ7CyUlnxo4hUiNTgLtxyVqxg4hWy9RPmIrRiF2K4BVgG1z6YuwgRaZyrweXHA1cEDuGSB39E9gal74cO0hRqdxrzSWHEi5erVUxUjT/IBT8a7GDFJHKvZbCCUrXomMbUlzPAFvqTNb6U7nXiku2ImyVOiZ2FJHIbgX2wKWl2EGKRCPKWggndFyPil0EYFfCZSKljjRyrzaXjAfuJ5yaLSLdDsalV8UOURQauVfflajYRfpyCS75ZOwQRaFyryaXnAzsHTuGSEaNBW7EJR+OHaQINC1TLS7ZjbDHtX5higzsKeCTuHR+7CB5piKqBpf8B2HrXn09RQY3DbgWl+jcjxpSGY2US5YjjNiXjR1FpIHsifZZqilNy4yES0YR1vB+OnYUkQa1Py79WewQeaSR+8ichYpdZCQuwSVrxg6RRxq5D5dLdgLujB1DJAdmAdvqDNbq0sh9OFwyEbg0dgyRnNgK+FrsEHmjch+ecwGt1RWpnu/oKk7VpWmZoXLJ9sAf0Ba+ItX2EPApXNoZO0geaOQ+FGE65nJU7CK18Ang67FD5IXKfWjOAlaPHUIkx76FSzaIHSIPNC1TKZdsA9yDRu0itTYb+AQu7YgdpJFp5F6JsI3vFajYRephE+D/YododCr3ynwX0IkWIvVzCi7ZJHaIRqZpmcGEy+Xdh0btIvX2F2BjrZ4ZHo3cB+KSsWh1jEgsGwKHxA7RqFTuAzsCWDt2CJEC+xYuWSZ2iEakcu9PWNOugzoicU0Fjo0dohGp3Pt3LDA5dggR4Ru4ZMXYIRqNyr0v4RtJZ8qJZEOC/ooeMpV7305CV1YSyZKjcMnqsUM0EpV7by5ZFTg6dgwRWcpYdFm+IVG5v99pQHPsECLyPl/EJRvHDtEoVO49hf2kta5WJJsMODt2iEahcl/amcCo2CFEpF874ZIdY4doBCr3Li7ZDPh87BgiMiiN3iugcu/2HbTNgEgj2ASX7Bw7RNap3IHyQZqdYscQkYodFztA1qncg6/GDiAiQ/JpXPIfsUNkmcrdJSsB+8aOISJDYsDXYofIMpU7HI7WtYs0ogO150z/il3uLhkNHBk7hogMyzKEwZn0odjlDp8FVo0dQkSG7QhcUvQe61PRvyhHxA4gIiPyYeDTsUNkUXHL3SVrAtvHjiEiI6apmT4Ut9zhS+ikJZE82A2XTI0dImuKWe4uGQUcHDuGiFTFaMJgTXooZrnDLoRrM4pIPnwZl+gv8R6KWu6Hxg4gIlW1GrBp7BBZUrxyd8l44L9ixxCRqtszdoAsKV65w47ojFSRPJoeO0CWFLHcd48dQERqYoPyEmehaOUeDrjsGjuGiNSMpmbKilXu4YDLlNghRKRmNDVTVrRy15SMSL59SjtFBip3EcmTUejnHChSubtkFWDj2DFEpOY0NUORyh12ix1AROpiZ1yyTOwQsancRSRvxhPOZym0YpR7+C2+Q+wYIlI3hZ+aKUa5h2Iv/J9pIgWyVewAsRWl3LeJHUBE6mptXJLEDhFTUcp9k9gBRKSujILvElmUctcSSJHi2Sx2gJgGLXcz+5yZzTGz1MzeM7MWM3uvHuGqwiVrAMvHjiEidbd57AAxja7gfc4Bdvfe/63WYWpEUzIixaSR+yDeaOBiB5W7SFGtjktWih0ilkpG7o+a2S+B3wKLuh703t9Qs1TVpXIXKa7NgDtih4ihknJfFlgA7NzjMQ+o3EUk6zZH5d437/0h9QhSE2GzsMmxY4hINIWdd++33M3sf73355jZjwgj9aV4779a02TVoVG7SLEVdsXMQCP3roOoj9YjSI2o3EWKbQoumYpL58YOUm/9lrv3/uby7VX1i1N1hT5DTUQAmAao3Hszs5t5/7RMShjRX+y9X1iLYFWyYewAIhLdqrEDxFDJOvd/AK3ApeWX94A3gHXK97PJJQasEjuGiEQ3NXaAGCpZCrmx937rHvdvNrM/eu+3NrNnahWsClYExsQOISLRFXKQV8nI/QNmtlrXnfLrXWd9La5JqupYOXYAEcmEQpZ7JSP344H7zezvhG001wCOMrMJQJYPtk6JHUBEMkHl3hfv/W1mtjbwUUK5P9vjIOoPaxluhDRyFxEoaLn3Oy1jZtuXbz8H7Ap8BFgT2KX8WNap3EUEYCVcMjZ2iHobaOS+DXA3sHsfb2uEvWU0LSMiEGYcpgD/jB2kngY6iek0M2sCbvfe/6qOmapFI3cR6bIKBSv3AVfLeO9LwH/XKUu1aeQuIl0KN+9eyVLI35vZCWb2ITNboeul5slGTiN3EelSuHKvZCnkoeXbo3s85gkHV7NMI3cR6aJy7817v0Y9glSVS5qBJHYMEcmM5WIHqLcBy93MJhNG7OsTRut/BS703r9Zh2wjsXzsACKSKYVbCjnQOvdPAY+U714NXFt+/eHy27JMe8qISE/Ryt3MWsu3q5vZF3s8vpmZnT/Ix65uZk8P5/MOdED1+8Ce3vvTvPc3ee9neu9PA/YEftDjk19mZusN55OPJPggRtXgOavm0JltTD63hQ1+3LrksbfbPDtdM5+1f9TKTtfM5522sMuy956v3r6Qtc5vYdpFrcye1wnAc//uZNNLWtnoJ638+ZUOADpKnh2vns+C9vddOEukT3e80MG6F7Sy1vktnHX/IgD2v2EB0y5q5eS7unfzPuO+Rcx8tj1WzGrIwsh9dWBJuXvvH63lFe0GKvdlvfeP937Qe/8EMKnH/S977/9ai3AjUMmB4mgO/tgY7vh/45d67Kz7F7HDGqOZc8xEdlhj9JIftNtf6GDO253MOWYil+zezJG3tgFw8WPtnLVDM7/5wjJ8789h/7aLHmnngGljGD/G6vsPkobUWfIcfVsbt+8/nr8ePZGfP93OU2+EwcNTR05k1sudpAs981pKPDy3k+kfbeg/iIcdvjwIfbY8kH3azK4zsx3N7AEzm2NmHzczZ2Yn9PiYp81s9V5PdRawlZk9YWbHmdm2ZnZL+f2dmV1jZneXn/OwPnKMMrNzzewRM3vKzI4YKPdA5W5mtnyvByaY2Z3AmuXw+5jZvWa2WfntrWb2bTN70sweNLMPlh//SPn+I2Z2etefKSMJPohMj9y3/vBoVlhm6QKe+VwHB20Uvv8O2mgMv30ujMZnPtvBgdPGYmZssepo3l0I81pKjGmCtg7PgnYY0wTvLvTc/Hw7B27U0D+AUkcPv9bJWis0sebyTYwdZey7/hhufb6DtnYoec/iTs+oJjj1nkWcvu242HFHaqQj97WAGYSrOn2UMAL/T+AE4OQKn+NEYJb3/mPe+/P6ePs0wlYvWwKnmlnvfei/BKTe+80J14Y9zMz6XfAy0Aj3PODO8m+j2eXHjgM+BnzNe3+xmSXAkT0+ZgLwoPf+FDM7BzgMOJPwRZnhvf+5mX2ln8+3JLiZjQMeMLM7vfcvDpCxP5ku97680VpiyqTwu3bKpCbenF8C4LUWz4eS7l8Eqy5rvNbiOfrjYznwxjYWdcLFuzVz+n2LOGWrcZhp1J5l3uMJixNKfdwueb08sVYC67rf4+MMX35f3/12wMqP4cG873679+X7PR9/8q22SZOaS5NSP/4VjzGm2ZZ77NXF48c3j/LrXtg2cfr6E9+67/Xm995c1Dl5ysorvTTP4z1NlD+ekreSxwjPbZSWft13va38+JLHyrdWomnJx5V819ualjxPKbwPpfLrnqbyY+ZLNFkpvL/3mHV6837JY13P00T5Ofx8ml8Z4fzHi977vwCUr2Nxl/fem9lfCNMtT4zs6QGY6b1vA9rM7B7g472ed2dgmpntVb6fAGsDfXbkQNsPXGJmc4Ez6F4t8yLhm2pNM9vKez+rV5ksBm4pv/4YsFP59S0Jc/UAPwO+18enHFLwQTRcufenr9lzA1ZLmrj34AkAvPB2ibktJT66UhMH3NjG4k7PGduNY50Vc/NlGBHv+yzS3oXaVX4975dftx5lCz68z5JC7Xnry4W89GPW9XHZOhjiMfA04UeBZzSlptHmm368x7LzgDcBdr/mndUu3H3Zuefe9a/Vnnmzo3nbNca2Hr3FhHeA8I1YLTUek7TS/MoIn2JRj9dLPe6XCD3awdIzIc3D+By9vz963zfgGO/97yp5sgHnpr33t9Bd1uHZw9mpuwDfLU/R9NTuve8K1DnY8/cypOCD6KzCc9TVByc2Ma8ljN7ntZSYPCF8n6w6yXgl7f4/fvU9z9RJS/8knHL3Qs7cbhznP7SY/Tccw+rLGd+6bxHXfW7pef2iMlvyQzfgb7vwVe2rf7PVydWy0Uod3PhMB5OsbXmAtoWLWW9FmGRtKwDMfLad7VYzxpba1p2bLuaOL45n6yvnL/vVzWxqox3XmUTbqzX+FC8BuwGY2SaE61701kKP45V9mG5m3yXMgGxLmMbpOZ30O+BIM7vbe99uZusAr3nv5/f1ZJVsP7BEeQ5ogff+WsLoe5MKP/RB4PPl1/ft5326go8pf651yhcEGY6GO6y/xzqjuerJEPuqJ9uZvm74vbjHuqO5+qnFeO958NUOknEsmb4BuO+lDlaZ1MTaK45iQTs0GYwyWNBwXwGpt81XGcWct0q8+E6JxZ2eXzzTzh7l77v2Ts+Mhxbz9U+NZUF798C65GFxww2dgDCyrqXrgRXM7AnCVPXzfbzPU0BH+ZjkcX28/WHgVkJfnuG9n9vr7ZcRzjWaXV5leDEDDKCHuqpkQ+BcMysRCvRI+p5i6e1Y4FozO74cPu3jfS4jzF3NtjDX8y+6p3KGKtPVtt/1C7j3pU7+vcCz6g9a+Na24zjxP8ey92/auPzxdlZLjF9/IYy6d1l7NLfN6WCtH7Uyfoxx5fRlljyP954zZy3iV3uF9z180zHsf0MbHSW4aNfh/FUoRTK6ybhgl2Y+fe0COr3n0I+NZf3J4Y+bCx9ZzEEbhZVX0z7YhAc2vKiVXdYazXLNjTVqLxt2uXvvXwI26HH/4H7etnM/Hz+xfNsO7NDrzff2eP157/3h/X3u8kaOJ1PhAVzrnkWpHTMbD7SVD0DsC+znvZ9es0/oklWBkc6xiUh+XI9L9xr83eIwMwe0eu8rGSxXZNCRe3k543eAqd77z5RPWNrSe3/5ED7PpsAF5RH5u3RvRlYrmR65i0jd1XpaZkS8967az1nJnPtPCfPhXWsunydMs1TMez/Le7+R936a935r7/0LQ4s5ZCp3EelpcewA9VZJua9UvhJTWM7lfQfZX43yHnld4iAiw/Gv2AHqrZJyn29mK1IuSzPbgr4PiGaHSzso4H+miPRrXuwA9VbJapn/AW4CPmJmDwAfADJ7YKKHucDk2CFEJBNU7r1572eb2TbAuoTlrs+Vl/Rk3VzCVgkiIr3XjOdeJatlDuz10CZmhvf+6hplqpbC/WeKSL80cu/D5j1ebyYswp9NuIBHlqncRaSLyr037/0xPe+Xd4K8pmaJqkflLiIAC3BptheB1MCQ9pYpW0DYrTHrVO4iAgUctUNlc+43071mvAlYD/hVLUNVicpdREDl3q+eex10AP/03td6+8xqULmLCKjc++a9v68eQWrgDcKZtLpihUixFXKg1++cu5m1mNl7fby0mNl79Qw5LC4tEQpeRIpNI/eevPcDXTGkUbxG94ZnIlJML8cOEEPFF+sws8n0uC6g974RvmBPs/Q6fREpntmxA8Qw6FJIM9vDzOYQLlR9H+FagbfXOFe1PBo7gIhE1ULfl7zLvUrWuZ8BbEG4BNQahDNUH6hpqup5LHYAEYnqCVxayO2/Kyn3du/9W0CTmTV57++hcTbkehJduEOkyAo7wKuk3N81s4nAH4HrzGwGGb9k1RIuXQg8EzuGiESjch/AdKANOA64A/g7sHstQ1VZYf9zRaS4P/8DrXO/wMw+6b2f773v9N53eO+v8t6fX56maRQ6qCpSTK3Ac7FDxDLQyH0O8H0ze8nMzjazRpln703lLlJMT5RPZiykfsvdez/De78lsA3wNnClmf3NzE41s3XqlnDknqKAVz4XkWKub+8y6Jy79/6f3vuzvfcbA18EPgv8rebJqsWliwknM4lIsRR2vh0qO4lpjJntbmbXEU5eeh74fM2TVZemZkSKp9Dl3u/2A2a2E7AfsCvwMPAL4HDv/fw6ZaumQv8nixTQAuDZ2CFiGmhvmZOBnwEneO/frlOeWrk/dgARqat7cWln7BAxDXRAdTvv/aU5KHZw6V+Bf8SOISJ1c3PsALEN5xqqjeqm2AFEpG5U7rED1JHKXaQYZuPS12KHiK1I5T4LeCd2CBGpOQ3kKFK5u7QDuC12DBGpucJPyUCRyj3Qb3SRfHsVlxb6zNQuRSv329FWBCJ5dkvsAFlRrHJ3aQtwb+wYIlIz+uu8rFjlHsyMHUBEaqIVuDt2iKwoYrnrN7tIPv0ely6KHSIrilfuLn0VeDx2DBGpOg3ceiheuQc3xA4gIlW1CJX7Uopa7lcBhd5USCRnrseljb8PVhUVs9xd+grhYt8ikg+Xxg6QNcUs9+Di2AFEpCrm4NJ7Y4fImiKX+23Aq7FDiMiIXRY7QBYVt9zDRv5XxI4hIiPSDvw0dogsKm65B5ehA6sijewGXPpm7BBZVOxyDwdWb4wdQ0SGbUbsAFlV7HIPfhg7gIgMyyO49M+xQ2SVyt2lDwCPxo4hIkOmUfsAVO6BvklEGss84FexQ2SZyj34JeGbRUQaw0W4tD12iCxTuQPlbxLNvYs0hneBC2KHyDqVe7cfAXNjhxCRQZ2NS3Wx+0Go3Lu4tA1wsWOIyIDmomNkFVG5L+0K4NnYIUSkX6eXB2IyCJV7T2FLglNixxCRPj0PXB47RKNQuffm0huAB2PHEJH3+SYu7YgdolGo3Pv2jdgBRGQpjwG/jh2ikajc++LSPxK2BBaRbDgRl/rYIRqJyr1/JwKl2CFEhLtw6R9ih2g0Kvf+uPQvwLWxY4gUnCcMtGSIVO4DO5VwVXURieM3uFQb+w2Dyn0gLv0n4cxVEam/BcBJsUM0KpX74E4DXogdQqSATsalf48dolGp3Afj0gXAIejgqkg9zQLOjx2ikancK+HS+9F+FiL1EgZUWvo4Iir3yp0CPBc7hEgBnKTpmJFTuVcqbFZ0MJqeEamlP6JFDFWhch8Klz4IfD92DJGcWgAcqumY6lC5D903gb/FDiGSQ5qOqSKV+1C5dBFwENAZO4pIjmg6pspU7sPh0keAc2LHEMkJTcfUgMp9+BzwdOwQIjlwoqZjqs+81y/LYXPJhsCfgQmxo4g0qF/j0r1jh8gjjdxHIuwceQBh5zoRGZonCMuLpQZU7iPl0hsJu0eKSOXeBKaXt/eQGtC0TLW45BfAPrFjiDSAxcD2uPSB2EHyTCP36jmEcJ1HERnYUSr22lO5V0vYnmBP4PXYUUQy7Ee49PLYIYpA5V5NLn2VUPC6epPI+90F/E/sEEWhcq82lz4EHBY7hkjG/B3YG5d2xA5SFCr3WnDpNcC5sWOIZEQLsAcufTt2kCJRudfOicCtsUOIRFYC9self40dpGhU7rXi0hKwL+EMVpEi8oQ9Y26OHaSIVO615NJW4DPAo7GjiERwFC69KnaIolK515pLU2Bn4MnYUUTq6Dhc+pPYIYpM5V4PLn0H2BF4JnYUkTo4CZf+MHaIolO514tL/w3sADwbO4pIDZ2GS8+KHUK0t0z9uWQy8Adgw9hRRKrsRFx6duwQEqjcY3DJisDvgE1jRxGpkmNx6YzYIaSbyj0WlyTA7cCWsaOIjIAnrIrRwdOM0Zx7LN2raO6LHUVkmDqBL6nYs0nlHlP3OvhfxI4iMkQpsBsuvTJ2EOmbpmWywiUnAWeiX7iSfc8T9op5LnYQ6Z/KPUtcshtwHbBs7Cgi/bgT2AeXvhs7iAxMo8QscektwBbAC7GjiPThPGAXFXtj0Mg9i1yyPPBLYKfYUUQI1zz9iubXG4tG7lkUtiv4DGGkJBLTG8B2KvbGo5F71rnkYOAnwLjISaR4Hgem49JXYgeRoVO5NwKXbAHcCKwcO4oUxq+Bg3HpgthBZHhU7o3CJVOBywjTNSK10gIcj0svjR1ERkbl3mhccghhLj6JHUVy5w+EM05fjh1ERk7l3ohcsgpwCbBL7CiSC63ACbj04thBpHpU7o0sHGw9D1guchJpXHcTRusvxQ4i1aWlkI3MpT8F1gdujZxEGk8rcBSwo4o9nzRyzwuXHAjMQKN4Gdy9wKG49MXYQaR2NHLPC5deTRjF3xI7imTWfOC/ge1V7PmnkXseuWQf4NvAR2JHkUwoAdcAp2olTHGo3PPKJWOAw4BvopOfiuxm4GRc+nTsIFJfKve8c8kE4Fjg62htfJH8CfgGLr0/dhCJQ+VeFC5ZATiJMOfaHDmN1M4zhJH6TbGDSFwq96JxyarAacAhwKjIaaR6XiH8v16FS0uxw0h8Kveicsm6hIOun48dRUbkbeA7wIW4dGHsMJIdKveic8lmwPHAXsDoyGmkci8BFwCX4dI0chbJIJW7BGG/miOBw4EPRE4j/fsj4WS1mbi0M3YYyS6VuyzNJc3AfsAxwMaR00jQRrjs4gxc+kTsMNIYVO7SvzBlcxih7CdFTlNETwGXAtfqotQyVCp3GVxYK78Poei3iJwm7+YTRumX4NKHYoeRxqVyl6FxyXrAnsDuwMfR/kTV8BZhZ8+bgN/h0tbIeSQHVO4yfC6ZDOxKKPqdgQlxAzWU5wllfhPwJx0clWpTuUt1uGQcsD2h6HcDPhQ3UOZ0An+mq9Bd+lzkPJJzKnepDZd8jFD0uwObUMyzYd8EZhE277oVl/47ch4pEJW71J5LlgGmAZsSin5Twt7zY2LGqrJ/AY8Bj5ZvH8Olr8SNJEWmcpc4wjTOhnSX/Sbl++NixqpQV5F3vTyqIpesUblLdoQ96NcnlPxUYEr5ZeUer0+scYoSobznAvN63b4KPKULXkgjULlLY3HJRPou/eUIe+OMKt92vTQRDma2Ax29bt9h6fKeC7yBSzvq9w8SqQ2Vu4hIDukEFBGRHFK5i4jkkMpdRCSHVO4iIjmkchcRySGVu4hIDqncRURySOUuIpJDKncRkRxSuYuI5JDKXUQkh1TuIiI5pHLksqqgAAAApklEQVQXEckhlbuISA6p3EVEckjlLiKSQyp3EZEcUrmLiOSQyl1EJIdU7iIiOaRyFxHJIZW7iEgOqdxFRHJI5S4ikkMqdxGRHFK5i4jkkMpdRCSHVO4iIjmkchcRySGVu4hIDqncRURySOUuIpJDKncRkRxSuYuI5JDKXUQkh1TuIiI5pHIXEckhlbuISA6p3EVEckjlLiKSQyp3EZEcUrmLiOTQ/wem8SZLamobjQAAAABJRU5ErkJggg==\n", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "series = pd.Series([(values_multiple_origin), (values_single_origin)], index=['multiple', 'single'], name='Value Origin')\n", + "series.plot.pie(figsize=(6, 6), autopct='%1.0f%%')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Conclusion\n", + "The absolute most values have only one origin. Only 35 occurencies of the same value are found to have more than one domain origin. \n", + "\n", + ">The JSON values are always from the same location or related domains?\n", + "\n", + "Almost, 0,097% of the values have multiple origins, but 99,9% is only produced by one domain. \n", + "\n", + "---" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.6.8" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} From 4e18a11938d628fee979e401903cad1150620192 Mon Sep 17 00:00:00 2001 From: Camila Resende Date: Sun, 31 Mar 2019 19:55:58 -0300 Subject: [PATCH 13/23] Readme update - Quantitative_Comparasion overview --- .../README.md | 20 ++++++++++++++++--- 1 file changed, 17 insertions(+), 3 deletions(-) diff --git a/analyses/2019_03_aliamcami_value_analyses/README.md b/analyses/2019_03_aliamcami_value_analyses/README.md index 6c7d919..d7195a9 100644 --- a/analyses/2019_03_aliamcami_value_analyses/README.md +++ b/analyses/2019_03_aliamcami_value_analyses/README.md @@ -1,6 +1,7 @@ # Overview -All the greatest values are JSON, but they represent very little percentage of the whole data. +## JSON +All the greatest values are JSON, but they represent very little percentual of the whole data. ### Most of the data have small value_len (mean = 1356 for the 10% sample) @@ -25,14 +26,25 @@ All the greatest values are JSON, but they represent very little percentage of t ## The top 46745 gratest value_len are valid JSONs, that is 9.35% of the filtered sample (value_len > mean) and 0,41% of the original 10% sample. +--- +## Correlation of location_domain and value + +- One domain produces multiple JSONs +- One JSON is usually (99.9%) produced by a single domain. + +### + +- One domain can produce values there are both Json or not, but most produce only one type +- Most of the domains that produce a single type produces JSON type. + --- # Future questions ## About JSONs: -- **The JSON values are always from the same location or related domains?** -- **Are there a set of location domains that always produces a JSON?** +- **The JSON values are always from the same location or related domains?*** +- **Are there a set of location domains that always produces a JSON?*** - Does the JSON values follow a structure pattern? What pattern? - What data does the JSON hold? Is there any pattern on content? - Do they have nested JSON? Css? Html? Javascript? Recursive study on JSON properties. @@ -40,6 +52,8 @@ The top 46745 gratest value_len are valid JSONs, that is 9.35% of the filtered s - Is a JSON's structure for a single script_url domain always the same? - Is every JSON with the same structure produced by the same script_url domain? + *See notebook 'isJson_Quantitative_Comparasion.ipynb' for more information + ## General I'm think some things here maybe a crawler investigation or just wiki reading, since someone may have already described and explained. I just need to find, read and understand it. From a509bffb4471205335e70c44e7969279b27f247d Mon Sep 17 00:00:00 2001 From: Camila Resende Date: Wed, 3 Apr 2019 23:14:14 -0300 Subject: [PATCH 14/23] DataPrep cleanup and new 'json_keys' and 'json_schema' columns to dataPrep final sample output --- .../isJson_dataPrep.ipynb | 2229 +++++------------ 1 file changed, 574 insertions(+), 1655 deletions(-) diff --git a/analyses/2019_03_aliamcami_value_analyses/isJson_dataPrep.ipynb b/analyses/2019_03_aliamcami_value_analyses/isJson_dataPrep.ipynb index f1cb759..d14915f 100644 --- a/analyses/2019_03_aliamcami_value_analyses/isJson_dataPrep.ipynb +++ b/analyses/2019_03_aliamcami_value_analyses/isJson_dataPrep.ipynb @@ -4,7 +4,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "# Start client" + "# Start" ] }, { @@ -21,46 +21,66 @@ "/anaconda3/envs/overscripted/lib/python3.6/site-packages/distributed/config.py:20: YAMLLoadWarning: calling yaml.load() without Loader=... is deprecated, as the default Loader is unsafe. Please read https://msg.pyyaml.org/load for full details.\n", " defaults = yaml.load(f)\n" ] - }, - { - "data": { - "text/html": [ - "\n", - "\n", - "\n", - "\n", - "\n", - "
\n", - "

Client

\n", - "\n", - "
\n", - "

Cluster

\n", - "
    \n", - "
  • Workers: 4
  • \n", - "
  • Cores: 4
  • \n", - "
  • Memory: 8.59 GB
  • \n", - "
\n", - "
" - ], - "text/plain": [ - "" - ] - }, - "execution_count": 1, - "metadata": {}, - "output_type": "execute_result" } ], "source": [ "import dask.dataframe as dd\n", "from dask.distributed import Client\n", + "from dask.diagnostics import ProgressBar" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "All sub samples and new samples with new columns/data will be saved under the \"DIR\" directory to keep things organized. \n", + "As such, the function \"save_parquet\" and \"read_parquet\" adds this directory to every parquet name, and I'm using this functions instead of dd.read_parquet/dd.to_parquet direct to ensure the same read and write settings across the notebook. " + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "#Initializing client / distributed\n", + "# client = Client()\n", + "# client\n", "\n", - "#Initializing client\n", - "client = Client()\n", - "client" + "#Create folder to save/read new data\n", + "DIR = 'sample_0_prep/'\n", + "import os\n", + "if not os.path.exists(DIR):\n", + " os.makedirs(DIR)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "If no \"recalculate_partition\" is passed on, it will not recalculate the partitions. It is not mandatory, but good if you are significantly reducing the size of the data. " + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [], + "source": [ + "#Save a DF to a parquet\n", + "def save_parquet(df, name, recalculate_partition=False):\n", + " with ProgressBar():\n", + " #DF.REPARTITION copyed from: https://stackoverflow.com/questions/44657631/strategy-for-partitioning-dask-dataframes-efficiently\n", + " if recalculate_partition:\n", + " n = 1+df.memory_usage(deep=True).sum().compute() // (1000 * 1000 * 100)\n", + " print(\"Npartition: \", n)\n", + " df.repartition(npartitions= n).to_parquet(DIR + name, engine=\"pyarrow\")\n", + " else:\n", + " df.to_parquet(DIR + name, engine=\"pyarrow\")\n", + " \n", + " \n", + "def read_parquet(name):\n", + " return dd.read_parquet(DIR + name, engine='pyarrow')" ] }, { @@ -75,7 +95,7 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": 4, "metadata": {}, "outputs": [ { @@ -84,7 +104,7 @@ "Index(['value_1000', 'value', 'value_len', 'symbol', 'script_url', 'location'], dtype='object')" ] }, - "execution_count": 3, + "execution_count": 4, "metadata": {}, "output_type": "execute_result" } @@ -103,7 +123,44 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Filtered value_len > 1356\n", + "## DF overview\n", + "Some overview about the sample: \n", + "- Mean: 1356.97,\n", + "- Min: 0,\n", + "- Max: 4496861\n", + "- Std: 26310.62" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[########################################] | 100% Completed | 48.2s\n", + "1356.9776628910975 0 4496861 26310.62140481331 11292867\n" + ] + } + ], + "source": [ + "with ProgressBar():\n", + " df_mean = df['value_len'].mean()\n", + " df_min = df['value_len'].min()\n", + " df_max = df['value_len'].max()\n", + " df_std = df['value_len'].std()\n", + " df_len = df['value_len'].count()\n", + " (df_mean, df_min, df_max, df_std, df_len) = dd.compute(df_mean, df_min, df_max, df_std, df_len);\n", + " print(df_mean, df_min, df_max, df_std, df_len)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### FILTER: value_len > df_mean\n", "1356 is the value_len mean\n", "\n", "To filter the data into something that is more interesting to this task I decided to only work with values that are at above the mean.\n", @@ -113,14 +170,22 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": 6, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[########################################] | 100% Completed | 58.0s\n", + "Npartition: 244\n", + "[########################################] | 100% Completed | 1min 30.9s\n" + ] + } + ], "source": [ "#Save\n", - "df = df[df['value_len'] > 1356]\n", - "dd.to_parquet(df=df, path='filtered_above_mean.parquet', engine='pyarrow')\n", - "# len(dff)" + "save_parquet(df= df[df['value_len'] > df_mean], name='above_mean.parquet', recalculate_partition=True)" ] }, { @@ -141,7 +206,7 @@ ], "source": [ "#Read\n", - "df = dd.read_parquet('filtered_above_mean.parquet', engine='pyarrow')\n", + "df = read_parquet('above_mean.parquet')\n", "df.columns" ] }, @@ -149,42 +214,11 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "## DF overview\n", - "Some overview about the sample: \n", - "- Mean: 1356.97,\n", - "- Min: 0,\n", - "- Max: 4496861\n", - "- Std: 26310.62" - ] - }, - { - "cell_type": "code", - "execution_count": 9, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "1356.9776628910975 0 4496861 26310.62140481331\n" - ] - } - ], - "source": [ - "df_mean = df['value_len'].mean()\n", - "df_min = df['value_len'].min()\n", - "df_max = df['value_len'].max()\n", - "df_std = df['value_len'].std()\n", - "(df_mean, df_min, df_max, df_std) = dd.compute(df_mean, df_min, df_max, df_std);\n", - "print(df_mean, df_min, df_max, df_std)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Domains\n", - "The following code is from this same project: ~/analyses/hello_world.ipynb\n" + "# Add Column: Domains\n", + "The following code is copyed from this same project: ~/analyses/hello_world.ipynb\n", + "\n", + "It uses the data saved from the last section\n", + "This section is dedicated to extract the domain of the columns \"location\" and \"script_url\" and add it as new columns \"location_domain\" and \"script_domain\"" ] }, { @@ -206,28 +240,39 @@ }, { "cell_type": "code", - "execution_count": 10, + "execution_count": 9, "metadata": {}, "outputs": [], "source": [ + "#To guarantee the usage of the correct parquet created above in case we start from this section\n", + "df = read_parquet('above_mean.parquet')\n", + "\n", "df.astype({'value_1000': str, 'value': str,'value_len': int,'symbol': int,'script_url': str, 'location': str})\n", - "df['location_domain'] = df.location.apply(extract_domain)\n", - "df['script_domain'] = df.script_url.apply(extract_domain)" + "df['location_domain'] = df.location.apply(extract_domain, meta='O')\n", + "df['script_domain'] = df.script_url.apply(extract_domain, meta='O')" ] }, { "cell_type": "code", - "execution_count": 11, + "execution_count": 10, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[########################################] | 100% Completed | 1min 17.3s\n" + ] + } + ], "source": [ "#save\n", - "df.to_parquet('0_sample_domains.parquet', engine='pyarrow')" + "save_parquet(df=df, name='above_mean_domain.parquet')" ] }, { "cell_type": "code", - "execution_count": 12, + "execution_count": 11, "metadata": {}, "outputs": [ { @@ -251,189 +296,140 @@ " \n", " \n", " \n", - " value_1000\n", - " value\n", - " value_len\n", - " symbol\n", - " script_url\n", - " location\n", " location_domain\n", + " location\n", " script_domain\n", + " script_url\n", " \n", " \n", " \n", " \n", " 0\n", - " {\"im-settings\":\"{\\\"val\\\":{\\\"settings\\\":{\\\"Site...\n", - " {\"im-settings\":\"{\\\"val\\\":{\\\"settings\\\":{\\\"Site...\n", - " 3713\n", - " window.sessionStorage\n", - " https://assets.adobedtm.com/caacec67651710193d...\n", - " https://www.canada.ca/en/services.html\n", " canada.ca\n", + " https://www.canada.ca/en/services.html\n", " adobedtm.com\n", + " https://assets.adobedtm.com/caacec67651710193d...\n", " \n", " \n", " 1\n", - " {\"APLUS_S_CORE_0.17.12_20171214163401_2ee09a0c...\n", - " {\"APLUS_S_CORE_0.17.12_20171214163401_2ee09a0c...\n", - " 103878\n", - " window.localStorage\n", - " https://g.alicdn.com/alilog/mlog/aplus_v2.js\n", - " https://maniform.world.tmall.com/category-1282...\n", " tmall.com\n", + " https://maniform.world.tmall.com/category-1282...\n", " alicdn.com\n", + " https://g.alicdn.com/alilog/mlog/aplus_v2.js\n", " \n", " \n", " 2\n", - " {\"APLUS_S_CORE_0.17.12_20171214163401_2ee09a0c...\n", - " {\"APLUS_S_CORE_0.17.12_20171214163401_2ee09a0c...\n", - " 103878\n", - " window.localStorage\n", - " https://g.alicdn.com/alilog/mlog/aplus_v2.js\n", - " https://maniform.world.tmall.com/category-1282...\n", " tmall.com\n", + " https://maniform.world.tmall.com/category-1282...\n", " alicdn.com\n", + " https://g.alicdn.com/alilog/mlog/aplus_v2.js\n", " \n", " \n", " 3\n", - " usunico=17/12/2017:0-00155123:830; SessionASM=...\n", - " usunico=17/12/2017:0-00155123:830; SessionASM=...\n", - " 1358\n", - " window.document.cookie\n", - " https://www.coches.net/scripts/common.min.js?2...\n", - " https://www.coches.net/fiat/segunda-mano/\n", " coches.net\n", + " https://www.coches.net/fiat/segunda-mano/\n", " coches.net\n", + " https://www.coches.net/scripts/common.min.js?2...\n", " \n", " \n", " 4\n", - " usunico=17/12/2017:0-00155123:830; SessionASM=...\n", - " usunico=17/12/2017:0-00155123:830; SessionASM=...\n", - " 1358\n", - " window.document.cookie\n", - " https://www.coches.net/scripts/common.min.js?2...\n", - " https://www.coches.net/fiat/segunda-mano/\n", " coches.net\n", + " https://www.coches.net/fiat/segunda-mano/\n", " coches.net\n", + " https://www.coches.net/scripts/common.min.js?2...\n", " \n", " \n", "\n", "" ], "text/plain": [ - " value_1000 \\\n", - "0 {\"im-settings\":\"{\\\"val\\\":{\\\"settings\\\":{\\\"Site... \n", - "1 {\"APLUS_S_CORE_0.17.12_20171214163401_2ee09a0c... \n", - "2 {\"APLUS_S_CORE_0.17.12_20171214163401_2ee09a0c... \n", - "3 usunico=17/12/2017:0-00155123:830; SessionASM=... \n", - "4 usunico=17/12/2017:0-00155123:830; SessionASM=... \n", - "\n", - " value value_len \\\n", - "0 {\"im-settings\":\"{\\\"val\\\":{\\\"settings\\\":{\\\"Site... 3713 \n", - "1 {\"APLUS_S_CORE_0.17.12_20171214163401_2ee09a0c... 103878 \n", - "2 {\"APLUS_S_CORE_0.17.12_20171214163401_2ee09a0c... 103878 \n", - "3 usunico=17/12/2017:0-00155123:830; SessionASM=... 1358 \n", - "4 usunico=17/12/2017:0-00155123:830; SessionASM=... 1358 \n", - "\n", - " symbol script_url \\\n", - "0 window.sessionStorage https://assets.adobedtm.com/caacec67651710193d... \n", - "1 window.localStorage https://g.alicdn.com/alilog/mlog/aplus_v2.js \n", - "2 window.localStorage https://g.alicdn.com/alilog/mlog/aplus_v2.js \n", - "3 window.document.cookie https://www.coches.net/scripts/common.min.js?2... \n", - "4 window.document.cookie https://www.coches.net/scripts/common.min.js?2... \n", + " location_domain location \\\n", + "0 canada.ca https://www.canada.ca/en/services.html \n", + "1 tmall.com https://maniform.world.tmall.com/category-1282... \n", + "2 tmall.com https://maniform.world.tmall.com/category-1282... \n", + "3 coches.net https://www.coches.net/fiat/segunda-mano/ \n", + "4 coches.net https://www.coches.net/fiat/segunda-mano/ \n", "\n", - " location location_domain \\\n", - "0 https://www.canada.ca/en/services.html canada.ca \n", - "1 https://maniform.world.tmall.com/category-1282... tmall.com \n", - "2 https://maniform.world.tmall.com/category-1282... tmall.com \n", - "3 https://www.coches.net/fiat/segunda-mano/ coches.net \n", - "4 https://www.coches.net/fiat/segunda-mano/ coches.net \n", - "\n", - " script_domain \n", - "0 adobedtm.com \n", - "1 alicdn.com \n", - "2 alicdn.com \n", - "3 coches.net \n", - "4 coches.net " + " script_domain script_url \n", + "0 adobedtm.com https://assets.adobedtm.com/caacec67651710193d... \n", + "1 alicdn.com https://g.alicdn.com/alilog/mlog/aplus_v2.js \n", + "2 alicdn.com https://g.alicdn.com/alilog/mlog/aplus_v2.js \n", + "3 coches.net https://www.coches.net/scripts/common.min.js?2... \n", + "4 coches.net https://www.coches.net/scripts/common.min.js?2... " ] }, - "execution_count": 12, + "execution_count": 11, "metadata": {}, "output_type": "execute_result" } ], "source": [ "#read\n", - "df = dd.read_parquet('0_sample_domains.parquet', engine='pyarrow')\n", - "df.head()" + "df = read_parquet('above_mean_domain.parquet')\n", + "df[['location_domain', 'location', 'script_domain', 'script_url']].head()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "# is JSON Column\n", + "# Add Column: is_json\n", + "\n", + "After manual initial analysis I have think that the huge values are json structured, to validate that I included an new column that is a boolean value with the validation of json\n", "\n", - "After manual initial analysis I have think that the huge values are json structured, to validate that I included an new column that is a boolean value with the validation of json" + "After simple validation of value is a json or not, boolean value will be saved on a new column named \"is_json\"\n" ] }, { "cell_type": "code", - "execution_count": 13, + "execution_count": 12, "metadata": {}, "outputs": [], "source": [ "import json\n", + "import pandas as pd\n", "\n", "def is_json(myjson):\n", " try:\n", " json.loads(myjson)\n", " return True\n", + "\n", " except ValueError as e:\n", " return False" ] }, { "cell_type": "code", - "execution_count": 14, + "execution_count": 13, "metadata": {}, "outputs": [], "source": [ - "\n", - "df['is_json'] = df['value'].apply(is_json)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### is_JSON data\n", - "Saving the new produced data with 'is_json' columns into disk" + "#To guarantee the usage of the correct parquet created above in case we start from this section\n", + "df = read_parquet('above_mean_domain.parquet')\n", + "df['is_json'] = df['value'].apply(is_json, meta=False)" ] }, { "cell_type": "code", - "execution_count": 15, + "execution_count": 14, "metadata": {}, "outputs": [ { - "name": "stderr", + "name": "stdout", "output_type": "stream", "text": [ - "/anaconda3/envs/overscripted/lib/python3.6/site-packages/fastparquet/util.py:221: FutureWarning: A future version of pandas will default to `skipna=True`. To silence this warning, pass `skipna=True|False` explicitly.\n", - " inferred_dtype = infer_dtype(column)\n" + "[########################################] | 100% Completed | 2min 25.1s\n" ] } ], "source": [ "#save\n", - "df.to_parquet('is_json_above_mean.parquet')" + "save_parquet(df=df, name='above_mean_domain_json.parquet')" ] }, { "cell_type": "code", - "execution_count": 8, + "execution_count": 15, "metadata": {}, "outputs": [ { @@ -458,13 +454,6 @@ " \n", " \n", " value_1000\n", - " value\n", - " value_len\n", - " symbol\n", - " script_url\n", - " location\n", - " location_domain\n", - " script_domain\n", " is_json\n", " \n", " \n", @@ -472,61 +461,26 @@ " \n", " 0\n", " {\"im-settings\":\"{\\\"val\\\":{\\\"settings\\\":{\\\"Site...\n", - " {\"im-settings\":\"{\\\"val\\\":{\\\"settings\\\":{\\\"Site...\n", - " 3713\n", - " window.sessionStorage\n", - " https://assets.adobedtm.com/caacec67651710193d...\n", - " https://www.canada.ca/en/services.html\n", - " canada.ca\n", - " adobedtm.com\n", " True\n", " \n", " \n", " 1\n", " {\"APLUS_S_CORE_0.17.12_20171214163401_2ee09a0c...\n", - " {\"APLUS_S_CORE_0.17.12_20171214163401_2ee09a0c...\n", - " 103878\n", - " window.localStorage\n", - " https://g.alicdn.com/alilog/mlog/aplus_v2.js\n", - " https://maniform.world.tmall.com/category-1282...\n", - " tmall.com\n", - " alicdn.com\n", " True\n", " \n", " \n", " 2\n", " {\"APLUS_S_CORE_0.17.12_20171214163401_2ee09a0c...\n", - " {\"APLUS_S_CORE_0.17.12_20171214163401_2ee09a0c...\n", - " 103878\n", - " window.localStorage\n", - " https://g.alicdn.com/alilog/mlog/aplus_v2.js\n", - " https://maniform.world.tmall.com/category-1282...\n", - " tmall.com\n", - " alicdn.com\n", " True\n", " \n", " \n", " 3\n", " usunico=17/12/2017:0-00155123:830; SessionASM=...\n", - " usunico=17/12/2017:0-00155123:830; SessionASM=...\n", - " 1358\n", - " window.document.cookie\n", - " https://www.coches.net/scripts/common.min.js?2...\n", - " https://www.coches.net/fiat/segunda-mano/\n", - " coches.net\n", - " coches.net\n", " False\n", " \n", " \n", " 4\n", " usunico=17/12/2017:0-00155123:830; SessionASM=...\n", - " usunico=17/12/2017:0-00155123:830; SessionASM=...\n", - " 1358\n", - " window.document.cookie\n", - " https://www.coches.net/scripts/common.min.js?2...\n", - " https://www.coches.net/fiat/segunda-mano/\n", - " coches.net\n", - " coches.net\n", " False\n", " \n", " \n", @@ -534,64 +488,36 @@ "" ], "text/plain": [ - " value_1000 \\\n", - "0 {\"im-settings\":\"{\\\"val\\\":{\\\"settings\\\":{\\\"Site... \n", - "1 {\"APLUS_S_CORE_0.17.12_20171214163401_2ee09a0c... \n", - "2 {\"APLUS_S_CORE_0.17.12_20171214163401_2ee09a0c... \n", - "3 usunico=17/12/2017:0-00155123:830; SessionASM=... \n", - "4 usunico=17/12/2017:0-00155123:830; SessionASM=... \n", - "\n", - " value value_len \\\n", - "0 {\"im-settings\":\"{\\\"val\\\":{\\\"settings\\\":{\\\"Site... 3713 \n", - "1 {\"APLUS_S_CORE_0.17.12_20171214163401_2ee09a0c... 103878 \n", - "2 {\"APLUS_S_CORE_0.17.12_20171214163401_2ee09a0c... 103878 \n", - "3 usunico=17/12/2017:0-00155123:830; SessionASM=... 1358 \n", - "4 usunico=17/12/2017:0-00155123:830; SessionASM=... 1358 \n", - "\n", - " symbol script_url \\\n", - "0 window.sessionStorage https://assets.adobedtm.com/caacec67651710193d... \n", - "1 window.localStorage https://g.alicdn.com/alilog/mlog/aplus_v2.js \n", - "2 window.localStorage https://g.alicdn.com/alilog/mlog/aplus_v2.js \n", - "3 window.document.cookie https://www.coches.net/scripts/common.min.js?2... \n", - "4 window.document.cookie https://www.coches.net/scripts/common.min.js?2... \n", - "\n", - " location location_domain \\\n", - "0 https://www.canada.ca/en/services.html canada.ca \n", - "1 https://maniform.world.tmall.com/category-1282... tmall.com \n", - "2 https://maniform.world.tmall.com/category-1282... tmall.com \n", - "3 https://www.coches.net/fiat/segunda-mano/ coches.net \n", - "4 https://www.coches.net/fiat/segunda-mano/ coches.net \n", - "\n", - " script_domain is_json \n", - "0 adobedtm.com True \n", - "1 alicdn.com True \n", - "2 alicdn.com True \n", - "3 coches.net False \n", - "4 coches.net False " + " value_1000 is_json\n", + "0 {\"im-settings\":\"{\\\"val\\\":{\\\"settings\\\":{\\\"Site... True\n", + "1 {\"APLUS_S_CORE_0.17.12_20171214163401_2ee09a0c... True\n", + "2 {\"APLUS_S_CORE_0.17.12_20171214163401_2ee09a0c... True\n", + "3 usunico=17/12/2017:0-00155123:830; SessionASM=... False\n", + "4 usunico=17/12/2017:0-00155123:830; SessionASM=... False" ] }, - "execution_count": 8, + "execution_count": 15, "metadata": {}, "output_type": "execute_result" } ], "source": [ "#read\n", - "df = dd.read_parquet('is_json_above_mean.parquet')\n", - "df.head()" + "df = read_parquet('above_mean_domain_json.parquet')\n", + "df[['value_1000', 'is_json']].head()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "# Value md5\n", + "# Add Column: value_md5\n", "Include new columns called \"value_md5\" that is the md5 of value column" ] }, { "cell_type": "code", - "execution_count": 9, + "execution_count": 16, "metadata": {}, "outputs": [], "source": [ @@ -603,25 +529,46 @@ }, { "cell_type": "code", - "execution_count": 10, + "execution_count": 17, "metadata": {}, "outputs": [], "source": [ - "df['value_md5'] = df['value'].apply(md5)" + "#To guarantee the usage of the correct parquet created above in case we start from this section\n", + "df = read_parquet('above_mean_domain_json.parquet') \n", + "\n", + "df['value_md5'] = df['value'].apply(md5, meta=' ')" ] }, { "cell_type": "code", - "execution_count": 11, + "execution_count": 18, "metadata": {}, "outputs": [ { - "data": { - "text/html": [ - "
\n", - "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
value_1000is_json
0{\"im-settings\":\"{\\\"val\\\":{\\\"settings\\\":{\\\"Site...True
1{\"APLUS_S_CORE_0.17.12_20171214163401_2ee09a0c...True
2{\"APLUS_S_CORE_0.17.12_20171214163401_2ee09a0c...True
3{\"dueljs_channel_comm\":\"[{\\\"id\\\":4734405521279...True
4{\"dueljs_channel_comm\":\"[{\\\"id\\\":4734405521279...True
\n", + "
" + ], + "text/plain": [ + " value_1000 is_json\n", + "0 {\"im-settings\":\"{\\\"val\\\":{\\\"settings\\\":{\\\"Site... True\n", + "1 {\"APLUS_S_CORE_0.17.12_20171214163401_2ee09a0c... True\n", + "2 {\"APLUS_S_CORE_0.17.12_20171214163401_2ee09a0c... True\n", + "3 {\"dueljs_channel_comm\":\"[{\\\"id\\\":4734405521279... True\n", + "4 {\"dueljs_channel_comm\":\"[{\\\"id\\\":4734405521279... True" + ] + }, + "execution_count": 22, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "#read all_json_above_mean\n", + "df = read_parquet('JSONs_only.parquet')\n", + "df[['value_1000', 'is_json']].head()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Add json keys and schema columns" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Extract the top level keys, sort them and add as a list into another column named 'json_keys'\n", + "Will be using \"https://github.com/rnd0101/json_schema_inferencer\" to guess the json schema and save it into another column called \"json_schema\"" + ] + }, + { + "cell_type": "code", + "execution_count": 23, + "metadata": {}, + "outputs": [], + "source": [ + "from json_schema_inferencer.guess_json_schema import guess_schema\n", + "\n", + "df = read_parquet('JSONs_only.parquet')\n", + "\n", + "def jsonSchema(myjson):\n", + " try:\n", + " dct = json.loads(myjson)\n", + " value = guess_schema(dct)\n", + " l = list(value['properties'])\n", + " l.sort()\n", + " return l\n", + " except ValueError as e:\n", + " return list()\n", + " \n", + "def jsonKeys(myjson):\n", + " try:\n", + " dct = json.loads(myjson)\n", + " keys = list(dct.keys())\n", + " keys.sort()\n", + " return keys\n", + " except ValueError as e:\n", + " return list()" + ] + }, + { + "cell_type": "code", + "execution_count": 24, + "metadata": {}, + "outputs": [ + { + "name": "stdout", "output_type": "stream", "text": [ - "tornado.application - ERROR - Exception in callback >\n", - "Traceback (most recent call last):\n", - " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/psutil/_common.py\", line 341, in wrapper\n", - " ret = self._cache[fun]\n", - "AttributeError: 'Process' object has no attribute '_cache'\n", - "\n", - "During handling of the above exception, another exception occurred:\n", - "\n", - "Traceback (most recent call last):\n", - " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/psutil/_common.py\", line 341, in wrapper\n", - " ret = self._cache[fun]\n", - "AttributeError: _cache\n", - "\n", - "During handling of the above exception, another exception occurred:\n", - "\n", - "Traceback (most recent call last):\n", - " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/psutil/_psosx.py\", line 359, in catch_zombie\n", - " yield\n", - " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/psutil/_psosx.py\", line 400, in _get_pidtaskinfo\n", - " ret = cext.proc_pidtaskinfo_oneshot(self.pid)\n", - "ProcessLookupError: [Errno 3] No such process\n", - "\n", - "During handling of the above exception, another exception occurred:\n", - "\n", - "Traceback (most recent call last):\n", - " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/tornado/ioloop.py\", line 907, in _run\n", - " return self.callback()\n", - " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/distributed/nanny.py\", line 266, in memory_monitor\n", - " memory = proc.memory_info().rss\n", - " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/psutil/_common.py\", line 344, in wrapper\n", - " return fun(self)\n", - " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/psutil/__init__.py\", line 1166, in memory_info\n", - " return self._proc.memory_info()\n", - " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/psutil/_psosx.py\", line 339, in wrapper\n", - " return fun(self, *args, **kwargs)\n", - " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/psutil/_psosx.py\", line 469, in memory_info\n", - " rawtuple = self._get_pidtaskinfo()\n", - " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/psutil/_psosx.py\", line 339, in wrapper\n", - " return fun(self, *args, **kwargs)\n", - " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/psutil/_common.py\", line 344, in wrapper\n", - " return fun(self)\n", - " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/psutil/_psosx.py\", line 400, in _get_pidtaskinfo\n", - " ret = cext.proc_pidtaskinfo_oneshot(self.pid)\n", - " File \"/anaconda3/envs/overscripted/lib/python3.6/contextlib.py\", line 99, in __exit__\n", - " self.gen.throw(type, value, traceback)\n", - " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/psutil/_psosx.py\", line 372, in catch_zombie\n", - " raise AccessDenied(proc.pid, proc._name)\n", - "psutil.AccessDenied: psutil.AccessDenied (pid=1375)\n", - "distributed.nanny - WARNING - Worker process 1375 was killed by unknown signal\n", - "distributed.nanny - WARNING - Restarting worker\n", - "distributed.nanny - WARNING - Worker exceeded 95% memory budget. Restarting\n", - "tornado.application - ERROR - Exception in callback >\n", - "Traceback (most recent call last):\n", - " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/psutil/_common.py\", line 341, in wrapper\n", - " ret = self._cache[fun]\n", - "AttributeError: 'Process' object has no attribute '_cache'\n", - "\n", - "During handling of the above exception, another exception occurred:\n", - "\n", - "Traceback (most recent call last):\n", - " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/psutil/_common.py\", line 341, in wrapper\n", - " ret = self._cache[fun]\n", - "AttributeError: _cache\n", - "\n", - "During handling of the above exception, another exception occurred:\n", - "\n", - "Traceback (most recent call last):\n", - " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/psutil/_psosx.py\", line 359, in catch_zombie\n", - " yield\n", - " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/psutil/_psosx.py\", line 400, in _get_pidtaskinfo\n", - " ret = cext.proc_pidtaskinfo_oneshot(self.pid)\n", - "ProcessLookupError: [Errno 3] No such process\n", - "\n", - "During handling of the above exception, another exception occurred:\n", - "\n", - "Traceback (most recent call last):\n", - " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/tornado/ioloop.py\", line 907, in _run\n", - " return self.callback()\n", - " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/distributed/nanny.py\", line 266, in memory_monitor\n", - " memory = proc.memory_info().rss\n", - " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/psutil/_common.py\", line 344, in wrapper\n", - " return fun(self)\n", - " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/psutil/__init__.py\", line 1166, in memory_info\n", - " return self._proc.memory_info()\n", - " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/psutil/_psosx.py\", line 339, in wrapper\n", - " return fun(self, *args, **kwargs)\n", - " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/psutil/_psosx.py\", line 469, in memory_info\n", - " rawtuple = self._get_pidtaskinfo()\n", - " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/psutil/_psosx.py\", line 339, in wrapper\n", - " return fun(self, *args, **kwargs)\n", - " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/psutil/_common.py\", line 344, in wrapper\n", - " return fun(self)\n", - " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/psutil/_psosx.py\", line 400, in _get_pidtaskinfo\n", - " ret = cext.proc_pidtaskinfo_oneshot(self.pid)\n", - " File \"/anaconda3/envs/overscripted/lib/python3.6/contextlib.py\", line 99, in __exit__\n", - " self.gen.throw(type, value, traceback)\n", - " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/psutil/_psosx.py\", line 372, in catch_zombie\n", - " raise AccessDenied(proc.pid, proc._name)\n", - "psutil.AccessDenied: psutil.AccessDenied (pid=1421)\n", - "tornado.application - ERROR - Exception in callback >\n", - "Traceback (most recent call last):\n", - " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/psutil/_common.py\", line 341, in wrapper\n", - " ret = self._cache[fun]\n", - "AttributeError: 'Process' object has no attribute '_cache'\n", - "\n", - "During handling of the above exception, another exception occurred:\n", - "\n", - "Traceback (most recent call last):\n", - " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/psutil/_common.py\", line 341, in wrapper\n", - " ret = self._cache[fun]\n", - "AttributeError: _cache\n", - "\n", - "During handling of the above exception, another exception occurred:\n", - "\n", - "Traceback (most recent call last):\n", - " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/psutil/_psosx.py\", line 359, in catch_zombie\n", - " yield\n", - " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/psutil/_psosx.py\", line 400, in _get_pidtaskinfo\n", - " ret = cext.proc_pidtaskinfo_oneshot(self.pid)\n", - "ProcessLookupError: [Errno 3] No such process\n", - "\n", - "During handling of the above exception, another exception occurred:\n", - "\n", - "Traceback (most recent call last):\n", - " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/tornado/ioloop.py\", line 907, in _run\n", - " return self.callback()\n", - " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/distributed/nanny.py\", line 266, in memory_monitor\n", - " memory = proc.memory_info().rss\n", - " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/psutil/_common.py\", line 344, in wrapper\n", - " return fun(self)\n", - " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/psutil/__init__.py\", line 1166, in memory_info\n", - " return self._proc.memory_info()\n", - " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/psutil/_psosx.py\", line 339, in wrapper\n", - " return fun(self, *args, **kwargs)\n", - " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/psutil/_psosx.py\", line 469, in memory_info\n", - " rawtuple = self._get_pidtaskinfo()\n", - " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/psutil/_psosx.py\", line 339, in wrapper\n", - " return fun(self, *args, **kwargs)\n", - " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/psutil/_common.py\", line 344, in wrapper\n", - " return fun(self)\n", - " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/psutil/_psosx.py\", line 400, in _get_pidtaskinfo\n", - " ret = cext.proc_pidtaskinfo_oneshot(self.pid)\n", - " File \"/anaconda3/envs/overscripted/lib/python3.6/contextlib.py\", line 99, in __exit__\n", - " self.gen.throw(type, value, traceback)\n", - " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/psutil/_psosx.py\", line 372, in catch_zombie\n", - " raise AccessDenied(proc.pid, proc._name)\n", - "psutil.AccessDenied: psutil.AccessDenied (pid=1421)\n" + "[########################################] | 100% Completed | 3min 57.7s\n" ] - }, + } + ], + "source": [ + "df['json_keys'] = df['value'].apply(jsonKeys, meta='')\n", + "df['json_schema'] = df['value'].apply(jsonSchema, meta='')\n", + "save_parquet(df=df, name='JSONs_key_schema.parquet')" + ] + }, + { + "cell_type": "code", + "execution_count": 25, + "metadata": {}, + "outputs": [ { - "name": "stderr", + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
value_1000json_keysjson_schema
0{\"im-settings\":\"{\\\"val\\\":{\\\"settings\\\":{\\\"Site...[im-settings][im-settings]
1{\"APLUS_S_CORE_0.17.12_20171214163401_2ee09a0c...[APLUS_S_CORE_0.17.12_20171214163401_2ee09a0c][APLUS_S_CORE_0.17.12_20171214163401_2ee09a0c]
2{\"APLUS_S_CORE_0.17.12_20171214163401_2ee09a0c...[APLUS_S_CORE_0.17.12_20171214163401_2ee09a0c][APLUS_S_CORE_0.17.12_20171214163401_2ee09a0c]
3{\"dueljs_channel_comm\":\"[{\\\"id\\\":4734405521279...[LastSearch, LastSearch_e, dueljs_channel_comm...[LastSearch, LastSearch_e, dueljs_channel_comm...
4{\"dueljs_channel_comm\":\"[{\\\"id\\\":4734405521279...[LastSearch, LastSearch_e, dueljs_channel_comm...[LastSearch, LastSearch_e, dueljs_channel_comm...
\n", + "
" + ], + "text/plain": [ + " value_1000 \\\n", + "0 {\"im-settings\":\"{\\\"val\\\":{\\\"settings\\\":{\\\"Site... \n", + "1 {\"APLUS_S_CORE_0.17.12_20171214163401_2ee09a0c... \n", + "2 {\"APLUS_S_CORE_0.17.12_20171214163401_2ee09a0c... \n", + "3 {\"dueljs_channel_comm\":\"[{\\\"id\\\":4734405521279... \n", + "4 {\"dueljs_channel_comm\":\"[{\\\"id\\\":4734405521279... \n", + "\n", + " json_keys \\\n", + "0 [im-settings] \n", + "1 [APLUS_S_CORE_0.17.12_20171214163401_2ee09a0c] \n", + "2 [APLUS_S_CORE_0.17.12_20171214163401_2ee09a0c] \n", + "3 [LastSearch, LastSearch_e, dueljs_channel_comm... \n", + "4 [LastSearch, LastSearch_e, dueljs_channel_comm... \n", + "\n", + " json_schema \n", + "0 [im-settings] \n", + "1 [APLUS_S_CORE_0.17.12_20171214163401_2ee09a0c] \n", + "2 [APLUS_S_CORE_0.17.12_20171214163401_2ee09a0c] \n", + "3 [LastSearch, LastSearch_e, dueljs_channel_comm... \n", + "4 [LastSearch, LastSearch_e, dueljs_channel_comm... " + ] + }, + "execution_count": 25, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "#read \n", + "df = read_parquet('JSONs_key_schema.parquet')\n", + "df[['value_1000', 'json_keys', 'json_schema']].head()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### All NON json above the mean" + ] + }, + { + "cell_type": "code", + "execution_count": 27, + "metadata": {}, + "outputs": [ + { + "name": "stdout", "output_type": "stream", "text": [ - "tornado.application - ERROR - Exception in callback >\n", - "Traceback (most recent call last):\n", - " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/psutil/_common.py\", line 341, in wrapper\n", - " ret = self._cache[fun]\n", - "AttributeError: 'Process' object has no attribute '_cache'\n", - "\n", - "During handling of the above exception, another exception occurred:\n", - "\n", - "Traceback (most recent call last):\n", - " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/psutil/_common.py\", line 341, in wrapper\n", - " ret = self._cache[fun]\n", - "AttributeError: _cache\n", - "\n", - "During handling of the above exception, another exception occurred:\n", - "\n", - "Traceback (most recent call last):\n", - " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/psutil/_psosx.py\", line 359, in catch_zombie\n", - " yield\n", - " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/psutil/_psosx.py\", line 400, in _get_pidtaskinfo\n", - " ret = cext.proc_pidtaskinfo_oneshot(self.pid)\n", - "ProcessLookupError: [Errno 3] No such process\n", - "\n", - "During handling of the above exception, another exception occurred:\n", - "\n", - "Traceback (most recent call last):\n", - " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/tornado/ioloop.py\", line 907, in _run\n", - " return self.callback()\n", - " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/distributed/nanny.py\", line 266, in memory_monitor\n", - " memory = proc.memory_info().rss\n", - " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/psutil/_common.py\", line 344, in wrapper\n", - " return fun(self)\n", - " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/psutil/__init__.py\", line 1166, in memory_info\n", - " return self._proc.memory_info()\n", - " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/psutil/_psosx.py\", line 339, in wrapper\n", - " return fun(self, *args, **kwargs)\n", - " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/psutil/_psosx.py\", line 469, in memory_info\n", - " rawtuple = self._get_pidtaskinfo()\n", - " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/psutil/_psosx.py\", line 339, in wrapper\n", - " return fun(self, *args, **kwargs)\n", - " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/psutil/_common.py\", line 344, in wrapper\n", - " return fun(self)\n", - " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/psutil/_psosx.py\", line 400, in _get_pidtaskinfo\n", - " ret = cext.proc_pidtaskinfo_oneshot(self.pid)\n", - " File \"/anaconda3/envs/overscripted/lib/python3.6/contextlib.py\", line 99, in __exit__\n", - " self.gen.throw(type, value, traceback)\n", - " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/psutil/_psosx.py\", line 372, in catch_zombie\n", - " raise AccessDenied(proc.pid, proc._name)\n", - "psutil.AccessDenied: psutil.AccessDenied (pid=1421)\n", - "tornado.application - ERROR - Exception in callback >\n", - "Traceback (most recent call last):\n", - " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/psutil/_common.py\", line 341, in wrapper\n", - " ret = self._cache[fun]\n", - "AttributeError: 'Process' object has no attribute '_cache'\n", - "\n", - "During handling of the above exception, another exception occurred:\n", - "\n", - "Traceback (most recent call last):\n", - " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/psutil/_common.py\", line 341, in wrapper\n", - " ret = self._cache[fun]\n", - "AttributeError: _cache\n", - "\n", - "During handling of the above exception, another exception occurred:\n", - "\n", - "Traceback (most recent call last):\n", - " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/psutil/_psosx.py\", line 359, in catch_zombie\n", - " yield\n", - " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/psutil/_psosx.py\", line 400, in _get_pidtaskinfo\n", - " ret = cext.proc_pidtaskinfo_oneshot(self.pid)\n", - "ProcessLookupError: [Errno 3] No such process\n", - "\n", - "During handling of the above exception, another exception occurred:\n", - "\n", - "Traceback (most recent call last):\n", - " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/tornado/ioloop.py\", line 907, in _run\n", - " return self.callback()\n", - " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/distributed/nanny.py\", line 266, in memory_monitor\n", - " memory = proc.memory_info().rss\n", - " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/psutil/_common.py\", line 344, in wrapper\n", - " return fun(self)\n", - " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/psutil/__init__.py\", line 1166, in memory_info\n", - " return self._proc.memory_info()\n", - " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/psutil/_psosx.py\", line 339, in wrapper\n", - " return fun(self, *args, **kwargs)\n", - " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/psutil/_psosx.py\", line 469, in memory_info\n", - " rawtuple = self._get_pidtaskinfo()\n", - " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/psutil/_psosx.py\", line 339, in wrapper\n", - " return fun(self, *args, **kwargs)\n", - " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/psutil/_common.py\", line 344, in wrapper\n", - " return fun(self)\n", - " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/psutil/_psosx.py\", line 400, in _get_pidtaskinfo\n", - " ret = cext.proc_pidtaskinfo_oneshot(self.pid)\n", - " File \"/anaconda3/envs/overscripted/lib/python3.6/contextlib.py\", line 99, in __exit__\n", - " self.gen.throw(type, value, traceback)\n", - " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/psutil/_psosx.py\", line 372, in catch_zombie\n", - " raise AccessDenied(proc.pid, proc._name)\n", - "psutil.AccessDenied: psutil.AccessDenied (pid=1421)\n", - "distributed.nanny - WARNING - Worker process 1421 was killed by unknown signal\n", - "distributed.nanny - WARNING - Restarting worker\n" + "[########################################] | 100% Completed | 26.7s\n", + "Npartition: 12\n", + "[########################################] | 100% Completed | 27.8s\n" ] } ], "source": [ - "#save\n", - "df.to_parquet('is_json_above_mean_md5.parquet')" + "df = read_parquet('above_mean_domain_json_md5.parquet')\n", + "save_parquet(df=df[df['is_json'] == False], name='NON_JSONs_only.parquet', recalculate_partition=True)" ] }, { "cell_type": "code", - "execution_count": 16, + "execution_count": 28, "metadata": {}, "outputs": [ { @@ -1223,44 +984,48 @@ " location_domain\n", " script_domain\n", " is_json\n", + " value_md5\n", " \n", " \n", " \n", " \n", " 0\n", - " {\"im-settings\":\"{\\\"val\\\":{\\\"settings\\\":{\\\"Site...\n", - " {\"im-settings\":\"{\\\"val\\\":{\\\"settings\\\":{\\\"Site...\n", - " 3713\n", - " window.sessionStorage\n", - " https://assets.adobedtm.com/caacec67651710193d...\n", - " https://www.canada.ca/en/services.html\n", - " canada.ca\n", - " adobedtm.com\n", - " True\n", + " usunico=17/12/2017:0-00155123:830; SessionASM=...\n", + " usunico=17/12/2017:0-00155123:830; SessionASM=...\n", + " 1358\n", + " window.document.cookie\n", + " https://www.coches.net/scripts/common.min.js?2...\n", + " https://www.coches.net/fiat/segunda-mano/\n", + " coches.net\n", + " coches.net\n", + " False\n", + " db64465b639e01993d9212390f057628\n", " \n", " \n", " 1\n", - " {\"APLUS_S_CORE_0.17.12_20171214163401_2ee09a0c...\n", - " {\"APLUS_S_CORE_0.17.12_20171214163401_2ee09a0c...\n", - " 103878\n", - " window.localStorage\n", - " https://g.alicdn.com/alilog/mlog/aplus_v2.js\n", - " https://maniform.world.tmall.com/category-1282...\n", - " tmall.com\n", - " alicdn.com\n", - " True\n", + " usunico=17/12/2017:0-00155123:830; SessionASM=...\n", + " usunico=17/12/2017:0-00155123:830; SessionASM=...\n", + " 1358\n", + " window.document.cookie\n", + " https://www.coches.net/scripts/common.min.js?2...\n", + " https://www.coches.net/fiat/segunda-mano/\n", + " coches.net\n", + " coches.net\n", + " False\n", + " db64465b639e01993d9212390f057628\n", " \n", " \n", " 2\n", - " {\"APLUS_S_CORE_0.17.12_20171214163401_2ee09a0c...\n", - " {\"APLUS_S_CORE_0.17.12_20171214163401_2ee09a0c...\n", - " 103878\n", - " window.localStorage\n", - " https://g.alicdn.com/alilog/mlog/aplus_v2.js\n", - " https://maniform.world.tmall.com/category-1282...\n", - " tmall.com\n", - " alicdn.com\n", - " True\n", + " usunico=17/12/2017:0-00155123:830; SessionASM=...\n", + " usunico=17/12/2017:0-00155123:830; SessionASM=...\n", + " 1358\n", + " window.document.cookie\n", + " https://tags.tiqcdn.com/utag/schibsted/coches....\n", + " https://www.coches.net/fiat/segunda-mano/\n", + " coches.net\n", + " tiqcdn.com\n", + " False\n", + " db64465b639e01993d9212390f057628\n", " \n", " \n", " 3\n", @@ -1268,11 +1033,12 @@ " usunico=17/12/2017:0-00155123:830; SessionASM=...\n", " 1358\n", " window.document.cookie\n", - " https://www.coches.net/scripts/common.min.js?2...\n", + " https://tags.tiqcdn.com/utag/schibsted/coches....\n", " https://www.coches.net/fiat/segunda-mano/\n", " coches.net\n", - " coches.net\n", + " tiqcdn.com\n", " False\n", + " db64465b639e01993d9212390f057628\n", " \n", " \n", " 4\n", @@ -1280,11 +1046,12 @@ " usunico=17/12/2017:0-00155123:830; SessionASM=...\n", " 1358\n", " window.document.cookie\n", - " https://www.coches.net/scripts/common.min.js?2...\n", + " https://tags.tiqcdn.com/utag/schibsted/coches....\n", " https://www.coches.net/fiat/segunda-mano/\n", " coches.net\n", - " coches.net\n", + " tiqcdn.com\n", " False\n", + " db64465b639e01993d9212390f057628\n", " \n", " \n", "\n", @@ -1292,900 +1059,52 @@ ], "text/plain": [ " value_1000 \\\n", - "0 {\"im-settings\":\"{\\\"val\\\":{\\\"settings\\\":{\\\"Site... \n", - "1 {\"APLUS_S_CORE_0.17.12_20171214163401_2ee09a0c... \n", - "2 {\"APLUS_S_CORE_0.17.12_20171214163401_2ee09a0c... \n", + "0 usunico=17/12/2017:0-00155123:830; SessionASM=... \n", + "1 usunico=17/12/2017:0-00155123:830; SessionASM=... \n", + "2 usunico=17/12/2017:0-00155123:830; SessionASM=... \n", "3 usunico=17/12/2017:0-00155123:830; SessionASM=... \n", "4 usunico=17/12/2017:0-00155123:830; SessionASM=... \n", "\n", " value value_len \\\n", - "0 {\"im-settings\":\"{\\\"val\\\":{\\\"settings\\\":{\\\"Site... 3713 \n", - "1 {\"APLUS_S_CORE_0.17.12_20171214163401_2ee09a0c... 103878 \n", - "2 {\"APLUS_S_CORE_0.17.12_20171214163401_2ee09a0c... 103878 \n", + "0 usunico=17/12/2017:0-00155123:830; SessionASM=... 1358 \n", + "1 usunico=17/12/2017:0-00155123:830; SessionASM=... 1358 \n", + "2 usunico=17/12/2017:0-00155123:830; SessionASM=... 1358 \n", "3 usunico=17/12/2017:0-00155123:830; SessionASM=... 1358 \n", "4 usunico=17/12/2017:0-00155123:830; SessionASM=... 1358 \n", "\n", " symbol script_url \\\n", - "0 window.sessionStorage https://assets.adobedtm.com/caacec67651710193d... \n", - "1 window.localStorage https://g.alicdn.com/alilog/mlog/aplus_v2.js \n", - "2 window.localStorage https://g.alicdn.com/alilog/mlog/aplus_v2.js \n", - "3 window.document.cookie https://www.coches.net/scripts/common.min.js?2... \n", - "4 window.document.cookie https://www.coches.net/scripts/common.min.js?2... \n", + "0 window.document.cookie https://www.coches.net/scripts/common.min.js?2... \n", + "1 window.document.cookie https://www.coches.net/scripts/common.min.js?2... \n", + "2 window.document.cookie https://tags.tiqcdn.com/utag/schibsted/coches.... \n", + "3 window.document.cookie https://tags.tiqcdn.com/utag/schibsted/coches.... \n", + "4 window.document.cookie https://tags.tiqcdn.com/utag/schibsted/coches.... \n", "\n", - " location location_domain \\\n", - "0 https://www.canada.ca/en/services.html canada.ca \n", - "1 https://maniform.world.tmall.com/category-1282... tmall.com \n", - "2 https://maniform.world.tmall.com/category-1282... tmall.com \n", - "3 https://www.coches.net/fiat/segunda-mano/ coches.net \n", - "4 https://www.coches.net/fiat/segunda-mano/ coches.net \n", + " location location_domain script_domain \\\n", + "0 https://www.coches.net/fiat/segunda-mano/ coches.net coches.net \n", + "1 https://www.coches.net/fiat/segunda-mano/ coches.net coches.net \n", + "2 https://www.coches.net/fiat/segunda-mano/ coches.net tiqcdn.com \n", + "3 https://www.coches.net/fiat/segunda-mano/ coches.net tiqcdn.com \n", + "4 https://www.coches.net/fiat/segunda-mano/ coches.net tiqcdn.com \n", "\n", - " script_domain is_json \n", - "0 adobedtm.com True \n", - "1 alicdn.com True \n", - "2 alicdn.com True \n", - "3 coches.net False \n", - "4 coches.net False " + " is_json value_md5 \n", + "0 False db64465b639e01993d9212390f057628 \n", + "1 False db64465b639e01993d9212390f057628 \n", + "2 False db64465b639e01993d9212390f057628 \n", + "3 False db64465b639e01993d9212390f057628 \n", + "4 False db64465b639e01993d9212390f057628 " ] }, - "execution_count": 16, + "execution_count": 28, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "#read\n", - "df = dd.read_parquet('is_json_above_mean_md5.parquet')\n", + "#read \n", + "df = read_parquet('NON_JSONs_only.parquet')\n", "df.head()" ] }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Saving other possible usefull samples to future analyses" - ] - }, - { - "cell_type": "code", - "execution_count": 19, - "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "distributed.nanny - WARNING - Worker exceeded 95% memory budget. Restarting\n", - "tornado.application - ERROR - Exception in callback >\n", - "Traceback (most recent call last):\n", - " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/psutil/_common.py\", line 341, in wrapper\n", - " ret = self._cache[fun]\n", - "AttributeError: 'Process' object has no attribute '_cache'\n", - "\n", - "During handling of the above exception, another exception occurred:\n", - "\n", - "Traceback (most recent call last):\n", - " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/psutil/_common.py\", line 341, in wrapper\n", - " ret = self._cache[fun]\n", - "AttributeError: _cache\n", - "\n", - "During handling of the above exception, another exception occurred:\n", - "\n", - "Traceback (most recent call last):\n", - " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/psutil/_psosx.py\", line 359, in catch_zombie\n", - " yield\n", - " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/psutil/_psosx.py\", line 400, in _get_pidtaskinfo\n", - " ret = cext.proc_pidtaskinfo_oneshot(self.pid)\n", - "ProcessLookupError: [Errno 3] No such process\n", - "\n", - "During handling of the above exception, another exception occurred:\n", - "\n", - "Traceback (most recent call last):\n", - " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/tornado/ioloop.py\", line 907, in _run\n", - " return self.callback()\n", - " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/distributed/nanny.py\", line 266, in memory_monitor\n", - " memory = proc.memory_info().rss\n", - " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/psutil/_common.py\", line 344, in wrapper\n", - " return fun(self)\n", - " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/psutil/__init__.py\", line 1166, in memory_info\n", - " return self._proc.memory_info()\n", - " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/psutil/_psosx.py\", line 339, in wrapper\n", - " return fun(self, *args, **kwargs)\n", - " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/psutil/_psosx.py\", line 469, in memory_info\n", - " rawtuple = self._get_pidtaskinfo()\n", - " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/psutil/_psosx.py\", line 339, in wrapper\n", - " return fun(self, *args, **kwargs)\n", - " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/psutil/_common.py\", line 344, in wrapper\n", - " return fun(self)\n", - " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/psutil/_psosx.py\", line 400, in _get_pidtaskinfo\n", - " ret = cext.proc_pidtaskinfo_oneshot(self.pid)\n", - " File \"/anaconda3/envs/overscripted/lib/python3.6/contextlib.py\", line 99, in __exit__\n", - " self.gen.throw(type, value, traceback)\n", - " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/psutil/_psosx.py\", line 372, in catch_zombie\n", - " raise AccessDenied(proc.pid, proc._name)\n", - "psutil.AccessDenied: psutil.AccessDenied (pid=21460)\n", - "tornado.application - ERROR - Exception in callback >\n", - "Traceback (most recent call last):\n", - " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/psutil/_common.py\", line 341, in wrapper\n", - " ret = self._cache[fun]\n", - "AttributeError: 'Process' object has no attribute '_cache'\n", - "\n", - "During handling of the above exception, another exception occurred:\n", - "\n", - "Traceback (most recent call last):\n", - " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/psutil/_common.py\", line 341, in wrapper\n", - " ret = self._cache[fun]\n", - "AttributeError: _cache\n", - "\n", - "During handling of the above exception, another exception occurred:\n", - "\n", - "Traceback (most recent call last):\n", - " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/psutil/_psosx.py\", line 359, in catch_zombie\n", - " yield\n", - " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/psutil/_psosx.py\", line 400, in _get_pidtaskinfo\n", - " ret = cext.proc_pidtaskinfo_oneshot(self.pid)\n", - "ProcessLookupError: [Errno 3] No such process\n", - "\n", - "During handling of the above exception, another exception occurred:\n", - "\n", - "Traceback (most recent call last):\n", - " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/tornado/ioloop.py\", line 907, in _run\n", - " return self.callback()\n", - " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/distributed/nanny.py\", line 266, in memory_monitor\n", - " memory = proc.memory_info().rss\n", - " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/psutil/_common.py\", line 344, in wrapper\n", - " return fun(self)\n", - " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/psutil/__init__.py\", line 1166, in memory_info\n", - " return self._proc.memory_info()\n", - " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/psutil/_psosx.py\", line 339, in wrapper\n", - " return fun(self, *args, **kwargs)\n", - " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/psutil/_psosx.py\", line 469, in memory_info\n", - " rawtuple = self._get_pidtaskinfo()\n", - " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/psutil/_psosx.py\", line 339, in wrapper\n", - " return fun(self, *args, **kwargs)\n", - " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/psutil/_common.py\", line 344, in wrapper\n", - " return fun(self)\n", - " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/psutil/_psosx.py\", line 400, in _get_pidtaskinfo\n", - " ret = cext.proc_pidtaskinfo_oneshot(self.pid)\n", - " File \"/anaconda3/envs/overscripted/lib/python3.6/contextlib.py\", line 99, in __exit__\n", - " self.gen.throw(type, value, traceback)\n", - " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/psutil/_psosx.py\", line 372, in catch_zombie\n", - " raise AccessDenied(proc.pid, proc._name)\n", - "psutil.AccessDenied: psutil.AccessDenied (pid=21460)\n", - "distributed.nanny - WARNING - Worker process 21460 was killed by unknown signal\n", - "distributed.nanny - WARNING - Restarting worker\n", - "distributed.nanny - WARNING - Worker exceeded 95% memory budget. Restarting\n", - "tornado.application - ERROR - Exception in callback >\n", - "Traceback (most recent call last):\n", - " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/psutil/_common.py\", line 341, in wrapper\n", - " ret = self._cache[fun]\n", - "AttributeError: 'Process' object has no attribute '_cache'\n", - "\n", - "During handling of the above exception, another exception occurred:\n", - "\n", - "Traceback (most recent call last):\n", - " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/psutil/_common.py\", line 341, in wrapper\n", - " ret = self._cache[fun]\n", - "AttributeError: _cache\n", - "\n", - "During handling of the above exception, another exception occurred:\n", - "\n", - "Traceback (most recent call last):\n", - " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/psutil/_psosx.py\", line 359, in catch_zombie\n", - " yield\n", - " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/psutil/_psosx.py\", line 400, in _get_pidtaskinfo\n", - " ret = cext.proc_pidtaskinfo_oneshot(self.pid)\n", - "ProcessLookupError: [Errno 3] No such process\n", - "\n", - "During handling of the above exception, another exception occurred:\n", - "\n", - "Traceback (most recent call last):\n", - " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/tornado/ioloop.py\", line 907, in _run\n", - " return self.callback()\n", - " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/distributed/nanny.py\", line 266, in memory_monitor\n", - " memory = proc.memory_info().rss\n", - " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/psutil/_common.py\", line 344, in wrapper\n", - " return fun(self)\n", - " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/psutil/__init__.py\", line 1166, in memory_info\n", - " return self._proc.memory_info()\n", - " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/psutil/_psosx.py\", line 339, in wrapper\n", - " return fun(self, *args, **kwargs)\n", - " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/psutil/_psosx.py\", line 469, in memory_info\n", - " rawtuple = self._get_pidtaskinfo()\n", - " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/psutil/_psosx.py\", line 339, in wrapper\n", - " return fun(self, *args, **kwargs)\n", - " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/psutil/_common.py\", line 344, in wrapper\n", - " return fun(self)\n", - " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/psutil/_psosx.py\", line 400, in _get_pidtaskinfo\n", - " ret = cext.proc_pidtaskinfo_oneshot(self.pid)\n", - " File \"/anaconda3/envs/overscripted/lib/python3.6/contextlib.py\", line 99, in __exit__\n", - " self.gen.throw(type, value, traceback)\n", - " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/psutil/_psosx.py\", line 372, in catch_zombie\n", - " raise AccessDenied(proc.pid, proc._name)\n", - "psutil.AccessDenied: psutil.AccessDenied (pid=21468)\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "tornado.application - ERROR - Exception in callback >\n", - "Traceback (most recent call last):\n", - " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/psutil/_common.py\", line 341, in wrapper\n", - " ret = self._cache[fun]\n", - "AttributeError: 'Process' object has no attribute '_cache'\n", - "\n", - "During handling of the above exception, another exception occurred:\n", - "\n", - "Traceback (most recent call last):\n", - " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/psutil/_common.py\", line 341, in wrapper\n", - " ret = self._cache[fun]\n", - "AttributeError: _cache\n", - "\n", - "During handling of the above exception, another exception occurred:\n", - "\n", - "Traceback (most recent call last):\n", - " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/psutil/_psosx.py\", line 359, in catch_zombie\n", - " yield\n", - " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/psutil/_psosx.py\", line 400, in _get_pidtaskinfo\n", - " ret = cext.proc_pidtaskinfo_oneshot(self.pid)\n", - "ProcessLookupError: [Errno 3] No such process\n", - "\n", - "During handling of the above exception, another exception occurred:\n", - "\n", - "Traceback (most recent call last):\n", - " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/tornado/ioloop.py\", line 907, in _run\n", - " return self.callback()\n", - " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/distributed/nanny.py\", line 266, in memory_monitor\n", - " memory = proc.memory_info().rss\n", - " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/psutil/_common.py\", line 344, in wrapper\n", - " return fun(self)\n", - " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/psutil/__init__.py\", line 1166, in memory_info\n", - " return self._proc.memory_info()\n", - " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/psutil/_psosx.py\", line 339, in wrapper\n", - " return fun(self, *args, **kwargs)\n", - " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/psutil/_psosx.py\", line 469, in memory_info\n", - " rawtuple = self._get_pidtaskinfo()\n", - " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/psutil/_psosx.py\", line 339, in wrapper\n", - " return fun(self, *args, **kwargs)\n", - " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/psutil/_common.py\", line 344, in wrapper\n", - " return fun(self)\n", - " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/psutil/_psosx.py\", line 400, in _get_pidtaskinfo\n", - " ret = cext.proc_pidtaskinfo_oneshot(self.pid)\n", - " File \"/anaconda3/envs/overscripted/lib/python3.6/contextlib.py\", line 99, in __exit__\n", - " self.gen.throw(type, value, traceback)\n", - " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/psutil/_psosx.py\", line 372, in catch_zombie\n", - " raise AccessDenied(proc.pid, proc._name)\n", - "psutil.AccessDenied: psutil.AccessDenied (pid=21468)\n", - "distributed.nanny - WARNING - Worker process 21468 was killed by unknown signal\n", - "distributed.nanny - WARNING - Restarting worker\n", - "distributed.nanny - WARNING - Worker exceeded 95% memory budget. Restarting\n", - "tornado.application - ERROR - Exception in callback >\n", - "Traceback (most recent call last):\n", - " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/psutil/_common.py\", line 341, in wrapper\n", - " ret = self._cache[fun]\n", - "AttributeError: 'Process' object has no attribute '_cache'\n", - "\n", - "During handling of the above exception, another exception occurred:\n", - "\n", - "Traceback (most recent call last):\n", - " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/psutil/_common.py\", line 341, in wrapper\n", - " ret = self._cache[fun]\n", - "AttributeError: _cache\n", - "\n", - "During handling of the above exception, another exception occurred:\n", - "\n", - "Traceback (most recent call last):\n", - " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/psutil/_psosx.py\", line 359, in catch_zombie\n", - " yield\n", - " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/psutil/_psosx.py\", line 400, in _get_pidtaskinfo\n", - " ret = cext.proc_pidtaskinfo_oneshot(self.pid)\n", - "ProcessLookupError: [Errno 3] No such process\n", - "\n", - "During handling of the above exception, another exception occurred:\n", - "\n", - "Traceback (most recent call last):\n", - " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/tornado/ioloop.py\", line 907, in _run\n", - " return self.callback()\n", - " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/distributed/nanny.py\", line 266, in memory_monitor\n", - " memory = proc.memory_info().rss\n", - " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/psutil/_common.py\", line 344, in wrapper\n", - " return fun(self)\n", - " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/psutil/__init__.py\", line 1166, in memory_info\n", - " return self._proc.memory_info()\n", - " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/psutil/_psosx.py\", line 339, in wrapper\n", - " return fun(self, *args, **kwargs)\n", - " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/psutil/_psosx.py\", line 469, in memory_info\n", - " rawtuple = self._get_pidtaskinfo()\n", - " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/psutil/_psosx.py\", line 339, in wrapper\n", - " return fun(self, *args, **kwargs)\n", - " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/psutil/_common.py\", line 344, in wrapper\n", - " return fun(self)\n", - " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/psutil/_psosx.py\", line 400, in _get_pidtaskinfo\n", - " ret = cext.proc_pidtaskinfo_oneshot(self.pid)\n", - " File \"/anaconda3/envs/overscripted/lib/python3.6/contextlib.py\", line 99, in __exit__\n", - " self.gen.throw(type, value, traceback)\n", - " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/psutil/_psosx.py\", line 372, in catch_zombie\n", - " raise AccessDenied(proc.pid, proc._name)\n", - "psutil.AccessDenied: psutil.AccessDenied (pid=21484)\n", - "tornado.application - ERROR - Exception in callback >\n", - "Traceback (most recent call last):\n", - " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/psutil/_common.py\", line 341, in wrapper\n", - " ret = self._cache[fun]\n", - "AttributeError: 'Process' object has no attribute '_cache'\n", - "\n", - "During handling of the above exception, another exception occurred:\n", - "\n", - "Traceback (most recent call last):\n", - " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/psutil/_common.py\", line 341, in wrapper\n", - " ret = self._cache[fun]\n", - "AttributeError: _cache\n", - "\n", - "During handling of the above exception, another exception occurred:\n", - "\n", - "Traceback (most recent call last):\n", - " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/psutil/_psosx.py\", line 359, in catch_zombie\n", - " yield\n", - " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/psutil/_psosx.py\", line 400, in _get_pidtaskinfo\n", - " ret = cext.proc_pidtaskinfo_oneshot(self.pid)\n", - "ProcessLookupError: [Errno 3] No such process\n", - "\n", - "During handling of the above exception, another exception occurred:\n", - "\n", - "Traceback (most recent call last):\n", - " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/tornado/ioloop.py\", line 907, in _run\n", - " return self.callback()\n", - " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/distributed/nanny.py\", line 266, in memory_monitor\n", - " memory = proc.memory_info().rss\n", - " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/psutil/_common.py\", line 344, in wrapper\n", - " return fun(self)\n", - " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/psutil/__init__.py\", line 1166, in memory_info\n", - " return self._proc.memory_info()\n", - " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/psutil/_psosx.py\", line 339, in wrapper\n", - " return fun(self, *args, **kwargs)\n", - " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/psutil/_psosx.py\", line 469, in memory_info\n", - " rawtuple = self._get_pidtaskinfo()\n", - " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/psutil/_psosx.py\", line 339, in wrapper\n", - " return fun(self, *args, **kwargs)\n", - " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/psutil/_common.py\", line 344, in wrapper\n", - " return fun(self)\n", - " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/psutil/_psosx.py\", line 400, in _get_pidtaskinfo\n", - " ret = cext.proc_pidtaskinfo_oneshot(self.pid)\n", - " File \"/anaconda3/envs/overscripted/lib/python3.6/contextlib.py\", line 99, in __exit__\n", - " self.gen.throw(type, value, traceback)\n", - " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/psutil/_psosx.py\", line 372, in catch_zombie\n", - " raise AccessDenied(proc.pid, proc._name)\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "psutil.AccessDenied: psutil.AccessDenied (pid=21484)\n", - "tornado.application - ERROR - Exception in callback >\n", - "Traceback (most recent call last):\n", - " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/psutil/_common.py\", line 341, in wrapper\n", - " ret = self._cache[fun]\n", - "AttributeError: 'Process' object has no attribute '_cache'\n", - "\n", - "During handling of the above exception, another exception occurred:\n", - "\n", - "Traceback (most recent call last):\n", - " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/psutil/_common.py\", line 341, in wrapper\n", - " ret = self._cache[fun]\n", - "AttributeError: _cache\n", - "\n", - "During handling of the above exception, another exception occurred:\n", - "\n", - "Traceback (most recent call last):\n", - " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/psutil/_psosx.py\", line 359, in catch_zombie\n", - " yield\n", - " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/psutil/_psosx.py\", line 400, in _get_pidtaskinfo\n", - " ret = cext.proc_pidtaskinfo_oneshot(self.pid)\n", - "ProcessLookupError: [Errno 3] No such process\n", - "\n", - "During handling of the above exception, another exception occurred:\n", - "\n", - "Traceback (most recent call last):\n", - " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/tornado/ioloop.py\", line 907, in _run\n", - " return self.callback()\n", - " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/distributed/nanny.py\", line 266, in memory_monitor\n", - " memory = proc.memory_info().rss\n", - " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/psutil/_common.py\", line 344, in wrapper\n", - " return fun(self)\n", - " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/psutil/__init__.py\", line 1166, in memory_info\n", - " return self._proc.memory_info()\n", - " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/psutil/_psosx.py\", line 339, in wrapper\n", - " return fun(self, *args, **kwargs)\n", - " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/psutil/_psosx.py\", line 469, in memory_info\n", - " rawtuple = self._get_pidtaskinfo()\n", - " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/psutil/_psosx.py\", line 339, in wrapper\n", - " return fun(self, *args, **kwargs)\n", - " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/psutil/_common.py\", line 344, in wrapper\n", - " return fun(self)\n", - " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/psutil/_psosx.py\", line 400, in _get_pidtaskinfo\n", - " ret = cext.proc_pidtaskinfo_oneshot(self.pid)\n", - " File \"/anaconda3/envs/overscripted/lib/python3.6/contextlib.py\", line 99, in __exit__\n", - " self.gen.throw(type, value, traceback)\n", - " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/psutil/_psosx.py\", line 372, in catch_zombie\n", - " raise AccessDenied(proc.pid, proc._name)\n", - "psutil.AccessDenied: psutil.AccessDenied (pid=21484)\n", - "tornado.application - ERROR - Exception in callback >\n", - "Traceback (most recent call last):\n", - " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/psutil/_common.py\", line 341, in wrapper\n", - " ret = self._cache[fun]\n", - "AttributeError: 'Process' object has no attribute '_cache'\n", - "\n", - "During handling of the above exception, another exception occurred:\n", - "\n", - "Traceback (most recent call last):\n", - " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/psutil/_common.py\", line 341, in wrapper\n", - " ret = self._cache[fun]\n", - "AttributeError: _cache\n", - "\n", - "During handling of the above exception, another exception occurred:\n", - "\n", - "Traceback (most recent call last):\n", - " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/psutil/_psosx.py\", line 359, in catch_zombie\n", - " yield\n", - " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/psutil/_psosx.py\", line 400, in _get_pidtaskinfo\n", - " ret = cext.proc_pidtaskinfo_oneshot(self.pid)\n", - "ProcessLookupError: [Errno 3] No such process\n", - "\n", - "During handling of the above exception, another exception occurred:\n", - "\n", - "Traceback (most recent call last):\n", - " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/tornado/ioloop.py\", line 907, in _run\n", - " return self.callback()\n", - " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/distributed/nanny.py\", line 266, in memory_monitor\n", - " memory = proc.memory_info().rss\n", - " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/psutil/_common.py\", line 344, in wrapper\n", - " return fun(self)\n", - " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/psutil/__init__.py\", line 1166, in memory_info\n", - " return self._proc.memory_info()\n", - " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/psutil/_psosx.py\", line 339, in wrapper\n", - " return fun(self, *args, **kwargs)\n", - " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/psutil/_psosx.py\", line 469, in memory_info\n", - " rawtuple = self._get_pidtaskinfo()\n", - " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/psutil/_psosx.py\", line 339, in wrapper\n", - " return fun(self, *args, **kwargs)\n", - " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/psutil/_common.py\", line 344, in wrapper\n", - " return fun(self)\n", - " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/psutil/_psosx.py\", line 400, in _get_pidtaskinfo\n", - " ret = cext.proc_pidtaskinfo_oneshot(self.pid)\n", - " File \"/anaconda3/envs/overscripted/lib/python3.6/contextlib.py\", line 99, in __exit__\n", - " self.gen.throw(type, value, traceback)\n", - " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/psutil/_psosx.py\", line 372, in catch_zombie\n", - " raise AccessDenied(proc.pid, proc._name)\n", - "psutil.AccessDenied: psutil.AccessDenied (pid=21484)\n", - "distributed.nanny - WARNING - Worker process 21484 was killed by unknown signal\n", - "distributed.nanny - WARNING - Restarting worker\n", - "distributed.nanny - WARNING - Worker exceeded 95% memory budget. Restarting\n", - "tornado.application - ERROR - Exception in callback >\n", - "Traceback (most recent call last):\n", - " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/psutil/_common.py\", line 341, in wrapper\n", - " ret = self._cache[fun]\n", - "AttributeError: 'Process' object has no attribute '_cache'\n", - "\n", - "During handling of the above exception, another exception occurred:\n", - "\n", - "Traceback (most recent call last):\n", - " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/psutil/_common.py\", line 341, in wrapper\n", - " ret = self._cache[fun]\n", - "AttributeError: _cache\n", - "\n", - "During handling of the above exception, another exception occurred:\n", - "\n", - "Traceback (most recent call last):\n", - " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/psutil/_psosx.py\", line 359, in catch_zombie\n", - " yield\n", - " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/psutil/_psosx.py\", line 400, in _get_pidtaskinfo\n", - " ret = cext.proc_pidtaskinfo_oneshot(self.pid)\n", - "ProcessLookupError: [Errno 3] No such process\n", - "\n", - "During handling of the above exception, another exception occurred:\n", - "\n", - "Traceback (most recent call last):\n", - " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/tornado/ioloop.py\", line 907, in _run\n", - " return self.callback()\n", - " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/distributed/nanny.py\", line 266, in memory_monitor\n", - " memory = proc.memory_info().rss\n", - " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/psutil/_common.py\", line 344, in wrapper\n", - " return fun(self)\n", - " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/psutil/__init__.py\", line 1166, in memory_info\n", - " return self._proc.memory_info()\n", - " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/psutil/_psosx.py\", line 339, in wrapper\n", - " return fun(self, *args, **kwargs)\n", - " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/psutil/_psosx.py\", line 469, in memory_info\n", - " rawtuple = self._get_pidtaskinfo()\n", - " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/psutil/_psosx.py\", line 339, in wrapper\n", - " return fun(self, *args, **kwargs)\n", - " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/psutil/_common.py\", line 344, in wrapper\n", - " return fun(self)\n", - " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/psutil/_psosx.py\", line 400, in _get_pidtaskinfo\n", - " ret = cext.proc_pidtaskinfo_oneshot(self.pid)\n", - " File \"/anaconda3/envs/overscripted/lib/python3.6/contextlib.py\", line 99, in __exit__\n", - " self.gen.throw(type, value, traceback)\n", - " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/psutil/_psosx.py\", line 372, in catch_zombie\n", - " raise AccessDenied(proc.pid, proc._name)\n", - "psutil.AccessDenied: psutil.AccessDenied (pid=21476)\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "tornado.application - ERROR - Exception in callback >\n", - "Traceback (most recent call last):\n", - " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/psutil/_common.py\", line 341, in wrapper\n", - " ret = self._cache[fun]\n", - "AttributeError: 'Process' object has no attribute '_cache'\n", - "\n", - "During handling of the above exception, another exception occurred:\n", - "\n", - "Traceback (most recent call last):\n", - " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/psutil/_common.py\", line 341, in wrapper\n", - " ret = self._cache[fun]\n", - "AttributeError: _cache\n", - "\n", - "During handling of the above exception, another exception occurred:\n", - "\n", - "Traceback (most recent call last):\n", - " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/psutil/_psosx.py\", line 359, in catch_zombie\n", - " yield\n", - " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/psutil/_psosx.py\", line 400, in _get_pidtaskinfo\n", - " ret = cext.proc_pidtaskinfo_oneshot(self.pid)\n", - "ProcessLookupError: [Errno 3] No such process\n", - "\n", - "During handling of the above exception, another exception occurred:\n", - "\n", - "Traceback (most recent call last):\n", - " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/tornado/ioloop.py\", line 907, in _run\n", - " return self.callback()\n", - " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/distributed/nanny.py\", line 266, in memory_monitor\n", - " memory = proc.memory_info().rss\n", - " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/psutil/_common.py\", line 344, in wrapper\n", - " return fun(self)\n", - " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/psutil/__init__.py\", line 1166, in memory_info\n", - " return self._proc.memory_info()\n", - " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/psutil/_psosx.py\", line 339, in wrapper\n", - " return fun(self, *args, **kwargs)\n", - " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/psutil/_psosx.py\", line 469, in memory_info\n", - " rawtuple = self._get_pidtaskinfo()\n", - " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/psutil/_psosx.py\", line 339, in wrapper\n", - " return fun(self, *args, **kwargs)\n", - " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/psutil/_common.py\", line 344, in wrapper\n", - " return fun(self)\n", - " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/psutil/_psosx.py\", line 400, in _get_pidtaskinfo\n", - " ret = cext.proc_pidtaskinfo_oneshot(self.pid)\n", - " File \"/anaconda3/envs/overscripted/lib/python3.6/contextlib.py\", line 99, in __exit__\n", - " self.gen.throw(type, value, traceback)\n", - " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/psutil/_psosx.py\", line 372, in catch_zombie\n", - " raise AccessDenied(proc.pid, proc._name)\n", - "psutil.AccessDenied: psutil.AccessDenied (pid=21476)\n", - "tornado.application - ERROR - Exception in callback >\n", - "Traceback (most recent call last):\n", - " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/psutil/_common.py\", line 341, in wrapper\n", - " ret = self._cache[fun]\n", - "AttributeError: 'Process' object has no attribute '_cache'\n", - "\n", - "During handling of the above exception, another exception occurred:\n", - "\n", - "Traceback (most recent call last):\n", - " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/psutil/_common.py\", line 341, in wrapper\n", - " ret = self._cache[fun]\n", - "AttributeError: _cache\n", - "\n", - "During handling of the above exception, another exception occurred:\n", - "\n", - "Traceback (most recent call last):\n", - " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/psutil/_psosx.py\", line 359, in catch_zombie\n", - " yield\n", - " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/psutil/_psosx.py\", line 400, in _get_pidtaskinfo\n", - " ret = cext.proc_pidtaskinfo_oneshot(self.pid)\n", - "ProcessLookupError: [Errno 3] No such process\n", - "\n", - "During handling of the above exception, another exception occurred:\n", - "\n", - "Traceback (most recent call last):\n", - " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/tornado/ioloop.py\", line 907, in _run\n", - " return self.callback()\n", - " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/distributed/nanny.py\", line 266, in memory_monitor\n", - " memory = proc.memory_info().rss\n", - " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/psutil/_common.py\", line 344, in wrapper\n", - " return fun(self)\n", - " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/psutil/__init__.py\", line 1166, in memory_info\n", - " return self._proc.memory_info()\n", - " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/psutil/_psosx.py\", line 339, in wrapper\n", - " return fun(self, *args, **kwargs)\n", - " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/psutil/_psosx.py\", line 469, in memory_info\n", - " rawtuple = self._get_pidtaskinfo()\n", - " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/psutil/_psosx.py\", line 339, in wrapper\n", - " return fun(self, *args, **kwargs)\n", - " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/psutil/_common.py\", line 344, in wrapper\n", - " return fun(self)\n", - " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/psutil/_psosx.py\", line 400, in _get_pidtaskinfo\n", - " ret = cext.proc_pidtaskinfo_oneshot(self.pid)\n", - " File \"/anaconda3/envs/overscripted/lib/python3.6/contextlib.py\", line 99, in __exit__\n", - " self.gen.throw(type, value, traceback)\n", - " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/psutil/_psosx.py\", line 372, in catch_zombie\n", - " raise AccessDenied(proc.pid, proc._name)\n", - "psutil.AccessDenied: psutil.AccessDenied (pid=21476)\n", - "distributed.nanny - WARNING - Worker process 21476 was killed by unknown signal\n", - "distributed.nanny - WARNING - Restarting worker\n", - "distributed.nanny - WARNING - Worker exceeded 95% memory budget. Restarting\n", - "tornado.application - ERROR - Exception in callback >\n", - "Traceback (most recent call last):\n", - " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/psutil/_common.py\", line 341, in wrapper\n", - " ret = self._cache[fun]\n", - "AttributeError: 'Process' object has no attribute '_cache'\n", - "\n", - "During handling of the above exception, another exception occurred:\n", - "\n", - "Traceback (most recent call last):\n", - " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/psutil/_common.py\", line 341, in wrapper\n", - " ret = self._cache[fun]\n", - "AttributeError: _cache\n", - "\n", - "During handling of the above exception, another exception occurred:\n", - "\n", - "Traceback (most recent call last):\n", - " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/psutil/_psosx.py\", line 359, in catch_zombie\n", - " yield\n", - " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/psutil/_psosx.py\", line 400, in _get_pidtaskinfo\n", - " ret = cext.proc_pidtaskinfo_oneshot(self.pid)\n", - "ProcessLookupError: [Errno 3] No such process\n", - "\n", - "During handling of the above exception, another exception occurred:\n", - "\n", - "Traceback (most recent call last):\n", - " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/tornado/ioloop.py\", line 907, in _run\n", - " return self.callback()\n", - " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/distributed/nanny.py\", line 266, in memory_monitor\n", - " memory = proc.memory_info().rss\n", - " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/psutil/_common.py\", line 344, in wrapper\n", - " return fun(self)\n", - " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/psutil/__init__.py\", line 1166, in memory_info\n", - " return self._proc.memory_info()\n", - " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/psutil/_psosx.py\", line 339, in wrapper\n", - " return fun(self, *args, **kwargs)\n", - " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/psutil/_psosx.py\", line 469, in memory_info\n", - " rawtuple = self._get_pidtaskinfo()\n", - " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/psutil/_psosx.py\", line 339, in wrapper\n", - " return fun(self, *args, **kwargs)\n", - " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/psutil/_common.py\", line 344, in wrapper\n", - " return fun(self)\n", - " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/psutil/_psosx.py\", line 400, in _get_pidtaskinfo\n", - " ret = cext.proc_pidtaskinfo_oneshot(self.pid)\n", - " File \"/anaconda3/envs/overscripted/lib/python3.6/contextlib.py\", line 99, in __exit__\n", - " self.gen.throw(type, value, traceback)\n", - " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/psutil/_psosx.py\", line 372, in catch_zombie\n", - " raise AccessDenied(proc.pid, proc._name)\n", - "psutil.AccessDenied: psutil.AccessDenied (pid=21497)\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "tornado.application - ERROR - Exception in callback >\n", - "Traceback (most recent call last):\n", - " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/psutil/_common.py\", line 341, in wrapper\n", - " ret = self._cache[fun]\n", - "AttributeError: 'Process' object has no attribute '_cache'\n", - "\n", - "During handling of the above exception, another exception occurred:\n", - "\n", - "Traceback (most recent call last):\n", - " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/psutil/_common.py\", line 341, in wrapper\n", - " ret = self._cache[fun]\n", - "AttributeError: _cache\n", - "\n", - "During handling of the above exception, another exception occurred:\n", - "\n", - "Traceback (most recent call last):\n", - " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/psutil/_psosx.py\", line 359, in catch_zombie\n", - " yield\n", - " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/psutil/_psosx.py\", line 400, in _get_pidtaskinfo\n", - " ret = cext.proc_pidtaskinfo_oneshot(self.pid)\n", - "ProcessLookupError: [Errno 3] No such process\n", - "\n", - "During handling of the above exception, another exception occurred:\n", - "\n", - "Traceback (most recent call last):\n", - " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/tornado/ioloop.py\", line 907, in _run\n", - " return self.callback()\n", - " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/distributed/nanny.py\", line 266, in memory_monitor\n", - " memory = proc.memory_info().rss\n", - " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/psutil/_common.py\", line 344, in wrapper\n", - " return fun(self)\n", - " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/psutil/__init__.py\", line 1166, in memory_info\n", - " return self._proc.memory_info()\n", - " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/psutil/_psosx.py\", line 339, in wrapper\n", - " return fun(self, *args, **kwargs)\n", - " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/psutil/_psosx.py\", line 469, in memory_info\n", - " rawtuple = self._get_pidtaskinfo()\n", - " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/psutil/_psosx.py\", line 339, in wrapper\n", - " return fun(self, *args, **kwargs)\n", - " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/psutil/_common.py\", line 344, in wrapper\n", - " return fun(self)\n", - " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/psutil/_psosx.py\", line 400, in _get_pidtaskinfo\n", - " ret = cext.proc_pidtaskinfo_oneshot(self.pid)\n", - " File \"/anaconda3/envs/overscripted/lib/python3.6/contextlib.py\", line 99, in __exit__\n", - " self.gen.throw(type, value, traceback)\n", - " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/psutil/_psosx.py\", line 372, in catch_zombie\n", - " raise AccessDenied(proc.pid, proc._name)\n", - "psutil.AccessDenied: psutil.AccessDenied (pid=21497)\n", - "tornado.application - ERROR - Exception in callback >\n", - "Traceback (most recent call last):\n", - " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/psutil/_common.py\", line 341, in wrapper\n", - " ret = self._cache[fun]\n", - "AttributeError: 'Process' object has no attribute '_cache'\n", - "\n", - "During handling of the above exception, another exception occurred:\n", - "\n", - "Traceback (most recent call last):\n", - " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/psutil/_common.py\", line 341, in wrapper\n", - " ret = self._cache[fun]\n", - "AttributeError: _cache\n", - "\n", - "During handling of the above exception, another exception occurred:\n", - "\n", - "Traceback (most recent call last):\n", - " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/psutil/_psosx.py\", line 359, in catch_zombie\n", - " yield\n", - " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/psutil/_psosx.py\", line 400, in _get_pidtaskinfo\n", - " ret = cext.proc_pidtaskinfo_oneshot(self.pid)\n", - "ProcessLookupError: [Errno 3] No such process\n", - "\n", - "During handling of the above exception, another exception occurred:\n", - "\n", - "Traceback (most recent call last):\n", - " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/tornado/ioloop.py\", line 907, in _run\n", - " return self.callback()\n", - " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/distributed/nanny.py\", line 266, in memory_monitor\n", - " memory = proc.memory_info().rss\n", - " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/psutil/_common.py\", line 344, in wrapper\n", - " return fun(self)\n", - " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/psutil/__init__.py\", line 1166, in memory_info\n", - " return self._proc.memory_info()\n", - " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/psutil/_psosx.py\", line 339, in wrapper\n", - " return fun(self, *args, **kwargs)\n", - " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/psutil/_psosx.py\", line 469, in memory_info\n", - " rawtuple = self._get_pidtaskinfo()\n", - " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/psutil/_psosx.py\", line 339, in wrapper\n", - " return fun(self, *args, **kwargs)\n", - " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/psutil/_common.py\", line 344, in wrapper\n", - " return fun(self)\n", - " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/psutil/_psosx.py\", line 400, in _get_pidtaskinfo\n", - " ret = cext.proc_pidtaskinfo_oneshot(self.pid)\n", - " File \"/anaconda3/envs/overscripted/lib/python3.6/contextlib.py\", line 99, in __exit__\n", - " self.gen.throw(type, value, traceback)\n", - " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/psutil/_psosx.py\", line 372, in catch_zombie\n", - " raise AccessDenied(proc.pid, proc._name)\n", - "psutil.AccessDenied: psutil.AccessDenied (pid=21497)\n", - "distributed.nanny - WARNING - Worker process 21497 was killed by unknown signal\n", - "distributed.nanny - WARNING - Restarting worker\n", - "distributed.nanny - WARNING - Worker exceeded 95% memory budget. Restarting\n", - "tornado.application - ERROR - Exception in callback >\n", - "Traceback (most recent call last):\n", - " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/psutil/_common.py\", line 341, in wrapper\n", - " ret = self._cache[fun]\n", - "AttributeError: 'Process' object has no attribute '_cache'\n", - "\n", - "During handling of the above exception, another exception occurred:\n", - "\n", - "Traceback (most recent call last):\n", - " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/psutil/_common.py\", line 341, in wrapper\n", - " ret = self._cache[fun]\n", - "AttributeError: _cache\n", - "\n", - "During handling of the above exception, another exception occurred:\n", - "\n", - "Traceback (most recent call last):\n", - " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/psutil/_psosx.py\", line 359, in catch_zombie\n", - " yield\n", - " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/psutil/_psosx.py\", line 400, in _get_pidtaskinfo\n", - " ret = cext.proc_pidtaskinfo_oneshot(self.pid)\n", - "ProcessLookupError: [Errno 3] No such process\n", - "\n", - "During handling of the above exception, another exception occurred:\n", - "\n", - "Traceback (most recent call last):\n", - " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/tornado/ioloop.py\", line 907, in _run\n", - " return self.callback()\n", - " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/distributed/nanny.py\", line 266, in memory_monitor\n", - " memory = proc.memory_info().rss\n", - " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/psutil/_common.py\", line 344, in wrapper\n", - " return fun(self)\n", - " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/psutil/__init__.py\", line 1166, in memory_info\n", - " return self._proc.memory_info()\n", - " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/psutil/_psosx.py\", line 339, in wrapper\n", - " return fun(self, *args, **kwargs)\n", - " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/psutil/_psosx.py\", line 469, in memory_info\n", - " rawtuple = self._get_pidtaskinfo()\n", - " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/psutil/_psosx.py\", line 339, in wrapper\n", - " return fun(self, *args, **kwargs)\n", - " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/psutil/_common.py\", line 344, in wrapper\n", - " return fun(self)\n", - " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/psutil/_psosx.py\", line 400, in _get_pidtaskinfo\n", - " ret = cext.proc_pidtaskinfo_oneshot(self.pid)\n", - " File \"/anaconda3/envs/overscripted/lib/python3.6/contextlib.py\", line 99, in __exit__\n", - " self.gen.throw(type, value, traceback)\n", - " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/psutil/_psosx.py\", line 372, in catch_zombie\n", - " raise AccessDenied(proc.pid, proc._name)\n", - "psutil.AccessDenied: psutil.AccessDenied (pid=21489)\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "tornado.application - ERROR - Exception in callback >\n", - "Traceback (most recent call last):\n", - " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/psutil/_common.py\", line 341, in wrapper\n", - " ret = self._cache[fun]\n", - "AttributeError: 'Process' object has no attribute '_cache'\n", - "\n", - "During handling of the above exception, another exception occurred:\n", - "\n", - "Traceback (most recent call last):\n", - " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/psutil/_common.py\", line 341, in wrapper\n", - " ret = self._cache[fun]\n", - "AttributeError: _cache\n", - "\n", - "During handling of the above exception, another exception occurred:\n", - "\n", - "Traceback (most recent call last):\n", - " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/psutil/_psosx.py\", line 359, in catch_zombie\n", - " yield\n", - " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/psutil/_psosx.py\", line 400, in _get_pidtaskinfo\n", - " ret = cext.proc_pidtaskinfo_oneshot(self.pid)\n", - "ProcessLookupError: [Errno 3] No such process\n", - "\n", - "During handling of the above exception, another exception occurred:\n", - "\n", - "Traceback (most recent call last):\n", - " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/tornado/ioloop.py\", line 907, in _run\n", - " return self.callback()\n", - " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/distributed/nanny.py\", line 266, in memory_monitor\n", - " memory = proc.memory_info().rss\n", - " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/psutil/_common.py\", line 344, in wrapper\n", - " return fun(self)\n", - " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/psutil/__init__.py\", line 1166, in memory_info\n", - " return self._proc.memory_info()\n", - " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/psutil/_psosx.py\", line 339, in wrapper\n", - " return fun(self, *args, **kwargs)\n", - " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/psutil/_psosx.py\", line 469, in memory_info\n", - " rawtuple = self._get_pidtaskinfo()\n", - " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/psutil/_psosx.py\", line 339, in wrapper\n", - " return fun(self, *args, **kwargs)\n", - " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/psutil/_common.py\", line 344, in wrapper\n", - " return fun(self)\n", - " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/psutil/_psosx.py\", line 400, in _get_pidtaskinfo\n", - " ret = cext.proc_pidtaskinfo_oneshot(self.pid)\n", - " File \"/anaconda3/envs/overscripted/lib/python3.6/contextlib.py\", line 99, in __exit__\n", - " self.gen.throw(type, value, traceback)\n", - " File \"/anaconda3/envs/overscripted/lib/python3.6/site-packages/psutil/_psosx.py\", line 372, in catch_zombie\n", - " raise AccessDenied(proc.pid, proc._name)\n", - "psutil.AccessDenied: psutil.AccessDenied (pid=21489)\n", - "distributed.nanny - WARNING - Worker process 21489 was killed by unknown signal\n", - "distributed.nanny - WARNING - Restarting worker\n" - ] - } - ], - "source": [ - "df[df['is_json'] == True].to_parquet('all_json_above_mean.parquet', engine='pyarrow')" - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "metadata": {}, - "outputs": [], - "source": [ - "df[df['is_json'] == False].to_parquet('all_NON_json_above_mean.parquet')" - ] - }, { "cell_type": "code", "execution_count": null, From 9e48a034ac5ad3e605623d803258ae21d77612d3 Mon Sep 17 00:00:00 2001 From: Camila Resende Date: Mon, 8 Apr 2019 15:13:53 -0300 Subject: [PATCH 15/23] Remove Quantitative comparison and Add value distribution notebook --- .../isJson_Quantitative_Comparasion.ipynb | 917 ------------------ .../isJson_Value_Distribution.ipynb | 664 +++++++++++++ 2 files changed, 664 insertions(+), 917 deletions(-) delete mode 100644 analyses/2019_03_aliamcami_value_analyses/isJson_Quantitative_Comparasion.ipynb create mode 100644 analyses/2019_03_aliamcami_value_analyses/isJson_Value_Distribution.ipynb diff --git a/analyses/2019_03_aliamcami_value_analyses/isJson_Quantitative_Comparasion.ipynb b/analyses/2019_03_aliamcami_value_analyses/isJson_Quantitative_Comparasion.ipynb deleted file mode 100644 index 20a2660..0000000 --- a/analyses/2019_03_aliamcami_value_analyses/isJson_Quantitative_Comparasion.ipynb +++ /dev/null @@ -1,917 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Start dask" - ] - }, - { - "cell_type": "code", - "execution_count": 1, - "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/anaconda3/envs/overscripted/lib/python3.6/site-packages/dask/config.py:168: YAMLLoadWarning: calling yaml.load() without Loader=... is deprecated, as the default Loader is unsafe. Please read https://msg.pyyaml.org/load for full details.\n", - " data = yaml.load(f.read()) or {}\n", - "/anaconda3/envs/overscripted/lib/python3.6/site-packages/distributed/config.py:20: YAMLLoadWarning: calling yaml.load() without Loader=... is deprecated, as the default Loader is unsafe. Please read https://msg.pyyaml.org/load for full details.\n", - " defaults = yaml.load(f)\n" - ] - }, - { - "data": { - "text/html": [ - "\n", - "\n", - "\n", - "\n", - "\n", - "
\n", - "

Client

\n", - "\n", - "
\n", - "

Cluster

\n", - "
    \n", - "
  • Workers: 4
  • \n", - "
  • Cores: 4
  • \n", - "
  • Memory: 8.59 GB
  • \n", - "
\n", - "
" - ], - "text/plain": [ - "" - ] - }, - "execution_count": 1, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "import dask.dataframe as dd\n", - "from dask.distributed import Client\n", - "\n", - "#Initializing client\n", - "client = Client()\n", - "client" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Data\n", - "This notebook starts using 'is_json_above_bean.parquet', this is a filtered data that you can get by running the data preparation notebook called 'jsJson_dataPrep.ipynb'. \n", - "This parquet contains the 10% sample data filtered by values above the mean of value_len. \n", - "\n", - "This new sample has 499805 rows, meaning that its only 4,42% of the original sample (most values are smaller than the sample's mean of 1356). " - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
value_lenis_json
03713True
1103878True
2103878True
31358False
41358False
\n", - "
" - ], - "text/plain": [ - " value_len is_json\n", - "0 3713 True\n", - "1 103878 True\n", - "2 103878 True\n", - "3 1358 False\n", - "4 1358 False" - ] - }, - "execution_count": 2, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "df = dd.read_parquet('is_json_above_mean.parquet', columns=['value_len', 'is_json'])\n", - "df.head()" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Visualization: " - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/anaconda3/envs/overscripted/lib/python3.6/site-packages/distributed/worker.py:2791: UserWarning: Large object of size 1.89 MB detected in task graph: \n", - " (" - ] - }, - "execution_count": 5, - "metadata": {}, - "output_type": "execute_result" - }, - { - "data": { - "image/png": "iVBORw0KGgoAAAANSUhEUgAAAZsAAAD8CAYAAAChHgmuAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDMuMC4zLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvnQurowAAGMJJREFUeJzt3X+wXGWd5/H3lyQQHFRCCJpKojfuRIUoQpLBWLorAwIB0TgWurGoJUbWbDlBQa0awrg7iK5bMLWrLDv4g10owNWBGIchqzBsxERdC4EEkZ/GXBHlmkguP4Q4TiCB7/7Rz41N6Htv306etOm8X1Vdfc63n3Oep0/l5nPPOc/tjsxEkqSaDuj2ACRJvc+wkSRVZ9hIkqozbCRJ1Rk2kqTqDBtJUnWGjSSpOsNGklSdYSNJqm58twfwx+Lwww/Pvr6+bg9DkvYp69evfywzp4zWzrAp+vr6WLduXbeHIUn7lIj4ZTvtvIwmSarOsJEkVVc1bCLi4Yi4NyLujoh1pXZYRKyOiI3leVKpR0RcFhH9EXFPRMxp2s/i0n5jRCxuqs8t++8v28ZIfUiSumNv3LP588x8rGl9OXBrZl4cEcvL+vnAqcCs8ngz8CXgzRFxGHAhMA9IYH1ErMrMJ0ubpcCPgJuABcDNI/QhaT+yfft2BgYG2LZtW7eHss+bOHEi06dPZ8KECR1t340JAguB48vyNcBaGkGwELg2G1+w86OIODQippa2qzPzCYCIWA0siIi1wMsy87ZSvxZ4D42wGa4PSfuRgYEBXvrSl9LX10e58KEOZCaPP/44AwMDzJw5s6N91L5nk8D/jYj1EbG01F6RmZsByvMRpT4NeKRp24FSG6k+0KI+Uh+S9iPbtm1j8uTJBs1uiggmT568W2eItc9s3pqZmyLiCGB1RPx0hLat/jVkB/W2lQBcCvCqV71qLJtK2kcYNHvG7h7Hqmc2mbmpPG8BbgCOAx4tl8coz1tK8wFgRtPm04FNo9Snt6gzQh+7ju+KzJyXmfOmTBn1b5IkSR2qdmYTEX8CHJCZW8vyycBngFXAYuDi8nxj2WQVcE5EXEdjgsBTmbk5Im4B/kvTjLKTgQsy84mI2BoR84HbgbOA/9G0r1Z9SNqP9S3/9h7d38MXv3OP7q+X1byM9grghnLqNR74emb+U0TcCayIiLOBXwHvK+1vAk4D+oHfA0sASqh8FriztPvM0GQB4CPA1cDBNCYG3FzqFw/TRxV7+h9wu/yHLvWWQw45hN/97nd7bH8f/OAHOf300znjjDP22D47VS1sMvMh4E0t6o8DJ7aoJ7BsmH1dBVzVor4OeEO7fUiSusNPEJCkSs4//3y++MUv7lz/9Kc/zUUXXcSJJ57InDlzeOMb38iNN774Kv/atWs5/fTTd66fc845XH311QCsX7+et7/97cydO5dTTjmFzZs3tzWW4bY7/vjjOf/88znuuON47Wtfyw9+8IPdeMfDM2wkqZJFixZx/fXX71xfsWIFS5Ys4YYbbuCuu+5izZo1fPKTn6RxYWd027dv56Mf/SgrV65k/fr1fOhDH+JTn/rUbm+3Y8cO7rjjDi699FIuuuiisb/RNvipz5JUybHHHsuWLVvYtGkTg4ODTJo0ialTp/Lxj3+c73//+xxwwAH8+te/5tFHH+WVr3zlqPvbsGED9913HyeddBIAzz33HFOnTt3t7d773vcCMHfuXB5++OEO3unoDBtJquiMM85g5cqV/OY3v2HRokV87WtfY3BwkPXr1zNhwgT6+vpe9MeS48eP5/nnn9+5PvR6ZjJ79mxuu+22MY1htO0OOuggAMaNG8eOHTvGtO92GTaS9hvdmMG5aNEiPvzhD/PYY4/xve99jxUrVnDEEUcwYcIE1qxZwy9/+eKvg3n1q1/NAw88wDPPPMO2bdu49dZbedvb3sbrXvc6BgcHue2223jLW97C9u3b+dnPfsbs2bNHHEOn2+1Jho0kVTR79my2bt3KtGnTmDp1KmeeeSbvete7mDdvHscccwyvf/3rX7TNjBkzeP/738/RRx/NrFmzOPbYYwE48MADWblyJR/72Md46qmn2LFjB+edd96oodHpdntStHtjqtfNmzcvO/2mTv/ORvrj9OCDD3LkkUd2exg9o9XxjIj1mTlvtG2djSZJqs7LaJK0j1u2bBk//OEPX1A799xzWbJkSZdG9GKGjaSelpk9/8nPl19+efU+dveWi5fRJPWsiRMn8vjjj+/2f5T7u6EvT5s4cWLH+/DMRlLPmj59OgMDAwwODnZ7KPu8oa+F7pRhI6lnTZgwoeOvMdae5WU0SVJ1ho0kqTrDRpJUnWEjSarOsJEkVWfYSJKqM2wkSdUZNpKk6gwbSVJ1ho0kqTrDRpJUnWEjSarOsJEkVWfYSJKqM2wkSdUZNpKk6gwbSVJ1ho0kqTrDRpJUnWEjSaquethExLiI+HFEfKusz4yI2yNiY0RcHxEHlvpBZb2/vN7XtI8LSn1DRJzSVF9Qav0Rsbyp3rIPSVJ37I0zm3OBB5vWLwG+kJmzgCeBs0v9bODJzPxT4AulHRFxFLAImA0sAL5YAmwccDlwKnAU8IHSdqQ+JEldUDVsImI68E7gf5X1AE4AVpYm1wDvKcsLyzrl9RNL+4XAdZn5TGb+AugHjiuP/sx8KDOfBa4DFo7ShySpC2qf2VwK/BXwfFmfDPw2M3eU9QFgWlmeBjwCUF5/qrTfWd9lm+HqI/UhSeqCamETEacDWzJzfXO5RdMc5bU9VW81xqURsS4i1g0ODrZqIknaA2qe2bwVeHdEPEzjEtcJNM50Do2I8aXNdGBTWR4AZgCU118OPNFc32Wb4eqPjdDHC2TmFZk5LzPnTZkypfN3KkkaUbWwycwLMnN6ZvbRuMH/3cw8E1gDnFGaLQZuLMuryjrl9e9mZpb6ojJbbSYwC7gDuBOYVWaeHVj6WFW2Ga4PSVIXdOPvbM4HPhER/TTur1xZ6lcCk0v9E8BygMy8H1gBPAD8E7AsM58r92TOAW6hMdttRWk7Uh+SpC4YP3qT3ZeZa4G1ZfkhGjPJdm2zDXjfMNt/Dvhci/pNwE0t6i37kCR1h58gIEmqzrCRJFVn2EiSqjNsJEnVGTaSpOoMG0lSdYaNJKk6w0aSVJ1hI0mqzrCRJFVn2EiSqjNsJEnVGTaSpOoMG0lSdYaNJKk6w0aSVJ1hI0mqzrCRJFVn2EiSqjNsJEnVGTaSpOoMG0lSdYaNJKk6w0aSVJ1hI0mqzrCRJFVn2EiSqjNsJEnVGTaSpOoMG0lSdW2FTUS8ofZAJEm9q90zmy9HxB0R8ZcRcWjVEUmSek5bYZOZbwPOBGYA6yLi6xFxUtWRSZJ6Rtv3bDJzI/AfgfOBtwOXRcRPI+K9rdpHxMRyNvSTiLg/Ii4q9ZkRcXtEbIyI6yPiwFI/qKz3l9f7mvZ1QalviIhTmuoLSq0/IpY31Vv2IUnqjnbv2RwdEV8AHgROAN6VmUeW5S8Ms9kzwAmZ+SbgGGBBRMwHLgG+kJmzgCeBs0v7s4EnM/NPyz4vKX0fBSwCZgMLgC9GxLiIGAdcDpwKHAV8oLRlhD4kSV3Q7pnN3wF3AW/KzGWZeRdAZm6icbbzItnwu7I6oTySRkCtLPVrgPeU5YVlnfL6iRERpX5dZj6Tmb8A+oHjyqM/Mx/KzGeB64CFZZvh+pAkdUG7YXMa8PXM/BeAiDggIl4CkJlfHW6jcgZyN7AFWA38HPhtZu4oTQaAaWV5GvBI2ecO4ClgcnN9l22Gq08eoQ9JUhe0GzbfAQ5uWn9JqY0oM5/LzGOA6TTORI5s1aw8xzCv7an6i0TE0ohYFxHrBgcHWzWRJO0B7YbNxKZLYpTll7TbSWb+FlgLzAcOjYjx5aXpwKayPEBjthvl9ZcDTzTXd9lmuPpjI/Sx67iuyMx5mTlvypQp7b4dSdIYtRs2/xwRc4ZWImIu8C8jbRARU4b+JiciDgbeQWOCwRrgjNJsMXBjWV5V1imvfzczs9QXldlqM4FZwB3AncCsMvPsQBqTCFaVbYbrQ5LUBeNHbwLAecA3ImLoDGEq8G9H2WYqcE2ZNXYAsCIzvxURDwDXRcR/Bn4MXFnaXwl8NSL6aZzRLALIzPsjYgXwALADWJaZzwFExDnALcA44KrMvL/s6/xh+pAkdUFbYZOZd0bE64HX0bgn8tPM3D7KNvcAx7aoP0Tj/s2u9W3A+4bZ1+eAz7Wo3wTc1G4fkqTuaPfMBuDPgL6yzbERQWZeW2VUkqSe0lbYRMRXgX8F3A08V8oJGDaSpFG1e2YzDziq3HyXJGlM2p2Ndh/wypoDkST1rnbPbA4HHoiIO2h85hkAmfnuKqOSJPWUdsPm0zUHIUnqbe1Off5eRLwamJWZ3ymfizau7tAkSb2i3a8Y+DCNT1H+SilNA/6x1qAkSb2l3QkCy4C3Ak/Dzi9SO6LWoCRJvaXdsHmmfGcMsPODMp0GLUlqS7th872I+Gvg4Ig4CfgG8H/qDUuS1EvaDZvlwCBwL/AfaHweWctv6JQkaVftzkZ7Hvif5SFJ0pi0+9lov6DFPZrMfM0eH5EkqeeM5bPRhkyk8VUAh+354UiSelFb92wy8/Gmx68z81LghMpjkyT1iHYvo81pWj2AxpnOS6uMSJLUc9q9jPbfmpZ3AA8D79/jo5Ek9aR2Z6P9ee2BSJJ6V7uX0T4x0uuZ+fk9MxxJUi8ay2y0PwNWlfV3Ad8HHqkxKElSbxnLl6fNycytABHxaeAbmfnvaw1MktQ72v24mlcBzzatPwv07fHRSJJ6UrtnNl8F7oiIG2h8ksBfANdWG5Ukqae0OxvtcxFxM/CvS2lJZv643rAkSb2k3ctoAC8Bns7M/w4MRMTMSmOSJPWYdr8W+kLgfOCCUpoA/O9ag5Ik9ZZ2z2z+Ang38M8AmbkJP65GktSmdsPm2cxMytcMRMSf1BuSJKnXtBs2KyLiK8ChEfFh4Dv4RWqSpDa1Oxvtv0bEScDTwOuAv8nM1VVHJknqGaOGTUSMA27JzHcABowkacxGvYyWmc8Bv4+Il++F8UiSelC792y2AfdGxJURcdnQY6QNImJGRKyJiAcj4v6IOLfUD4uI1RGxsTxPKvUo++2PiHuav7AtIhaX9hsjYnFTfW5E3Fu2uSwiYqQ+JEnd0W7YfBv4TzQ+6Xl902MkO4BPZuaRwHxgWUQcBSwHbs3MWcCtZR3gVGBWeSwFvgSN4AAuBN4MHAdc2BQeXypth7ZbUOrD9SFJ6oIR79lExKsy81eZec1Yd5yZm4HNZXlrRDwITAMWAseXZtcAa2n8wehC4NoyxfpHEXFoREwtbVdn5hNlTKuBBRGxFnhZZt5W6tcC7wFuHqEPSVIXjHZm849DCxHxzU47iYg+4FjgduAVJYiGAumI0mwaL/x+nIFSG6k+0KLOCH1IkrpgtLCJpuXXdNJBRBwCfBM4LzOfbrOvIdlBfSxjWxoR6yJi3eDg4Fg2lSSNwWhhk8MstyUiJtAImq9l5j+U8qPl8hjleUupDwAzmjafDmwapT69RX2kPl4gM6/IzHmZOW/KlCljfXuSpDaNFjZvioinI2IrcHRZfjoitkbESGcplJlhVwIPZubnm15aBQzNKFsM3NhUP6vMSpsPPFUugd0CnBwRk8rEgJNp/N3PZmBrRMwvfZ21y75a9SFJ6oIRJwhk5rjd2PdbgX9HY8r03aX218DFND7+5mzgV8D7yms3AacB/cDvgSVlDE9ExGeBO0u7zwxNFgA+AlwNHExjYsDNpT5cH5KkLmj3mzrHLDP/H63vqwCc2KJ9AsuG2ddVwFUt6uuAN7SoP96qD0lSd4zly9MkSeqIYSNJqs6wkSRVZ9hIkqozbCRJ1Rk2kqTqDBtJUnWGjSSpOsNGklSdYSNJqs6wkSRVZ9hIkqozbCRJ1Rk2kqTqDBtJUnWGjSSpOsNGklSdYSNJqs6wkSRVZ9hIkqozbCRJ1Rk2kqTqDBtJUnWGjSSpOsNGklSdYSNJqs6wkSRVZ9hIkqozbCRJ1Rk2kqTqDBtJUnWGjSSpOsNGklRdtbCJiKsiYktE3NdUOywiVkfExvI8qdQjIi6LiP6IuCci5jRts7i03xgRi5vqcyPi3rLNZRERI/UhSeqemmc2VwMLdqktB27NzFnArWUd4FRgVnksBb4EjeAALgTeDBwHXNgUHl8qbYe2WzBKH5KkLqkWNpn5feCJXcoLgWvK8jXAe5rq12bDj4BDI2IqcAqwOjOfyMwngdXAgvLayzLztsxM4Npd9tWqD0lSl+ztezavyMzNAOX5iFKfBjzS1G6g1EaqD7Soj9SHJKlL/lgmCESLWnZQH1unEUsjYl1ErBscHBzr5pKkNu3tsHm0XAKjPG8p9QFgRlO76cCmUerTW9RH6uNFMvOKzJyXmfOmTJnS8ZuSJI1sb4fNKmBoRtli4Mam+lllVtp84KlyCewW4OSImFQmBpwM3FJe2xoR88sstLN22VerPiRJXTK+1o4j4u+B44HDI2KAxqyyi4EVEXE28CvgfaX5TcBpQD/we2AJQGY+ERGfBe4s7T6TmUOTDj5CY8bbwcDN5cEIfUiSuqRa2GTmB4Z56cQWbRNYNsx+rgKualFfB7yhRf3xVn1Ikrrnj2WCgCSphxk2kqTqDBtJUnWGjSSpOsNGklSdYSNJqs6wkSRVZ9hIkqozbCRJ1Rk2kqTqDBtJUnWGjSSpOsNGklSdYSNJqs6wkSRVZ9hIkqozbCRJ1Rk2kqTqDBtJUnWGjSSpOsNGklSdYSNJqs6wkSRVZ9hIkqozbCRJ1Rk2kqTqDBtJUnWGjSSpOsNGklSdYSNJqs6wkSRVZ9hIkqozbCRJ1fVs2ETEgojYEBH9EbG82+ORpP3Z+G4PoIaIGAdcDpwEDAB3RsSqzHyguyPbs/qWf7trfT988Tu71rekfU+vntkcB/Rn5kOZ+SxwHbCwy2OSpP1WT57ZANOAR5rWB4A3d2ksPalbZ1WeUUn7pl4Nm2hRyxc1ilgKLC2rv4uIDR32dzjwWIfb9qJqxyMuqbHXqvy38UIejxfqhePx6nYa9WrYDAAzmtanA5t2bZSZVwBX7G5nEbEuM+ft7n56hcfjDzwWL+TxeKH96Xj06j2bO4FZETEzIg4EFgGrujwmSdpv9eSZTWbuiIhzgFuAccBVmXl/l4clSfutngwbgMy8CbhpL3W325fieozH4w88Fi/k8Xih/eZ4ROaL7ptLkrRH9eo9G0nSHxHDZjf0wkfiRMRVEbElIu5rqh0WEasjYmN5nlTqERGXlfd7T0TMadpmcWm/MSIWN9XnRsS9ZZvLIiI67WMvHIsZEbEmIh6MiPsj4tz9/HhMjIg7IuIn5XhcVOozI+L2MtbryyQcIuKgst5fXu9r2tcFpb4hIk5pqrf8Geqkj70hIsZFxI8j4ludjrNXjsWYZaaPDh40Jh78HHgNcCDwE+Cobo+rg/fxb4A5wH1Ntb8Flpfl5cAlZfk04GYaf8c0H7i91A8DHirPk8rypPLaHcBbyjY3A6d20sdeOhZTgTll+aXAz4Cj9uPjEcAhZXkCcHsZwwpgUal/GfhIWf5L4MtleRFwfVk+qvx8HATMLD8340b6GRprH3vxmHwC+DrwrU7G2UvHYszHrtsD2Fcf5T+MW5rWLwAu6Pa4OnwvfbwwbDYAU8vyVGBDWf4K8IFd2wEfAL7SVP9KqU0FftpU39lurH106bjcSOPz9fb74wG8BLiLxidxPAaML/WdPwc0Zn++pSyPL+1i15+NoXbD/QyVbcbUx146BtOBW4ETgG91Ms5eORadPLyM1rlWH4kzrUtj2dNekZmbAcrzEaU+3HseqT7Qot5JH3tVuSRxLI3f5vfb41EuG90NbAFW0/jt+7eZuaPFeHaOtbz+FDCZsR+nyR30sTdcCvwV8HxZ72ScvXIsxsyw6VxbH4nTY4Z7z2Otd9LHXhMRhwDfBM7LzKdHatqi1lPHIzOfy8xjaPxWfxxw5Ajj2VPHY6T33JXjERGnA1syc31zeYSx9Oyx6JRh07m2PhJnH/VoREwFKM9bSn249zxSfXqLeid97BURMYFG0HwtM/+hw7H2zPEYkpm/BdbSuGdzaEQM/Y1e83h2jrW8/nLgCcZ+nB7roI/a3gq8OyIepvEp8ifQONPZH49FRwybzvXyR+KsAoZmUC2mce9iqH5WmSE1H3iqXPK5BTg5IiaVWVQn07iuvBnYGhHzy6yrs3bZ11j6qK6M8Urgwcz8fNNL++vxmBIRh5blg4F3AA8Ca4Azhhnr0Hs4A/huNm4orAIWldlTM4FZNCZKtPwZKtuMtY+qMvOCzJyemX1lnN/NzDM7GOc+fyw61u2bRvvyg8ZMoZ/RuI79qW6Pp8P38PfAZmA7jd+UzqZx3fdWYGN5Pqy0DRpfSvdz4F5gXtN+PgT0l8eSpvo84L6yzd/xhz8kHnMfe+FYvI3GZYh7gLvL47T9+HgcDfy4HI/7gL8p9dfQ+A+yH/gGcFCpTyzr/eX11zTt61PlPWygzMAb6Weokz724nE5nj/MRtuvj8VYHn6CgCSpOi+jSZKqM2wkSdUZNpKk6gwbSVJ1ho0kqTrDRpJUnWEjSarOsJEkVff/ATyV0p7aVVUJAAAAAElFTkSuQmCC\n", - "text/plain": [ - "
" - ] - }, - "metadata": { - "needs_background": "light" - }, - "output_type": "display_data" - } - ], - "source": [ - "cdf['value_len'].plot(kind='hist', legend=True)" - ] - }, - { - "cell_type": "code", - "execution_count": 6, - "metadata": {}, - "outputs": [], - "source": [ - "jsonGroup = cdf.groupby('is_json')" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "And we cannot identify any non_json (blue) on the right side of the histogram. This means there all frquency of non-json values are very low or inexistent for the biggest values. Since there are so many small values, the biggest ones represent such a small portion that is hard to identify by look on the histograms and graphs. " - ] - }, - { - "cell_type": "code", - "execution_count": 7, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "is_json\n", - "False AxesSubplot(0.125,0.125;0.775x0.755)\n", - "True AxesSubplot(0.125,0.125;0.775x0.755)\n", - "Name: value_len, dtype: object" - ] - }, - "execution_count": 7, - "metadata": {}, - "output_type": "execute_result" - }, - { - "data": { - "image/png": "iVBORw0KGgoAAAANSUhEUgAAAZsAAAD8CAYAAAChHgmuAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDMuMC4zLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvnQurowAAG9tJREFUeJzt3X+01XWd7/HniwN4KE0Qwbgc9aAxcyUrwhPiaro3f4TodSRN70XvGlkNDWW6tDW1EptZSaWrvGsmZ1wxmiWFPwrQUrmKl4vkTHcqfxwUlR8RJ6I4YYKAaJOowPv+sT8HN7DPPntvzuds2Of1WOu79vf7/n5+7W8d33y/38/+fhURmJmZ5TSg3gMwM7PG52RjZmbZOdmYmVl2TjZmZpadk42ZmWXnZGNmZtk52ZiZWXZONmZmlp2TjZmZZTew3gM4VBx77LHR2tpa72GYmR1Wli9f/nJEjOipnJNN0traSnt7e72HYWZ2WJH020rK+TKamZll52RjZmbZOdmYmVl2vmdjZlalt956i87OTnbu3FnvofSZ5uZmWlpaGDRoUE31nWzMzKrU2dnJUUcdRWtrK5LqPZzsIoKtW7fS2dnJmDFjamrDl9HMzKq0c+dOhg8f3i8SDYAkhg8fflBnck42ZmY16C+JpsvBfl8nGzMzy873bMzMDlLrrEd6tb0N3/hvPZZpamrife97397tBx98kO6egrJhwwYuuOACVq5c2VtDrJqTTS9onfVIRf/nMDPrLUOGDGHFihX1HkbFsl1Gk9Qs6SlJz0laJekrKT5G0pOS1klaIGlwih+RtjvS/taitq5P8bWSzi2KT0mxDkmziuIl+zAza2QbNmzgIx/5CBMmTGDChAn8/Oc/P6DMqlWrmDhxIuPHj+f9738/69atA+Cee+7ZG//0pz/N7t27e3VsOe/ZvAGcFREfAMYDUyRNAm4GbomIscB2YEYqPwPYHhHvAW5J5ZA0DpgGvBeYAvyLpCZJTcAc4DxgHHBZKkuZPszMGsLrr7/O+PHjGT9+PBdddBEAI0eOZOnSpTzzzDMsWLCAa6655oB6t99+O9deey0rVqygvb2dlpYW1qxZw4IFC/jZz37GihUraGpq4t577+3V8Wa7jBYRAfwxbQ5KSwBnAZen+DxgNnAbMDWtA9wPfEuF6Q9TgfkR8QbwG0kdwMRUriMi1gNImg9MlbSmTB9mZg2h1GW0t956i6uvvnpvwvjVr351QL0zzjiDm266ic7OTi6++GLGjh3LsmXLWL58OR/60IeAQiIbOXJkr4436z2bdPaxHHgPhbOQXwOvRMSuVKQTGJ3WRwMbASJil6QdwPAUf6Ko2eI6G/eLn57qdNeHmVnDuuWWWzjuuON47rnn2LNnD83NzQeUufzyyzn99NN55JFHOPfcc/nud79LRDB9+nS+/vWvZxtb1qnPEbE7IsYDLRTORk4pVSx9lprEHb0YP4CkmZLaJbVv2bKlVBEzs8PGjh07GDVqFAMGDODuu+8ued9l/fr1nHTSSVxzzTVceOGFPP/885x99tncf//9bN68GYBt27bx299W9OaAivXJbLSIeEXSvwKTgKGSBqYzjxZgUyrWCRwPdEoaCBwNbCuKdymuUyr+cpk+9h/XHcAdAG1tbSUTkplZTw6V2aif/exn+cQnPsF9993HmWeeyTvf+c4DyixYsIB77rmHQYMG8e53v5svf/nLHHPMMdx4441MnjyZPXv2MGjQIObMmcOJJ57Ye4OLiCwLMAIYmtaHAP8PuAC4D5iW4rcDn03rVwG3p/VpwMK0/l7gOeAIYAywHmiikCjXp9jgVOa9qU7JPsotp512WtTqxOserrmumR1+Vq9eXe8h1EWp7w20RwU5IeeZzShgXrpvMyAlj4clrQbmS7oReBa4M5W/E7g7TQDYlhIOEbFK0kJgNbALuCoidgNIuhpYkpLP3IhYldq6rps+zMysDnLORnse+GCJ+Hrenk1WHN8JXNpNWzcBN5WILwYWV9qHmZnVh5+NZmZm2TnZmJlZdk42ZmaWnZONmZll56c+m5kdrNlH93J7O8ru3rp1K2effTYAf/jDH2hqamLEiBEAPPXUUwwefOg9e9jJxszsMDN8+PC9z0WbPXs2Rx55JF/4whf2KdP1+5YBAw6NC1iHxijMzOygdXR0cOqpp/KZz3yGCRMmsHHjRoYOHbp3//z58/nUpz4FwEsvvcTFF19MW1sbEydO5Iknnuiu2V7hZGNm1kBWr17NjBkzePbZZxk9uvtnEF9zzTV88YtfpL29nYULF+5NQrn4MpqZWQM5+eST974qoJzHHnuMtWvX7t3evn07r7/+OkOGDMkyLicbM7MGUvzwzQEDBnQ9qxKAnTt37l2PiD6dTODLaGZmDWrAgAEMGzaMdevWsWfPHh544IG9+8455xzmzJmzd3v/F7H1Np/ZmJkdrB6mKtfTzTffzJQpUzjhhBMYN24cb7zxBgBz5szhyiuv5Hvf+x67du3izDPP3Cf59DYVn2L1Z21tbdHe3l5T3dZZjxwy77Mws/zWrFnDKaeUehdkYyv1vSUtj4i2nur6MpqZmWXnZGNmZtk52ZiZ1aC/3YI42O/rZGNmVqXm5ma2bt3abxJORLB161aam5trbsOz0czMqtTS0kJnZydbtmyp91D6THNzMy0tLTXXd7IxM6vSoEGDGDNmTL2HcVjxZTQzM8vOycbMzLJzsjEzs+ycbMzMLDsnGzMzy87JxszMssuWbCQdL+lxSWskrZJ0bYrPlvR7SSvScn5RnesldUhaK+ncoviUFOuQNKsoPkbSk5LWSVogaXCKH5G2O9L+1lzf08zMepbzzGYX8PmIOAWYBFwlaVzad0tEjE/LYoC0bxrwXmAK8C+SmiQ1AXOA84BxwGVF7dyc2hoLbAdmpPgMYHtEvAe4JZUzM7M6yZZsIuLFiHgmrb8GrAG6fyE2TAXmR8QbEfEboAOYmJaOiFgfEW8C84GpkgScBdyf6s8DPl7U1ry0fj9wdipvZmZ10Cf3bNJlrA8CT6bQ1ZKelzRX0rAUGw1sLKrWmWLdxYcDr0TErv3i+7SV9u9I5fcf10xJ7ZLa+9NjJ8zM+lr2ZCPpSOBHwOci4lXgNuBkYDzwIvCPXUVLVI8a4uXa2jcQcUdEtEVE24gRI8p+DzMzq13WZCNpEIVEc29E/BggIl6KiN0RsQf4DoXLZFA4Mzm+qHoLsKlM/GVgqKSB+8X3aSvtPxrY1rvfzszMKpVzNpqAO4E1EfHNovioomIXASvT+iJgWppJNgYYCzwFPA2MTTPPBlOYRLAoCs/2fhy4JNWfDjxU1Nb0tH4J8JPoL88CNzM7BOV86vOHgb8CXpC0IsW+RGE22XgKl7U2AJ8GiIhVkhYCqynMZLsqInYDSLoaWAI0AXMjYlVq7zpgvqQbgWcpJDfS592SOiic0UzL+D3NzKwH2ZJNRPw7pe+dLC5T5ybgphLxxaXqRcR63r4MVxzfCVxazXjNzCwfP0HAzMyyc7IxM7PsnGzMzCw7JxszM8vOycbMzLJzsjEzs+ycbMzMLDsnGzMzy87JxszMsnOyMTOz7JxszMwsOycbMzPLzsnGzMyyc7IxM7PsnGzMzCw7JxszM8vOycbMzLJzsjEzs+ycbMzMLDsnGzMzy87JxszMsnOyMTOz7JxszMwsOycbMzPLLluykXS8pMclrZG0StK1KX6MpKWS1qXPYSkuSbdK6pD0vKQJRW1NT+XXSZpeFD9N0gupzq2SVK4PMzOrj5xnNruAz0fEKcAk4CpJ44BZwLKIGAssS9sA5wFj0zITuA0KiQO4ATgdmAjcUJQ8bktlu+pNSfHu+jAzszrIlmwi4sWIeCatvwasAUYDU4F5qdg84ONpfSpwVxQ8AQyVNAo4F1gaEdsiYjuwFJiS9r0rIn4REQHctV9bpfowM7M66JN7NpJagQ8CTwLHRcSLUEhIwMhUbDSwsahaZ4qVi3eWiFOmDzMzq4PsyUbSkcCPgM9FxKvlipaIRQ3xasY2U1K7pPYtW7ZUU9XMzKqQNdlIGkQh0dwbET9O4ZfSJTDS5+YU7wSOL6reAmzqId5SIl6uj31ExB0R0RYRbSNGjKjtS5qZWY8qSjaSTq224TQz7E5gTUR8s2jXIqBrRtl04KGi+BVpVtokYEe6BLYEmCxpWJoYMBlYkva9JmlS6uuK/doq1YeZmdXBwArL3S5pMPB94AcR8UoFdT4M/BXwgqQVKfYl4BvAQkkzgN8Bl6Z9i4HzgQ7gT8AnASJim6SvAU+ncl+NiG1p/co0piHAo2mhTB9mZlYHFSWbiPgLSWOBvwbaJT0FfC8ilpap8++Uvq8CcHaJ8gFc1U1bc4G5JeLtwAFnXRGxtVQfZmZWHxXfs4mIdcDfA9cB/xW4VdIvJV2ca3BmZtYYKr1n835Jt1D4rcxZwF+mH2ueBdyScXxmZtYAKr1n8y3gO8CXIuL1rmBEbJL091lGZmZmDaPSZHM+8HpE7AaQNABojog/RcTd2UZnZmYNodJ7No9RmPHV5R0pZmZm1qNKz2yaI+KPXRsR8UdJ78g0psPOhubLYXadOp+9o04dm5lVrtIzm//Y75H/pwGvlylvZma2V6VnNp8D7pPU9TiYUcD/yDMkMzNrNJX+qPNpSf8Z+HMKP9T8ZUS8lXVkZmbWMCo9swH4ENCa6nxQEhFxV5ZRmZlZQ6ko2Ui6GzgZWAHsTuGuF5aZmZmVVemZTRswLj2/zMzMrCqVzkZbCbw750DMzKxxVXpmcyywOj3t+Y2uYERcmGVUZmbWUCpNNrNzDsLMzBpbpVOf/03SicDYiHgsPT2gKe/QzMysUVT6ioG/Ae4Hvp1Co4EHcw3KzMwaS6UTBK6i8JrnV2Hvi9RG5hqUmZk1lkqTzRsR8WbXhqSBFH5nY2Zm1qNKk82/SfoSMETSx4D7gP+db1hmZtZIKk02s4AtwAvAp4HFgN/QaWZmFal0NtoeCq+F/k7e4ZiZWSOq9Nlov6HEPZqIOKnXR2RmZg2nmmejdWkGLgWO6f3hmJlZI6ronk1EbC1afh8R/wSclXlsZmbWICr9UeeEoqVN0meAo3qoM1fSZkkri2KzJf1e0oq0nF+073pJHZLWSjq3KD4lxTokzSqKj5H0pKR1khZIGpziR6TtjrS/teKjYWZmWVR6Ge0fi9Z3ARuA/95Dne8D3+LAd97cEhH/UByQNA6YBrwX+E/AY5L+LO2eA3wM6ASelrQoIlYDN6e25ku6HZgB3JY+t0fEeyRNS+X8CmszszqqdDbamdU2HBE/reKsYiowPyLeAH4jqQOYmPZ1RMR6AEnzgamS1lC4jHd5KjOPwsNCb0ttzU7x+4FvSZLfxWNmVj+Vzkb723L7I+KbVfR5taQrgHbg8xGxncKz1p4oKtOZYgAb94ufDgwHXomIXSXKj+6qExG7JO1I5V+uYoxmZtaLKv1RZxtwJYX/kI8GPgOMo3Dfpuy9m/3cRuH10uOBF3n78pxKlI0a4uXaOoCkmZLaJbVv2bKl3LjNzOwgVPPytAkR8RoUbvQD90XEp6rpLCJe6lqX9B3g4bTZCRxfVLQF2JTWS8VfBoZKGpjOborLd7XVmZ7hdjSwrZvx3AHcAdDW1ubLbGZmmVR6ZnMC8GbR9ptAa7WdSRpVtHkRhddNAywCpqWZZGOAscBTwNPA2DTzbDCFSQSL0v2Xx4FLUv3pwENFbU1P65cAP/H9GjOz+qr0zOZu4ClJD1C4JHURB84y24ekHwIfBY6V1AncAHxU0vjUxgYKz1kjIlZJWgispjDb7aqI2J3auRpYQuFlbXMjYlXq4jpgvqQbgWeBO1P8TuDuNMlgG4UEZWZmdaRK/9EvaQLwkbT504h4Ntuo6qCtrS3a29trqzz76N4dTFV976hf32bW70laHhFtPZWr9DIawDuAVyPinyncDxlT8+jMzKxfqfQJAjdQuGx1fQoNAu7JNSgzM2sslZ7ZXARcCPwHQERsoropz2Zm1o9VmmzeTDO6AkDSO/MNyczMGk2lyWahpG9T+G3L3wCP4RepmZlZhSp9Nto/SPoY8Crw58CXI2Jp1pGZmVnD6DHZSGoClkTEOYATjJmZVa3Hy2jpx5V/klTHH5OYmdnhrNInCOwEXpC0lDQjDSAirskyKjMzayiVJptH0mJmZla1sslG0gkR8buImNdXAzIzs8bT0z2bB7tWJP0o81jMzKxB9ZRsil9EdlLOgZiZWePqKdlEN+tmZmYV62mCwAckvUrhDGdIWidtR0S8K+vozMysIZRNNhHR1FcDMTOzxlXN+2zMzMxq4mRjZmbZOdmYmVl2TjZmZpadk42ZmWXnZGNmZtk52ZiZWXZONmZmlp2TjZmZZZct2UiaK2mzpJVFsWMkLZW0Ln0OS3FJulVSh6TnJU0oqjM9lV8naXpR/DRJL6Q6t0pSuT7MzKx+cp7ZfB+Ysl9sFrAsIsYCy9I2wHnA2LTMBG6DQuIAbgBOByYCNxQlj9tS2a56U3row8zM6iRbsomInwLb9gtPBbpexDYP+HhR/K4oeAIYKmkUcC6wNCK2RcR2YCkwJe17V0T8IiICuGu/tkr1YWZmddLX92yOi4gXAdLnyBQfDWwsKteZYuXinSXi5fo4gKSZktoltW/ZsqXmL2VmZuUdKhMEVCIWNcSrEhF3RERbRLSNGDGi2upmZlahvk42L6VLYKTPzSneCRxfVK4F2NRDvKVEvFwfZmZWJ32dbBYBXTPKpgMPFcWvSLPSJgE70iWwJcBkScPSxIDJwJK07zVJk9IstCv2a6tUH2ZmVic9vamzZpJ+CHwUOFZSJ4VZZd8AFkqaAfwOuDQVXwycD3QAfwI+CRAR2yR9DXg6lftqRHRNOriSwoy3IcCjaaFMH2ZmVifZkk1EXNbNrrNLlA3gqm7amQvMLRFvB04tEd9aqg8zM6ufQ2WCgJmZNTAnGzMzy87JxszMsnOyMTOz7JxszMwsOycbMzPLzsnGzMyyc7IxM7PsnGzMzCw7JxszM8vOycbMzLJzsjEzs+ycbMzMLDsnGzMzy87JxszMsnOyMTOz7JxszMwsOycbMzPLzsnGzMyyc7IxM7PsnGzMzCw7JxszM8vOycbMzLJzsjEzs+zqkmwkbZD0gqQVktpT7BhJSyWtS5/DUlySbpXUIel5SROK2pmeyq+TNL0oflpqvyPVVd9/SzMz61LPM5szI2J8RLSl7VnAsogYCyxL2wDnAWPTMhO4DQrJCbgBOB2YCNzQlaBSmZlF9abk/zpmZtadQ+ky2lRgXlqfB3y8KH5XFDwBDJU0CjgXWBoR2yJiO7AUmJL2vSsifhERAdxV1JaZmdVBvZJNAP9X0nJJM1PsuIh4ESB9jkzx0cDGorqdKVYu3lkibmZmdTKwTv1+OCI2SRoJLJX0yzJlS91viRriBzZcSHQzAU444YTyIzYzs5rV5cwmIjalz83AAxTuubyULoGRPjen4p3A8UXVW4BNPcRbSsRLjeOOiGiLiLYRI0Yc7NcyM7Nu9HmykfROSUd1rQOTgZXAIqBrRtl04KG0vgi4Is1KmwTsSJfZlgCTJQ1LEwMmA0vSvtckTUqz0K4oasvMzOqgHpfRjgMeSLORBwI/iIj/I+lpYKGkGcDvgEtT+cXA+UAH8CfgkwARsU3S14CnU7mvRsS2tH4l8H1gCPBoWszMrE76PNlExHrgAyXiW4GzS8QDuKqbtuYCc0vE24FTD3qwZmbWKw6lqc9mZtagnGzMzCw7JxszM8vOycbMzLJzsjEzs+ycbMzMLDsnGzMzy87JxszMsnOyMTOz7JxszMwsOycbMzPLzsnGzMyyc7IxM7PsnGzMzCw7JxszM8vOycbMzLJzsjEzs+ycbMzMLDsnGzMzy87JxszMsnOyMTOz7JxszMwsOycbMzPLzsnGzMyyc7IxM7PsBtZ7ALlImgL8M9AEfDcivlHnIeUx++g69bujPv2a2WGpIc9sJDUBc4DzgHHAZZLG1XdUZmb9V0MmG2Ai0BER6yPiTWA+MLXOYzIz67ca9TLaaGBj0XYncHqdxtKYfPnOzKrQqMlGJWJxQCFpJjAzbf5R0toa+zsWeLnGuo0o3/H4Sqn/aQ9p/v/Gvnw89tUIx+PESgo1arLpBI4v2m4BNu1fKCLuAO442M4ktUdE28G20yh8PN7mY7EvH4999afj0aj3bJ4GxkoaI2kwMA1YVOcxmZn1Ww15ZhMRuyRdDSyhMPV5bkSsqvOwzMz6rYZMNgARsRhY3EfdHfSluAbj4/E2H4t9+Xjsq98cD0UccN/czMysVzXqPRszMzuEONkcJElTJK2V1CFpVr3HUy1JcyVtlrSyKHaMpKWS1qXPYSkuSbem7/q8pAlFdaan8uskTS+KnybphVTnVkmqtY8+OBbHS3pc0hpJqyRd28+PR7OkpyQ9l47HV1J8jKQn01gXpEk4SDoibXek/a1FbV2f4mslnVsUL/n3U0sffUFSk6RnJT1c6zgb5VhULSK81LhQmHzwa+AkYDDwHDCu3uOq8jv8F2ACsLIo9r+AWWl9FnBzWj8feJTC75gmAU+m+DHA+vQ5LK0PS/ueAs5IdR4Fzquljz46FqOACWn9KOBXFB531F+Ph4Aj0/og4Mk0hoXAtBS/HbgyrX8WuD2tTwMWpPVx6W/jCGBM+ptpKvf3U20ffXhM/hb4AfBwLeNspGNR9bGr9wAO5yX9R2NJ0fb1wPX1HlcN36OVfZPNWmBUWh8FrE3r3wYu278ccBnw7aL4t1NsFPDLovjectX2Uafj8hDwMR+PAHgH8AyFJ3G8DAxM8b1/AxRmf56R1gemctr/76KrXHd/P6lOVX300TFoAZYBZwEP1zLORjkWtSy+jHZwSj0WZ3SdxtKbjouIFwHS58gU7+77lot3lojX0kefSpckPkjhX/P99niky0YrgM3AUgr/+n4lInaVGM/esab9O4DhVH+chtfQR1/4J+CLwJ60Xcs4G+VYVM3J5uBU9FicBtLd9602XksffUbSkcCPgM9FxKvlipaINdTxiIjdETGewr/qJwKnlBlPbx2Pct+5LsdD0gXA5ohYXhwuM5aGPRa1crI5OBU9Fucw9JKkUQDpc3OKd/d9y8VbSsRr6aNPSBpEIdHcGxE/rnGsDXM8ukTEK8C/UrhnM1RS12/0isezd6xp/9HANqo/Ti/X0EduHwYulLSBwlPkz6JwptMfj0VNnGwOTqM+FmcR0DWDajqFexdd8SvSDKlJwI50yWcJMFnSsDSLajKF68ovAq9JmpRmXV2xX1vV9JFdGuOdwJqI+GbRrv56PEZIGprWhwDnAGuAx4FLuhlr13e4BPhJFG4oLAKmpdlTY4CxFCZKlPz7SXWq7SOriLg+IloiojWN8ycR8T9rGOdhfyxqVu+bRof7QmG20K8oXMv+u3qPp4bx/xB4EXiLwr+UZlC47rsMWJc+j0llReGldL8GXgDaitr5a6AjLZ8sircBK1Odb/H2D4mr7qMPjsVfULgM8TywIi3n9+Pj8X7g2XQ8VgJfTvGTKPwHsgO4DzgixZvTdkfaf1JRW3+XvsNa0gy8cn8/tfTRh8flo7w9G61fH4tqFj9BwMzMsvNlNDMzy87JxszMsnOyMTOz7JxszMwsOycbMzPLzsnGzMyyc7IxM7PsnGzMzCy7/w9D5zzuFuBn3AAAAABJRU5ErkJggg==\n", - "text/plain": [ - "
" - ] - }, - "metadata": { - "needs_background": "light" - }, - "output_type": "display_data" - } - ], - "source": [ - "jsonGroup['value_len'].plot(kind='hist', legend=True)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Sample overview\n", - "Some overview about the sample after the data prep: \n", - "- Rows: 499805\n", - "- Mean: 27829.33,\n", - "- Min: 1357,\n", - "- Max: 4496861\n", - "- Std: 122092.41" - ] - }, - { - "cell_type": "code", - "execution_count": 8, - "metadata": {}, - "outputs": [], - "source": [ - "#Hardcoded data to fast use, but your can update for the calculed value within the next few cells \n", - "MEAN = 27829.33\n", - "MIN = 1357\n", - "MAX = 4496861\n", - "STD = 122092.41\n", - "COUNT = 499805\n", - "\n", - "#Information for original sample.\n", - "ORIG_MEAN = 1356.97\n", - "ORIG_MIN = 0\n", - "ORIG_MAX = 4496861\n", - "ORIG_STD = 26310.62\n", - "ORIG_COUNT = 11292867\n", - "\n", - "#hardcoded information about described data for values one std above the mean: \n", - "A_MEAN = 271204.44\n", - "A_MIN = 27669\n", - "A_MAX = 4496861\n", - "A_STD = 306555\n", - "A_COUNT = 46745" - ] - }, - { - "cell_type": "code", - "execution_count": 9, - "metadata": {}, - "outputs": [], - "source": [ - "def describedData(df):\n", - " tmp_mean = df['value_len'].mean()\n", - " tmp_min = df['value_len'].min()\n", - " tmp_max = df['value_len'].max()\n", - " tmp_std = df['value_len'].std()\n", - " tmp_count = df['value_len'].count()\n", - " (tmp_mean, tmp_min, tmp_max, tmp_std, tmp_count) = dd.compute(tmp_mean, tmp_min, tmp_max, tmp_std, tmp_count);\n", - " return (tmp_mean, tmp_min, tmp_max, tmp_std, tmp_count)" - ] - }, - { - "cell_type": "code", - "execution_count": 10, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "27829.332847810645 1357 4496861 122092.41371885882 499805\n" - ] - } - ], - "source": [ - "#Calculate the described data for mean sample\n", - "(MEAN, MIN, MAX, STD, COUNT) = describedData(df)\n", - "print(MEAN, MIN, MAX, STD, COUNT)" - ] - }, - { - "cell_type": "code", - "execution_count": 11, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "1356.9776628910975 0 4496861 26310.62140481331 11292867\n" - ] - } - ], - "source": [ - "#Calculate the described data for original sample\n", - "(ORIG_MEAN, ORIG_MIN, ORIG_MAX, ORIG_STD, ORIG_COUNT) = describedData(dd.read_parquet('sample_0.parquet'))\n", - "print(ORIG_MEAN, ORIG_MIN, ORIG_MAX, ORIG_STD, ORIG_COUNT)" - ] - }, - { - "cell_type": "code", - "execution_count": 12, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "271204.44978072523 27669 4496861 306555.0273738244 46745\n" - ] - } - ], - "source": [ - "#Calculate the described data for one std above the mean (using mean and stf of the original sample)\n", - "std_above = df[df['value_len'] > ORIG_STD + ORIG_MEAN]\n", - "(A_MEAN, A_MIN, A_MAX, A_STD, A_COUNT) = describedData(std_above)\n", - "print(A_MEAN, A_MIN, A_MAX, A_STD, A_COUNT)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "The following cell will create a dataframe of the described data calculated above and save it into a csv to fulture use, if calculations are not possible. " - ] - }, - { - "cell_type": "code", - "execution_count": 28, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
MEANMINMAXSTDCOUNT
ORIGINAL1356.9776630449686126310.62140511292867
ABOVE_MEAN27829.33284813574496861122092.413719499805
ABOVE_STD271204.449781276694496861306555.02737446745
\n", - "
" - ], - "text/plain": [ - " MEAN MIN MAX STD COUNT\n", - "ORIGINAL 1356.977663 0 4496861 26310.621405 11292867\n", - "ABOVE_MEAN 27829.332848 1357 4496861 122092.413719 499805\n", - "ABOVE_STD 271204.449781 27669 4496861 306555.027374 46745" - ] - }, - "execution_count": 28, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "# Comparasion of this sample and original 10% sample:\n", - "import pandas as pd\n", - "import numpy as np\n", - "%matplotlib inline\n", - "\n", - "compare = pd.DataFrame([(ORIG_MEAN, ORIG_MIN, ORIG_MAX, ORIG_STD, ORIG_COUNT),\n", - " (MEAN, MIN, MAX, STD, COUNT), \n", - " (A_MEAN, A_MIN, A_MAX, A_STD, A_COUNT)], \n", - " columns=['MEAN', 'MIN', 'MAX', 'STD', 'COUNT'],\n", - " index= ['ORIGINAL','ABOVE_MEAN', 'ABOVE_STD'])\n", - "compare.to_csv('describedData.csv')\n", - "compare" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Number of rows\n", - "The number of rows after filtering for values above the mean are about 4.42% of the original sample. \n", - "And the count for values one std above the mean is just 9.35% of this sample or 0.41% of original sample. \n", - "By this we can see that the really big values represent just a very small portion of the whole. " - ] - }, - { - "cell_type": "code", - "execution_count": 14, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Above the mean / original 4.425846864219688\n", - "1 STD Above the mean / original 0.41393385754033946\n", - "1 STD Above the mean / Above mean 9.35264753253769\n" - ] - } - ], - "source": [ - "print('Above the mean / original', COUNT / ORIG_COUNT * 100)\n", - "print('1 STD Above the mean / original', A_COUNT / ORIG_COUNT * 100)\n", - "print('1 STD Above the mean / Above mean', A_COUNT / COUNT * 100)" - ] - }, - { - "cell_type": "code", - "execution_count": 15, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "" - ] - }, - "execution_count": 15, - "metadata": {}, - "output_type": "execute_result" - }, - { - "data": { - "image/png": "iVBORw0KGgoAAAANSUhEUgAAAUAAAADuCAYAAABI8d6AAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDMuMC4zLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvnQurowAAG4dJREFUeJzt3XmYHFW9//H3tzshJEACQbbLVuyBBCRslysgyPKAlrIJQtiCgqIsinjREhHilauNyE9wBQVkUeQni4CUsonsWyAETMKiQIHsYiQhkNnP/aNqzGQyk+mZ6anT3fV5PU8/zNT08klIPjm1nWPOOUREiqjkO4CIiC8qQBEpLBWgiBSWClBECksFKCKFpQIUkcJSAYpIYakARaSwVIAiUlgqQBEpLBWgiBSWClBECksFKCKFpQIUkcJSAYpIYakARaSwVIAiUlgqQBEpLBWgiBSWClBECksFKCKFpQIUkcJSAYpIYakARaSwVIAiUlgqQBEpLBWgiBSWClBECksFKCKFpQIUkcIa5TuANKYgiicA6wOrASsAo7PHCr3+2wksBBZk/50PvJVUwnc9xBZZijnnfGeQOhRE8RhgW2ASadFtkP23+zF+mB+xGHgL+DswD5jb/Ugq4RvDfG+RqqgAhSCKDdgS2KnHYxvSEZwP81lSiLOAO5NK+KKnLNLEVIAFlBXedkAI7A7swPBHdCPtReDO7PGnpBL+03MeaQIqwIIIongFYB/gIOBjwDp+Ew2LA2aTluFNSSV8wHMeaVAqwCYWRPFo0rI7FPg4MMFvohHzPHAVcKV2lWUwVIBNKIjidYETgM8Ca3uOkycH3A9cCfw2qYQLPeeROqcCbCJBFH8EOAk4AF3itBi4EbgwqYSP+A4j9UkF2OCCKF4FOAY4EdjKc5x6dR/wPSBOKqH+wMu/qQAbVBDFKwKnABEw0XOcRjEH+DZwrYpQQAXYcIIoLgOfBs4G1vMcp1GpCAVQATaUIIoPAc4BtvCdpUnMBE5KKuFM30HEDxVgA8hObnyP9IJlqa0u4BLg60klnO87jORLBVjHsgkH/h/wGd9ZCuCfwBnAJUkl7PIdRvKhAqxTQRSHwMXAur6zFMyjpLvFj/kOIiNPBVhngiheDbgQONp3lgLrAs4HvpFUwnbfYWTkqADrSBDFBwI/o1h3b9SzR4Bpur2ueakA60AQxeNIi+8Y31lkGQuAzyaV8FrfQaT2VICeBVG8GXA9sLXvLLJcPwdOTSrhYt9BpHZUgB4FUXwAcAXNO0tLs5kDHJZUwnm+g0htaFEkT4IoPhP4HSq/RjIFeCSI4n19B5Ha0AgwZ9k9vJcB03xnkSFrJz0ueIXvIDI8GgHmKIjiicDdqPwa3Wjg8iCKv+E7iAyPRoA5CaJ4DeAO4IO+s0hN/RQ4RXePNCYVYA6CKF4b+BOar69Z3QgcoTPEjUcFOMKCKF4PuAvYzHcWGVH3Ax9NKuEi30GkejoGOIKCKN4QuAeVXxHsCvw+iOKxvoNI9VSAIySI4k2Ae4GNfWeR3OwB/C5bglQagHaBR0AQxWuR3ke6oe8s4sXvgEOTStjpO4gsn0aANZbtAt2Myq/IDgJ+4juEDEwFWENBFBvpAt07+c4i3p0QRPHZvkPI8qkAa+u7wCd9h5C6MSOI4mN9h5D+6RhgjQRRfBzp2hIiPbUAOyeV8EnfQWRZKsAaCKJ4T+BW0lukRHp7Htg+qYQLfAeRpWkXeJiCKN4AuA6Vn/RvE+By3yFkWSrAYQiiuEQ6n99qvrNI3TswiOLTfYeQpakAh+e/SS9+FanGd4Io3s13CFlCxwCHKIjiqcDDgK76l8F4HZiaVMI3fQcRjQCHJLvY+deo/GTw1gF+7DuEpFSAQ/M9YEvfIaRhHRJE8cd9hxDtAg9ath7EHwHznUUa2svAVkklfM93kCLTCHAQsl3fi1H5yfBtAHzbd4iiUwEOztfRJAdSO18Monh73yGKTLvAVQqieGNgLrCi7yzSVJ4AdtTUWX5oBFi981H5Se1NBb7oO0RRaQRYhezi1Xt955CmNR8Ikkr4ru8gRaMR4ACyOf6+7zuHNLWJwJd8hygiFeDAPoUmOJWRd1oQxRN8hygaFeByZKO/s3znkEJYDTjVd4iiUQEu38fQYuaSny8HUbyq7xBFogJcvv/2HUAKZQJwmu8QRaKzwP0IongHYKbvHFI4C4GNkko433eQItAIsH8a/YkP44ETfIcoCo0A+xBE8Yak6ziUfWeRQnoJ2DiphF2+gzQ7jQD79mVUfuLPhqQn4GSEqQB7CaJ4JeA43zmk8L7gO0ARqACXtT+wsu8QUnj7BVG8ru8QzU4FuKzDfQcQIf27ebTvEM1OBdhDdhHqfr5ziGSm+w7Q7FSASzsYLXQk9WNSEMW6D30EqQCXpt1fqTef8B2gmakAM0EUrwns6TuHSC+6HGYEqQCXOBRd+yf1Z2oQxWv7DtGsVIBLaJ1WqUcGfNR3iGalAgSCKC4Du/jOIdIPFeAIUQGmtgNW8R1CpB/7BFE8yneIZqQCTH3YdwCR5VgV+JDvEM1IBZhSAUq928d3gGZU+ALM1v3YzXcOkQFs7ztAMyp8AQJbky5II1LPtvUdoBlVVYBmtp6Z3WRmfzWz583sQjNbwcz2MLMFZvaEmT1jZt/v8ZpjzezHPb4/ysyeMrO5ZvakmV1iZqtmP7vbzHbIvk7M7PoerzvEzC7vlecmM3uo17YZZjaUWZy1+yuNYJ0gitfyHaLZDFiAZmbADcCNzrnNgM1Jp4v63+wp9znnpgJTgY+b2TKXk5jZfqSTjH7UOTeZ9Kzrg0B//0N3MLPJ/eRZNXv9qma20UD5q7BDDd5DJA8aBdZYNSPAPYEW59wvAZxznaRl9hlgXPeTnHOLgdlAX3OYfYN0jY2Dut/DOXeZc+7Zfj7z+8AZ/fzsk8DvgWuozb27WvZSGsVU3wGaTTUFOBl4vOcG59xC4GVg0+5tZrYasBlwbz/vMYu0NKvxW2A7M9u0j59NA36TPaZV+X7LM6kG7yGSB40Aa6yaAjSgr5WTurfvZmZPAW8Atzjn3ljum5ltbWazs2OJh/XztE7gPODrvV67Fmnp3u+cew7oMLMpVfwa+hRE8froAmhpHBoB1lg1BTiXXsfJzGw8sD7pymn3Oee2IT2b+gUz6+tfqbmkx+22AR4ANs5ef7mZLQR2Be7u9ZqrSE9QbNBj22GkZ2xfNLMECBjebvBmw3itSN42DaJ4jO8QzaSaAvwTMM7MjgEwszJwPnA58H73k7IR2XeBr/XxHt8lPa73jHNuvHNuPGnBfSH7+n5gj54vcM61Az8ATu2xeRqwn3MucM4FpNdGDacAa3ESRSQvJUAzw9TQgAXo0oWDDwIONbO/As8BLfR9kuIi4MO9z8465/4A/BDYxMzmmdmDpLu5tw3w8ZcCowDMLCAdDT7c431fBBaa2X9mm840s1e6HwP92lABSuNRAdZQrgujm9kZzrnv5PaBAwii+GpqcyJFJC8HJpXwJt8hmkXeM0yMNrOz+vmZc859O9c0+tdUGo/+zNZQ3gW4qI9t44DjgdWBvAtQZ4Cl0agAayjXAnTOnd/9tZmtAnyJ9NrAa0hPrORNBSiNRgVYQ7lPsmhmE4HTgCOBK4DtnHP/yjtHRgUojUYFWEO5FqCZnUe69u7Pga2dc33tEudpvOfPFxmsNX0HaCZ5nwXuAlqBDpa+u8RIT4LkVkjZPICd2WeLNIrHkkq4o+8QzSLX+QCdcyXn3Fjn3CrdF0Rnj1XyLL/Myqj8pPEMaq/NzA4yM2dmk7LvAzNbnN2O+qSZPWhmW/R4/q5m9mg2vd0zZva5bPsefUxBN8rM3jSzdczscjN7MXvf2dm1vv1lWsvMbsk+f56Z/aHHLbKzzWx+j/e6s0fmJ8zs6Szf9MH9tvUt713gib02OeAdl+cwdAkd/5NGNNi/s9NI77Q6HJiRbXveObctgJmdQHpTw3QzWxu4GjjQOTfLzD4A3GZmrwJ/BNYzs8A5l2Tvszcwxzn3ejprHqc7566rItP/AHc45y7MMmzjnPsL2WQP2fyft3S/V3YTxPPZtHuY2cbADWZW6p6laqjyPgnyOGnp9Rx5rWJms4Hje/zG5mF0jp9VOFPsb8/uVp71ekfJucVWdi1mtJRKrsVKrgWzlpLRZkabQasZ7Ya1lbB2oKPkrN0wpxH6MpwrvwlhVc81s5VJl3v9CHAzSwqwp/FA90nIk4DLnXOz0s9yb5vZV4EZzrnYzK4lvR//3Oz5h5POyjRY6wC3d3/jnHtqMC92zr1gZqeRXjnSOAXonOvz1jMzO5j0Nrr9cozz/sBPkaGa4zbd4tWONdc4rHz3058p313aiDe2si4m/PsJnQO/Ryd0tpm1tRltbWbt2aOtxayjzayz1ay9Jf1vZ6tZZ0vJutLvS10tZq7FrKulZLSauexhrZYWb6uZtRulNjNrNyu1Y6UOo9xhVu6AUqfZqC4Y1YmN6jJGOxjVBaMdrED6j+cKmPlYqvKZQTz3QOBW59xz2W7ldsB80ltSZ5PuBY0Dum8lnUx6ZUZPj2XbIS27nwPnmtkY4GOkc4N2O8/Mzsy+nuucO7KfXD8B/r+ZnQzcCfzSOffaIH5dkE6vN+yp7OpirVHn3A09fuPy8l7On1c4/2L8xIs699/los79GUVH+16lJ2YfU759wY6lZ4IVrHPDgV5fhvJY58aOdYzte0Y2v7qgq91oazVrayct59a0nDtazdpbStaRlXNHi1lXq1nn4pJ1taZfd7VYybVk5dw9Iu4u6jbD2sysLS1nazcrdxilUY5XBxFxGnBB9vU12fc/Yeld4MNIS20/+p/6zgE452aa2crZMcMtgYd7XcJW1S6wc+62bDd2P9JF358wsynOuX8M4tdWk72DuijAbKie9wJNi1l2d1xGSAejRt/WteO2t3WlJzC3sJdfnF6+/eX9yo+uuhqLJpvVx5/FwShBaYxjxTHOrVhvBW1mq5PO5j7FzBxQJg35015PvZklu5HdU9/d3OPn2wPzenzfPRP7lgxt9xcA59x80uONV5vZLaRT312//FctZSrw9FA/v1veJ0FO62PzasD+wI/7+NmISSqhC6L4fWClPD9XUs+6DTY6o+P4jc7oOJ4JLHrn0PI98w4r/9ltYq9NLhmr+s5XxxZX+bxDgCudcyd0bzCze4D1ej1vV9J5PSEdHT5iZjc452ZnJXou6UmLbr8BbgImAMcNIT9mtifp6PH97I6wTUhnmK/29QHp9Ho/Gsrn95T3v7q9z7w60pmkj8rOAuVNBVgHFrDyqpd0hh+6pDOkRFfnHqXZT00v3z5/59K8DcZYx8a+89WZag/dTAMqvbZdT3rGt/sYoAFtpPfik53NPQr4RVZMBlzgnPt99xs45+aZ2fvA48653lnO63UoayfnXFsf2bYHfmxmHaR7fpc452YO8OvZxMyeAFYE3gV+NNwzwJDzhdBLfXC62+v6+E3MTRDFL5LOKi11amN77aXp5duTsPzw+NVZOMWs8Gfvr2LGgmN8h2gWuS+MbmZfMLOXgZeAl83sJTM7Me8cGZ0IqXMvuP/Y8OyOY3ffofWiqVu3XrL47PbpDz/Ttf79Xc7+6TubJ8tdc0cGJ+9jgGcCHwL2cM69kG3bGLjQzCY6587JMw9Lrn+SBrCIceOv6Nx35ys698Xo6tq1NGfO9PJtb+9SmrPuWGsvyvoub/oOUC0z+zTpjE89PeCcO8lHnr7kfS/ws8AHnXMtvbaPBZ50zm2eWxggiOIrgaPz/EwZGRvaG68cXb7jhf3LD41bg3e2NqNZFw86mhkLfuU7RLPI/dKD3uWXbVucTZSQt+cHfoo0gpfc2uud03H0eud0HM1KLF50QPmB2UeU72rb0l6aVDa3hu98NaRd4BrKuwBfMbO9nHN/6rnRzPYCXs85C6gAm9J7jF356s69//Pqzr0B53YuzZt7bPm2tz9c+sva46x1iwHfoL4N9o4JWY68T4J8Ebg4mzniFDM72cyuAC4GTs45C6gAC8Ds4a7Jkz/fftruW7X+cotdWi58/Wcdn7jvdTdxpnMsszdS57qo8s9snc4Cc2yWaa8+ch6SfX+3mT3b4/2u6/UeT5rZb3ptu9zMXs1uz8PMPmDpuuEDynsE2AocC2xOen+hAfeSLn/p4w+jCrBgXmWNdc7tmLbOuR3TGEvr+x8vP/ToUeU7W6fYi5uVzdX7bMt/Z8aC1iqfW4+zwAD8JcvWvRd4OPBkr+cc6Zx7rPcLzWxL0kHbh81spV6X0HWSLq/xsypzAPmPAC8AFjrnLnPOfcU5d5pz7lLSC5IvGOC1NZdUwrfoe6EmKYDFjBl3beceOx3Qds5um7T+aq1DW8965pbOne9Z5Fac51yd3duWqmoihB6zwBxHWjB9We4sMMBXgcg51wV0zwLTbaizwADcB+xkZqOznJsCs6t87RHAVaQzyezf62cXAF+2QU5QkfcIMOhr6hvn3GPZ7S0+PA980NNnS90wm+kmTZrZPmkSwNrMf/PIUXc+d1D5/jHr8vYUM8b5TgjMqfJ59ToLDKR3f90J7Et6O93NQO9Zon5tZt23/N3hnDs9+/owYB9gC9JDZj1L+GXSEe/RwO+pUt4FuOJyfjY2txRLexYVoPTyBhPXOr/jU2ud3/EpxtDW8tHSo48dPeqO9z9oz286yrr+w1OsuVU+ry5ngenhGtLzAROAr5Duive0zC6wme0I/MM595KZvQJcZmar9crxHdJCjasNkncBzjSzzzrnftFzo5kdRzpZqg+PAp/y9NnSAFpZYcUbu3bd4ca2XQHY1v723PRRt722d2nWGiuzeEuz3A4lzRroCfU+CwyAc+5RM5sCLM5GqdW8bBowqcfJjfHAJ4FLerzv37IRbtV/n/MuwFOB35nZkSwpvB1IJ5k8KOcs3R729LnSoGa7TTef3b7p5gAf4J1/HFG+69lPlu8dtYG9NdlsxJZaeJf0BMJA6nYWmF6+TpUnPs2sBBwKbOOcezXb9hHgTHoUYOZ/qdcRoHPuTeBDWfgp2ebYOXdXnjl6mQW0oynyZQjeZtU1fth58Bo/7DyY0XS07VuaOevoUXcs2s7+utFo61y/hh/1CDMWVHOzQD3PAvNvzrk/LufHPY8Bvg2cA7zaXX6Ze4GtzGydXu8718xmAdst7/O7eZsNpp4EUfwooKUGpaYm24vPTy/f9vd9y4+vPp73tjKjPIy3+xYzFsyoVTZJNdwsvCPkHlSAUmNz3UabfLXj85t8tQNWY+H8w8t/fvrQ8j2ljeyNrcx6rI9SnX4vMJah0wgQCKI4BG7xnUOKIVsfZe4g1kfpBCYyY8HCPPINVyPMAtNNBQgEUTye9Dqp4eyiiAxJFeujPMiMBbt4CdfkVICZIIofBP7Ldw4ptn7WR/kmMxbkPVdmIegY4BLXoQIUz/paH+UT5Ydu9nWNWLPLfUr8OnYt9ba2oRRaF6XyXV3bTTjo239Y5vZRqQ0VYCaphH9HF0VL/bnRd4BmpgJc2rW+A4j0ogIcQSrApWk3WOrJG6TTR8kIUQH2kFTCV4CHBnyiSD4uTSphp+8QzUwFuCztBks96AJ+MeCzZFhUgMv6NX6m5xfp6dakEr7kO0SzUwH2klTCf5CWoIhPF/sOUAQqwL79wHcAKbRXGMScdjJ0KsA+JJVwLunCKyI+XKKTH/lQAfZPo0DxoZNlZzmWEaIC7EdSCW9l6TURRPJwVVIJXx34aVILKsDly32tYim0NuBbvkMUiQpw+a4CXvcdQgrj0qQSJr5DFIkKcDmSStgCnOU7hxRCC+niP5IjFeDALgM0HZGMtJ8mlfA13yGKRgU4gKQSdpGuXi8yUhax7FKWkgMVYBWSSngn8AffOaRp/TC7A0lypgKs3umk12iJ1NJrwPd8hygqFWCVkko4D83OIbV3YlIJF/gOUVQqwME5C3jHdwhpGtcllfAm3yGKTAU4CNlxmlN955Cm8C/gZN8hik4FOEhJJbwCuMV3Dml4pyeV8E3fIYpOBTg0J5D+Cy4yFHcllfBS3yFEBTgk2QWrJ/rOIQ1pMfA53yEkpQIcoqQSXgNc6TuHNJxTk0r4vO8QklIBDs/JwAu+Q0jD+FVSCX/uO4QsoQIchqQSvgscjhZRkoHNAz7vO4QsTQU4TEklnAl82ncOqWvvAocklfA930FkaSrAGsiOB/6P7xxSlxxwVFIJn/YdRJalAqydGcBvfYeQunN2Uglv9h1C+qYCrJGkEjrgWGCm5yhSP65Bk5zWNXPO+c7QVIIoXgd4FFjPdxbx6hbg4KQStvsOIv3TCLDGkkr4OrA/oBk+iusu4FCVX/1TAY6ApBI+AeyDZo4pooeA/bP1ZKTOaRd4BAVRvD1wB7Ca7yySi9nAR5JKqH/4GoQKcIQFUTwVuBOY6DuLjKhngA9ravvGol3gEZbtDu8JvO07i4yYOcDeKr/GowLMQVIJnyQtQf0FaT53AbsmlfBV30Fk8FSAOUkq4V+A3dHkCc3kKmA/renRuHQMMGdBFE8EriUdEUrjOiephN/0HUKGRyPAnCWVcD6wL/Aj31lkSDqAz6r8moNGgB4FUXw88BNgBd9ZpCrvANOSSnir7yBSGypAz4Io3gW4AVjTdxZZrgeAI5JK+LLvIFI72gX2LKmEDwA7AA/6ziJ96gS+Beyu8ms+GgHWiSCKy8DXSKfVGu03jWReBo5MKuH9voPIyFAB1pnszpFfAVv5zlJw1wKf021tzU0FWIeCKF4B+CbpiFCjwXzNB76SVMLLfQeRkacCrGNBFG8DXEp6jFBGlgN+CXwtqYS6bbEgVIB1LojiEjCddM0RTbI6MmYBpySVUCeiCkYF2CCCKB4LfAmIgAme4zSL14EzgCuTStjlO4zkTwXYYIIoXh04EzgRXUA9VO8AFwLnaanKYlMBNqggijciXXDncHQ9Z7VeB34AXJQtai8FpwJscEEUbwycBHwGWNVznHr1V+A80l3dVt9hpH6oAJtEEMUrAccApwBbeo5TLx4HzgWu1zE+6YsKsAkFUbw38EUgpHi7x2+Rrsf766QSPuo7jNQ3FWATC6J4PeDg7LEbzVuG7wM3kt5Bc0dSCTs855EGoQIsiCCK1wQOIC3DvWj8O0wWA3cDVwM3JpVwkd840ohUgAUURPEE4BPAx0hHho1wgXUb8DDwZ9J1OB5OKmGb30jS6FSAQhDFAWkR/hewI7AN/q8xXAQ8BdxDWnr3J5Vwsd9I0mxUgLKMbDKGrYEpQNDjsSGwPjCqhh/3JullKn8DniNdYnIOkCSVUH84ZUSpAGVQsnkL1yUtxLWAscAYYMUej+7vS8AC0jsvej/+BfxTd2KITypAESmsZr0sQkRkQCpAESksFaCIFJYKUEQKSwUoIoWlAhSRwlIBikhhqQBFpLBUgCJSWCpAESksFaCIFJYKUEQKSwUoIoWlAhSRwlIBikhhqQBFpLBUgCJSWCpAESksFaCIFJYKUEQKSwUoIoWlAhSRwlIBikhhqQBFpLBUgCJSWCpAESksFaCIFJYKUEQKSwUoIoWlAhSRwlIBikhh/R+uFtLu4nk/AAAAAABJRU5ErkJggg==\n", - "text/plain": [ - "
" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "compare['COUNT'].plot(kind='pie')" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Max and Min values\n", - "\n", - "it is expected that the maximum will be the same for all mentioned samples since the filtering is being made by the minimum, and is also expected that the min is the value used to filter." - ] - }, - { - "cell_type": "code", - "execution_count": 16, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "" - ] - }, - "execution_count": 16, - "metadata": {}, - "output_type": "execute_result" - }, - { - "data": { - "image/png": "iVBORw0KGgoAAAANSUhEUgAAAa0AAAD9CAYAAAAPryh0AAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDMuMC4zLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvnQurowAAGPVJREFUeJzt3X+QXGWd7/H3NwEMEjBIsiybwZssxlUQLuKIWHq3KNzShEWDLK6JCGi4xb0WKVCvrrCr8kNYf6CrsiveYoFNtNwEVvSCCnKzKv64ijIJyK8sS5AsDAiEJIiugAa+949+OvRMeqZ7xiQ9z/B+VU1Nn+c853m+PdNzPn1On+mOzESSpBpM6XUBkiR1y9CSJFXD0JIkVcPQkiRVw9CSJFXD0JIkVcPQkiRVw9CSJFXD0JIkVWOXXhews8ycOTPnzJnT6zIkqSqrV69+NDNn9bqOpudMaM2ZM4eBgYFelyFJVYmI/+h1Da08PShJqoahJUmqhqElSaqGoSVJqoahJUmqhqElSaqGoSVJqsZz5v+0ePRu+Kc/73UVkqTfg0dakqRqPHeOtGbOg3d9s9dVSFJdlkSvKxjCIy1JUjUMLUlSNQwtSVI1DC1JUjUMLUlSNQwtSVI1DC1JUjUMLUlSNQwtSVI1DC1JUjUMLUlSNQwtSVI1DC1JUjUMLUlSNQwtSVI1DC1JUjW6Dq2ImBoRN0fEN8ry3Ij4SUTcHRFXRMRupf15ZXldWT+nZYyzSvtdEfHGlvb5pW1dRJzZ0j7mOSRJk9dYjrTOANa2LH8C+ExmzgM2A6eU9lOAzZn5YuAzpR8RcSCwCDgImA9cXIJwKvB5YAFwILC49B3zHJKkya2r0IqIPuDPgUvLcgBHAV8pXZYDx5bbC8syZf3rS/+FwMrMfCoz7wXWAYeXr3WZ+fPM/C2wElg4zjkkSZNYt0danwX+CnimLO8DPJaZW8ryIDC73J4N3A9Q1v+y9N/aPmybkdrHM4ckaRLrGFoRcQzwSGaubm1u0zU7rNte7Z3m3yoiTo2IgYgY2LBhQ5tNJEk16eZI67XAmyNiPY1Td0fROPKaERG7lD59wIPl9iCwP0BZ/wJgU2v7sG1Gan90HHMMkZmXZGZ/ZvbPmjWri7sqSZrIOoZWZp6VmX2ZOYfGhRTfycwTgO8Cx5duJwNXl9vXlGXK+u9kZpb2ReXKv7nAPOCnwE3AvHKl4G5ljmvKNmOdQ5I0ie3SucuIPgisjIjzgZuBy0r7ZcCXImIdjaOfRQCZeUdEXAncCWwBTsvMpwEiYilwPTAVuDwz7xjPHJKkyS2eKwco/f39OTAw0OsyJKkqEbE6M/t7XUeT74ghSaqGoSVJqoahJUmqhqElSaqGoSVJqoahJUmqhqElSaqGoSVJqoahJUmqhqElSaqGoSVJqoahJUmqhqElSaqGoSVJqoahJUmqhqElSaqGoSVJqoahJUmqhqElSaqGoSVJqoahJUmqhqElSaqGoSVJqoahJUmqhqElSaqGoSVJqoahJUmqhqElSaqGoSVJqoahJUmqhqElSaqGoSVJqoahJUmqhqElSaqGoSVJqkbH0IqIaRHx04j4WUTcERHnlva5EfGTiLg7Iq6IiN1K+/PK8rqyfk7LWGeV9rsi4o0t7fNL27qIOLOlfcxzSJImr26OtJ4CjsrM/wocCsyPiCOATwCfycx5wGbglNL/FGBzZr4Y+EzpR0QcCCwCDgLmAxdHxNSImAp8HlgAHAgsLn0Z6xySpMmtY2hlw6/L4q7lK4GjgK+U9uXAseX2wrJMWf/6iIjSvjIzn8rMe4F1wOHla11m/jwzfwusBBaWbcY6hyRpEuvqNa1yRHQL8AiwCrgHeCwzt5Qug8Dscns2cD9AWf9LYJ/W9mHbjNS+zzjmkCRNYl2FVmY+nZmHAn00joxe1q5b+d7uiCe3Y/tocwwREadGxEBEDGzYsKHNJpKkmozp6sHMfAy4ATgCmBERu5RVfcCD5fYgsD9AWf8CYFNr+7BtRmp/dBxzDK/3kszsz8z+WbNmjeWuSpImoG6uHpwVETPK7d2BPwPWAt8Fji/dTgauLrevKcuU9d/JzCzti8qVf3OBecBPgZuAeeVKwd1oXKxxTdlmrHNIkiaxXTp3YT9gebnKbwpwZWZ+IyLuBFZGxPnAzcBlpf9lwJciYh2No59FAJl5R0RcCdwJbAFOy8ynASJiKXA9MBW4PDPvKGN9cCxzSJImt3iuHKD09/fnwMBAr8uQpKpExOrM7O91HU2+I4YkqRqGliSpGoaWJKkahpYkqRqGliSpGoaWJKkahpYkqRqGliSpGoaWJKkahpYkqRqGliSpGoaWJKka3bzLuyQ9p/3ud79jcHCQJ598stel7DDTpk2jr6+PXXfdtdeljMrQkqQOBgcH2XPPPZkzZw4R7T44vW6ZycaNGxkcHGTu3Lm9LmdUnh6UpA6efPJJ9tlnn0kZWAARwT777FPFkaShJUldmKyB1VTL/TO0JGmCiwhOPPHErctbtmxh1qxZHHPMMQAsW7aMpUuXAnDOOefw/Oc/n0ceeWRr/+nTp+/cgncgQ0uSJrg99tiD22+/nSeeeAKAVatWMXv27BH7z5w5k09/+tM7q7ydytCSpAosWLCAb37zmwCsWLGCxYsXj9h3yZIlXHHFFWzatGlnlbfTePWgJI3BuV+/gzsffHy7jnngH+3F2W86aNQ+ixYt4rzzzuOYY47h1ltvZcmSJfzgBz9o23f69OksWbKEz33uc5x77rnbtdZe80hLkipwyCGHsH79elasWMHRRx/dsf/pp5/O8uXLefzx7RuwveaRliSNQacjoh3pzW9+M+9///u54YYb2Lhx46h9Z8yYwdvf/nYuvvjinVTdzmFoSVIllixZwgte8AIOPvhgbrjhho793/e+9/GqV72KLVu27PjidhJPD0pSJfr6+jjjjDO67j9z5kze8pa38NRTT+3AqnauyMxe17BT9Pf358DAQK/LkFShtWvX8rKXvazXZexw7e5nRKzOzP4elbQNj7QkSdUwtCRJ1TC0JEnVMLQkSdUwtCRJ1TC0JEnVMLQkaYLr9NEkTQsXLuQ1r3nNkLbTTz+dj370o1uXL7jgAk477bQdW/AO5DtiSNIE1/rRJLvvvnvbjyZ57LHHWLNmDdOnT+fee+9l7ty5AJx//vkceuihnHDCCUQEl156KTfffHMv7sZ24ZGWJFWg00eTXHXVVbzpTW9i0aJFrFy5cmv7XnvtxQUXXMDSpUs57bTTOO+885gxY8ZOrX178khLksbiujPhodu275h/eDAs+PioXTp9NMmKFSs4++yz2XfffTn++OM566yztq5bvHgxF110EVOnTh1ymrFGHY+0ImL/iPhuRKyNiDsi4ozS/sKIWBURd5fve5f2iIiLImJdRNwaEYe1jHVy6X93RJzc0v7KiLitbHNRRMR455CkyWi0jyZ5+OGHWbduHa973et4yUtewi677MLtt9++df3g4CAPPfQQDz74IL/+9a93dunbVTdHWluA/5WZayJiT2B1RKwC3gl8OzM/HhFnAmcCHwQWAPPK16uBLwCvjogXAmcD/UCWca7JzM2lz6nAjcC1wHzgujJm13P8vj8MSeqowxHRjjTSR5NcccUVbN68eevrWI8//jgrV67k/PPPB+CMM87gnHPOYe3atZx77rlceOGFPal/e+h4pJWZv8jMNeX2r4C1wGxgIbC8dFsOHFtuLwS+mA03AjMiYj/gjcCqzNxUgmoVML+s2yszf5yNd+/94rCxxjKHJE1aS5Ys4SMf+QgHH3zwkPYVK1bwrW99i/Xr17N+/XpWr1699XWt6667jkceeYSTTjqJD3/4w3zta1/jzjvv7EX528WYLsSIiDnAK4CfAPtm5i+gEWzAH5Rus4H7WzYbLG2jtQ+2aWccc0jSpNXuo0nWr1/PfffdxxFHHLG1be7cuey1115873vf4z3veQ8XX3wxEcEee+zBJz/5SZYuXbqzS99uur4QIyKmA1cB78nMx8vLTm27tmnLcbSPWk4320TEqTROO/KiF72ow5CSNDG1ex3qyCOP5MgjjwTggQce2Gb9mjVrALjrrruGtB933HEcd9xx27/InaSrI62I2JVGYH05M79amh9unpIr3x8p7YPA/i2b9wEPdmjva9M+njmGyMxLMrM/M/tnzZrVzV2VJE1g3Vw9GMBlwNrM/LuWVdcAzSsATwaubmk/qVzhdwTwy3Jq73rgDRGxd7kK8A3A9WXdryLiiDLXScPGGssckqRJrJvTg68FTgRui4hbSttfAx8HroyIU4D7gLeWddcCRwPrgN8A7wLIzE0R8VHgptLvvMzcVG6/G1gG7E7jqsHrSvuY5pAkTW4dQyszf0j715AAXt+mfwJt39gqMy8HLm/TPgC8vE37xrHOIUk7QmYyymv51WvsVic+38ZJkjqYNm0aGzdurGbHPlaZycaNG5k2bVqvS+nIt3GSpA76+voYHBxkw4YNvS5lh5k2bRp9fX2dO/aYoSVJHey6665b321CveXpQUlSNQwtSVI1DC1JUjUMLUlSNQwtSVI1DC1JUjUMLUlSNQwtSVI1DC1JUjUMLUlSNQwtSVI1DC1JUjUMLUlSNQwtSVI1DC1JUjUMLUlSNQwtSVI1DC1JUjUMLUlSNQwtSVI1DC1JUjUMLUlSNQwtSVI1DC1JUjUMLUlSNQwtSVI1DC1JUjUMLUlSNQwtSVI1DC1JUjUMLUlSNQwtSVI1OoZWRFweEY9ExO0tbS+MiFURcXf5vndpj4i4KCLWRcStEXFYyzYnl/53R8TJLe2vjIjbyjYXRUSMdw5J0uTWzZHWMmD+sLYzgW9n5jzg22UZYAEwr3ydCnwBGgEEnA28GjgcOLsZQqXPqS3bzR/PHJKkya9jaGXm94FNw5oXAsvL7eXAsS3tX8yGG4EZEbEf8EZgVWZuyszNwCpgflm3V2b+ODMT+OKwscYyhyRpkhvva1r7ZuYvAMr3Pyjts4H7W/oNlrbR2gfbtI9nDknSJLe9L8SINm05jvbxzLFtx4hTI2IgIgY2bNjQYVhJ0kQ33tB6uHlKrnx/pLQPAvu39OsDHuzQ3temfTxzbCMzL8nM/szsnzVr1pjuoCRp4hlvaF0DNK8APBm4uqX9pHKF3xHAL8upveuBN0TE3uUCjDcA15d1v4qII8pVgycNG2ssc0iSJrldOnWIiBXAkcDMiBikcRXgx4ErI+IU4D7graX7tcDRwDrgN8C7ADJzU0R8FLip9DsvM5sXd7ybxhWKuwPXlS/GOockafKLxkV7k19/f38ODAz0ugxJqkpErM7M/l7X0eQ7YkiSqmFoSZKqYWhJkqphaEmSqmFoSZKqYWhJkqphaEmSqmFoSZKqYWhJkqphaEmSqmFoSZKqYWhJkqphaEmSqmFoSZKqYWhJkqphaEmSqmFoSZKqYWhJkqphaEmSqmFoSZKqYWhJkqphaEmSqmFoSZKqYWhJkqphaEmSqmFoSZKqYWhJkqphaEmSqmFoSZKqYWhJkqphaEmSqmFoSZKqYWhJkqphaEmSqmFoSZKqUW1oRcT8iLgrItZFxJm9rkeStONVGVoRMRX4PLAAOBBYHBEH9rYqSdKOtkuvCxinw4F1mflzgIhYCSwE7uxpVVIlMpNn8tnvz2SSze+U788MXW72afZrt9wcc+s2ZQy2jkHLds8uP5MJyZDlbNbZUkfbepOhc7cu03I/n2nW1Rynua45zgjLjFx3VzUM+Rk+O/Zo2wz5mY1w/59pNAxZHjpOc5uW388ztPwunr2/Q38eQ2uZaGoNrdnA/S3Lg8CrR9vg5xv+k8WX3Nhx4MavubMcwy+z665jGrN3deYYBu1+zK6HHMPPs9d1dv87Gr7z2xocowRE5tAdcEJjxzzSTrvluzqLgACmRDAlorEcQ5dbv08JiIiWbcpyy/pnx2ldLttMGbbcMhdRlqdMaTvGlABoWZ4CwbZz06kWmsvN+uFDPfwdtFNraEWbtm3+FCPiVOBUgD32O4Cnu/1rbTf6+LuVB0J3HWMMo3Y1ZrdzPzvqdh+z67nHMGjXP6Ux3fdux9wRdQ7dkbTdGbYuU3ZMrcux7Q6nuRNsu9zcyU1pmZMx1NC6g95mhzvyTrvtjrOllm12wCMGxrD729xJT2n3M9n2e7v72Xr/x/J7nswMre1jENi/ZbkPeHB4p8y8BLgEoL+/P6/8n6/ZOdVJknaIKi/EAG4C5kXE3IjYDVgEXNPjmiRJO1iVR1qZuSUilgLXA1OByzPzjh6XJUnawaoMLYDMvBa4ttd1SJJ2nlpPD0qSnoMMLUlSNQwtSVI1DC1JUjUMLUlSNWIsb8lTs4j4FXBXr+vowkzg0V4X0QXr3H5qqBGsc3urpc4/ycw9e11EU7WXvI/DXZnZ3+siOomIAevcfmqos4YawTq3t5rq7HUNrTw9KEmqhqElSarGcym0Lul1AV2yzu2rhjprqBGsc3uzznF4zlyIIUmq33PpSEuSVLkJF1oR0RcRV0fE3RFxT0R8LiJ2i4gjI+KXEXFzRPxbRHyqZZt3RsQ/tCy/IyJujYg7IuJnEXFpRMwo626IiP5ye31EXNWy3fERsWxYPVdHxI+HtZ0TEe/fQT8CSdIIJlRoReOjQr8K/J/MnAe8BJgOXFC6/CAzXwG8AjgmIl7bZoz5wHuBBZl5EHAY8CNg3xGm7Y+Ig0aoZ0bZfkZEzB3/PdNEFhFviYiMiJeW5TkR8URE3FKe9PwoIv6kpf/rIuKn5cnTv5VPyKY8sRr+BGeXiHg4IvaLiGURcW8Z95aI+NEoNb2z1PT6NnUeX5ZviIi7Wsb7yrAxfhYRK4a1LYuIByLieWV5ZkSsH/cPTzvdBH287hsR3yjz3xkR10bEwS3bbmoZ619bar45ItaW+k7u5v5PtP/TOgp4MjP/CSAzn46I9wL3At9tdsrMJyLiFmB2mzH+Bnh/Zj7QHAO4fJQ5PwX8NXBCm3V/AXwdeJjGB01+bMz3SDVYDPyQxu/4nNJ2T2YeChAR/4PGY+TkiPhD4J+BYzNzTUTMBK6PiAeA64C+iJiTmevLOH8G3J6Zv2g8J+MDmTkkXEZxW6nt22V5EfCzYX1OyMxt/o8mIl5G40npn0bEHpn5ny2rnwaWAF/osg5NLBPx8XoesCozP1dqOCQzbwOaNS0DvtEcKyLmlJpfUZb/GPhqRExp7v9HMqGOtICDgNWtDZn5OHAf8OJmW0TsDcwDvj/CGGvGMOeVwGER8eI26xYDK8rX4jGMqUpExHTgtcApNHYC7ewFbC63TwOWZeYagMx8FPgr4MzMfAb4F+BtLdsuovH4GY8fAIdHxK6lzhcDt3S57duBLwH/F3jzsHWfBd4bERPtSas6mMCP1/2AweZCZt46lo0z8+fA+4DTO/WdaKEVQLvLGZvt/y0ibgUeopHaD4062LOHp/dExNtG6PY0cCFw1rBt96Wxk/hhZv47sCUiXj62u6MKHAt8q/yON0XEYaX9gOZjh8Yf09+V9m2eWAEDpR0af/CLAMopuKOBq1r6XthyyuTLHWpL4F+BNwILgWva9Plyy3gXtrS/DbiC9k+47qPxTP3EDvNr4pmoj9fPA5dFxHcj4m8i4o/Gcd/WAC/t1GmihdYdwJC3NYmIvYD9gXtovKZ1CHAw8O6IOHSEMQ4DyMzbyiHzdcDuo8z7JeBPgRe1tL0N2Bu4t5zzn8PIz2xUr8XAynJ7Jc/u4O/JzEMz8wDgPTz7vyojPbFKgMy8CZheXlNYANyYmZtb+n2gjHtoZrY7JT3cShqPu5GeAZ/QMt4HACLiVcCGzPwPGqcWDytnJ1r9LfABJt4+QKObkI/XzLwe+GPgH2kEz80RMWuM9y266TTRHrDfBp4fEScBRMRU4NPAMuA3zU7lWcbHgA+2GeNjwKcioq+lbbTAIjN/B3yGxi+7aTEwPzPnZOYc4JUYWpNKROxD43XUS8sTkw/QeLIy/I/nGhpPaqDNEysaj407W5Y7BU3XMvOnwMuBmeVx343FwEvLfbqHxumivxg27joapxr/8vepTzvPRH+8ZuamzPznzDwRuKmlhm69AljbqdOECq1s/KfzW4C3RsTdwL8DT9J4UXG4/03jReYhV/Vl5rXARcB15SqWH9E4BXh9h+kvo1yYUl4kfBFwY8u49wKPR8SrS9OHImKw+TWmO6qJ4njgi5n5X8qTk/1pXPTTN6zf62js/KFxGuSdzaP8siP5BPDJlv4rgHfQ2MG0O6U3VmfR/m9gGxExBXgrcEjLE66FtH9N9gLAf92ox4R9vEbEURHx/HJ7T+AAGqehu91+Do2L4v6+U98J90JsZt4PvKnNqhvKV7PfEzx79eC9NI7GmuuWA8tHGP/IlttzWm4/BbSeh93mysTMbJ4//gnPXrWjei0GPj6s7SoaAXFAuUI1gN8C/x2gXFX1DuAfyx9nAJ/NzK83B8jMOyPiN8DqYVftQeM1gg+1LB+emb8drcjMvG6U1V+OiCfK7UeB84EHmlfPFt8HDoyI/YaNe0dErKGcTteEN5Efr68E/iEittA4GLq0nHoczQERcTMwDfgV8PedrhwE38ZJklSRCXV6UJKk0Uy404PSc0lEvAs4Y1jz/8vM03pRjzSaifB49fSgJKkanh6UJFXD0JIkVcPQkiRVw9CSJFXD0JIkVeP/A4NAz1QnJ20oAAAAAElFTkSuQmCC\n", - "text/plain": [ - "
" - ] - }, - "metadata": { - "needs_background": "light" - }, - "output_type": "display_data" - } - ], - "source": [ - "compare[['MIN','MAX']].plot()" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Mean and Std\n", - "> A low standard deviation indicates that the data points tend to be close to the mean (also called the expected value) of the set, while a high standard deviation indicates that the data points are spread out over a wider range of values. (https://en.wikipedia.org/wiki/Standard_deviation)\n", - "\n", - "It is noticeable that both mean and std are increassing as the data is filtered by bigger values. \n", - "\n", - "The increase of the mean is to be expected, since we are eliminating the smaller values and leaving only the bigger ones. \n", - "\n", - "But the STD is not necessary like the mean where it will increase after the filter, but since it is the case here we can safaly assume that as the values get bigger the more spread out they are, the mean of the sample is less accurate to represent the whole dataset since they have a huge difference of value from one another. " - ] - }, - { - "cell_type": "code", - "execution_count": 17, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "" - ] - }, - "execution_count": 17, - "metadata": {}, - "output_type": "execute_result" - }, - { - "data": { - "image/png": "iVBORw0KGgoAAAANSUhEUgAAAacAAAD9CAYAAAAYjbi9AAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDMuMC4zLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvnQurowAAIABJREFUeJzt3Xd4VFX6wPHvS4CEklBDDRBKkCJNQlNRVFRAFLEBShMQ9KdrWXdXFNfOWta1rroiICBdRUEQEQs2IBB6h9ADIYEEUoD08/vj3sEhhCRAkjvl/TxPHmbOnHvvOzDknXvPe88RYwxKKaWUJynjdABKKaVUXpqclFJKeRxNTkoppTyOJiellFIeR5OTUkopj6PJSSmllMfR5KSUUsrjaHJSSinlcTQ5KaWU8jhlnQ6guNWsWdOEh4c7HYZSSnmVNWvWHDPGhDodh4vPJafw8HCio6OdDkMppbyKiOx3OgZ3ellPKaWUx9HkpJRSyuNoclJKKeVxfG7MKT9ZWVnExsaSnp7udCiOCAoKIiwsjHLlyjkdilJKFYlfJKfY2FiCg4MJDw9HRJwOp1QZY0hMTCQ2NpbGjRs7HY5SShWJX1zWS09Pp0aNGn6XmABEhBo1avjtWaNSyjv5RXIC/DIxufjze1dKeSe/uKynlFLqPIyBNZ86HcU5/ObMyWkiwpAhQ848z87OJjQ0lL59+wIwZcoUQkNDad++/ZmfrVu3nun/9ttvExQURHJy8pm2ZcuWISJ88803Z9r69u3LsmXLSv4NKaW838ljMPteWPiE05Gco9DkJCJBIrJKRDaIyBYRedFubywiUSKyS0TmiEh5uz3Qfh5jvx7utq+n7fYdInKzW3svuy1GRMa6ted7DG9UqVIlNm/ezOnTpwFYunQp9evXP6vPgAEDWL9+/ZmfVq1anXlt1qxZdOrUia+++uqsbcLCwhg/fnzJvwGllG/Z/RN8dCXE/AA3v+p0NOcoyplTBnC9MaYd0B7oJSJdgdeBt40xEcBxYKTdfyRw3BjTDHjb7oeItAIGAq2BXsCHIhIgIgHAB0BvoBUwyO5LAcfwSr1792bRokWAlWwGDRpUpO12795NWloar7zyCrNmzTrrtXbt2lGlShWWLl1a7PEqpXxQdgYsGQef9YcK1eCBn6Db/zkd1TkKHXMyxhggzX5azv4xwPXAvXb7VOAF4COgn/0Y4Avgv2KNyPcDZhtjMoC9IhIDdLb7xRhj9gCIyGygn4hsK+AYF+3Fb7aw9XDKpeziHK3qhfD8ra0L7Tdw4EBeeukl+vbty8aNGxkxYgS//fbbmdfnzJnD77//fub5ihUrqFChwplE1r17d3bs2EFCQgK1atU60+/ZZ5/l2Wef5cYbbyzW96WU8jEJ2+HLURC/CTo9ADe9DOUqOB1Vvoo05mSf4awHEoClwG7ghDEm2+4SC7iuUdUHDgLYrycDNdzb82xzvvYaBRzDK7Vt25Z9+/Yxa9Ys+vTpc87reS/rVahgfWhmz57NwIEDKVOmDHfccQeff/75Wdt1794d4KxEp5RSZxgDqz6BCddCahwMmgO3vOmxiQmKWK1njMkB2otIVeAroGV+3ew/86tbNgW055cgC+p/DhEZDYwGaNiwYX5dzijKGU5Juu222/jb3/7GsmXLSExMLLT/xo0b2bVr15mzoszMTJo0acLDDz98Vr9x48Yxfvx4ypbVAkyllJuTx2D+I7BzMTTrCf0+hODaTkdVqAuq1jPGnACWAV2BqiLi+k0YBhy2H8cCDQDs16sASe7tebY5X/uxAo6RN64JxphIY0xkaKjHLEeSrxEjRvDcc8/Rpk2bIvWfNWsWL7zwAvv27WPfvn0cPnyYQ4cOsX//2bPb33TTTRw/fpwNGzaURNhKKW8U86NV9LD7R+j1Gtz7uVckJihatV6ofcaEiFQAegLbgJ+Bu+xuw4D59uMF9nPs13+yx60WAAPtar7GQASwClgNRNiVeeWxiiYW2Nuc7xheKywsjMceeyzf1+bMmXNWKfny5cuZPXs2/fv3P6tf//79mT179jnbjxs3jtjY2BKJWynlRbLS4bunYfodUKE6PPAzdH0IynjP3UNi5YACOoi0xSpGCMBKZnONMS+JSBNgNlAdWAcMNsZkiEgQ8BnQAeuMaaBbscM4YASQDTxujFlst/cB3rGPMdkYM95uz/cYBcUbGRlp8i42uG3bNlq2zO9KpP/QvwOl/ETCdvhyJMRvhs6j4caXijS2JCJrjDGRpRBhkRSlWm8jVqLJ276HP6vt3NvTgbvPs6/xwDk35RhjvgW+LeoxlFJK5WEMrJ4I3z8L5SvDvXOh+c2Fb+ehdPRcKaW8XdpRWPAI7PwOmt0It38IlWsVvp0H0+SklFLeLOYH+OohSE+GXq9DlzHgA5M9a3JSSilvlJUOP74IKz+E0JYw9Guo7eytMsVJk5NSSnmbhG32TA+bofMYuPFFj76h9mJoclJKKW/hmulh6T8hMNi6b6n5TU5HVSI0OZWi8ePHM3PmTAICAihTpgzVqlXj+PHjpKWlcfTo0TPLqH/44Yc888wzxMXFERgYSGZmJj179uSVV16hatWqDr8LpZQj0o7C/Idh1xKIuAn6feD1RQ8F0eRUSlasWMHChQtZu3YtgYGBHDt2jMzMTOrVq8eyZct48803Wbhw4VnbzJgxg8jISDIzM3n66afp168fv/zyi0PvQCnlmF0/wNd20UPvN6z7l3yg6KEg3nO7sJeLi4ujZs2aBAYGAlCzZk3q1atXpG3Lly/PG2+8wYEDB3R6IqX8SVY6LH4KZtwJlWrC6GU+U41XGP87c1o8Fo5sKt591mkDvV8rsMtNN93ESy+9RPPmzenZsycDBgzg2muvLfIhAgICaNeuHdu3b6ddu3aXGrFSytPFb7WKHhK2QJcHoeeLUC7I6ahKjZ45lZLKlSuzZs0aJkyYQGhoKAMGDGDKlCkXtI/CpppSSvkAYyBqAkzoAScT4L4voPfrfpWYwB/PnAo5wylJAQEB9OjRgx49etCmTRumTp3K8OHDi7RtTk4OmzZt0vnxlPJlaQl20cP3dtHDh1DZs1daKCn+l5wcsmPHDsqUKUNERAQA69evp1GjRkXaNisri3HjxtGgQQPatm1bkmEqpZyy83uY/3+QkQp93oROo/xibOl8NDmVkrS0NP7yl79w4sQJypYtS7NmzZgwYUKB29x3330EBgaSkZFBz549mT/f61cMUUrllZUOS5+DVR9DrdYw7BuoVXpXSHJzDf9ZuqPUjldUmpxKSceOHVm+fHm+r7ku9blbtmxZyQellHJW/Ba76GErdHkIer5QqmNL2Tm5jJ23iS/WeN46cFoQoZRSpc0YWPk/mHCdtYz6fV9a4+GlmJgysnP4y6x1fLEmlsd7RpTacYtKz5yUUqo0pSXA1/8HMUuheS+47b+lXvRwOjOHMdPX8OvOo/yzbytGXt2YJ0o1gsL5TXIyxiB+OrioJehKeYidS6zElJnmWNFDSnoWI6esZs3+47xxZ1vu6dSgVI9fVH6RnIKCgkhMTKRGjRp+l6CMMSQmJhIU5F/3SCjlUbJO20UPE6D25XDnxFItenBJTMtg2Ker2HEklfcHXcEtbeuWegxF5RfJKSwsjNjYWI4ePep0KI4ICgoiLCzM6TCU8k/xW+CLkXB0G3R9GG54zpEbao8kp3PfxJXEHj/NhKGRXHeZZ08a6xfJqVy5cmdm/FZKqVJhDET9D5Y+D0FVYPCX0KynI6HsTzzJfROjOHEqi6kjOtO1SQ1H4rgQfpGclFKqVKXGWzfUxvxgFT30+8CauNUBO46kMmRSFJk5ucx8oAttw7xj2R1NTkopVZzcix5u+Q9EjnRspocNB08w7NNVlA8ow9wx3WheO9iROC6GJiellCoOWafh+3/C6k+gdhu76KGFY+Gs3JPIqKnRVKtUjhkju9KwRkXHYrkYhd6EKyINRORnEdkmIltE5DG7/QUROSQi6+2fPm7bPC0iMSKyQ0RudmvvZbfFiMhYt/bGIhIlIrtEZI6IlLfbA+3nMfbr4cX55pVSqlgc2WzNIr76E6vo4YEfHU1MP29PYNjkVdSpEsTnY670usQERZshIht40hjTEugKPCwirezX3jbGtLd/vgWwXxsItAZ6AR+KSICIBAAfAL2BVsAgt/28bu8rAjgOjLTbRwLHjTHNgLftfkop5Rlyc2HFh/DJdXD6OAyeB73+BWUDHQvpmw2HeWBaNM1rBzN3TDfqVPHO20gKTU7GmDhjzFr7cSqwDahfwCb9gNnGmAxjzF4gBuhs/8QYY/YYYzKB2UA/sW48uh74wt5+KnC7276m2o+/AG4Qf7tRSSnlmVLjYcZdsORpqwrvoeXQ7AZHQ5q96gCPzl5Hh4ZVmfFAF6pXKu9oPJfigubWsy+rdQCi7KZHRGSjiEwWkWp2W33goNtmsXbb+dprACeMMdl52s/al/16st1fKaWcs+M7+Kgb7F8Ot7wFA2c6Vo3nMvG3PYydt4lrIkKZNqILIUHlHI3nUhU5OYlIZeBL4HFjTArwEdAUaA/EAf9xdc1nc3MR7QXtK29so0UkWkSi/fVGW6VUKcg6DYuehFkDILgejPkFOjlXjQfWLDBvLd3JK4u20adNHT4ZGkmF8gGOxVNcilStJyLlsBLTDGPMPABjTLzb658AC+2nsYD7ZE1hwGH7cX7tx4CqIlLWPjty7+/aV6yIlAWqAEl54zPGTAAmAERGRupEckqp4ndkk7W8xdHt0O0Ra6YHB8eWwEpMLy/cxuQ/9nJ3xzBevaMNZQN8Y7GJolTrCTAJ2GaMecut3X1Spv7AZvvxAmCgXWnXGIgAVgGrgQi7Mq88VtHEAmPNSvozcJe9/TBgvtu+htmP7wJ+MjqLqVKqNOXmwooP4JPr4fQJGPIV3Dze8cSUk2t46suNTP5jL/dfFc7rd7b1mcQERTtzugoYAmwSkfV22zNY1XbtsS6z7QPGABhjtojIXGArVqXfw8aYHAAReQRYAgQAk40xW+z9PQXMFpFXgHVYyRD7z89EJAbrjGngJbxXpZS6MKlH4OuHYPdPcFkfa3mLSs4Pe2dm5/LEnPUs2hTHozdE8ETPCJ+b1Fp87UQkMjLSREdHOx2GUsrb7VgM8x+GzFNWeXjH+x0dW3I5nZnDQzPWsGzHUZ69pSWjujcplv2KyBpjTGSx7KwY6AwRSinlLvMUfP8sRE+COm3gzkkQepnTUQGQmp7FyCnRrN6fxGt3tGFg54ZOh1RiNDkppZRL3Ear6OHYDo8penBJOpnJsMmr2BaXwnsDO3Bru3pOh1SiNDkppVRuLqz8EH58ESpUhyFfQ9PrnI7qjCPJ6QyZFMWBpFNMGNqR61vUdjqkEqfJSSnl384qergFbnvfI4oeXA4knuK+SStJSstkyv2d6dbUc2IrSZqclFL+a/u3VtFD1mno+w50HO4RRQ8uu+JTGTwpiozsXGY80JX2DbxjLabioMlJKeV/Mk/B9+MgejLUaWsXPTR3OqqzbIpNZujkKMoGlGHO6G5cVsd71mIqDpqclFL+xb3o4cpH4fpnPabowSVqTyIjp0ZTtWI5ZozqQqMalZwOqdRpclJK+YfcXFj5AfzwojVJ69D50KSH01Gd4+cdCTz42RrCqlVg+qgu1K1SwemQHKHJSSnl+1LirKKHPT9Di75W0UPF6k5HdY5FG+N4fM46mtcOZtqIztSo7FlndKVJk5NSyrdtXwTzH4HsdLj1XbhimEcVPbjMXX2QsfM2ckXDakwa3okqFbx7yYtLpclJKeWbMk/BkmdgzadQt51V9FAzwumo8jXp9728vHAr3SNq8vGQjlQsr7+a9W9AKeV74jbAFyMhMQauegyuexbKet6qsMYY3vsxhrd/2Emv1nV4d1B7Ast6/1pMxUGTk1LKd+Tmwor/wo8vuRU9XOt0VPkyxjB+0TYm/r6XO68I4/U7fWctpuKgyUkp5RtS4uDrB2HPMo8uegBrLaZxX21i9uqDDL8ynOf6tqJMGc8bB3OSJiellPfbthAWPALZGXDre3DFUI8segBrLaa/zl3Pwo1x/OX6Zvz1xuY+txZTcdDkpJTyXpkn7aKHKVC3Pdw50WOLHgDSs3J4aPoaft5xlGf6tGD0NU2dDsljaXJSSnmnw+utmR4SY+Cqx+G6cR5Z9OCSmp7FqKnRrNqXxL/6t+HeLr67FlNx0OSklPIuubmw4n348WWoFArDFkDja5yOqkDHT2Yy7NNVbDmcwjsD2tOvfX2nQ/J4mpyUUt4j5TB89SDs/QVa3mqNL3lo0YNLQko6gydFsS/xFB8P7kjPVr6/FlNx0OSklPIO276BBX+xih5uex86DPHYogeXg0mnuG9iFIlpGUy5vxNXNq3pdEheQ5OTUsqzZZ6E756GtVPtoodJULOZ01EVKiYhlcETV3E6K4fpo7rQoWE1p0PyKpqclFKe6/A6u+hhN1z9BPR4xqOLHlw2H0pm6ORVlBFhzpiutKgT4nRIXkeTk1LK8+TmwvL34KdXvKbowWX1viRGfLqakArWWkzhNf1vLabioMlJKeVZkg9ZMz3s/RVa3mbNJO7hRQ8uv+w8ypjPoqlXxVqLqV5V/1yLqTgUOpGTiDQQkZ9FZJuIbBGRx+z26iKyVER22X9Ws9tFRN4TkRgR2SgiV7jta5jdf5eIDHNr7ygim+xt3hP7dunzHUMp5aO2LoCProTYNXDbf+GeaV6TmBZvimPU1NU0qVmZuQ9208R0iYoyy2A28KQxpiXQFXhYRFoBY4EfjTERwI/2c4DeQIT9Mxr4CKxEAzwPdAE6A8+7JZuP7L6u7XrZ7ec7hlLKl2SetCrx5g6B6o3hwd/gCs+vxnP5PPogD89cS9uwqswa3ZWafrxIYHEpNDkZY+KMMWvtx6nANqA+0A+YanebCtxuP+4HTDOWlUBVEakL3AwsNcYkGWOOA0uBXvZrIcaYFcYYA0zLs6/8jqGU8hWH1sLH18Daz+Dqv8LIpVDDe6b1mfLHXv7+xUaualaTz0Z29vtFAovLBY05iUg40AGIAmobY+LASmAiUsvuVh846LZZrN1WUHtsPu0UcIy8cY3GOvOiYUOdEkQpr5Cb82fRQ+XaMOwbaNzd6aiKzBjDf3+K4T9Ld3Jz69q8N6iDrsVUjIqcnESkMvAl8LgxJqWAWXTze8FcRHuRGWMmABMAIiMjL2hbpZQDkg/BV2Ng32/Qqh/0fcdrxpbASkyvLt7OhF/3cEeH+rxxV1tdi6mYFSk5iUg5rMQ0wxgzz26OF5G69hlNXSDBbo8FGrhtHgYcttt75GlfZreH5dO/oGMopbzV1vmw4FHIyYJ+H0D7+7xmbAmstZie/Xozs1YdYGi3Rrxwa2tdi6kEFKVaT4BJwDZjzFtuLy0AXBV3w4D5bu1D7aq9rkCyfWluCXCTiFSzCyFuApbYr6WKSFf7WEPz7Cu/YyilvE1GGsx/BOYOhepNrKKHDoO9KjFl5eTy+Jz1zFp1gIeva8qLt2liKilFOXO6ChgCbBKR9XbbM8BrwFwRGQkcAO62X/sW6APEAKeA+wGMMUki8jKw2u73kjEmyX78EDAFqAAstn8o4BhKKW9yaK0100PSHuj+JPR4GgK8q3AgPSuH/5uxlp+2J/BUrxY81MN7ija8kVgFcr4jMjLSREdHOx2GUgqsooc/3oWfx0PlOnDHxxB+tdNRXbC0jGxGTV1N1N4kXup3OUO6NnI6pGInImuMMZFOx+GiM0QopUpGcqy1vMW+36B1f+j7NlTwvvvoT5zKZNinq9l8KJm372nP7R10LabSoMlJKVX8tnwN3zxmFz18CO3v9aqxJZeElHSGTFrF3sST/G9wR27UtZhKjSYnpVTxyUiDxU/B+ulQvyPc8YlX3VDr7mDSKQZPiuJoagafDu/EVc10LabSpMlJKVU8Dq2xix72Qve/QY+xXlf04BKTkMaQSVGczMhm+qguXKFrMZU6TU5KqUuTmwN/vAM//8sqehi+CMKvcjqqi7b5UDLDJq9CRJgzphst6+paTE7Q5KSUunjJsTBvDOz/3auLHlyi9yVx/5TVBAeWZfqoLjQJrex0SH5Lk5NS6uJs+coqesjNgds/gnaDvLLoweXXnUcZ89ka6lQJYvqoLtTXJS8cpclJKXVhMlJh8Vi76CES7vzEmvHBi323+QiPzlpHk9BKfDayC6HBuuSF0zQ5KaWKLnYNfDkSTuyHa/4O1z7ltUUPLl+uieUfX26kbVgVpgzvTJWK3v1+fIUmJ6VU4XJz4Pe3YdmrEFzXKnpodKXTUV2yaSv28dz8LVzVrAYThkRSKVB/JXoK/ZdQShXsxEFreYv9f8Dld8Itb0GFqk5HdUmMMXy4bDf/XrKDG1vV5v1BHQgqp2sxeRJNTkqp89s8DxY+bhc9/A/aDfTqogewEtNr323n41/20N9ei6mcrsXkcTQ5KaXOlZFqz/QwA8I6wR0TvL7oAay1mP45fzMzow4wuGtDXrrtcl3ywkNpclJKnS022prp4cR+uOYfcO0/vL7oAay1mP72+Qbmrz/Mg9c25alel1HAit7KYZqclFKW3Bz4/S34+VUIqQfDv4VG3ZyOqlikZ+XwyMy1/LAtgX/0uoz/69HM6ZBUITQ5KaWsood5o+HAcrj8LrjlP15f9OByMiObB6ZFs3x3Ii/3a82QbuFOh6SKQJOTUv5u85fwzRNgcqH/BGh7j9cXPbicOJXJ8E9Xs+lQMm8PaEf/DmFOh6SKSJOTUv4qIxW+/TtsmGUXPXwC1Rs7HVWxSUhNZ+ikVew5epIP77uCm1vXcTokdQE0OSnlj2Kj7ZkeDlizPFzzDwjwnV8HscdPMXhiFPEpGUwe3omrI3QtJm/jO59GpVThcnPgt7esmR5C6sP9i6FhV6ejKla7j6YxZGIUafZaTB0bee8s6f5Mk5NS/uLEAbvoYQW0udsqegiq4nRUxWrL4WSGTloFwKzRXWldz7fenz/R5KSUP9j0BSz8659FD+0GOB1RsVuzP4nhn66msr0WU1Ndi8mraXJSypelp8Dif9hFD52t5S2qhTsdVbH7fdcxHpgWTe2QQKaP6kJYtYpOh6QuUaETSonIZBFJEJHNbm0viMghEVlv//Rxe+1pEYkRkR0icrNbey+7LUZExrq1NxaRKBHZJSJzRKS83R5oP4+xXw8vrjetlF84uBo+7g4b50CPp63xJR9MTN9vOcKIKatpVKMicx/sponJRxRltsMpQK982t82xrS3f74FEJFWwECgtb3NhyISICIBwAdAb6AVMMjuC/C6va8I4Dgw0m4fCRw3xjQD3rb7KaUKk5sDv7wBk2+2LuPd/x30GOtT1XguX62L5aEZa2lVL4TZo7tSKzjI6ZBUMSk0ORljfgWSiri/fsBsY0yGMWYvEAN0tn9ijDF7jDGZwGygn1gTW10PfGFvPxW43W1fU+3HXwA3iE6EpVTBju+HKbfAz+Ot5S0e/B0adnE6qhLx2Yp9PDFnA10aV2f6qC5UrVje6ZBUMbqUeeIfEZGN9mU/V61mfeCgW59Yu+187TWAE8aY7DztZ+3Lfj3Z7q+Uys+mL+B/V0P8FuuG2js/8blqPJcPl8Xwz/lb6NmyFpOHd6KyLhLocy42OX0ENAXaA3HAf+z2/M5szEW0F7Svc4jIaBGJFpHoo0ePFhS3Ur4nPcUqEf9yJNRqCQ/+Zk1B5IOMMby2eDtvfLeDfu3r8dHgjrpIoI+6qK8bxph412MR+QRYaD+NBRq4dQ0DDtuP82s/BlQVkbL22ZF7f9e+YkWkLFCF81xeNMZMACYAREZG5pvAlPJJB1dZy1skH7SKHrr/zSfHlgBycw3PLdjM9JUHuLdLQ17udzkBuhaTz7qoMycRqev2tD/gquRbAAy0K+0aAxHAKmA1EGFX5pXHKppYYIwxwM/AXfb2w4D5bvsaZj++C/jJ7q+UysmGZa/DZLtWyYeLHgCyc3J58vMNTF95gDHXNmH87ZqYfF2hn2QRmQX0AGqKSCzwPNBDRNpjXWbbB4wBMMZsEZG5wFYgG3jYGJNj7+cRYAkQAEw2xmyxD/EUMFtEXgHWAZPs9knAZyISg3XGNPCS361SvuD4fusy3sGV0HYA9HkTgkKcjqrEpGfl8JdZ61i6NZ6/33wZ/9ejqS4S6AfE105GIiMjTXR0tNNhKFUyNn4Oi/5qPb7lLWh7t7PxlLCTGdmM/iyaP2ISefG21gy7MtzpkHyWiKwxxkQ6HYeLb14DUMrXpCdby1tsnAMNusIdE6BaI6ejKlHJp7K4f8oq1h88wX/ubsedHXUtJn+iyUkpT3cgCuaNguRD0OMZ6P6kz44tuRxNzWDo5FXsTkjjw/s60utyXYvJ3/j2J1wpb5aTDb/+G359A6o0gBHfQYPOTkdV4g6dOM2QiVHEJaczaXgk3SNCnQ5JOUCTk1Ke6Pg+u+ghCtoOhD7/9umiB5c9R9MYPDGK1PRsPhvZmcjw6k6HpByiyUkpT7NxLix6EhC4cxK0uavQTXzBtrgUhkyKItdYazFdXt83Z7dQRaPJSSlPkZ5sJaVNn0PDblbRQ9WGTkdVKtYeOM7wyauoWN5ai6lZLV2Lyd9pclLKExxYCfMesIoerhsHV//V54seXJbHHGPUtGhCgwOZPrILDarrkhdKk5NSznIveqjaEEYsgQadnI6q1CzdGs/DM9fSuEYlPhvZmVohuuSFsmhyUsopSXutoofYVdBuEPR+wy+KHlzmrz/EX+du4PL6VZh6fydd8kKdRZOTUk7YMMcaX5IyflX04DJ95X7+OX8zXRpXZ+IwXfJCnUs/EUqVprOKHq6EOz72m6IHl4+W7eb177ZzfYtafHjfFbrkhcqXJielSsv+FdZlvJRDcN2z0P2vUMZ/fjEbY/j3kh18uGw3t7arx1v3tKNcwKWsd6p8mSYnpUpSThbsWgobZsL2RdZZ0sjvIcxj5tcsFbm5hhe+2cK0FfsZ1LkBr9zeRpe8UAXS5KRUcTMGjmyE9bOsy3enjkGlUOj2CFz7DwgMdjrCUpWdk8s/vtzIvLWHGH1NE57u3UKXvFCF0uSkVHFJPWLN7rBhFiRshYDQm5DFAAAaz0lEQVTycFlvaHcvNLsBAso5HWGpy8jO4dFZ61iyJZ4nb2zOI9c308SkikSTk1KXIisddiyyzpJ2/wgmF8I6WWstte4PFf13brhTmdmM+WwNv+06xvO3tuL+qxo7HZLyIpqclLpQxlgTsq6fCVu+hoxkCAmDq5+w7leqGeF0hI5LPp3FiCmrWXfgOP++qy13RzZwOiTlZTQ5KVVUJw7AhtnWZbukPVCuIrS8DdoPgvBroIxWngEcS8tg6KRV7EpI5YN7r6B3m7pOh6S8kCYnpQqSkQpbF1gJad9vVlt4d+j+N2h1m98VNxTm8InTDJ4UxeETp5k4rBPXNte1mNTF0eSkVF65ObD3V+ssadsCyDoF1ZtY9ya1G+B3N80W1b5jJ7lvYhQpp7OYNqILnRv773ibunSanJRyObbLGkfaOMe6UTawCrS9x6q2a9AZtMrsvLYfSWHIpFXk5Bpdi0kVC01Oyr+dSoIt86xqu0PRIAFW2fdNr8BlfaCczpJdmPUHTzBs8iqCypVh1piuNKullzrVpdPkpPxPThbE/GCdJe38DnIyoVZrKyG1uQeCazsdoddYvvsYD0yNpkblQGaM0rWYVPEpNDmJyGSgL5BgjLncbqsOzAHCgX3APcaY42LdXfcu0Ac4BQw3xqy1txkGPGvv9hVjzFS7vSMwBagAfAs8Zowx5zvGJb9j5b/iNlqFDZs+h5NHoWJNiBxpVdvVaauX7S7QD1vj+b+ZawmvUZHPRnahtq7FpIpRUWpfpwC98rSNBX40xkQAP9rPAXoDEfbPaOAjOJPMnge6AJ2B50Wkmr3NR3Zf13a9CjmGUkWXlgDL/wsfXQUfd4fVE60l0AfNhie3Q+/XoG47TUwXaP76Qzw4fQ0t6gQzZ3Q3TUyq2BV65mSM+VVEwvM09wN62I+nAsuAp+z2acYYA6wUkaoiUtfuu9QYkwQgIkuBXiKyDAgxxqyw26cBtwOLCziGUgXLSocd31pnSTE/gsmB+h2hz5tw+Z1+PWtDcZgZdYBxX2+iU3h1Jg2LJDjI/6ZlUiXvYsecahtj4gCMMXEiUsturw8cdOsXa7cV1B6bT3tBx1DqXMZA7Gp71oZ51rpJIfXhqsesWRtCmzsdoU+Y8Otu/vXtdq67LJSPBnfUtZhUiSnugoj8ro2Yi2i/sIOKjMa6NEjDhnoPil85cRA2zraq7ZJ2Q9kK1s2x7QZB42v8ar2kkmSM4a2lO3n/pxhuaVuXt+9pT/myOiOGKjkXm5ziRaSufUZTF0iw22MB90m0woDDdnuPPO3L7PawfPoXdIxzGGMmABMAIiMjLzi5KS+TkWbdHLthFuz9DTDQ6Gpr8b5W/XTWhmKWm2t4aeFWpizfx8BODRjfX9diUiXvYpPTAmAY8Jr953y39kdEZDZW8UOynVyWAP9yK4K4CXjaGJMkIqki0hWIAoYC7xdyDOWPcnOt6YM2zLKmE8o6CdUaQ4+nrVkbqoU7HaFPys7JZey8TXyxJpZRVzdm3C0tdckLVSqKUko+C+usp6aIxGJV3b0GzBWRkcAB4G67+7dYZeQxWKXk9wPYSehlYLXd7yVXcQTwEH+Wki+2fyjgGMqfHIuxEtLGOZB8EAJDoM1d0P5eaNBFq+xKUEZ2Do/PXs/izUd4omdzHr1B12JSpUeswjrfERkZaaKjo50OQ12K08dh8zwrKcWuBikDTa+3xpFa3ALlKjgdoc9zX4vpub6tGHG1rsXk60RkjTEm0uk4XHSGCOUZcrKtxfrWz4QdiyEnA2q1ghtfhjZ3Q4guu1BaUtKzGPHpatYeOM4bd7blnk66FpMqfZqclLOObLYv282FkwlQsQZE3m+dJenNsaUuMS2DoZNXsTM+lfcHXcEtbfVLgXKGJidV+tISrCmE1s+C+E1Qphw0v9kaR2p2I5Qt73SEfiku+TSDJ0YRe/w0E4ZGct1lemuhco4mJ1U6sjOsy3UbZsGupdasDfWu0FkbPMT+RGstphOnspg2ojNdmtRwOiTl5zQ5qZJjDMRGw4aZVoFD+gkIrgtX/sW6bFerhdMRKmDHkVSGTIoiKyeXmQ90oW1YVadDUkqTkyoBybHWKrIbZkPiLmvWhpZ9rYTUpIfO2uBBNhw8wbBPV1E+oAxzx3QjorbewKw8gyYnVTwyT8K2b6xqu72/Ys3acJU1t12rfhAU4nSEKo8VuxMZNXU11SuXZ8bIrjSsoWsxKc+hyUldvNxc2P+7dYa0dT5kplkzNfQYC20HQHW9N8ZT/bQ9noemr6VhdWstpjpVdMkL5Vk0OakLl7jbKmzYMAeSD0D5YGjd36q2a9hNy7893DcbDvPEnPW0rBvC1BGdqV5JqyOV59HkpIrm9AnY8pWVlA5GWbM2NLkOej4Pl/WB8npJyBvMXnWAp7/aRKdG1Zk4PJIQXYtJeShNTur8crJh909WQtq+yJq1IbQF9HzRumynszZ4lYm/7eGVRdu4tnko/xvckQrltTBFeS5NTupc8VuswoZNn0NaPFSoDh2HWdV29TroZTsvY4zh7R928d6Pu7ilTV3eHqBrMSnPp8lJWU4es2dtmAlHNkKZstC8l5WQIm7SWRu8VG6u4eVFW/n0j33cExnGq3e01bWYlFfQ5OTPsjNg53fWNEIxSyE3G+q2h95vwOV3QSWdJcCb5eQaxn65kc/XxDLiqsY8e0tLymhiUl5Ck5O/MQYOrbVnbfjSWp4iuC50e9ietaGl0xGqYpCZncvjc9bx7aYjPHZDBI/3jNC1mJRX0eTkL5IPWQv2bZgFx3ZC2SBo0RfaD7Kq7nTWBp9xOjOHB6ev4ZedR3n2lpaM6t7E6ZCUumCanHxZ5knYttA6S9rzC2Cs+5BufQ9a3w5BVZyOUBWzlPQsRk2JZvX+JF67ow0DOzd0OiSlLoomJ1+TmwsHllvjSFu/tmZtqNoIrn0K2g2A6vot2lclncxk2ORVbItL4b2BHbi1XT2nQ1Lqomly8hWJu/+8bHfCNWvD7dDOnrWhjJYO+7IjyekMmRTFgaRTTBjaketb1HY6JKUuiSYnb5aebM3asH4WHFwJiDXr9/X/tMaTdNYGv3Ag8RT3TVpJUlomU0d0pquuxaR8gCYnb5ObA7t/tsaRti+C7HSoeRn0fMGetUEv5fiTnfGpDJ4YRWZOLjMf6Eq7BroWk/INmpy8RfxW65LdxrmQdgQqVIMOQ6xqu3pX6KwNfmhj7AmGTV5FuYAyzBndjcvq6FpMyndocvJkJ4/Bpi+ss6S4DdasDRE3WfcjNb8ZygY6HaFySNSeREZOjaZqxXLMGNWFRjUqOR2SUsVKk5Onyc6EXUuscaRdS+xZG9pBr9ehzV1QqabTESqH/bw9gQenryGsWgVmjOqqazEpn3RJyUlE9gGpQA6QbYyJFJHqwBwgHNgH3GOMOS7W7envAn2AU8BwY8xaez/DgGft3b5ijJlqt3cEpgAVgG+Bx4wx5lJi9kjGwOG1VkLa/IU1a0Pl2tD1IavarnYrpyNUHmLRxjgem72Oy+oEM21EZ2pU1rNn5ZuK48zpOmPMMbfnY4EfjTGvichY+/lTQG8gwv7pAnwEdLGT2fNAJGCANSKywBhz3O4zGliJlZx6AYuLIWbPkHLYLv+eDUe3Q0AgtLjFWrSvyXUQoCe26k9zVx9k7LyNdGxUjUnDO+laTMqnlcRvv35AD/vxVGAZVnLqB0yzz3xWikhVEalr911qjEkCEJGlQC8RWQaEGGNW2O3TgNvx9uSUecqqstswE/YsA5MLDbrCre9Cq9uhglZbqXNN+n0vLy/cyjXNQ/lY12JSfuBSk5MBvhcRA3xsjJkA1DbGxAEYY+JEpJbdtz5w0G3bWLutoPbYfNq9jzGwf7mVkLbMh8xUqNIQuv8N2g2EGk2djlB5KGMM7/64i3d+2EXvy+vwzsD2BJbVxKR836Ump6uMMYftBLRURLYX0De/WmdzEe3n7lhkNNblPxo29KC5xJL2WpfsNsyCE/uhfGXr7KjdQGh0lc7aoApkjOGVRduY9Pte7uoYxmt3tKFsgH5mlH+4pORkjDls/5kgIl8BnYF4EalrnzXVBRLs7rFAA7fNw4DDdnuPPO3L7PawfPrnF8cEYAJAZGSkswUT6SnWnHbrZ1lz3CHQ5Fq4bhy07AvlteRXFS4n1/DMvE3MiT7I8CvDea5vK12LSfmVi05OIlIJKGOMSbUf3wS8BCwAhgGv2X/OtzdZADwiIrOxCiKS7QS2BPiXiFSz+90EPG2MSRKRVBHpCkQBQ4H3LzbeEpWbA3t+ts6Sti2E7NNQIwJueM6ataFKWOH7UMqWmZ3LE3PXs2hjHI9e34wnbmyuazEpv3MpZ061ga/s/zRlgZnGmO9EZDUwV0RGAgeAu+3+32KVkcdglZLfD2AnoZeB1Xa/l1zFEcBD/FlKvhhPK4ZI2G6NI22cC6lxEFTVqrRrfy/U76izNqgLlp5lrcW0bMdRnunTgtHX6Hik8k/ia7cNRUZGmujo6JI7wMlEawXZDTPh8DqQAGvWhvaDoHkvnbVBXbTU9CxGTY1m1b4kxt/ehnu7eND4qfJ5IrLGGBPpdBwueiNNUWRnwq7vrcKGnUsgNwvqtIGbX4U2d0PlUKcjVF7u+MlMhn26iq2HU3hnQHv6tffOwlSliosmp/MxBuLW/zlrw6lEqFQLuoyx5rarc7nTESofEZ+SzuCJUexPOsXHQzpyQ0tdi0kpTU55pcTBprlWUjq6zZ61oY81jVDT63XWBlWsDiad4r6JUSSmZTDl/k5c2VTnTlQKNDlZsk5bszasn2lV3ZlcCOsMfd+G1v2t5SmUKma74lMZPCmK9KxcZjzQlfa6FpNSZ/hvcjIGDqy0Z234GjJSoEoD6P4ktB0INZs5HaHyYZtikxk6OYqyAWWYO0bXYlIqL/9LTsf3/Tlrw/F9UK4StOpnVds1ulpnbVAlbtXeJEZOWU1IBWstpvCaemO2Unn5R3LKSLXOjjbMgv1/AAKNr4Frx0LLWyGwstMRKj/xy86jjPksmnpVKzB9ZBfqVa3gdEhKeSTfTU65ObD3F6uwYds39qwNzeD6f1qzNlRtUPg+lCpGizfF8ejsdUTUCmbayM7U1LWYlDov30tO2enwwwuwYQ6kHoagKtYlu3b3QlikztqgSkRuriHxZCbxKekkpKYTn5JBfEq6/WM93haXQoeG1Zg8vBNVKuhaTEoVxPeSU8I2+OM9aNYTev0LmveGcrqMtbo4xhhS0rPPSTQJKekcsZ8npKSTkJpBdu7Zs62IQI1KgdQOCaR2SBBXN6vJYz0jqFje9/7bKVXcfO9/SUh9eDIaKtcqvK/ya6czc4g/k2TSSXCd7aRmEJ+cTnyq1Z6elXvOtiFBZalTJYjaIUE0Da1J7ZBA6lQJolZw0JlkFBocSDld4kKpi+J7yalyLU1Mfi4zO5ejaXaiSU7/M+HYCciVjFLTs8/ZtkK5ADvJBNIurOqZRPPnTyC1goN0JVqlSpjvJSfls3JyDYknM86c4bhfVrOeW48TT2aes225ADlzVhNRqzJXN6tJrZBAagcH2WdAgdQKCSI4sKwuT6GUB9DkpBxnjCH5dFaeIgK3ogL7MtvRtAxy8hnXqVnZGtepXzWIDg2rUtvt0prrbKdaxfK6WJ9SXkSTkypRJzOyzySaBHsM50hyBvGp6fYZj5WAMrLPHdepWrEctYODqBUSSPNaNf+8rGYnnTohQdSsXF6XLlfKB2lyUhclIzuHhDMJJ+PsMx23AoPUjHPHdSqWD6BOiJV0OjSsaj92O9uxE1JQOR3XUcpfaXJSZ8nJNSSmZZwZz8lbNh1vl00n5TOuUz6gjDWOExLEZXWC6R4RemY8x0o41uPgIL3HRylVME1OfsIYw4lTWcSnpnMk2b1s2rrM5rrkdjQ1gzzDOpQRCA22kk5YtYp0bFTtzGW1Wm5jO9UqltNiAqVUsdDk5APSXOM6Z+7NObdsOiElg8ycc8d1qlUsdya5tKgTTG37Elsdt8tsNSrpuI5SqnRpcvJg6Vk5HE3Nv2zafXznZGbOOdtWDixLrZBA6oQE0Sm8+pmy6dohQdSpYt2rExqs4zpKKc+kyckB2Tm5HEvLLLBsOj41nROnss7ZtnzZMmfGcFrWC6HHZbXOnOG4klGtkCAqB+o/rVLKe+lvsGKUm2s4firTSjR2qXR+ZdPH0s4d1wkoI4Ta9+s0rFGRzo2rn1U2XdtOPFUq6LiOUsr3aXIqAmMMqRnZZyWY/G4YTUhNJyvHnLN9jUrlz1SqtaobQm23CjZX4qlROZAAvUlUKaUAL0hOItILeBcIACYaY14rzv2nZ+Wck2wSUjM4kvzn4/iUdE7lM64THFT2THLp0ri6XUgQeKaooHZIIKHBgQSW1XEdpZS6EB6dnEQkAPgAuBGIBVaLyAJjzNbCts3KyeVYmivJnD07getxfEoGyafPHdcJLFvGuj8nOIjW9UK4vsWf4zqun1rBgVTScR2llCoRnv7btTMQY4zZAyAis4F+wHmT066ENCJf+YHEkxmYfMZ1atn36zSuWYmuTWqcSTSu5Q9qBwcRUkEn/1RKKSd5enKqDxx0ex4LdClog3JlhBtb1bJnoD57AtAalXTyT6WU8gaenpzyyyTnVByIyGhgNEDDhg159Y62JR2XUkqpEuTpt/3HAg3cnocBh/N2MsZMMMZEGmMiQ0NDSy04pZRSJcPTk9NqIEJEGotIeWAgsMDhmJRSSpUwj76sZ4zJFpFHgCVYpeSTjTFbHA5LKaVUCfPo5ARgjPkW+NbpOJRSSpUeT7+sp5RSyg9pclJKKeVxNDkppZTyOJqclFJKeRwxeef48XIikgrscDqOIqgJHHM6iCLQOIuPN8QIGmdx85Y4LzPGBDsdhIvHV+tdhB3GmEingyiMiERrnMXHG+L0hhhB4yxu3hSn0zG408t6SimlPI4mJ6WUUh7HF5PTBKcDKCKNs3h5Q5zeECNonMVN47wIPlcQoZRSyvv54pmTUkopL+dIchKRMBGZLyK7RGS3iLwrIuVFpIeIJIvIOhHZLiJvum0zXET+6/Z8sIhsFJEtIrJBRCaKSFX7tWUiEmk/3iciX7ptd5eITMkTz3wRWZGn7QUR+VsJ/RUopZQqQKknJ7HWP58HfG2MiQCaA5WB8XaX34wxHYAOQF8RuSqfffQCngB6G2NaA1cAy4Ha5zlspIi0Pk88Ve3tq4pI44t/Z8rTiUh/ETEi0sJ+Hi4ip0Vkvf0FZ7mIXObW/2oRWWV/UdpuL2qJ/SUq75eZsiISLyJ1RWSKiOy197teRJYXENNwO6Yb8onzLvv5MhHZ4ba/L/LsY4OIzMrTNkVEDolIoP28pojsu+i/PFXqPPTzWltEFtrH3yoi34pIG7dtk9z29YNbzOtEZJsd37CivH8n7nO6Hkg3xnwKYIzJEZEngL3Az65OxpjTIrIea6n2vMYBfzPGHHLtA5hcwDHfBJ4B7svntTuBb4B4rPWiXr3gd6S8xSDgd6x/5xfstt3GmPYAIjIG63MyTETqADOB240xa0WkJrBERA4Bi4EwEQk3xuyz99MT2GyMibO+f/F3Y8xZSaQAm+zYfrSfDwQ25OlznzHmnPtQRKQl1pfMa0SkkjHmpNvLOcAI4KMixqE8iyd+Xl8Clhpj3rVjaGuM2QS4YpoCLHTtS0TC7Zg72M+bAPNEpIwrB5yPE5f1WgNr3BuMMSnAAaCZq01EqgERwK/n2cfaCzjmXOAKEWmWz2uDgFn2z6AL2KfyIiJSGbgKGIn1nz0/IcBx+/HDwBRjzFoAY8wx4B/AWGNMLvA5MMBt24FYn6GL8RvQWUTK2XE2A9YXcdt7gc+A74Hb8rz2DvCEiPjizfY+zYM/r3WxVijHPs7GC9nYGLMH+CvwaGF9nUhOAuRXIuhq7y4iG4EjWBn4SIE7+/OUcreIDDhPtxzg38DTebatjfWL4HdjzE4gW0Quv7C3o7zE7cB39r9zkohcYbc3dX1+sP7TvGW3n/MlCoi228H6jz0QwL501gf40q3vv90udcwoJDYD/ADcDPQj/9WeZ7jt799u7QOAOeT/5eoA1jfvIYUcX3keT/28fgBMEpGfRWSciNS7iPe2FmhRWCcnktMW4KypPEQkBGgA7MYac2oLtAEeEpH259nHFQDGmE32ae5ioEIBx/0MuAZo6NY2AKgG7LWvx4dz/m8pyrsNAmbbj2fz5y/y3caY9saYpsDj/Hmvx/m+RBkAY8xqoLJ9zb83sNIYc9yt39/t/bY3xuR3OTmv2VifvfN9o73PbX9/BxCRTsBRY8x+rEuCV9hXHNz9C/g7WpnrbTzy82qMWQI0AT7BSjDrRCT0At+bFKWTEx/YH4GKIjIUQEQCgP8AU4BTrk72N4ZXgafy2cerwJsiEubWVlBiwhiTBbyN9Q/qMgjoZYwJN8aEAx3R5ORzRKQG1ljnRPtLyN+xvpjk/U+yAOsLDOTzJQrr87HV7XlhCaXIjDGrgMuBmvZnvygGAS3s97Qb6zLPnXn2G4N1ifCeS4lPlR5P/7waY5KMMTONMUOA1W4xFFUHYFthnUo9ORnrrt/+wN0isgvYCaRjDezl9T+sgd6zqujspdvfAxbbFSPLsS7dLSnk8JOwi0DsgbqGwEq3/e4FUkSki930rIjEun4u6I0qT3IXMM0Y08j+ItIAqwAnLE+/q7F+yYN1+WK468zd/oXxOvCGW/9ZwGCsXyT5XYq7UE+T//+Dc4hIGeBuoK3bl6t+5D9uOh7Q2yK8h8d+XkXkehGpaD8OBppiXT4u6vbhWAVq7xfW15GBUmPMQeDWfF5aZv+4+p3mz2q9vVhnV67XpgJTz7P/Hm6Pw90eZwDu10jPqQQ0xriu7UbxZ4WM8m6DgNfytH2JlQia2lWhAmQCowDsKqbBwCf2f0IB3jHGfOPagTFmq4icAtbkqZID6xr+s27POxtjMgsK0hizuICXZ4jIafvxMeAV4JCrYtX2K9BKROrm2e8WEVmLfSlceTxP/rx2BP4rItlYJzcT7UuGBWkqIuuAICAVeL+wSj3Q6YuUUkp5IB0kVUop5XH0/gelSoGI3A88lqf5D2PMw07Eo1RBPOHzqpf1lFJKeRy9rKeUUsrjaHJSSinlcTQ5KaWU8jianJRSSnkcTU5KKaU8zv8DOlODoGpQIvEAAAAASUVORK5CYII=\n", - "text/plain": [ - "
" - ] - }, - "metadata": { - "needs_background": "light" - }, - "output_type": "display_data" - } - ], - "source": [ - "compare[['MEAN','STD']].plot()" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# IS JSON\n", - "\n", - "This whole sample has: \n", - "- False: 307577 rows\n", - " - 61,54% are not valid JSON\n", - " \n", - "- True: 192228 rows\n", - " - 38,46% are valid JSON" - ] - }, - { - "cell_type": "code", - "execution_count": 18, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "" - ] - }, - "execution_count": 18, - "metadata": {}, - "output_type": "execute_result" - }, - { - "data": { - "image/png": "iVBORw0KGgoAAAANSUhEUgAAAPoAAADuCAYAAAAQqxqwAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDMuMC4zLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvnQurowAAFdpJREFUeJzt3XmUXGWZx/HvU72xN5sssuQmkLDLzghhVVTGQoHjAC5wcEfCJg7IZVEvI8MpNhVGDFuGHZkTFZFcCMNiQhIIiWwBRIFgQcIuhCKYhaTzzh+3mmlip7uqu6qee+/7fM6p091Fdd9fh/r13d9XnHMYY/KtoB3AGNN8VnRjPGBFN8YDVnRjPGBFN8YDVnRjPGBFN8YDVnRjPGBFN8YDVnRjPGBFN8YDVnRjPGBFN8YDVnRjPGBFN8YDVnRjPGBFN8YDVnRjPGBFN8YDVnRjPGBFN8YDVnRjPGBFN8YDVnRjPGBFH4SI9IjIE30ewQCvDUTk6dalM6Y27doBMmCxc24X7RDGDIet0YeguuaeJiKPVR/79POaHURkVnUrYI6IjK4+f0yf568SkbbW/wbGN1b0wa3eZ7P99upzbwKfcc7tBhwNXN7P930PuKy6NbAHMF9Etqu+fmz1+R7ga83/FYzvbNN9cP1tuncAvxSR3rKO6ef7HgbOEZHNgd85554XkU8DuwOzRQRgdZI/GsY0lRV9aE4D3gB2JtkqWrLyC5xzt4rII0ARuEdEvg0IcINz7qxWhjXGij403cB859wKETkO+Kf9bBEZBbzonLu8+vkngP8F7hCRnzvn3hSR9YG1nXMvtSJ0EMZrACOAzYHOGr/tfeA14JVyqfiPZmUzzSU2P/rAROR959xaKz03GvgtsAj4I3Cyc26t6qm3Sc65HUXkLOAYYBnwOvBV59w7InI0cBbJlsAy4ETn3MxGZA3CeF0gICnziJU+HwFsOMxFLAReBeYBc4EXgOerH+eWS8V/2rIx6WBFz7AgjAPggD6PUYpxlgOPAlOBKcD0cqm4UDGP6cOKniFBGG/NR4u9pW6iAfUAj5MUfyowrVwqvqsbyV9W9BSr7lMfDhxKUuyP6yYalhXAkyRr+9vKpeIs3Th+saKnTBDG7cBnSM6vHw6sqZuoaZ4BrgduKpeKbyhnyT0rekoEYTwG+C5wLLCRcpxWWg7cDVwHTCqXisuU8+SSFV1REMadwBHA8cBBynHS4C3gFuC6cqk4RztMnljRFVT3vU8GfoBfa+96zAJ+Ui4VJ2sHyQMregsFYdwBfAc4F9hUOU5WPAicXS4VZ2gHyTIregsEYVwguXgmAkbqpsmsu4BzyqXiE9pBssiK3mRBGB8B/BTYQTtLDjhgIvCjcqn4nHaYLLGiN0kQxgcDFwB7amfJoeXADcB55VJxnnaYLLCiN1gQxhsBVwOHaWfxwBLgPODicqnYox0mzazoDRSE8eEkJf+YdhbPzAa+US4Vn9EOklZW9AYIwngd4DLg68pRfPYBybGQUrlUXK4dJm2s6MMUhPEBJPuLI7SzGCBZu3+lXCrO1Q6SJlb0IQrCuIvkYNtpJCPHmPRYCJxULhVv1A6SFlb0IQjCeFfgJuyUWdrdCpxQLhXf0w6izYpepyCMv0pyA0atQzEZXc8B/1ouFV/UDqLJhnuuQxDGZwI3YyXPkjHAQ0EY764dRJOt0WtQvYT1cuBE7SxmyN4HjvT1Jhlbow8iCOPVgd9gJc+6tYA7gzD+pnYQDVb0AQRhvAFwH8k94yb72oEJQRj/WDtIq9mm+yoEYTySZOSTbbSzmKa4huSIvBeXzlrR+xGE8W4kt0VurJ3FNFUMHO3DxBRW9JVUz5FPAdZRjmJa44/AIeVS8QPtIM1k++h9BGE8GpiMldwnBwHXB2Gc66sbrehVQRhvBtyLjeHmo68AF2qHaCbbdAeCMF4fmAZsr53FqDq5XCr+UjtEM3hf9OqQy/cB+2lnMepWkFxU8zvtII1mm+5wLVZykygAtwRhPFY7SKN5XfQgjM8lmRnFmF6rAX8Iwnhb7SCN5O2mexDGRwG3YfeSm/6Vgb3LpeLr2kEawcuiB2G8FcnMnnmdwNA0xn3AZ8ulYuZL4t2mexDGbcCNWMnN4A4GTtIO0QjeFR34IbCPdgiTGRfmYX/dq033IIx3IZm8r0M7i8mUP5Hsr2d2dFlv1ujVwRxvwkpu6rcHycSYmeVN0YHzgR21Q5jMOicI48xOr+XFpnsQxvuT3KXk0x8203h/BXYtl4qLtYPUK/dv/CCM1yaZYCH3v6tpum3I6M0vPrz5LwYC7RAmN04Kwvgg7RD1yvWmexDG2wFPAW3aWUyuPAnsVi4VV2gHqVXe1+jnYyU3jbczGbtHIrdr9CCM9yCZcM+YZpgPjMnKgbk8r9H/UzuAybXNge9rh6hVLtfo1amMp2jnMLn3HhCUS8UF2kEGk9c1+gXaAYwX1iGZNjv1crdGD8L4UOBO7RzGGxWStfq72kEGkqs1enXI3vO1cxivdJOBtXquig4cTXLqw5hWOjUI43W1Qwwkb0U/VTuA8VI3KT+vnpuiVwcH+KR2DuOtb2gHGEhuik7K/6FN7u0ahHFqdxtzUfTqOHCp3nQyXkjtyiYXRQcOATbVDmG897UgjFM5glFeip7av6TGKxsCX9QO0Z/MFz0I4w2BL2jnMKYqlSudzBcd+BrQqR3CmKpDgjBO3W5kHoqeyr+gxlupPDCc6aJXT2ek9pSG8dZx2gFWlumiA4dqBzCmH9sHYbyFdoi+sl70g7UDGLMKB2gH6CuzRQ/CeE1sDjWTXlb0BtkfO9pu0suK3iC22W7SbHSaTrNluej7aQcwZhCpWatnsuhBGK8O7KKdw5hBWNGHaQ9s+mOTflb0YbKj7SYLtgvCeCPtEJDdou+tHcCYGu2vHQCyW/TttQMYU6MdtANABoteHdJ5S+0cxtQo0A4A0F7rC0VkDHAGMKLv9znnPtWEXAPZGOhq8TKNGaoR2gGgjqIDE4ErgWuAnubEqUkq/uGMqVEq3q/1bLovd86Nd87Ncs492vtoWrJVS8U/nDE12iIIY/Vd5HoC3Cki40RkUxFZv/fRtGSrZkU3WdJBCgYurWfTvfdm+jP6POeAUY2LUxMrusmaEcArmgFqLrpzbmQzg9TBim6yJgAe0gxQz1H3DuAE/v8CgCnAVc65ZU3INRAruska9fdsPZvu40n2N35V/frY6nPfbnSoQaj/oxlTJ/X3bD1F39M513cgxgdE5MlGBxpI9a61dVq5TGMaQP1gXD1H3XtEZKveL0RkFK0/n65+msKYIVAfCameNfoZwB9F5EVASDZHbEx1Ywanfkt1PUfd7xeR0cA2JEX/i3NuadOSGZMf6kWveVNYRI4EOp1zc0jmOvu1iOzWtGTG5EemNt1/5JybKCL7Ap8DLiE56v4vTUlmWmonefH5w9pmvKadI4+W0vEyFFUz1FP03gNvRWC8c+4OEYkaH8m02ucLjzx2Rcdlo0UYrZ0lp2ZpB6jnKPYrInIVcBRwl4h01fn9JoXGtd0x44qOy3YSYW3tLDmmebcnUN8a/SjgEOAS59y7IrIpH73u3WTMpR3jp3ypbdqB2jk8kP6ii8g6zrn3gNVILnuletfaUmC2iLQ559R/EVO7Ait6JnaeN2P3wvMHamfxxELtALWs0W8lmbX0UZK71aT6fO/na4nINc65s5sT8SNcC5aRa6uxdPH9Xac/tZm8nYpBCz3xtnaAQfexnXOHVj+OdM6Nqn788HNgE+AIEWn6gI3lUnERsKjZy8mr9am8Patr3NzN5O29tLN45u/aAYZ9MM051+Oc2w64uQF5avFyi5aTKyPl1Zdndp28cB1ZvKN2Fg+lf41eBxn8JQ3xUouWkxt7ybN/vr/zjNU7ZXmgncVT2V+j99Gq/Wcreh2OKEyb/T+dPx1REPcx7SweU1+j13N6LS2s6DU6rX3itFPabt9bJJP/n/PkDe0AjXwDfNDAnzUQK3oNftXxi6mfb5uVmkn+PPe8doB6bmoZKyJrVj8/RkR+JiIfjpzhnPtkMwL2w4o+gDZ6lk/qPHualTw1FhJV1O8hqGcffTywSER2Bn5IUrgbm5JqYFb0VViDJf+Y0XXKEzsWyvtpZzEf+qt2AKh/AgcHHAZc5py7DFSuj34VWK6w3FTbiAVvze4a9/ImsmAP7SzmIzJX9IUichZwDBCLSBsKN9SXS8UeYH6rl5tmY2Te32Z0nbJkTVmynXYW80/+oh0A6iv60STXt3/LOfc6sBlwcVNSDS4VfyXTYGzh6acnd4bdHdKzhXYW069UvFdrLrpz7nXn3M+cc9OqX7/snNPYRweYqbTcVPly2wOP3NxxwVYFcRpTY5naPKYdAGq7e226c25fEVnIRy+KEcA55zSGX35YYZmpclb7LQ9+ty0eK0KbdhazSm8QVeZqh4Aaiu6c27f6MU0DE8wEVuDlwBfOTei4ZOqn2x4/UDuJGdQM7QC9MlmUcqlYAf6snaPV2lm+7J7OMx+ykmeGFb0BpmsHaKW1WPTezK6TntqmMH+sdhZTMyt6A9ynHaBVNuXt12d1jXttQ3nPhtfOjsWk5EAcZLvoD5Dsp+fa9lKeO63r1J415INttLOYujxEVGn1TMOrlNmil0vFBSTDW+XWpwqPPTmp8+wN2mXFZtpZTN3u0A7QV2aLXnWvdoBmOa5t8sMTOi7ZtiCsq53FDIkVvYHu1g7QDP/Rft3UqP3GT4rQpZ3FDMnjRJVUDXmW9QEJZgB/A0ZqB2kM527puODBsW3P2C2m2fZ77QAry/QavVwqOuB67RyN0MmypQ90/vtMK3kuWNGb4AYyPt77OrxfeaTrxGdHFV7fWzuLGba5RJU52iFWlvmil0vFl0hOtWXS5vLWq7O6Tvz7evL+LtpZTEP8t3aA/mS+6FXXaQcYip3lheemdn6/sJos20o7i2mI5aT0vZiXov8OqGiHqMfnCrMe/33njzdpE7eJdhbTMHEaxofrTy6KXi4VFwO3aeeo1XfbJs24suMXO4igcYuvaZ6rtQOsSi6KXpXKTaaVXdR+5ZSz2m/dR4RO7SymoeYBk7VDrEpuil4uFR8BntXOsSrCihUTO6MHj2p/8ECRlk1fZVrnWqJKau+9yE3Rq36lHaA/q7F0ydTO02btWXjOpirOp/eBK7RDDCRvRb+alM22uh7vvfNI14nPb1l4q1UTXJjWG09UUZ9fbSC5Knq5VPwAOE87R69AXps3s+ukSrcs2kk7i2maxcCl2iEGk6uiV91ACsbS3kP++uz9naev1iXLc3IdvlmFa4gq6pMoDiZ3Ra9O8PBjzQyHFWb8aWLneVu02VTFefcBcJF2iFrkruhVv0FpGJ9T2347/RcdV+wiwloayzctNYGo8op2iFrksujVu9rOafVy/6vj8imndfx2X5uP3AvvorzlWI9cFh2gXCpOBh5sxbIKrOi5o/PcaV9om3lgK5ZnUuEnRJW/a4eoVW6LXtX0tXp1quLHdi68aFMV++MZUnrNxqrkuujlUnE68Idm/fwNefetWV3jXtpU3tmzWcswqfR9okqmpu7OddGrTgAWNPqHjpb55Ye7Tl60lizZvtE/26Ta7USVzM0pkPuil0vFV4FxjfyZ+xSefuaezjPX7pCeEY38uSb1FgKnaYcYitwXHaBcKt5Gg25jPbJtyqxbOi4YWRC3QSN+nsmUM4gqL2mHGAovil41Dnh1OD/gzPZfP3hR+9W7i7BGgzKZ7LiXqHKVdoih8qbo1ZldvjnU77+649IpJ7Tfub/NR+6lYb130sCbogOUS8V7gPH1fE87y5fd3RlO/2zbowc2J5XJgOOJKvO1QwyHV0WvOh14vpYXrsnihQ91nTxnu8LL+zY5k0mv64gqE7VDDJd3RS+XiouAY4GegV63Ce+8Mbtr3CsbSWX31iQzKfQoDT5jo8W7osOHw06t8qq5beXlF6d3nbJsDVm6bQtjmXR5EziCqLJEO0gjeFl0gHKpeCH9DCi5f+HJOXd1huu1y4rNFWKZdFgGfImoMk87SKN4W/Sq44EpvV8c03bvzBs6LhxTENbTi2RS4BSiynTtEI0kzmV62rJhC8J4feDhn7Tf8NrX2+7ZT8T7P36+u5qocrx2iEbzvugAE849euS32ifPBDbSzmJU3Q4cSVQZ8EBtFlnRe0Xdu5Fsxq+tnMTouBc4lKjygXaQZrDN1F5R5THgCGCpdhTTcjOAw/NacrCif1RUuR/4AskQvsYPjwNFosoi7SDNZJvu/Ym6DwAmgQ3wmHPPAAcRVd7SDtJstkbvT1SZCnyWjE3FbOryELCfDyUHK/qqRZWHgYNpwug0Rt1dwGeIKt78v7WiDySq/Ak4gJTN52aG5SbgsLzvk6/Mij6YqPIUsBfwsHYUM2w/A47L2sCOjWAH42oVdXcB15Dc+WayZSlwMlHlGu0gWqzo9Yq6Q+ACQLSjmJrMI7lBZbZ2EE1W9KGIur8IXA9280vK3Q98OUszqjSL7aMPRVT5A/AJ+tz5ZlLnQuBzVvKErdGHI+ouAGcAPwU6lNOYxDzgO0SVe7SDpIkVvRGi7t2BW4Ex2lE8NwH4AVHlPe0gaWNFb5Soew2SNfspYNMmt9h8krX4ZO0gaWVFb7SoeyfgCsBmV20+R7IWP52oYpcrD8CK3ixR97HAxcDG2lFyairJZvpj2kGywIreTFF3N8nm/PFAp3KavHgB+CFR5XbtIFliRW+FqHsEcC7wdWz/fagWAOcDv8zzABHNYkVvpah7JPAjkstorfC1eQ34OXAlUWWhdpissqJriLq3IplA4ivAaspp0uoF4CLgRqKKDe81TFZ0TVH3BiSzdH4PGKWcJi1mAZcCvyGqrNAOkxdW9DSIugU4hGSer8/j36XJC4CbgWuJKnO0w+SRFT1tou4tgSOBfwP+hfzeJbcMuBu4EZhkm+fNZUVPs6h7C+BLJKXfh+yX/m1gMhADk30aykmbFT0rou6PkwxY+Sng08DHdQPVxAFPkxR7EjAzj7OgZIEVPaui7q2BfauPvYBt0L8o502Sg2mPVB+ziSrv6kYyYEXPj6i7Hdga2KHPY3tgc2DdBi5pOfASMHelxxNElXIjFiAiG5AMGgGwCdAD9A7LvJdzzi6YqZMV3QfJeHcbkVx33/tYn+Qe+vY+j7bqx8XAu9VHpc/nbwPzWjm4oohEwPvOuUtWel5I3r92Cq4GdnWWD5Ij2vOqj8wSka2B3wPTSc5IHC4iTzrn1q3+9y8DBzvnvi0iGwPjgS2BFcApzrmZStHV+Xa+1mTf9sAE59yuwCsDvO5y4CLn3B7AUcC1rQiXVrZGN1kz1zlXy4iuBwPbJFv4AKwnIqs757ycQNOKbrLmH30+X8FHry3oe9+AYAfuPmSb7iazqgfiFojIaBEpkMxv3+s+4MTeL0Rkl1bnSxMrusm6M0mutrufZOy4XicCY0Vkjoj8GfiORri0sNNrxnjA1ujGeMCKbowHrOjGeMCKbowHrOjGeMCKbowHrOjGeMCKbowHrOjGeMCKbowHrOjGeMCKbowHrOjGeMCKbowHrOjGeMCKbowHrOjGeMCKbowHrOjGeMCKbowHrOjGeMCKbowHrOjGeMCKbowHrOjGeOD/AM0W+8UGUHW2AAAAAElFTkSuQmCC\n", - "text/plain": [ - "
" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "is_json_counts = df['is_json'].value_counts().compute()\n", - "is_json_counts.plot(kind='pie')" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "As the value_len increases the frequence of valid JSON on the columns 'value' also increases,\n", - "for the rows that have the value_len one std above the mean, we have the following:\n", - "- isJson True: 46691 rows\n", - " - 99,88% are valid JSON\n", - " \n", - "- isJson False: 54 rows\n", - " - 0,11% are not valid JSON\n", - "\n", - "\n", - "The valid json also represent 9.35% of the data because the number of non Json are too small to make a percentual difference. " - ] - }, - { - "cell_type": "code", - "execution_count": 19, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "one std above the mean = len: 46745 (9.35%)\n" - ] - } - ], - "source": [ - "print(\"one std above the mean = len: {0} ({1:0.2f}%)\".format(A_COUNT, A_COUNT / COUNT * 100))" - ] - }, - { - "cell_type": "code", - "execution_count": 20, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
value_len
meanstdminmaxcount
is_json
False82460.05555613627.1180632813510465354
True271422.740185412552.29861327669449686146691
\n", - "
" - ], - "text/plain": [ - " value_len \n", - " mean std min max count\n", - "is_json \n", - "False 82460.055556 13627.118063 28135 104653 54\n", - "True 271422.740185 412552.298613 27669 4496861 46691" - ] - }, - "execution_count": 20, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "group = std_above.groupby('is_json')\n", - "group_result = group.agg({'value_len': ['mean', 'std', 'min', 'max', 'count']}).compute()\n", - "group_result" - ] - }, - { - "cell_type": "code", - "execution_count": 21, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "NOT json count: 54 (0.01%)\n", - "IS json count: 46691 (9.34%)\n" - ] - } - ], - "source": [ - "a = group_result['value_len']['count']\n", - "print(\"NOT json count: {0} ({1:0.2f}%)\".format(a[0], a[0] / COUNT * 100))\n", - "print(\"IS json count: {0} ({1:0.2f}%)\".format(a[1], a[1] / COUNT * 100))" - ] - }, - { - "cell_type": "code", - "execution_count": 22, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "" - ] - }, - "execution_count": 22, - "metadata": {}, - "output_type": "execute_result" - }, - { - "data": { - "image/png": "iVBORw0KGgoAAAANSUhEUgAAAQIAAADuCAYAAADSvgkdAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDMuMC4zLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvnQurowAAEh1JREFUeJzt3XmQnVWdxvHv6bCDvAhhSSAYdmWRyD6Cg0HQQbawODAFOkOGJYIwTlGKoIynQEUccAw4YA2jcWBAKAUxhkFBtlKjISyhhmUgxRAQIQsk8wYyMEnnnvnjvE132l7u7bv87nnf51N1qzu3b/d9oO99+rzLeY8LISAi1dZjHUBE7KkIRERFICIqAhFBRSAiqAhEBBWBiKAiEBFUBCKCikBEUBGICCoCEUFFICKoCEQEFYGIoCIQEVQEIoKKQERQEYgIKgIRQUUgIqgIRAQVgYigIhARVAQigopARFARiAgqAhFBRSAiqAhEBFjPOoB0iM82BCYWt62ADYH1gQ2Ir4MasLb42AssBxYXt2X4vGaQWjrEhRCsM0gr+GwcsCewH7Az8Q2/fXGbCIxv4qevBV6nvxgWA4uAJ4An8PnLTfxs6QIqghT5zAF7AAcUtwOBKcAmRoneoK8U4PHi40KNItKhIkiFz3YDTgCOJr75N7cNNKqVwH3AHOBufL7MOI+MQEXQreJf/YOJb/4TgA/YBmpKDZhPLIU5+HyBcR4ZREXQTXy2HvBxYBpwHLCdbaC2eYVYCjfj87nWYURF0B18Nhk4GzgTmGAbpuOeBb4P3KTNBzsqAitx6H80cAFxFFD1czpWAz8GrsPn86zDVI2KoNN8tgnwGeDvgPcbp+lW84GZwG34fK11mCpQEXRKPKHnfOASmjumXyXPApfh8zusg5SdiqDdfNYDnA5cAbzPOE2qHgUuxef3WQcpKxVBO/nsL4BvAvtaRymJB4FLtA+h9VQE7eCzA4CrgCOso5TUz4Av4vPnrYOUhYqglXy2OfBtYDrgjNOU3TvAV4FrtEOxeSqCVvHZUcTj4ZOso1TMI8B0fP60dZCUqQia5bPNgH8EZlhHqbDVwOXAVfi81zpMilQEzfDZ4cAsYCfrKALEWY9n4vMnrYOkRkUwFj7bGLgSuBDtC+g2a4B/wOfftA6SEhVBo3w2ibjX+kPWUWREPyaODlZZB0mBiqARPjsE+CnlnRVYNk8C0/D5Iusg3a7qE13q57NPAw+hEkjJvsB8fDbVOki304hgNPEU4SuBL1pHkTHrBS7C59daB+lWKoKR+Ow9wK3AsdZRpCVmAefi8zXWQbqNimA4PtuOeM29va2jSEvdDZyCz9+xDtJNVARD8dn2wAPA7tZRpC0eAI7XEYV+KoLBfLYj8YWyi3UUaau5wNH4fKV1kG6gIhgolsDDwGTjJNIZvwM+gc/ftA5iTYcP+/hsAnA/KoEq+TPgbny2qXUQayoCAJ+NB34F7GodRTruI8AcfLaRdRBLKoL41+Be4rqBUk0fBX5gHcJStYsgXlL8JjRvQOCv8NmXrUNYqXYRwGXASdYhpGtcgc8q+Xqo7lEDn00D7kTTiGVdq4DDqrY+YzWLwGd7Ew8dbWYdRbrSH4AD8fkS6yCdUr1NA59tSbyegEpAhjMJuKtYlKYSqlUEPhsH3A7sbB1Fut4hwHXWITqlWkUAlwJHWoeQZJyNz462DtEJ1dlH4LN9gMeA9a2jSFJeBfbG5yusg7RTNUYEPluPOBddJSCNmkgFNhGqUQTx6kL7W4eQZJ2Oz060DtFO5d808NlewOPABtZRJGlLiZsIy6yDtEO5RwTxKMEsVALSvG2AG6xDtEu5iwAuAg60DiGlcXJZT0Eu76aBz3YAFgKVnl4qLbcQ2LNsayyWeUTgUQlI6+0GnGMdotXKOSLw2R7A08A46yhSSkuBXfD5W9ZBWqWsI4KvoRKQ9tkG+IJ1iFYq34jAZ/sD89H0YmmvVcCu+HyxdZBWKOOI4EpUAtJ+mwJftQ7RKuUaEcTFLh+wjiGV0Qvshc+ftw7SrLKNCL5hHUAqZT1KsjhueUYEPjsM+LV1DKmcd4AdUz/1uEwjggusA0glbQR81jpEs8oxIoiLli4iDtVEOm0JcVSw2jrIWJVlRDADlYDY2ZbEL4uffhHEGYZ/ax1DKm+GdYBmpF8EcAwwwTqEVN7h+Oz91iHGqgxFcJZ1AJHC2dYBxirtIvDZtsAnrWOIFD5lHWCs0i4COB5NLpLuMQmf7WcdYixSL4JjrQOIDDLNOsBYpFsEPtsILVYi3UdF0GFHAJtYhxAZZB98tpN1iEalXATHWQcQGUZyo4KUi+AY6wAiwzjBOkCj0pxr4LMpwBPWMUSGsRbYFp+/YR2kXqmOCDQakG42DjjUOkQjUi2CpP4nSyUltbBOqkWQ5EkbUikqgrby2QTitE+RbnaAdYBGpFcEGg1IGrZK6XwCFYFI+ySzeaAiEGkfFUEbqQgkFcnsJ0irCHy2FbCjdQyROu1jHaBeaRUBJHspKKmkrYpZsl2vriJwzt1fz30doGsTSmq2tw5QjxEvAe6c24g41Xe8c+699C8uujkwsc3ZhrKdwXOKNGN74AXrEKMZbS2Ac4HPE9/0j9FfBCuBf25jruGoCCQ16Y8IQggzgZnOuQtCCNd1KNNIVASSmvSLoE8I4Trn3IeByQO/J4RwU5tyDUf7CCQ15SkC59zNwC7AAuJca4AAdLoINCKQ1JSnCIgnRuwZ7K9ioiKQ1FjsVG9YvecRPEV3vAm3sQ4g0qBN632gc26tc27BgNvkER472Tn3VCsCQv0jgvHAM865R4Aa8OHi/reJmwrLin8fFEJoz9LQcbFTrXgsqdmwgce+HUKY0rYkI6j3jeWHuX8q8FYI4eqBdzrnHPF6iLUmsg2mFY0kRRs0883FqOBm+kcWnwshzB30mL2AWcVz9QAnhxAWOufOAC4s7p8HnBdCWMsQ6j1q8PAwIacO+HxX4C7gN8DBwDTn3JMhhC2Kr58GHBlCOMs5ty1wA3HeQA24MITw+1FiaDQgXSsEAvG13PexBtQCbk0D5/Fv7JxbUHz+YgjhRGApcFQI4R3n3G7Aj/jTyUwzgJkhhFuccxsA45xzHwBOBQ4NIaxxzl0PnM4wO/jrPWrwZvEfCLFd1gdWAd8e9NA9gTNDCDOccyP97GuBb4UQfl803hxg71FiaETQRsULeZ0X8aAbAWrgihd432NdrXhh1AIu9H/N9X0MAULo/1gb4r5BN+j7vDbg/uJzQuj/vLbuY1yNnlDDER/fw4DH0fe1Gj3Uwrtf59376GFt/+ch4NzagV8LPcXz9bgajrX0uP7Hvnuy3Tp6GZfPqP/XMNSmwfrAd51zU4ib4bsP8X2/A77snNsBuLMYDXwM2B+YHwfobEwslSHVOyJ4z8B/O+emAQcN8dAXQgjz6/iRRwJ7FAEB3uuc2ziE8PYI39PKzQwZxDkc8cU87B+w+NsafODI4EDSkG+5rvVyk9//98ASYF/i7+adwQ8IIdzqnJtHvLr3L51zZxH/L/1bCOGSep5kTLMPQwh3EZccG2zVgM9rrPsrGzgLyxF3LE4pbtuPUgIAa8aSVcRYs6/bDHit2N/2aYYYGTvndgb+O4RwLTAb+CBwP3CKc26b4jFbOufeN9yT1LtpcNKAf/YQt1FG/FMQQqg551YU2zUvACfSf3ThV8D5wD8VP39KCGHB0D/pXSoCSVFvk99/PXCHc+5TwIOs+8e2z6nAGc65NcBi4PIQwnLn3FeAe51zPcT3z/nAS0M9SV0rHTnnZg34Zy+wCLgROI/iqEGxs/AnA7dxnHOnAt8gDo+eATYsdhZuTdxZuDuxjB4MIZw/ahCf9aJ9BZKWp/B511+gJK0lz3y2AtjCOoZIA36Nz//cOsRo6r0wyQ7OuZ8655Y655Y45+4o9lB22msGzynSjCRes/XuLJxF3AkxkTiJ4ufFfZ32R4PnFGnGq9YB6lFvEWwdQpgVQugtbj8Etm5jruEk8T9VZIBSjQhed86d4ZwbV9zOACyWfFYRSGpKVQTTgb8kHpp4DTgFOLNdoUagIpDUJFEE9Z6/fwXw1yGEFRBPTgCuJhZEJ6kIJDVJFEG9I4IP9pUAQAhhOfCh9kQakYpAUpPEa7beIugpLmcOvDsisJgN+AeD5xQZqzfx+YrRH2av3iK4BpjrnLvCOXc5MBf4VvtiDcPnrwDLO/68ImPzhHWAetVVBMXVik8mzoJaBpwUQri5ncFG8LjR84o06jHrAPWqe3gfQniGOF/A2qPEacwi3S6ZP1qpLYIKsQhEUpDMiEBFINIebwHPWYeoV3pF4POXgNetY4iMYgE+T+aqWukVQZTMkEsqK5n9A5BuEdRzXUQRS0ltwqZaBA9ZBxAZQQDutQ7RiFSL4GHgf6xDiAzjEXy+xDpEI9IsAp/3AvdYxxAZxmzrAI1Kswiin1kHEBnGz60DNCrlIrgHXeJcus8ifP6f1iEalW4R+Hwl2mko3Se50QCkXARRcttiUnpJviZVBCKts5J4RCs5aReBz18mXhtBpBvchs+T3G+VdhFEN1oHECkk+1osQxHcjk4uEnsL8HlSpxUPlH4R+Pxt4BbrGFJ5yY4GoAxFEF1vHUAqbSVgdem+lihHEfj8GeA+6xhSWbPw+ZvWIZpRjiKIvmMdQCqpBlxnHaJZZSqCe0jo0lBSGnPw+QvWIZpVniLweQC+bh1DKiUA3jpEK5SnCKJbgOQmfEiybsfnySxiMpJyFUG8WOQl1jGkEtYAX7EO0SrlKgIAn99Noud7S1L+tQz7BvqUrwiii60DSKmtAi63DtFK5SwCn88D7rSOIaU1E58vtg7RSuUsguhSoNc6hJTOcixWAm+z8haBz58D/sU6hpTOZfg8tw7RauUtguhLwMvWIaQ0HgJusA7RDuUugnj+93TiiR8izVgFTC9OXCudchcBgM/vB75nHUOSdzE+f9E6RLuUvwiiLwCl/SVK2z1Eyae6V6MIfL4KOBNtIkjjSr1J0KcaRQDg84cpwXRR6bhSbxL0qU4RRJcA/2UdQpLxC0q+SdDHhVDqEc+f8tnuwDxgC+so0tWeBw7G55W4MG7VRgTg8+eBU4G11lGka+XA8VUpAahiEQD4/F7gIusY0pVqwGnFmamVUc0iAPD5TOD71jGk61yMz39hHaLTqlsE0XnAb6xDSNe4GZ9fbR3CQrWLwOergZOAl6yjiLl5wNnWIaxUuwgAfL4M+CSwzDqKmHkGOBaf/591ECsqAuhbIOUoYIV1FOm4hcDH8Pnr1kEsqQj6+PxJ4OPEQ0dSDS8CR5TtakNjoSIYKK5m+wm0unIVvABMxeevWAfpBiqCweL1Do8AKj1ULLnngMPxuXYSF1QEQ4mLVnwUqPyQsYSeJpbAH62DdBMVwXB8/jRwGPCsdRRpmfuAj+DzJdZBuo2KYCRxAYtDgDnWUaRpM4Gj8bmODA2herMPx8JnPcDX0HJqKVoNnIfPdTr5CFQEjfDZacAPgI2to0hdlgIn4fPfWgfpdiqCRvlsP+AuYJJ1FBnRAuAEfK7L2ddB+wga5fPHgQOAB62jyLB+CByqEqifRgRj5TNHnL14FbCpcRqJXgPOwefaudsgFUGzfLYT8boGU62jVNy/AxfqqMDYqAhaIY4OPkscHWxmnKZqFgPn4vPZ1kFSpiJoJZ9NJo4OjjBOUhW3Ahfg8+XWQVKnImi1ODr4G+ByYAfbMKX1JPClKl5SrF1UBO3is42A84FLgS2N05TFi8BlwK1lX3mo01QE7eazjLj24ufR0YWxWkY8s/N7xeXlpMVUBJ3is22Jf83OAdY3TpOKt4BrgGuKJe6lTVQEneaznYlrKnwGHWEYzhLiUvbX4/Ol1mGqQEVgJW4yTAc+B+xsnKZbPEGcJfgjbQJ0lorAWpzZeBTxUtrHU73NhreA24Ab8fkj1mGqSkXQTXy2DXGT4RTgIMDZBmqb1cS5Gj8Bbtf2vz0VQbeKOxePAY4jjhhSP+LwBvAfwGzgl3rzdxcVQQriOQlTiaVwLOlMgV5IfOPPBn6Lz7UCdZdSEaQoTnTaH9hvwMfxppngFeCx4vYo8Jj2+KdDRVAWPptEfynsDUwEJgDbARu06Fn+F3iVON33VeIVgeMbX2/6pKkIyi7OfdiSWAgT6C+HTYH1gHHFR4BeYE1xe5s4s+9V+t78PtcqUCWlIhARXapMRFQEIoKKQERQEYgIKgIRQUUgIqgIRAQVgYigIhARVAQigopARFARiAgqAhFBRSAiqAhEBBWBiKAiEBFUBCKCikBEUBGICCoCEUFFICKoCEQEFYGIoCIQEVQEIoKKQERQEYgIKgIRQUUgIqgIRAT4fxoxDoxWW+6XAAAAAElFTkSuQmCC\n", - "text/plain": [ - "
" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "group_result['value_len']['count'].plot(kind='pie')" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### All greater values are JSON\n", - "\n", - "There is absolute no value greater than 104653 (max value for non-json) that represents a valid JSON. \n", - "\n", - "This implies that all the greater values are JSON but they represent very low percentage of the whole data (6.76%). " - ] - }, - { - "cell_type": "code", - "execution_count": 26, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "104653\n", - "len: 33788 (6.76%)\n" - ] - } - ], - "source": [ - "max_non_json_value = group_result['value_len']['max'][0]\n", - "allJson = df[df['value_len'] > max_non_json_value ]\n", - "length = len(allJson)\n", - "print(\"len: {0} ({1:0.2f}%)\".format(length, length / COUNT * 100))" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.6.8" - } - }, - "nbformat": 4, - "nbformat_minor": 2 -} diff --git a/analyses/2019_03_aliamcami_value_analyses/isJson_Value_Distribution.ipynb b/analyses/2019_03_aliamcami_value_analyses/isJson_Value_Distribution.ipynb new file mode 100644 index 0000000..ba2df56 --- /dev/null +++ b/analyses/2019_03_aliamcami_value_analyses/isJson_Value_Distribution.ipynb @@ -0,0 +1,664 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Start dask" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/anaconda3/envs/overscripted/lib/python3.6/site-packages/dask/config.py:168: YAMLLoadWarning: calling yaml.load() without Loader=... is deprecated, as the default Loader is unsafe. Please read https://msg.pyyaml.org/load for full details.\n", + " data = yaml.load(f.read()) or {}\n" + ] + } + ], + "source": [ + "import dask.dataframe as dd\n", + "from dask.diagnostics import ProgressBar\n", + "import matplotlib.pyplot as plt\n", + "import pandas as pd\n", + "import numpy as np\n", + "\n", + "# from dask.distributed import Client\n", + "# #Initializing client\n", + "# client = Client()\n", + "# client\n", + "\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Parquet\n", + "Used sample: sample_0_prep/full_sample_json.parquet\n", + " * This sample is the 10% sample with the \"is_json\" column added to it, this column is the result of the 'value' columns as a valid json or not. \n", + " * This sample can be obtained by running 'jsJson_dataPrep.ipynb'" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
value_lenis_json
08False
\n", + "
" + ], + "text/plain": [ + " value_len is_json\n", + "0 8 False" + ] + }, + "execution_count": 2, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df = dd.read_parquet('sample_0_prep/full_sample_json.parquet', engine=\"pyarrow\", columns=['value_len', 'is_json'])\n", + "df.head(1)" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[########################################] | 100% Completed | 1.6s\n" + ] + } + ], + "source": [ + "with ProgressBar():\n", + " df = df.compute()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Values distribution: " + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The absolute majority of the values are small. As seen on the graph below." + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": { + "scrolled": true + }, + "outputs": [ + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYgAAAD8CAYAAABthzNFAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDMuMC4zLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvnQurowAAF89JREFUeJzt3XGQXWWd5vHvQydNRFjBIGMqndBhO0Y6I2uSnig7zJpVkaC07LCs0xmqRjCTjKNRUKs2YbAGUltb5UztzjCscSEu2airQMyoBIyVUgYI46YkCTpjIAZ7s7hcEkkDM4EZDUngt3/c0+Gmud333Nv95vTpfj5VXX3Pe+8553dP5faT933PPUcRgZmZ2VCnFV2AmZmNTw4IMzOrywFhZmZ1OSDMzKwuB4SZmdXlgDAzs7ocEGZmVpcDwszM6nJAmJlZXVOKLmA0zj333Ojs7Cy6DDOzUtm9e/dzEfGWRq8rdUB0dnaya9euosswMysVSb/I8zoPMZmZWV2lDAhJvZLWHz58uOhSzMwmrFIGRETcFxEr3/SmNxVdipnZhFXqOQgzm3iOHTtGpVLhyJEjRZdSetOmTaOjo4OpU6e2tL4DwszGlUqlwllnnUVnZyeSii6ntCKC559/nkqlwpw5c1raRimHmMxs4jpy5AjTp093OIySJKZPnz6qnti46UFI+h3gGqo1dUfEvy64JDMriMNhbIz2OCbtQUjaIOmQpD1D2pdK2iepX9IagIh4JCI+DtwPfCVlXWZm1ljqHsRG4IvAVwcbJLUB64BLgQqwU9KWiHgie8nvA3+YuC4613w39S6G9dQXPlTYvs3KZqw/q/785Ze0BxER24EXhjQvBvojYn9EHAXuBq4EkDQbOBwRL6asy8xsrJx55pljur1rr72WzZs3j+k2W1XEJPVM4Oma5UrWBrAc+J8jrSxppaRdknYNDAwkKtHMzIoIiHqzJgEQETdHxP8eaeWIWA+sBR5rb29PUJ6ZTWarV6/mS1/60onlW265hbVr1/K+972PhQsX8o53vIN77733des99NBDXHHFFSeWV61axcaNGwHYvXs373nPe1i0aBGXXXYZBw8ezFXLcOstWbKE1atXs3jxYt72trfxyCOPjOIdD6+IgKgAs2qWO4ADzWzA36Q2s1T6+vq45557Tixv2rSJ6667jm9/+9s89thjPPjgg3zuc58jInJt79ixY3zqU59i8+bN7N69m4997GPcdNNNo17v+PHjPProo9x6662sXbu2+TeaQxGnue4E5kqaAzwD9FGdmM5NUi/Q29XVlaA8M5vMFixYwKFDhzhw4AADAwOcc845zJgxg8985jNs376d0047jWeeeYZnn32Wt771rQ23t2/fPvbs2cOll14KwCuvvMKMGTNGvd5VV10FwKJFi3jqqadaeKeNJQ0ISXcBS4BzJVWAmyPiTkmrgG1AG7AhIh5PWYeZWTOuvvpqNm/ezC9/+Uv6+vr4+te/zsDAALt372bq1Kl0dna+7gtoU6ZM4dVXXz2xPPh8RDB//nx27NjRVA2N1jv99NMBaGtr4/jx401tO6+kARERy4Zp3wpsHcV27wPu6+npWdHqNsysHIo4LbWvr48VK1bw3HPP8fDDD7Np0ybOO+88pk6dyoMPPsgvfvH62ymcf/75PPHEE7z88sscOXKEBx54gEsuuYR58+YxMDDAjh07uPjiizl27BhPPvkk8+fPH7GGVtcbS+Pmm9TN8BCTmaU0f/58XnrpJWbOnMmMGTO45ppr6O3tpaenh3e+8528/e1vf906s2bN4iMf+QgXXXQRc+fOZcGCBQC0t7ezefNmPv3pT3P48GGOHz/ODTfc0PAPfavrjSXlnWgZj3p6eqLVO8r5i3Jm49PevXu58MILiy5jwqh3PCXtjoieRuv6Yn1mZlaXh5jMzArwyU9+kh/+8IcntV1//fVcd911BVX0eqUMCE9Sm01sETHhr+i6bt265PsY7RSCh5jMbFyZNm0azz///Kj/uE12gzcMmjZtWsvbKGUPwkNMZhNXR0cHlUoFX2tt9AZvOdqqUgaEh5jMJq6pU6e2fItMG1seYjIzs7ocEGZmVlcpA0JSr6T1hw8fLroUM7MJq5QB4ct9m5mlV8qAMDOz9BwQZmZWlwPCzMzqKmVAeJLazCy9UgaEJ6nNzNIrZUCYmVl6DggzM6vLAWFmZnWNm4v1SToN+E/AvwB2RcRXCi7JzGxSS9qDkLRB0iFJe4a0L5W0T1K/pDVZ85XATOAYUElZl5mZNZZ6iGkjsLS2QVIbsA64HOgGlknqBuYBOyLis8AfJ67LzMwaSBoQEbEdeGFI82KgPyL2R8RR4G6qvYcK8A/Za15JWZeZmTVWxCT1TODpmuVK1vYt4DJJ/w3YPtzKklZK2iVpl+84ZWaWThGT1PXuRB4R8StgeaOVI2K9pINAb3t7+6Ixr87MzIBiehAVYFbNcgdwoJkN+JvUZmbpFREQO4G5kuZIagf6gC3NbMDXYjIzSy/1aa53ATuAeZIqkpZHxHFgFbAN2AtsiojHm9muexBmZuklnYOIiGXDtG8Ftra6XUm9QG9XV1ermzAzswZKeakN9yDMzNIrZUB4DsLMLL1SBoR7EGZm6ZUyINyDMDNLr5QB4R6EmVl6pQwIMzNLr5QB4SEmM7P0ShkQHmIyM0uvlAFhZmbpOSDMzKyuUgaE5yDMzNIrZUB4DsLMLL1SBoSZmaXngDAzs7ocEGZmVlcpA8KT1GZm6ZUyIDxJbWaWXikDwszM0nNAmJlZXQ4IMzOra9wEhKQlkh6RdLukJUXXY2Y22SUNCEkbJB2StGdI+1JJ+yT1S1qTNQfwT8A0oJKyLjMzayx1D2IjsLS2QVIbsA64HOgGlknqBh6JiMuB1cDaxHWZmVkDSQMiIrYDLwxpXgz0R8T+iDgK3A1cGRGvZs//A3B6yrrMzKyxKQXscybwdM1yBXiXpKuAy4CzgS8Ot7KklcBKgNmzZycs08xscisiIFSnLSLiW8C3Gq0cEeslHQR629vbF415dWZmBhRzFlMFmFWz3AEcaGYD/ia1mVl6RQTETmCupDmS2oE+YEszG/C1mMzM0kt9mutdwA5gnqSKpOURcRxYBWwD9gKbIuLxZrbrHoSZWXpJ5yAiYtkw7VuBra1uV1Iv0NvV1dXqJszMrIFx803qZrgHYWaWXikDwnMQZmbplTIg3IMwM0uvlAHhHoSZWXqlDAj3IMzM0itlQJiZWXqlDAgPMZmZpZcrICT9ZupCmuEhJjOz9PL2IG6X9KikT0g6O2lFZmY2LuQKiIi4BLiG6kX2dkn6hqRLk1ZmZmaFyj0HERE/Bz5P9Y5v7wFuk/Sz7D4Op5TnIMzM0ss7B3GRpL+kenG99wK9EXFh9vgvE9ZXl+cgzMzSy3uxvi8CXwb+JCJ+PdgYEQckfT5JZWZmVqi8AfFB4NcR8QqApNOAaRHxq4j4WrLqzMysMHnnIH4AvKFm+YyszczMJqi8ATEtIv5pcCF7fEaakhrzJLWZWXp5A+KfJS0cXJC0CPj1CK9PypPUZmbp5Z2DuAH4pqQD2fIM4PfSlGRmZuNBroCIiJ2S3g7MAwT8LCKOJa3MzMwK1cw9qX8L6MzWWSCJiPhqkqrMzKxwuQJC0teAfwn8BHglaw5gTANC0huB7cDNEXH/WG7bzMyak7cH0QN0R0Q0s3FJG4ArgEMR8Zs17UuBvwLagP8REV/InloNbGpmH2Zmlkbes5j2AG9tYfsbgaW1DZLagHXA5UA3sExSt6T3A08Az7awHzMzG2N5exDnAk9IehR4ebAxIj480koRsV1S55DmxUB/ROwHkHQ3cCVwJvBGqqHxa0lbI+LVnPWZmdkYyxsQt4zhPmcCT9csV4B3RcQqAEnXAs8NFw6SVgIrAWbPnj2GZZmZWa28p7k+LOl8YG5E/EDSGVTnD1qheruo2dfGBrWsl3QQ6G1vb1/UYg1mZtZA3st9rwA2A3dkTTOB77S4zwrVGw8N6gAODPPauvxNajOz9PJOUn8S+G3gRThx86DzWtznTmCupDmS2oE+YEszG/C1mMzM0ssbEC9HxNHBBUlTqBkWGo6ku4AdwDxJFUnLI+I4sArYRvUGRJsi4vFminYPwswsvbyT1A9L+hPgDdm9qD8B3NdopYhYNkz7VmBr7iqHkNQL9HZ1dbW6CTMzayBvD2INMAD8FPgjqn/cC7uTnHsQZmbp5T2L6VWqtxz9ctpy8il7D6JzzXcL2e9TX/hQIfs1s3LKexbT/5W0f+hP6uKG4x6EmVl6zVyLadA04D8Abx77cvIpew/CzKwMcvUgIuL5mp9nIuJW4L2JaxupHvcgzMwSy3u574U1i6dR7VGclaQiMzMbF/IOMf3XmsfHgaeAj4x5NTl5iMnMLL28ZzH929SFNCMi7gPu6+npWVF0LWZmE1XeIabPjvR8RPzF2JRjZmbjRTNnMf0Wr10zqZfqrUGfHnYNMzMrtWZuGLQwIl4CkHQL8M2I+MNUhY3EcxBmZunlvdTGbOBozfJRoHPMq8nJp7mamaWXtwfxNeBRSd+mehXX3wW+mqwqMzMrXN6zmP6zpO8Bv5M1XRcRP05XlpmZFS3vEBPAGcCLEfFXQEXSnEQ1mZnZOJD3Yn03A6uBG7OmqcD/SlVUjnp8Rzkzs8Ty9iB+F/gw8M8AEXGAAi+14UlqM7P08gbE0YgIstuMSnpjupLMzGw8yBsQmyTdAZwtaQXwA8bJzYPMzCyNvGcx/ZfsXtQvAvOAP42I7yetzMzMCtUwICS1Adsi4v1AslCQdCFwPdVvbT8QEf891b7MzKyxhkNMEfEK8CtJTc8IS9og6ZCkPUPal0raJ6lf0ppsP3sj4uNULyPeU297ZmZ26uSdgzgC/FTSnZJuG/zJsd5GYGltQ9YjWQdcDnQDyyR1Z899GPhb4IGcdZmZWSJ5L7Xx3eynKRGxXVLnkObFQH9E7AeQdDdwJfBERGwBtkj6LvCNZvdnZmZjZ8SAkDQ7Iv5fRHxlDPc5k5MvE14B3iVpCXAVcDqwdYSaVgIrAWbPnj2GZU18nWuazvgx89QXPlTYvs2sNY16EN8BFgJI+uuI+PdjsE/VaYuIeAh4qNHKEbFe0kGgt729fdEY1GNmZnU0moOo/WN+wRjtswLMqlnuAA40swF/k9rMLL1GARHDPB6NncBcSXMktQN9vHanulx8LSYzs/QaBcS/kvSipJeAi7LHL0p6SdKLjTYu6S5gBzBPUkXS8og4DqwCtgF7gU0R8XgzRbsHYWaW3ohzEBHRNpqNR8SyYdq3MsJEdCO+5aiZWXrN3A9i3HAPwswsvVIGhOcgzMzSK2VAuAdhZpZeKQPCPQgzs/RKGRDuQZiZpZf3Wkxmo1LUZT58iQ+z1pWyB+EhJjOz9EoZEB5iMjNLr5QBYWZm6TkgzMysrlIGhOcgzMzSK2VAeA7CzCy9UgaEmZml54AwM7O6HBBmZlZXKQPCk9RmZumVMiA8SW1mll4pA8LMzNJzQJiZWV0OCDMzq2tcBYSkfyfpy5LulfSBousxM5vMkgeEpA2SDknaM6R9qaR9kvolrQGIiO9ExArgWuD3UtdmZmbDOxU9iI3A0toGSW3AOuByoBtYJqm75iWfz543M7OCJA+IiNgOvDCkeTHQHxH7I+IocDdwpar+DPheRDyWujYzMxteUXMQM4Gna5YrWdungPcDV0v6eL0VJa2UtEvSroGBgfSVmplNUkXdk1p12iIibgNuG2nFiFgPrAfo6emJBLWZWQuKuu84+N7jqRTVg6gAs2qWO4ADeVf2pTbMzNIrKiB2AnMlzZHUDvQBWwqqxczM6kg+xCTpLmAJcK6kCnBzRNwpaRWwDWgDNkTE43m3GRH3Aff19PSsSFGzTRxFDXt4yMMmguQBERHLhmnfCmxtZZuSeoHerq6u0ZRmZmYjGFffpM7LV3M1M0uvlAHhSWozs/RKGRDuQZiZpVfKgHAPwswsvVIGhHsQZmbplTIgzMwsvVIGhIeYzMzSK2VAeIjJzCy9UgaEmZml54AwM7O6ShkQnoMwM0uvlAHhOQgzs/RKGRBmZpaeA8LMzOpyQJiZWV2lDAhPUpuZpVfKgPAktZlZesnvKGdmlppvLZtGKXsQZmaWngPCzMzqGjcBIekCSXdK2lx0LWZmljggJG2QdEjSniHtSyXtk9QvaQ1AROyPiOUp6zEzs/xS9yA2AktrGyS1AeuAy4FuYJmk7sR1mJlZk5IGRERsB14Y0rwY6M96DEeBu4ErU9ZhZmbNK2IOYibwdM1yBZgpabqk24EFkm4cbmVJKyXtkrRrYGAgda1mZpNWEd+DUJ22iIjngY83Wjki1gPrAXp6emKMazMzs0wRAVEBZtUsdwAHmtmApF6gt6urayzrMhszRX1xCyb+l7fs1CliiGknMFfSHEntQB+wpYA6zMxsBKlPc70L2AHMk1SRtDwijgOrgG3AXmBTRDzezHZ9LSYzs/SSDjFFxLJh2rcCW1vdroeYzMzSGzffpG6GexBmZumVMiB8Pwgzs/RKGRDuQZiZpVfKgHAPwswsvVIGhHsQZmbplTIgzMwsvVIGhIeYzMzSK2VAeIjJzCy9UgaEmZml54AwM7O6iria66j5UhtmwyvySrKTzUS/am8pexCegzAzS6+UAWFmZuk5IMzMrC4HhJmZ1VXKgPAX5czM0itlQHiS2swsvVIGhJmZpeeAMDOzuhwQZmZWlyKi6BpaJmkA+EWLq58LPDeG5ZSdj8fJfDxe42NxsolwPM6PiLc0elGpA2I0JO2KiJ6i6xgvfDxO5uPxGh+Lk02m4+EhJjMzq8sBYWZmdU3mgFhfdAHjjI/HyXw8XuNjcbJJczwm7RyEmZmNbDL3IMzMbASTMiAkLZW0T1K/pDVF19MsSRskHZK0p6btzZK+L+nn2e9zsnZJui17r38vaWHNOh/NXv9zSR+taV8k6afZOrdJUqv7OAXHYpakByXtlfS4pOsn+fGYJulRSX+XHY+1WfscST/Kar1HUnvWfnq23J8931mzrRuz9n2SLqtpr/v5aWUfp4qkNkk/lnR/q7VOpOORW0RMqh+gDfg/wAVAO/B3QHfRdTX5Hv4NsBDYU9P258Ca7PEa4M+yxx8EvgcIeDfwo6z9zcD+7Pc52eNzsuceBS7O1vkecHkr+zhFx2IGsDB7fBbwJNA9iY+HgDOzx1OBH2U1bAL6svbbgT/OHn8CuD173Afckz3uzj4bpwNzss9M20ifn2b3cYo/M58FvgHc30qtE+145D5uRRdQwD+Ui4FtNcs3AjcWXVcL76OTkwNiHzAjezwD2Jc9vgNYNvR1wDLgjpr2O7K2GcDPatpPvK7ZfRR0XO4FLvXxCIAzgMeAd1H9YteUrP3EZwDYBlycPZ6SvU5DPxeDrxvu85Ot09Q+TuFx6AAeAN4L3N9KrRPpeDTzMxmHmGYCT9csV7K2svuNiDgIkP0+L2sf7v2O1F6p097KPk6prKu+gOr/mift8ciGU34CHAK+T/V/uP8YEcfr1HOi1uz5w8B0mj9O01vYx6lyK/AfgVez5VZqnUjHI7fJGBCq0zaRT+Ua7v02297KPk4ZSWcCfw3cEBEvjvTSOm0T6nhExCsR8U6q/3NeDFw4Qj1jdTxGes+FHQ9JVwCHImJ3bfMI9Uzo49GsyRgQFWBWzXIHcKCgWsbSs5JmAGS/D2Xtw73fkdo76rS3so9TQtJUquHw9Yj4Vou1TpjjMSgi/hF4iOocxNmSptSp50St2fNvAl6g+eP0XAv7OBV+G/iwpKeAu6kOM93aQq0T5Xg0ZTIGxE5gbnaGQTvVSaItBdc0FrYAg2fefJTqWPxg+x9kZ9a8GzicDYdsAz4g6Zzs7JsPUB0jPQi8JOnd2dk6fzBkW83sI7msxjuBvRHxFzVPTdbj8RZJZ2eP3wC8H9gLPAhcPUytg+/hauBvojo4vgXoy864mQPMpTpZX/fzk63T7D6Si4gbI6IjIjqzWv8mIq5podYJcTyaVvQkSBE/VM8yeZLq2OxNRdfTQv13AQeBY1T/N7Kc6hjmA8DPs99vzl4rYF32Xn8K9NRs52NAf/ZzXU17D7AnW+eLvPaFyqb3cQqOxSVUu+d/D/wk+/ngJD4eFwE/zo7HHuBPs/YLqP5B6we+CZyetU/Llvuz5y+o2dZN2XvYR3bm1kifn1b2cYo/N0t47SymSX888vz4m9RmZlbXZBxiMjOzHBwQZmZWlwPCzMzqckCYmVldDggzM6vLAWFmZnU5IMzMrC4HhJmZ1fX/AanMWczxw3l/AAAAAElFTkSuQmCC\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "%matplotlib inline\n", + "df['value_len'].plot(kind='hist', legend=True, logy=True, bins=10)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Type distribution\n", + "The non-json values are found mainly withing the smaller values.\n", + " - Orange bar: non-json values\n", + " - Blue bars: json values" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "Text(0, 0.5, 'density')" + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAZIAAAELCAYAAADz6wBxAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDMuMC4zLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvnQurowAAFqxJREFUeJzt3X+QXWd93/H3JzLCDSEOGEFdy4rkyHHipknBWwWawpC0JnZi4YF4GimmUFu1BoLbZtJMRm46BaaTUSBTEsCe2E4xxkywcR2nkYyohjGkzkw9INkmII0irDim3tiJDEmME2aCRb794x7ZV8v+uLvnnr2rs+/XzM7e8+w95/neY60/+zznueemqpAkaam+Y9IFSJJObwaJJKkVg0SS1IpBIklqxSCRJLVikEiSWjFIJEmtGCSSpFZOyyBJ8vokf5jkpiSvn3Q9krSanbHcHSa5FbgcOF5VPzTUfinwAWAN8D+q6tfmOUwBfwOcCUwv1OfLXvay2rhxY5uyJWnVefDBB79aVesWel6W+xYpSV7HIARuPxkkSdYAXwYuYRAMB4DtDEJl94xDXAN8tar+PskrgPdX1VXz9Tk1NVUHDx4c7wuRpJ5L8mBVTS30vGUfkVTV/Uk2zmjeAhyrqkcBktwJXFFVuxmMXubyV8ALZ/tBkp3AToANGza0rFqSNJeVco3kXODxoe3ppm1WSd6c5GbgY8ANsz2nqm6pqqmqmlq3bsGRmSRpiZZ9RDKHzNI255xbVd0D3LPgQZOtwNbNmze3KE2SNJ+VMiKZBs4b2l4PPDGhWiRJi7BSguQAcEGSTUnWAtuAPW0PWlV7q2rnWWed1bpASdLslj1IktwBPABcmGQ6yY6qOgFcB+wHjgB3VdXhMfS1NcktTz/9dNtDSZLmsOzLfyfB5b+StHijLv9dKVNbnXBEIkndWymrtjpRVXuBvVNTU9e2Oc7GXZ88ZfuxM3/u25/0bsNK0urU6xGJJKl7vQ4Sp7YkqXu9DhKX/0pS93odJJKk7vU6SJzakqTu9TpInNqSpO71OkgkSd0zSCRJrfQ6SLxGIknd63WQeI1EkrrX6yCRJHXPIJEktWKQSJJa6XWQeLFdkrrX6yDxYrskda/XQSJJ6p5BIklqxSCRJLVikEiSWjktP7M9yXcA/w34buBgVX10wiVJ0qq17COSJLcmOZ7k0Iz2S5McTXIsya4FDnMFcC7wLDDdVa2SpIVNYkRyG3ADcPvJhiRrgBuBSxgEw4Eke4A1wO4Z+18DXAg8UFU3J7kbuG8Z6pYkzWLZg6Sq7k+ycUbzFuBYVT0KkORO4Iqq2g1cPvMYSaaBbzab3+quWknSQlbKxfZzgceHtqebtrncA/xkkg8B98/2hCQ7kxxMcvCpp54aX6WSpFOslIvtmaWt5npyVX0D2DHfAavqliRPAlvXrl17ccv6JElzWCkjkmngvKHt9cATbQ/qLVIkqXsrJUgOABck2ZRkLbAN2NP2oN60UZK6N4nlv3cADwAXJplOsqOqTgDXAfuBI8BdVXW4bV+OSCSpe5NYtbV9jvZ9wL5x9pVkK7B18+bN4zysJGnISpna6oQjEknqXq+DxGskktS9XgeJIxJJ6l6vg8QRiSR1r9dB4ohEkrrX6yCRJHWv10Hi1JYkda/XQeLUliR1r9dBIknqnkEiSWql10HiNRJJ6l6vg8RrJJLUvV4HiSSpewaJJKkVg0SS1IpBIklqpddB4qotSeper4PEVVuS1L1eB4kkqXsGiSSpFYNEktSKQSJJauWMSRewFEleC1zFoP6LquqfT7gkSVq1ln1EkuTWJMeTHJrRfmmSo0mOJdk13zGq6g+r6u3AvcBHu6xXkjS/SYxIbgNuAG4/2ZBkDXAjcAkwDRxIsgdYA+yesf81VXW8efxzwL/rumBJ0tyWPUiq6v4kG2c0bwGOVdWjAEnuBK6oqt3A5bMdJ8kG4Omq+vocP98J7ATYsGHDeIqXJH2blXKx/Vzg8aHt6aZtPjuAj8z1w6q6BXgP8NDatWtbFyhJmt1KCZLM0lbz7VBV76qq/7vAc3xnuyR1bKUEyTRw3tD2euCJtgf1XluS1L2VEiQHgAuSbEqyFtgG7Gl7UEckktS9SSz/vQN4ALgwyXSSHVV1ArgO2A8cAe6qqsNj6MsRiSR1bBKrtrbP0b4P2DfmvvYCe6empq4d53ElSc9bKVNbnXBEIknd63WQeI1EkrrX6yBxRCJJ3et1kDgikaTu9TpIJEndM0gkSa30Oki8RiJJ3TstP9hqVJN6H8nGXZ/8trbHfu2nl7MESVo2vR6RSJK61+sgcWpLkrrX6yBx+a8kda/XQSJJ6p5BIklqxSCRJLVikEiSWul1kLhqS5K61+sgcdWWJHWv1+9sP134TnhJp7Nej0gkSd0zSCRJrRgkkqRWRgqSJJcnWTGhk2RDkj1Jbk2ya9L1SNJqNmo4bAMeSfK+JD/YpsPmf/7Hkxya0X5pkqNJjo0QDt8PfLKqrgEualOPJKmdkYKkqt4CvBL4E+AjSR5IsjPJi5fQ523ApcMNSdYANwKXMQiG7UkuSvJPktw74+vlwMPAtiSfAT67hBokSWMy8nRVVX0d+F3gTuAc4E3AQ0n+/WI6rKr7gb+c0bwFOFZVj1bVN5s+rqiqL1XV5TO+jgNXA++qqp8AZl0n2wTdwSQHn3rqqcWUKElahFGvkbwxye8BnwFeAGypqsuAHwF+aQx1nAs8PrQ93bTN5X8D/yHJTcBjsz2hqm6pqqmqmlq3bt0YSpQkzWbUNyReCfxGM5p4TlV9I8k1Y6gjs7TVXE+uqkNNTfMfNNkKbN28eXOL0iRJ8xl1auvJmSGS5L0AVXXfGOqYBs4b2l4PPDGG40qSOjZqkFwyS9tlY6zjAHBBkk1J1jJYJban7UG915YkdW/eIEnyjiRfAn4gyReHvv4U+OJSOkxyB/AAcGGS6SQ7quoEcB2wHzgC3FVVh5dy/Bl9efdfSerYQtdIPg58CtgNDL+345mqmrnyaiRVtX2O9n3AvqUcc56+9gJ7p6amrh3ncSVJz1toaquq6jHgncAzQ18keWm3pbXniESSurdQkHy8+f4gcLD5/uDQ9ormNRJJ6t68U1tVdXnzfdPylCNJOt2M+obEH0vyoubxW5K8P8mGbktrz6ktSereqMt/fwv4RpIfAX4Z+Arwsc6qGhOntiSpe6MGyYmqKuAK4ANV9QFgKTdslCT1zKi3SHkmyfXAW4DXNXfrfUF3ZY2Ht0h5np8LL6kro45Ifhb4O2BHVf05gxsq/npnVY2JU1uS1L2RRiRNeLx/aPv/Abd3VZQk6fQx6qqtNyd5JMnTSb6e5JkkX++6OEnSyjfqNZL3AVur6kiXxYxbX6+ReL1D0koy6jWSvzjdQgS8RiJJy2HUEcnBJJ8A/heDi+4AVNU9nVQlSTptjBok3w18A3jDUFsBBokkrXKjrtq6uutCJEmnp1FXbX1/kvuSHGq2fzjJf+m2tPa815YkdW/Ui+2/DVwPPAtQVV9k8HG4K5oX2yWpe6MGyXdW1edntJ0YdzGSpNPPqEHy1STfx+ACO0muBJ7srCpJ0mlj1FVb7wRuAX4gyZ8Bfwpc1VlVmjjf9ChpVPMGSZJfHNrcB3yWwSjmb4GfYej+W5Kk1WmhEcnJzxy5EPhnwO8DAf4NcH+Hdc0ryUXAu4GvAfdV1d2TqkWSVrt5r5FU1Xuq6j3Ay4BXVdUvVdV/Ai4G1i+lwyS3Jjl+cinxUPulSY4mOZZk1wKHuQz4UFW9A3jrUuqQJI3HqNdINgDfHNr+JrBxiX3eBtzA0G3omw/KuhG4BJgGDiTZA6wBds/Y/xoGH/P7riRvBM5eYh2SpDEYNUg+Bnw+ye8xWLn1JuCjS+mwqu5PsnFG8xbgWFU9CpDkTuCKqtoNXD7Hod7ZBJC3aZGkCRr1Fim/muRTwGubpqur6uEx1nEu8PjQ9jTwo3M9uQmi/wy8iDk+qTHJTmAnwIYNG8ZUpiRpplFHJFTVQ8BDHdWR2bqcp5bHaEJinufckuRJYOvatWsvbleexs3lxVJ/jBwkHZsGzhvaXg880fagVbUX2Ds1NXVt22NpfgaDtHqN+s72rh0ALkiyKclaBvfx2tP2oN60UZK6t+xBkuQO4AHgwiTTSXZU1QngOmA/cAS4q6oOt+3LmzZKUveWfWqrqrbP0b6Pwbvnx6avn9kuSSvJSpna6oQjEknq3kq52N4JRyT94gV9aWVyRCJJaqXXQeKqLUnqXq+DxBGJJHWv10EiSeper4PEqS1J6l6vg8SpLUnqXq+DRJLUPYNEktRKr4PEaySS1L1eB4nXSCSpe70OEklS9wwSSVIrBokkqRWDRJLUSq+DxFVbktS9Xn8eSVXtBfZOTU1dO+latPL5eScrg/8dTj+9HpFIkrpnkEiSWjFIJEmtGCSSpFZWfJAkOT/Jh5PcPdT2oiQfTfLbSa6aZH2StNp1GiRJbk1yPMmhGe2XJjma5FiSXfMdo6oeraodM5rfDNxdVdcCbxxz2ZKkReh6+e9twA3A7ScbkqwBbgQuAaaBA0n2AGuA3TP2v6aqjs9y3PXAl5rH3xpzzZKkReg0SKrq/iQbZzRvAY5V1aMASe4Erqiq3cDlIx56mkGYfIE5RlVJdgI7ATZs2LDo2iVJo5nENZJzgceHtqebtlklOTvJTcArk1zfNN8D/EyS3wL2zrZfVd1SVVNVNbVu3boxlS5JmmkS72zPLG0115Or6mvA22e0/S1w9YIdJVuBrZs3b15sjZKkEU0iSKaB84a21wNPTKAOST3hbVUmaxJTWweAC5JsSrIW2Abs6aIjPyFRkrrX9fLfO4AHgAuTTCfZUVUngOuA/cAR4K6qOtxR/979V5I61vWqre1ztO8D9nXZd9OPd/+VpI6t+He2t+GIRJK65+eRqLe8ACstj16PSCRJ3ev1iMT3kUgaB0e38+v1iMTlv5LUvV4HiSSpe70OEldtSVL3eh0kTm1JUvd6HSSSpO71Okic2pKk7vU6SJzakqTu9fp9JFLXZr6/wPcWaDUySKQVzDfC6XTQ66ktSVL3DBJJUiu9DhJXbUlS93odJK7akqTu9TpIJEndM0gkSa0YJJKkVgwSSVIrKz5Ikpyf5MNJ7p6vTZI0GZ0GSZJbkxxPcmhG+6VJjiY5lmTXfMeoqkerasdCbZKkyej6Fim3ATcAt59sSLIGuBG4BJgGDiTZA6wBds/Y/5qqOt5xjZKkFjoNkqq6P8nGGc1bgGNV9ShAkjuBK6pqN3D5uPpOshPYCbBhw4ZxHVaSNMMkrpGcCzw+tD3dtM0qydlJbgJemeT6udpmqqpbqmqqqqbWrVs3xvIlScMmcfffzNJWcz25qr4GvH2htlk7SrYCWzdv3rzYGiVJI5pEkEwD5w1trweemEAdkvBW9WpvElNbB4ALkmxKshbYBuzpoiPvtSVJ3et6+e8dwAPAhUmmk+yoqhPAdcB+4AhwV1Ud7qh/7/4rSR3retXW9jna9wH7uuy76WcvsHdqaurarvuSpNVqxb+zvQ1HJJLUvV4HiddIJKl7k1i1Ja1arpDSXE7nfxu9HpE4tSVJ3et1kDi1JUnd63WQSJK61+sgcWpLkrrX6yBxakuSutfrIJEkdc8gkSS10usg8RqJJHWv129I9F5bkvpqJb2BsdcjEklS9wwSSVIrBokkqZVeB4kX2yWpe70OEt+QKEnd63WQSJK6Z5BIkloxSCRJrRgkkqRWUlWTrqFzSZ4CvrKEXV8GfHXM5ZzuPCen8nycyvNxqtP9fHxvVa1b6EmrIkiWKsnBqpqadB0riefkVJ6PU3k+TrVazodTW5KkVgwSSVIrBsn8bpl0ASuQ5+RUno9TeT5OtSrOh9dIJEmtOCKRJLVikEiSWjFI5pDk0iRHkxxLsmvS9SxFkluTHE9yaKjtpUk+neSR5vtLmvYk+WDzer+Y5FVD+7ytef4jSd421H5xki81+3wwSZbax3JIcl6SzyY5kuRwkv+4ms9JkjOTfD7JHzXn4z1N+6Ykn2tq/USStU37C5vtY83PNw4d6/qm/WiSnxxqn/X3aCl9LJcka5I8nOTepdbap/Mxkqrya8YXsAb4E+B8YC3wR8BFk65rCa/jdcCrgENDbe8DdjWPdwHvbR7/FPApIMCrgc817S8FHm2+v6R5/JLmZ58HXtPs8yngsqX0sYzn4xzgVc3jFwNfBi5areek6fO7mscvAD7X1HAXsK1pvwl4R/P454GbmsfbgE80jy9qfkdeCGxqfnfWzPd7tNg+lvnfyS8CHwfuXUqtfTsfI52zSRewEr+a/xHsH9q+Hrh+0nUt8bVs5NQgOQqc0zw+BzjaPL4Z2D7zecB24Oah9pubtnOAPx5qf+55i+1jgufm94FLPCcF8J3AQ8CPMngn9hlN+3O/C8B+4DXN4zOa52Xm78fJ5831e9Tss6g+lvE8rAfuA34CuHcptfbpfIz65dTW7M4FHh/anm7a+uAVVfUkQPP95U37XK95vvbpWdqX0seya6YIXsngr/BVe06aaZwvAMeBTzP4i/mvq+rELPU8V2vz86eBs1n8eTp7CX0sl98Efhn4+2Z7KbX26XyMxCCZXWZp6/s66ble82Lbl9LHskryXcDvAr9QVV+f76mztPXqnFTVt6rqnzL4S3wL8IPz1DOu8zHfa57Y+UhyOXC8qh4cbp6nnl6fj8UwSGY3DZw3tL0eeGJCtYzbXyQ5B6D5frxpn+s1z9e+fpb2pfSxbJK8gEGI/E5V3dM0r+pzAlBVfw38AYNrJN+T5IxZ6nmu1ubnZwF/yeLP01eX0Mdy+DHgjUkeA+5kML31m0uotS/nY2QGyewOABc0KynWMrjItWfCNY3LHuDkKqO3MbhOcLL9rc0qolcDTzdTMPuBNyR5SbPS6A0M5m+fBJ5J8upmZdJbZxxrMX0si6bODwNHqur9Qz9aleckybok39M8/gfAvwKOAJ8Frpyj1pOv4UrgMzWYvN8DbGtWGG0CLmCw6GDW36Nmn8X20bmqur6q1lfVxqbWz1TVVUuotRfnY1EmfZFmpX4xWE3zZQZzxr8y6XqW+BruAJ4EnmXwl80OBvOr9wGPNN9f2jw3wI3N6/0SMDV0nGuAY83X1UPtU8ChZp8beP5OCYvuY5nOx79gMC3wReALzddPrdZzAvww8HBzPg4B/7VpP5/B//iOAf8TeGHTfmazfaz5+flDx/qV5jUcpVmpNt/v0VL6WOZ/K6/n+VVbq/58LPTlLVIkSa04tSVJasUgkSS1YpBIkloxSCRJrRgkkqRWDBJJUisGiTRmSf5mzMe7LcmVCz9TmgyDRJLUikEiLSDJe5P8/ND2u5O8K8l9SR7K4IOsrphlv9ef/HCkZvuGJP+2eXxxkv+T5MEk+0/eh2uEWmbdL8kfNHV+PsmXk7y29QuXRmSQSAu7E/jZoe1/DXwEeFNVvQr4ceC/N/fXWlBz48gPAVdW1cXArcCvjmG/M6pqC/ALwLtGqUUahzMWfoq0ulXVw0lenuQfAeuAv2JwD7PfSPI6Bp9dcS7wCuDPRzjkhcAPAZ9usmdNc7y2+528m/GDDD7QTFoWBok0mrsZ3H31HzIYoVzFIFQurqpnm1uPnzljnxOcOuo/+fMAh6vqNYusYaH9/q75/i383dYycmpLGs2dDG77fSWDUDmLwYcgPZvkx4HvnWWfrwAXNbcTPwv4l037UWBdktfAYMoqyT8eoYal7id1yr9apBFU1eEkLwb+rKqeTPI7wN4kBxncjv6PZ9nn8SR3MbhN+yMMbtlOVX2zWc77wSZgzmDwAUqHF6hhSftJXfM28pKkVpzakiS14tSWtEIkuZHB54YP+0BVfWQS9UijcmpLktSKU1uSpFYMEklSKwaJJKkVg0SS1Mr/B0NHHeyvZyHKAAAAAElFTkSuQmCC\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "plt.hist(\n", + " (df[df.is_json==True].value_len, df[df.is_json==False].value_len),\n", + " bins=25,\n", + " density=True,\n", + " label=['true', 'false'],\n", + "# color=['teal','orange'],\n", + ")\n", + "plt.yscale('log')\n", + "plt.xlabel('value_len')\n", + "plt.ylabel('density')" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "Text(0, 0.5, 'density')" + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAaQAAAELCAYAAACWBvIOAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDMuMC4zLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvnQurowAAGKdJREFUeJzt3X+wXOV93/H3J5KFmsQBW5YTyg9fOShuRJo45hbb49iTQG1Ealu4YVqBHVOHGZoYOs14PEGaNA1mnE7lmVixa6hNB1zKJBWExEE1danHkKTNNMCV8S/hyFx+uCiQWApCtuMGLPztH/sAq8vu3dXl7tVB9/2a2bl7nn2e7/OcM1x9OLvnnk1VIUnS0fZ9R3sBkiSBgSRJ6ggDSZLUCQaSJKkTDCRJUicYSJKkTjCQJEmdYCBJkjrBQJIkdcLKo72AF5KXvexlNTU1dbSXIUkvKLt27dpfVWtH9TOQjsDU1BQzMzNHexmS9IKS5Ovj9PMtO0lSJxhIkqROMJAkSZ1gIEmSOsFAkiR1goEkSeoEA0mS1AkGkiSpEwwkSVIneKeGo+mK4we0HVz6dUhSB3iGJEnqBM+QltDUllsP235o9VFaiCR1kGdIkqROMJAkSZ1gIEmSOsFAkiR1goEkSeoEA0mS1AkGkiSpEwwkSVInGEiSpE4wkCRJnWAgSZI6wUCSJHWCgSRJ6gQDSZLUCRMNpCQbk+xJMptky4DXj0tyY3v9ziRTfa9tbe17kpwzqmaSda3Gfa3mqvnmSDKV5P8l+UJ7fHxyR0KSNMrEAinJCuAq4FxgA3BBkg1zul0MHKiq04DtwLY2dgOwGTgd2AhcnWTFiJrbgO1VtR440GoPnaO5v6pe3R6/vIi7L0k6QpM8QzoTmK2qB6rqSWAHsGlOn03A9e35zcDZSdLad1TVE1X1IDDb6g2s2cac1WrQap43Yg5JUodMMpBOAh7u297b2gb2qapDwEFgzTxjh7WvAR5vNebONWwOgHVJ7knyJ0neuLDdlCQthkl+hfmgs5Aas8+w9kEBOl//+eZ4FDi1qv4myRnAHyU5vaq+edgCk0uASwBOPfXUAaUkSYthkmdIe4FT+rZPBh4Z1ifJSuB44LF5xg5r3w+c0GrMnWvgHO3twL8BqKpdwP3Aj83diaq6pqqmq2p67dq1Y++8JOnITDKQ7gbWt6vfVtG7SGHnnD47gYva8/OB26uqWvvmdoXcOmA9cNewmm3MHa0GreYt882RZG27SIIkr2xzPLCI+y9JOgITe8uuqg4luQy4DVgBXFdVu5NcCcxU1U7gWuCGJLP0zow2t7G7k9wE3AscAi6tqqcABtVsU14O7EjyQeCeVpthcwBvAq5Mcgh4CvjlqnpsUsdDkjS/9E4uNI7p6emamZlZ8PipLbcetv3Q6guf2+mKgwuuL0ldlGRXVU2P6uedGiRJnWAgSZI6wUCSJHWCgSRJ6gQDSZLUCQaSJKkTDCRJUicYSJKkTjCQJEmdYCBJkjrBQJIkdYKBJEnqBANJktQJBpIkqRMMJElSJxhIkqROMJAkSZ1gIEmSOsFAkiR1goEkSeoEA0mS1AkGkiSpEwwkSVInGEiSpE4wkCRJnWAgSZI6wUCSJHWCgSRJ6oSJBlKSjUn2JJlNsmXA68clubG9fmeSqb7Xtrb2PUnOGVUzybpW475Wc9WoOdrrpyb5dpL3L/4RkCSNa2KBlGQFcBVwLrABuCDJhjndLgYOVNVpwHZgWxu7AdgMnA5sBK5OsmJEzW3A9qpaDxxotYfO0Wc78JnF2WtJ0kJN8gzpTGC2qh6oqieBHcCmOX02Ade35zcDZydJa99RVU9U1YPAbKs3sGYbc1arQat53og5SHIe8ACwexH3W5K0AJMMpJOAh/u297a2gX2q6hBwEFgzz9hh7WuAx1uNuXMNnCPJDwCXAx9Y8B5KkhbNJAMpA9pqzD6L1T7fHB+g9xbftwe8/uwCk0uSzCSZ2bdv33xdJUnPw8oJ1t4LnNK3fTLwyJA+e5OsBI4HHhsxdlD7fuCEJCvbWVB//2FzvBY4P8mHgBOA7yX5u6r6WP8Cq+oa4BqA6enpuYEqSVokkzxDuhtY365+W0XvIoWdc/rsBC5qz88Hbq+qau2b2xVy64D1wF3DarYxd7QatJq3zDdHVb2xqqaqagr4HeDfzQ0jSdLSmdgZUlUdSnIZcBuwAriuqnYnuRKYqaqdwLXADUlm6Z21bG5jdye5CbgXOARcWlVPAQyq2aa8HNiR5IPAPa02w+aQJHVLeicXGsf09HTNzMwsePzUllsP235o9YXP7XTFwQXXl6QuSrKrqqZH9fNODZKkTjCQJEmdYCBJkjrBQJIkdYKBJEnqBANJktQJBpIkqRMMJElSJxhIkqROMJAkSZ1gIEmSOsFAkiR1goEkSeoEA0mS1AkGkiSpEwwkSVInGEiSpE4wkCRJnWAgSZI6YaxASvLWJIaXJGlixg2ZzcB9ST6U5McnuSBJ0vI0ViBV1buAnwbuBz6Z5P8kuSTJiye6OknSsjH223BV9U3gD4AdwInAO4DPJ/lXE1qbJGkZGfczpLcn+RRwO/Ai4MyqOhf4KeD9E1yfJGmZWDlmv/OB7VX1p/2NVfWdJL+0+MuSJC03475l9+jcMEqyDaCqPrfoq5IkLTvjBtKbB7Sdu5gLkSQtb/O+ZZfkV4D3Aj+a5Et9L70Y+LNJLkyStLyMOkP6PeBtwC3t59OPM9ql4PNKsjHJniSzSbYMeP24JDe21+9MMtX32tbWvifJOaNqJlnXatzXaq6ab44kZyb5Qnt8Mck7Ru2PJGlyRgVSVdVDwKXAt/oeJHnpfAOTrACuovfW3gbggiQb5nS7GDhQVacB24FtbewGen+MezqwEbg6yYoRNbfRu/BiPXCg1R46B/AVYLqqXt3m+ESScS/ykCQtsnHOkAB2ATPt566+7fmcCcxW1QNV9SS9v1/aNKfPJuD69vxm4Owkae07quqJqnoQmG31BtZsY85qNWg1z5tvjqr6TlUdau2rgRqxP5KkCZr3jKCq3tp+rltA7ZOAh/u29wKvHdanqg4lOQisae1/PmfsSe35oJprgMf7Aqa//7A59id5LXAd8ArgF/vGS5KW2Lh/GPuGJD/Qnr8ryYeTnDpq2IC2uWchw/osVvu866iqO6vqdOAfAVuTrJ7bsd0iaSbJzL59+waUkiQthnEv+/6PwHeS/BTwa8DXgRtGjNkLnNK3fTLwyLA+7fOb44HH5hk7rH0/cELfZ0D9cw2b4xlV9VXgb4GfmLsTVXVNVU1X1fTatWtH7LIkaaHGDaRDVVX0Po/5SFV9hN6l3/O5G1jfrn5bRe8ihZ1z+uwELmrPzwdub/PsBDa3K+TWAeuBu4bVbGPuaDVoNW+Zb45WYyVAklcArwIeGvN4SJIW2bhXlX0ryVbgXcCb2tVuL5pvQPu85jLgNmAFcF1V7U5yJTBTVTuBa4EbkszSO2vZ3MbuTnITcC9wCLi0qp4CGFSzTXk5sCPJB4F7Wm2GzQH8DLAlyXeB7wHvrar9Yx4PSdIiS+/kYkSn5EeAC4G7q+p/tc+Pfraq/sukF9gl09PTNTMz6uLC4aa23HrY9kOrL3xupysOLri+JHVRkl1VNT2q31hnSFX1V8CH+7b/L7CswkiSNFnjXmX3T9sdEA4m+WaSbyX55qQXJ0laPsb9DOlDwNva1WiSJC26ca+y+2vDSJI0SeOeIc0kuRH4I+CJpxur6g8nsipJ0rIzbiD9EPAd4C19bQUYSJKkRTHuVXbvmfRCJEnL27hX2f1Yks8l+Urb/skk/2ayS5MkLSfjXtTwn4CtwHcBqupLPHvHA0mSnrdxA+n7q+quOW1+VYMkadGMG0j7k/wo7WsbkpwPPDqxVUmSlp1xr7K7FLgG+AdJ/hJ4EHjnxFYlSVp25g2kJO/r2/zv9L7i4fvofXfQL9B3fztJkp6PUWdIT3/n0avofavqLfS+gfUXgT+d4LokScvMvIFUVR8ASPI/gddU1bfa9hXA7098dZKkZWPcixpOBZ7s234SmFr01UiSlq1xL2q4AbgryafoXWn3DuD6ia1KkrTsjHvroN9K8hngja3pPVV1z+SWJUlabsY9Q6KqPg98foJrkSQtY+N+hiRJ0kQZSJKkTjCQJEmdYCBJkjrBQJIkdYKBJEnqBANJktQJBpIkqRMMJElSJxhIkqROmGggJdmYZE+S2SRbBrx+XJIb2+t3Jpnqe21ra9+T5JxRNZOsazXuazVXzTdHkjcn2ZXky+3nWZM7EpKkUSYWSElWAFcB5wIbgAuSbJjT7WLgQFWdBmwHtrWxG4DNwOnARuDqJCtG1NwGbK+q9cCBVnvoHMB+4G1V9Q+Bi+jd0VySdJRM8gzpTGC2qh6oqieBHcCmOX028ezXWNwMnJ0krX1HVT1RVQ8Cs63ewJptzFmtBq3mefPNUVX3VNUjrX03sDrJcYu295KkIzLJQDoJeLhve29rG9inqg4BB4E184wd1r4GeLzVmDvXsDn6/QJwT1U9MXcnklySZCbJzL59+0bssiRpoSYZSBnQVmP2Waz2ketIcjq9t/H+5YB+VNU1VTVdVdNr164d1EWStAgmGUh7gVP6tk8GHhnWJ8lK4HjgsXnGDmvfD5zQasyda9gcJDkZ+BTw7qq6f4H7KUlaBJMMpLuB9e3qt1X0LlLYOafPTnoXFACcD9xeVdXaN7cr5NYB64G7htVsY+5oNWg1b5lvjiQnALcCW6vqzxZ1zyVJR2xigdQ+r7kMuA34KnBTVe1OcmWSt7du1wJrkswC7wO2tLG7gZuAe4H/AVxaVU8Nq9lqXQ68r9Va02oPnaPVOQ34jSRfaI+XT+RgSJJGSu/kQuOYnp6umZmZBY+f2nLrYdsPrb7wuZ2uOLjg+pLURUl2VdX0qH7eqUGS1AkGkiSpEwwkSVInGEiSpE4wkCRJnWAgSZI6wUCSJHWCgSRJ6gQDSZLUCQaSJKkTDCRJUicYSJKkTjCQJEmdYCBJkjrBQJIkdYKBJEnqBANJktQJBpIkqRMMJElSJxhIkqROMJAkSZ1gIEmSOsFAkiR1goEkSeoEA0mS1AkGkiSpEwwkSVInGEiSpE6YaCAl2ZhkT5LZJFsGvH5ckhvb63cmmep7bWtr35PknFE1k6xrNe5rNVfNN0eSNUnuSPLtJB+b3FGQJI1jYoGUZAVwFXAusAG4IMmGOd0uBg5U1WnAdmBbG7sB2AycDmwErk6yYkTNbcD2qloPHGi1h84B/B3wG8D7F3XHJUkLMskzpDOB2ap6oKqeBHYAm+b02QRc357fDJydJK19R1U9UVUPArOt3sCabcxZrQat5nnzzVFVf1tV/5teMEmSjrJJBtJJwMN923tb28A+VXUIOAismWfssPY1wOOtxty5hs0hSeqQSQZSBrTVmH0Wq33cdQyV5JIkM0lm9u3bN+4wSdIRmmQg7QVO6ds+GXhkWJ8kK4HjgcfmGTusfT9wQqsxd65hc4ylqq6pqumqml67du24wyRJR2iSgXQ3sL5d/baK3kUKO+f02Qlc1J6fD9xeVdXaN7cr5NYB64G7htVsY+5oNWg1bxkxhySpQ1aO7rIwVXUoyWXAbcAK4Lqq2p3kSmCmqnYC1wI3JJmld9ayuY3dneQm4F7gEHBpVT0FMKhmm/JyYEeSDwL3tNoMm6PVegj4IWBVkvOAt1TVvZM5IpKk+cSThfFNT0/XzMzMgsdPbbn1sO2HVl/43E5XHFxwfUnqoiS7qmp6VD/v1CBJ6gQDSZLUCQaSJKkTDCRJUicYSJKkTjCQJEmdYCBJkjrBQJIkdYKBJEnqBANJktQJBpIkqRMMJElSJxhIkqROMJAkSZ1gIEmSOsFAkiR1goEkSeoEA0mS1AkGkiSpEwwkSVInGEiSpE4wkCRJnWAgSZI6wUCSJHWCgSRJ6gQDSZLUCQaSJKkTDCRJUidMNJCSbEyyJ8lski0DXj8uyY3t9TuTTPW9trW170lyzqiaSda1Gve1mqsWOockaelNLJCSrACuAs4FNgAXJNkwp9vFwIGqOg3YDmxrYzcAm4HTgY3A1UlWjKi5DdheVeuBA632Ec+xuEdBkjSuSZ4hnQnMVtUDVfUksAPYNKfPJuD69vxm4Owkae07quqJqnoQmG31BtZsY85qNWg1z1vgHJKko2DlBGufBDzct70XeO2wPlV1KMlBYE1r//M5Y09qzwfVXAM8XlWHBvRfyBydMLXl1ue0PfTv/8mi9ZekLplkIGVAW43ZZ1j7oDO6+fovZI7DF5hcAlzSNr+dZM+AcaO8DNj/nNqDen5gYOuzY7Yd2cRH2n8JDTwmy5jH43Aej8O90I/HK8bpNMlA2guc0rd9MvDIkD57k6wEjgceGzF2UPt+4IQkK9tZUn//hczxjKq6BrhmjP0dKslMVU0/nxrHGo/J4Tweh/N4HG65HI9JfoZ0N7C+Xf22it4FBDvn9NkJXNSenw/cXlXV2je3K+TWAeuBu4bVbGPuaDVoNW9Z4BySpKNgYmdI7fOay4DbgBXAdVW1O8mVwExV7QSuBW5IMkvvrGVzG7s7yU3AvcAh4NKqegpgUM025eXAjiQfBO5ptVnIHJKkpZfeyYImKckl7a0/NR6Tw3k8DufxONxyOR4GkiSpE7x1kCSpEwykCRt1+6QXgiTXJflGkq/0tb00yWfbrZo+m+QlrT1JPtr290tJXtM35qLW/74kF/W1n5Hky23MR9sfLi9ojqWQ5JQkdyT5apLdSf71cj4mSVYnuSvJF9vx+EBrX5dFup3XsN+jhcyxVNK7u8w9ST690LUeS8djLFXlY0IPehde3A+8ElgFfBHYcLTXtYD9eBPwGuArfW0fAra051uAbe35zwOfofdnVq8D7mztLwUeaD9f0p6/pL12F/D6NuYzwLkLmWMJj8eJwGva8xcDX6N3K6tleUzanD/Ynr8IuLOt4SZgc2v/OPAr7fl7gY+355uBG9vzDe135DhgXfvdWTHf79GRzrHE/528D/g94NMLWeuxdjzGOmZHewHH8qP9g3Jb3/ZWYOvRXtcC92WKwwNpD3Bie34isKc9/wRwwdx+wAXAJ/raP9HaTgT+oq/9mX5HOsdRPDa3AG/2mBTA9wOfp3cHlf3Aytb+zO8CvatkX9+er2z9Mvf34+l+w36P2pgjmmMJj8PJwOfo3dLs0wtZ67F0PMZ9+JbdZA26fVKnbk/0PPxwVT0K0H6+vLUP2+f52vcOaF/IHEuuvfXx0/TOCpbtMWlvT30B+AbwWXr/Bz/W7byA/tt5HclxGvuWYX1zLJXfAX4N+F7bXshaj6XjMRYDabLGuj3RMeZIb9W0kGPUieOa5AeBPwB+taq+OV/XAW3H1DGpqqeq6tX0zgzOBH58nvUs1vFYyC3DJi7JW4FvVNWu/uZ51nNMH48jYSBN1li3J3qB+uskJwK0n99o7cP2eb72kwe0L2SOJZPkRfTC6Her6g9b87I+JgBV9Tjwx/Q+Qzohvdt1zV3PM2vNeLfzGtb+zC3DjmCOpfAG4O1JHqL3jQRn0TtjWq7HY2wG0mSNc/ukF6r+WzLNvVXTu9tVX68DDra3lm4D3pLkJe3KsLfQe3/7UeBbSV7XriR7N4Nv+zTOHEuirfNa4KtV9eG+l5blMUmyNskJ7fnfA/4x8FUW73Zei3nLsImrqq1VdXJVTbW13l5V71zAWo+J43FEjvaHWMf6g97VT1+j9576rx/t9SxwH/4r8CjwXXr/p3UxvfefPwfc136+tPUNvS9RvB/4MjDdV+eX6H3v1Czwnr72aeArbczHePYPto94jiU6Hj9D7+2OLwFfaI+fX67HBPhJerfr+lJb879t7a+k9w/oLPD7wHGtfXXbnm2vv7Kv1q+3fdhDu7Jwvt+jhcyxxP+t/CzPXmW37I/HqId3apAkdYJv2UmSOsFAkiR1goEkSeoEA0mS1AkGkiSpEwwkSVInGEhSRyX59iLX+89Jzh/dUzo6DCRJUicYSNISSbItyXv7tq9I8ptJPpfk8+l9Id+mAeN+9ukveWvbH0vyL9rzM5L8SZJdSW57+j53Y6xl4Lgkf9zWeVeSryV54/PecWlMBpK0dHYA/7xv+58BnwTeUVWvAX4O+O12/7qR2g1e/wNwflWdAVwH/NYijFtZVWcCvwr85jhrkRbDytFdJC2GqronycuT/H1gLXCA3j0Ctyd5E73vzjkJ+GHgr8Yo+SrgJ4DPtgxb0eo933FP3718F70vZpSWhIEkLa2b6d1t+UfonTG9k144nVFV321fWbB6zphDHP5uxtOvB9hdVa8/wjWMGvdE+/kU/huhJeRbdtLS2kHv6wLOpxdOx9P7MrfvJvk54BUDxnwd2NC+huB44OzWvgdYm+T10HsrLsnpY6xhoeOkifL/fqQlVFW7k7wY+MuqejTJ7wL/LckMva+x+IsBYx5OchO9r3e4j95XPVBVT7bLuD/agmolvS+C2z1iDQsaJ02aXz8hSeoE37KTJHWCb9lJx5gkVwFvmNP8kar65NFYjzQu37KTJHWCb9lJkjrBQJIkdYKBJEnqBANJktQJBpIkqRP+PzUtoPevk+qYAAAAAElFTkSuQmCC\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "plt.hist(\n", + " (df[df.is_json==True].value_len, df[df.is_json==False].value_len),\n", + " bins=25,\n", + " density=True,\n", + " label=['true', 'false'],\n", + "# color=['teal','orange'],\n", + ")\n", + "plt.xlabel('value_len')\n", + "plt.ylabel('density')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# JSON percentual by group\n", + "Here the orange is the percentual of non-JSON values found in each group, and the blue is the percentual of JSON values. \n", + "We can see that as we filter the data to bigger values the percentual of JSON values also increases. \n", + "\n", + "The gorups are: \n", + "- Original: all original data (sample 10%)\n", + "- Above_mean: original data filtered to only values above the mean\n", + "- Above_std: original data filtered to only values 1 std above the mean" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAXcAAAExCAYAAACHweKPAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDMuMC4zLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvnQurowAAF35JREFUeJzt3X2QVfWd5/H3Vx5kV0BnpZ1S0cDuEhUxIrSAIQuaTErRjIyJm2hIVlNGy80aY2XiyMQpdUztOokZJ2bXzK4mPqZ8irVRosy6W6PiQ9QFFBR0zaKDsQc3QeJzZATy3T/ubek0DX0bLn36/u77VXWr7jn31/d+r21/+N3fPed7IjORJJVlj6oLkCQ1n+EuSQUy3CWpQIa7JBXIcJekAhnuklQgw12SCmS4S1KBDHdJKtDwql543LhxOWHChKpeXpJa0vLly1/LzI7+xlUW7hMmTGDZsmVVvbwktaSIeLmRcS7LSFKBDHdJKpDhLkkFqmzNvS+bNm2iq6uLjRs3Vl3KoBo1ahTjx49nxIgRVZciqRBDKty7uroYM2YMEyZMICKqLmdQZCYbNmygq6uLiRMnVl2OpEL0uywTEddHxK8jYtV2Ho+I+H5ErImIZyJi2s4Ws3HjRvbdd9+2CXaAiGDfffdtu08rknavRtbcbwRO2MHj84BJ9ds5wN/uSkHtFOzd2vE9S9q9+g33zHwY+M0OhswHbs6aJ4B9ImL/ZhUoSRq4Zqy5Hwi80mO7q77v1d4DI+IcarN7Dj744H6feMLC+5pQ3lZr/+qkhsZ99KMf5ec//3lTX1tqV83+O+5Po3/npWvGoZB9rSn0edXtzLw2Mzszs7Ojo9+zZytjsEtqdc0I9y7goB7b44F1TXjeyowePZpXX32VOXPmMHXqVKZMmcIjjzwCwG233cYRRxzBlClTuOiii37vZy6++GKOPPJIZs2axa9+9auqypekpoT7IuDf1Y+amQW8mZnbLMm0mltvvZXjjz+eFStWsHLlSqZOncq6deu46KKLeOCBB1ixYgVLly7l7rvvBuDdd99l1qxZrFy5kjlz5nDddddV/A4ktbNGDoW8DXgcOCQiuiLirIg4NyLOrQ9ZDLwErAGuA76y26odREcffTQ33HADl112Gc8++yxjxoxh6dKlHHvssXR0dDB8+HAWLFjAww8/DMDIkSP51Kc+BcD06dNZu3ZthdVLanf9fqGamaf383gC/6FpFQ0Rc+bM4eGHH+a+++7ji1/8IhdeeCFjx47d7vgRI0Z8cEjjsGHD2Lx582CVKknbsLfMdrz88svst99+nH322Zx11lk89dRTzJw5kyVLlvDaa6+xZcsWbrvtNubOnVt1qZK0jSHVfqC3qg5piggeeughrrzySkaMGMHo0aO5+eab2X///bniiis47rjjyExOPPFE5s+fX0mNkrQjUVtVGXydnZ3Z+2Idzz//PIcddlgl9XTbsGED06ZN4+WXG+qH3zRD4b1Lu4PHuTdXRCzPzM7+xrks08O6des45phj+MY3vlF1KZK0S4b0ssxgO+CAA/jFL35RdRmStMucuUtSgQx3SSqQ4S5JBTLcJalAQ/sL1cv2bvLzvblTP/bGG29w66238pWv1DorPPTQQ3z3u9/l3nvvbWZ1ktQ0ztwb8MYbb/CDH/ygac9nawJJu5vh3oerrrqKKVOmMGXKFL73ve+xcOFCXnzxRaZOncqFF14IwDvvvMOpp57KoYceyoIFC+g+GWz58uXMnTuX6dOnc/zxx/Pqq7UGmcceeyzf/OY3mTt3LldffXVl701SexjayzIVWL58OTfccANPPvkkmcnMmTP58Y9/zKpVq1ixYgVQW5Z5+umnWb16NQcccACzZ8/mscceY+bMmXz1q1/lnnvuoaOjgzvuuIOLL76Y66+/Hqh9AliyZEmVb09SmzDce3n00Uc55ZRT2GuvvQD49Kc//cGFOnqaMWMG48ePB2Dq1KmsXbuWffbZh1WrVvHJT34SgC1btrD//lsvJ/u5z31uEN6BJBnu22i0186ee+75wf3uFr+ZyeGHH87jjz/e5890/4MhSbuba+69zJkzh7vvvpvf/va3vPvuu/z0pz9l9uzZvP322/3+7CGHHML69es/CPdNmzaxevXq3V2yJG1jaM/cd/LQxV0xbdo0zjzzTGbMmAHAl7/8ZaZPn87s2bOZMmUK8+bN46ST+u46N3LkSO666y7OP/983nzzTTZv3swFF1zA4YcfPphvQZJs+TtUtPN7V9ls+dtctvyVpDZmuEtSgYZcuFe1TFSldnzPknavIRXuo0aNYsOGDW0VdpnJhg0bGDVqVNWlSCrIkDpaZvz48XR1dbF+/fqqSxlUo0aN+uCEKElqhiEV7iNGjGDixIlVlyFJLW9ILctIkprDcJekAhnuklQgw12SCmS4S1KBDHdJKpDhLkkFGlLHuRflsr0H+fUGvz2ypKGroZl7RJwQES9ExJqIWNjH4wdHxIMR8XREPBMRJza/VElSo/oN94gYBlwDzAMmA6dHxORew/4CuDMzjwJOA37Q7EIlSY1rZOY+A1iTmS9l5vvA7cD8XmMSGFu/vzewrnklSpIGqpFwPxB4pcd2V31fT5cBX4iILmAx8NW+nigizomIZRGxrN2ag0nSYGok3KOPfb178p4O3JiZ44ETgVsiYpvnzsxrM7MzMzs7OjoGXq0kqSGNhHsXcFCP7fFsu+xyFnAnQGY+DowCxjWjQEnSwDUS7kuBSRExMSJGUvvCdFGvMb8EPgEQEYdRC3fXXSSpIv2Ge2ZuBs4D7geep3ZUzOqIuDwiTq4P+1Pg7IhYCdwGnJntdDklSRpiGjqJKTMXU/uitOe+S3rcfw6Y3dzSJEk7y/YDklQgw12SCmS4S1KBDHdJKpDhLkkFMtwlqUCGuyQVyHCXpAIZ7pJUIMNdkgpkuEtSgQx3SSqQ4S5JBTLcJalAhrskFchwl6QCGe6SVCDDXZIKZLhLUoEMd0kqkOEuSQUy3CWpQIa7JBXIcJekAhnuklQgw12SCmS4S1KBDHdJKpDhLkkFMtwlqUCGuyQVyHCXpAIZ7pJUoOGNDIqIE4CrgWHADzPzr/oY81ngMiCBlZn5+SbWKQ2ey/Ye5Nd7c3BfT22h33CPiGHANcAngS5gaUQsyszneoyZBPw5MDszX4+I/XZXwZKk/jWyLDMDWJOZL2Xm+8DtwPxeY84GrsnM1wEy89fNLVOSNBCNhPuBwCs9trvq+3r6MPDhiHgsIp6oL+NsIyLOiYhlEbFs/fr1O1exJKlfjYR79LEve20PByYBxwKnAz+MiH22+aHMazOzMzM7Ozo6BlqrJKlBjYR7F3BQj+3xwLo+xtyTmZsy8x+AF6iFvSSpAo2E+1JgUkRMjIiRwGnAol5j7gaOA4iIcdSWaV5qZqGSpMb1G+6ZuRk4D7gfeB64MzNXR8TlEXFyfdj9wIaIeA54ELgwMzfsrqIlSTvW0HHumbkYWNxr3yU97ifw9fpNklQxz1CVpAIZ7pJUIMNdkgpkuEtSgQx3SSqQ4S5JBTLcJalAhrskFchwl6QCGe6SVCDDXZIKZLhLUoEMd0kqkOEuSQUy3CWpQIa7JBXIcJekAhnuklQgw12SCmS4S1KBDHdJKpDhLkkFMtwlqUCGuyQVyHCXpAIZ7pJUIMNdkgpkuEtSgQx3SSqQ4S5JBTLcJalAhrskFaihcI+IEyLihYhYExELdzDu1IjIiOhsXomSpIHqN9wjYhhwDTAPmAycHhGT+xg3BjgfeLLZRUqSBqaRmfsMYE1mvpSZ7wO3A/P7GPct4DvAxibWJ0naCY2E+4HAKz22u+r7PhARRwEHZea9O3qiiDgnIpZFxLL169cPuFhJUmMaCffoY19+8GDEHsDfAH/a3xNl5rWZ2ZmZnR0dHY1XKUkakEbCvQs4qMf2eGBdj+0xwBTgoYhYC8wCFvmlqiRVZ3gDY5YCkyJiIvCPwGnA57sfzMw3gXHd2xHxEPCNzFzW3FLVziYsvG/QXmvtqEF7KWm36XfmnpmbgfOA+4HngTszc3VEXB4RJ+/uAiVJA9fIzJ3MXAws7rXvku2MPXbXy5Ik7QrPUJWkAhnuklQgw12SCmS4S1KBDHdJKpDhLkkFMtwlqUCGuyQVyHCXpAIZ7pJUIMNdkgpkuEtSgQx3SSqQ4S5JBTLcJalAhrskFchwl6QCGe6SVCDDXZIKZLhLUoEMd0kqkOEuSQUy3CWpQIa7JBXIcJekAhnuklQgw12SCmS4S1KBDHdJKpDhLkkFMtwlqUCGuyQVyHCXpAI1FO4RcUJEvBARayJiYR+Pfz0inouIZyLi7yPiQ80vVZLUqH7DPSKGAdcA84DJwOkRMbnXsKeBzsz8CHAX8J1mFypJalwjM/cZwJrMfCkz3wduB+b3HJCZD2bmb+ubTwDjm1umJGkgGgn3A4FXemx31fdtz1nA3/X1QEScExHLImLZ+vXrG69SkjQgjYR79LEv+xwY8QWgE7iyr8cz89rM7MzMzo6OjsarlCQNyPAGxnQBB/XYHg+s6z0oIv4IuBiYm5n/1JzyJEk7o5GZ+1JgUkRMjIiRwGnAop4DIuIo4L8BJ2fmr5tfpiRpIPqduWfm5og4D7gfGAZcn5mrI+JyYFlmLqK2DDMa+ElEAPwyM0/ejXUP2ISF9w3q660dNagvJ0m/p5FlGTJzMbC4175Letz/oybXJUnaBZ6hKkkFMtwlqUCGuyQVyHCXpAIZ7pJUIMNdkgpkuEtSgQx3SSqQ4S5JBTLcJalAhrskFchwl6QCGe6SVCDDXZIKZLhLUoEMd0kqkOEuSQUy3CWpQIa7JBXIcJekAhnuklQgw12SCmS4S1KBDHdJKpDhLkkFMtwlqUCGuyQVyHCXpAIZ7pJUIMNdkgpkuEtSgQx3SSpQQ+EeESdExAsRsSYiFvbx+J4RcUf98ScjYkKzC5UkNa7fcI+IYcA1wDxgMnB6REzuNews4PXM/NfA3wDfbnahkqTGNTJznwGsycyXMvN94HZgfq8x84Gb6vfvAj4REdG8MiVJAzG8gTEHAq/02O4CZm5vTGZujog3gX2B13oOiohzgHPqm+9ExAs7U3QrCBhHr/e/W/2l/5Y2i7+71hbfHuTf3+D7UCODGgn3vv7Py50YQ2ZeC1zbwGu2vIhYlpmdVdehgfN319r8/dU0sizTBRzUY3s8sG57YyJiOLA38JtmFChJGrhGwn0pMCkiJkbESOA0YFGvMYuAM+r3TwUeyMxtZu6SpMHR77JMfQ39POB+YBhwfWaujojLgWWZuQj4EXBLRKyhNmM/bXcW3SLaYvmpUP7uWpu/PyCcYEtSeTxDVZIKZLhLUoEMd0kqkOEuSQUy3CWpQI2coaodiIif0cfZuN0y8+RBLEe7ICI+DFxI7fTuD/42MvPjlRWlHYqIt9nx39/YQSxnSDHcd913qy5ATfMT4L8C1wFbKq5FDcjMMQD1827+H3ALtXYoC4AxFZZWOY9zl+oiYnlmTq+6Dg1cRDyZmTP729dOXHNvkoiYFBF3RcRzEfFS963qujQgP4uIr0TE/hHxL7pvVRelhmyJiAURMSwi9oiIBbT5py9n7k0SEY8Cl1K7WMkfA1+i9t/30koLU8Mi4h/62J2Z+S8HvRgNSP3qb1cDs6mtwT8GXJCZa6urqlqGe5N0f6SPiGcz84j6vkcy899UXZtUuoiYnZmP9bevnfiFavNsjIg9gP9bb7T2j8B+FdekAYqIKdQuJzmqe19m3lxdRWrQfwamNbCvbRjuzXMB8M+B84FvAR9naxtktYCIuBQ4llq4L6Z23eBHAcN9iIqIY4CPAh0R8fUeD42l1sW2bRnuTZKZS+t336G23q7WcypwJPB0Zn4pIv4Q+GHFNWnHRgKjqWVZz0Mf36L2+2xbhnuTeAJMEd7LzN9FxOaIGAv8GvDL1CEsM5cASyLixsx8GaC+PDo6M9+qtrpqGe7N4wkwrW9ZROxD7Xe4nNqnsP9dbUlq0BURcS61v73lwN4RcVVmXllxXZXxaJkm8QSYstQPrRubmc9UXIoaEBErMnNq/fj26cBFwPLM/EjFpVXGk5iaxxNgWlzUfCEiLqkfH/1GRMyoui41ZEREjAD+BLgnMzexg54z7cCZe5N4Akzri4i/BX4HfDwzD4uIPwD+Z2YeXXFp6kdEnE9ttr4SOAk4GPhxO59nYrhLdRHxVGZOi4inM/Oo+r6VmXlk1bVpYCIigGGZubm+fUZm3lRxWYPKL1R3UUR8PDMfiIhP9/V4Zv73wa5JO21TRAyj/nE+IjqozeTVYrI2a93cY9fXAMNdAzIXeIBaP5neEjDcW8f3gZ8C+0XEf6R2nPRfVFuSmiSqLmCwuSwj9RARhwKfoBYGf5+Zz1dckpqge8mt6joGkzP3Jul16nO3N6kdjrVisOvRTvsV8Ai1v41/FhHTMvOpimvSrmu7mbvh3jyd9dvP6tsnAUuBcyPiJ5n5ncoqU0Mi4lvAmcCLbD2MLqn1CVJra7vukC7LNElE3A98JjPfqW+PBu4CTqE2e59cZX3qX0S8AByRme9XXYsGpt4H6D8BB2TmvIiYDByTmT+quLTKeBJT8xwM9AyFTcCHMvM94J+qKUkDtArYp+oitFNuBO4HDqhv/4Jap9a25bJM89wKPBER99S3/xi4LSL2Ap6rriwNwBXA0xGxih7/IGfmydWVpAaNy8w7I+LPATJzc0S0dY8nw71JMvNbEbEY+Bi1L2/Ozcxl9YcXVFeZBuAm4NvAs3h8e6t5NyL2Zes5CrOoHdDQtlxz30URMTYz39peH5nM/M1g16SdExFLMnNu1XVo4CJiOrXzFKZQW17rAE5t58Zvhvsuioh7M/NT9d4yPf9jBvaWaSkRcRW15ZhF/P6yjIdCtoCIGA4cQu1v74V687C2Zbg3Qb2PxUGZ+cuqa9HOi4gH+9idXnBl6IuIlcAdwB2Z+WLV9QwFhnuT2M+9fO3YfKpVRMSHgM/Vb7+jFvR3tvOEy0Mhm+eJiLA1bNm+VnUB6ltmvpyZ36lPsD4PfAToqw132/BomeY5jtrZqGuBd9m65t62V4IpUNudwt5K6lfP+iy12fsW4M+qrKdqhnvzzAP+AOi+OMDDwBvVlaPdwDXMISoingRGULuW8b/NzJcqLqlyLss0z58AtwDjqB2GdQvgyS9lceY+dJ2RmdMy8wqDvcYvVJskIp6h1svi3fr2XsDjLsuUIyL+S2aeV3Ud2lZE7A1cCsyp71oCXJ6ZbXsikzP35glq63zdtuBMr6VExB9GxI8i4u/q25Mj4qzuxw32Ie164G1qa+6fBd4Cbqi0ooo5c2+Sej/3M6hdyQdqyzQ3Zub3qqtKA1EP9RuAizPzyPpJMU9n5hEVl6Z+RMSKzJza37524sy9STLzKuBLwG+A14EvGewtZ1xm3km9r0z94spt3XyqhbwXER/r3oiI2cB7FdZTOY+WaaL6aeqeqt66bD7Vuv49cFN97T2oTbLOqLakarksI9XZfKr1RcRYgMx8q+paqma4Sz3YfKo11T9xXUqt5XYCj1I7WmZDpYVVyDV3qa7efOrPgI2Zucpgbym3A+uBzwCn1u/fUWlFFXPmLtXZfKp19dW4LyKWZWZnVTVVzZm7VGfzqZb2YEScFhF71G+fBe6ruqgqOXOXeuij+dQdmfnXVdak7YuIt6mtsQewF1sPXR0GvJOZY6uqrWoeCinV2Xyq9WTmmO779UtdTgJGVVfR0OHMXaqLiEMz8/9UXYcGLiK+TK3f/nhgBTAL+HlmfqLSwirkmru01asRcVVELKvf/rp+UoyGvq8BRwMvZ+ZxwFHAa9WWVC3DXdrK5lOta2NmbgSIiD3rn8AOqbimSrnmLm31rzLzMz22/zIiVlRWjQaiKyL2Ae4G/ldEvA6sq7imShnu0lbvRcTHMvNRsPlUK8nMU+p3L4uIB4G9gf9RYUmV8wtVqS4ipgI3UQuGD5pP2VtGrchwl3qx+ZRK4BeqUl1E7BsR3wceonbG49X1hlRSyzHcpa1sPqViuCwj1dl8SiVx5i5tZfMpFcOZu9qezadUIo9zV9uz+ZRKZLhLddtrPgW0bfMptS7X3KWtbD6lYhju0lY2n1IxXJaRtrL5lIrh0TJSHyJiLvXmU5n5ftX1SANluEtSgVxzl6QCGe6SVCDDXZIKZLhLUoH+Pyga7Mm8jS84AAAAAElFTkSuQmCC\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "def count_json(df):\n", + " trues = df.is_json[df.is_json == True].count()\n", + " falses = df.is_json[df.is_json == False].count()\n", + " total = df.is_json.count()\n", + " return trues/total, falses/total\n", + " \n", + "total_count = count_json(df)\n", + "total_mean = df.value_len.mean()\n", + "total_std = df.value_len.std()\n", + "\n", + "above_mean_count = count_json(df[df['value_len'] > total_mean])\n", + "above_std_count = count_json(df[df['value_len'] > (total_mean + total_std)])\n", + "\n", + "p1 = pd.DataFrame([total_count, above_mean_count, above_std_count],\n", + " columns= [ 'json', 'other'],\n", + " index=[ 'original', 'above_mean', 'above_std'])\n", + "plot = p1.plot(kind='bar')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# JSON percentual by bins" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [], + "source": [ + "#Helper code to separate and calculate what needed\n", + "import math\n", + "def percetangeData(df):\n", + " bins=[]\n", + " trues=[]\n", + " falses=[]\n", + " \n", + " nbins = 10\n", + " minimum_value = min(df.value_len)\n", + " range_value = max(df.value_len) - minimum_value\n", + " step = math.ceil(range_value/nbins)\n", + " bin_max_range = minimum_value\n", + " def count_in_range(df):\n", + " f1 = df.value_len >= bin_max_range - step\n", + " f2 = df.value_len < bin_max_range\n", + " return len(df[f1 & f2])\n", + "\n", + " for x in range(0, nbins):\n", + " bin_max_range += step\n", + " range_count = count_in_range(df)\n", + " bins.append(str(bin_max_range)) #superior margin for each bin\n", + " if range_count == 0:\n", + " #If range_count is 0 then there is no counting to do for trues or falses, all 0. \n", + " trues.append(0)\n", + " falses.append(0)\n", + " else:\n", + " trues.append(count_in_range(df[df.is_json == True]) / range_count)\n", + " falses.append(count_in_range(df[df.is_json == False]) / range_count)\n", + "\n", + "# print('Bins:', bins)\n", + "# print('Trues: ', trues)\n", + "# print('Falses: ', falses)\n", + "# print(pd.DataFrame([bins, trues, falses], index= ['up to value', 'json%', 'non json%']))\n", + " return (bins, trues, falses)\n", + "\n", + "def plotPercentualComparison(df, title='Value type: Json X Other'):\n", + " bins, trues, falses = percetangeData(df)\n", + " width = 0.95\n", + " p1 = plt.bar(bins, trues, width=width)\n", + " p2 = plt.bar(bins, falses, bottom=trues, width=width)\n", + "\n", + " plt.ylabel('Scores')\n", + " plt.xlabel('Value_len')\n", + " plt.title(title)\n", + " plt.legend((p1[0], p2[0]), ('JSON', 'Other'))\n", + " idx = np.round(np.linspace(0, 10 - 1, 4)).astype(int)\n", + " plt.xticks(idx, [bins[i] for i in idx])\n", + "\n", + " return plt" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## All values\n", + "If all data is divided in 10 bins and the percentual of NON-JSON values in each bin is painted orange, we have the following graph:" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 9, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYUAAAEXCAYAAABCjVgAAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDMuMC4zLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvnQurowAAIABJREFUeJzt3Xm8HfP9x/HXOzebLYKEkhtJLEGKWmKrfY8tqkWjlvjZWhWKWqKLraV2tVdaFLWloW2QSpUS1VJRESJCRHAFCYJGhCT38/tj5o7JyV2O68w9kbyfj8d5ZJbvzHzPnJvzPvOdme8oIjAzMwNoV+0KmJnZosOhYGZmGYeCmZllHApmZpZxKJiZWcahYGZmGYeCWcEkTZC0Q7XrUQmSDpf0z2rXw4rjULCySXpE0kxJnUqm/17SL5tYJiSt1cJ6D5I0VZJKpreXNF3S3rlpfSTVS7q23G1JOlvSH5orn763OZJm5V735sr+RNKr6fQ6SXc1957yIuLrEfFIueUXFZJ6p/uofbXrYm3HoWBlkdQb2BYIYGCFV/8noCuwfcn0Aen2HshNOwyYCQwqDacKGBIRy+Ze+wBIGgwcCuwSEcsC/YGHKrztJY7DZtHkULByHQY8AfweGFzJFUfEHGB4uo3Sbd4WEfNKpv0MmAvsU8l6NGMzYHREvAIQEW9HxLByF06PgnZJhzeXNFbSR5LekXRZrtzAtKnpg/TIZb2SdZwiabykDyXdJalzE9u7TtKI3PiFkh4qPRJL57WT9DNJr6VHZbdIWj6dPSb994P0CGmr3HKXpEeNr0raIzd9eUk3SHpL0puSfimpJp13uKTHJV0u6X3g7HL3obUdh4KV6zDgtvS1u6RVKrz+m4H9JS0FyZcLyZf+LQ0FJG0L1AJ30niIFOUJ4DBJp0rq3/All6vXUEn3lbmuK4ArIqILsCbJ+0BSX+AO4ESgOzAKuFdSx9yyB5IcPfUBNgQOb2IbPwY2TL+EtwWOBAZH433aHJ6+dgTWAJYFrk7nbZf+2zU9cvp3Or4FMAnoBlwE3JALnJuBecBawMbAbsBRue1tAUwBVgbOa6L+VkUOBWuRpG2AXsDwiHgaeAX4XiW3ERGPA+8A+6WTDgReiohxuWKDgb9GxEzgdmAPSStXsBpXpr/SG16/SOv2B+B4YHfgUWC6pKG5ul8QEXs3vsqFzAXWktQtImZFxBPp9O8C90fEgxExF7gEWAr4Zr5+ETEtIt4H7gU2amwDETEbOAS4DPgDcHxE1DVRn4OByyJiSkTMAs4gaZprrmnntYj4bUTMJwmBVYFV0h8KewAnRsTHETEduBwYlFt2WkRcFRHzIuKTZrZhVeJQsHIMBv4WEe+m47dT4Sak1C18/uv/UJIvHADSI4gDSI5USH+1vk554TQP6JCfIKlhfG5u8gkR0TX3+nnDjIi4LSJ2ITn38QPgXEm7f5E3lzoS6Au8KOmp3En01YDXcturB94AeuSWfTs3PJvkV32jIuI/JL/IRXo00oQFtpsOtweaOxLM6pEGEGldepHs57caghW4nuSooMEbzazXFgEOBWtW+mV8ILC9pLclvQ2cBHxD0jcqvLlbgJ3TtustScKnwX5AF+DaXD16UF4T0utA75JpfYD5wJtfpIIRMTci/giMB9b/Isumy78cEQeRfFFeCIyQtAwwjeRLFYC0OabnF61fbvnjgE7pek9rpugC2wVWJwnRd0hO8n8RbwCfAt1ywdolIr6eK+NumRdxDgVrybdIvjz7kTRXbASsBzzGgl/INZI65175tvCOJfMWaJNvEBGvAf8kaVt/MCLyv4wHAzcCG+TqsTWwkaQNWtjWA8A6kg6V1EHSisD5wIiSk9iNStvm95K0XHpidg/g68CTLS3byLoOkdQ9PRL4IJ08n+TX/F6Sdk6PYn5M8gX7r1Zsoy/wS5ImpEOB0yQ12tREsq9PUnKp77Ik++WudL/MAOpJzjW0KCLeAv4GXCqpS7qv1pRUelWZLcIcCtaSwcBNEfF6etXN2+mX9dXAwbm256HAJ7nXw7l1TCiZ93/NbO9mkl+u+RPMPYCdgV/n65Ce33iABZuyFtpW2ra9J/B9YDrwPPAhcGzJtq/WgvcpPJ1O/wj4CckRxwckJ1ePjYh/pvX7iaS/NvOe8gYAEyTNIjnpPCgi5kTEJJIv8auAd0lOsu8TEZ+VuV7SurQnOY9wYUQ8GxEvp3W/VY1fwnsjcCvJlUavAnNIzp80NA2dBzyeNgdtWUYVDgM6Ai+QXDo8guScg31FyA/ZMSuWpNeBQyJiTIuFzarMRwpmBZLUneQS06lVropZWQoLBUk3pjfDPN/EfEm6UtJkJTfkbFJUXcyqQdJmwMvAVRHxerXrY1aOwpqPJG0HzAJuiYiFrtKQtCdJ2+WeJDe0XBERWxRSGTMzK0thRwpp++n7zRTZlyQwIr2Bp6skn5AyM6uianZI1YMFb2SpS6e9VVpQ0jHAMQDLLLPMpuuuu27rtjjtmdYt92WstjEAz735YZtveoMey1dlu9Xc9gY9km57lqT97fe85G27NZ5++ul3I6J7S+WqGQoLdc5FEze2pJ2PDQPo379/jB07tnVbPLv1O7TVzk7q2nvo/W2+6bEX7FWV7VZz22Mv2AtYsva33/OSt+3WkPRay6Wqe/VRHckdmw1qSe6uNDOzKqlmKIwk6XlS6U0xH6Z3RJqZWZUU1nwk6Q5gB6CbpDrgLNJOySLiNyRdA+8JTCbp3Ku5u1zNzKwNFBYKaadfzc0P4Liitm9mS64undpx/BYr0KtrB9To6cvWmzhxIgC/Hdj2F0s2bLs5nTt3pra2lg4dOrRYtjF+HJ6ZLXaO32IFNllzNdovvRxa+IFzX8p6tV0BmFv3QQslK69h202JCN577z3q6uro06dPq7bhbi7MbLHTq2uHQgJhUSeJlVZaiTlz5rR6HQ4FM1vsCC1xgdDgy75vh4KZmWV8TsHMFnsDr368ouubWsZNZFuuU8u/Jr7ORWefwX8eH4MkOnXqzEXX3UTt6r3430cfcsGZpzPuqeRZTRtttgVDz72Q5bosz5tvvM6e3/wGp597Id/7v2MAOP9npzJgh605/PDDK/peSvlIwcysIKNH3sOMd95mxIOPc/ff/8Xlv7uVLl2SnhXOPvUEalfvzf2PP8P9jz9Dj569OOe0H2XLrtitO7ff8BvmfvaFnrP0pTkUzMwKMmP6O3RbeRXatUu+aldZtQddunbl9Ven8MJz4zjmR6dmZb9/4mlMGP8Mb0x9FYAVVlyJzbfZjpEj7mjTOjsUzMwKsvs+32LM3x/gwN235ZJzf8bE58cDMOXlF1mn3wbU1Hz+uPKamhrW6bcBr7z0+b0IR/zwJG4Zdg3z589vszo7FMzMCrLKqj34yyNPccLQM2nXThwzaF+e/OejRDRxlVAyIxutXb0X62+0CaP+/Mc2q7NPNJuZFahjp05ss+OubLPjrqzUbWUeHn0/Bx/xA16cMJ76+vqsaam+vp5JE59njbXWWWD5o4aczI+/P5hNt/hmm9TXRwpmZgWZ+NyzTH876eezvr6el16cwGo9erJ6nzVY9+sbMuzKS7Kyw668hPXW/war91ljgXX0Wasva/ZdlzEPjW6TOvtIwcwWeyOHbF2xdW3YQlcTAPPmzaNjx468/+4Mzjn9R3z26acArL/Rpgw6/GgAzrn4Kn515mnsvc0mRAQbbroZZ198VaPrO+r4k/nugO0r9h6a41AwM6uwV16aSG2vPmy94y5sveMujZbp0rUrv7pyWKPzevRcnXse+nc2vk6/DRj3+vtlBdKX5VAwM6ug4bfeyB03DePUs86vdlVaxaFgZlZBBx56BAceekS1q9FqPtFsZmYZh4KZmWUcCmZmlnEomJlZxieazWyxt+HvelV2hWd/2GKRd956k/N/eipTXp5EfX092+2yOyf/9FxeeXkSM955i2132g2A6y67gKWXXobBPzi+snVsJR8pmJlVWERw0tGHsePue3HvY08zcsxYZn/8MVdd9AsmTXiOxx5+sGLbqnRneT5SMDOrsP88PoZOnTrxre8eDCQ9oJ561nkM2GpD2rfvABGMe+oJjjjuJABeeXkSRx6wN29Nq+PgI4/l4CO+D8B999zF7TcOY97cz1h/40256+YbqKmpYdlll+Xkk09m9OjRXHrppWyzzTYVq7uPFMzMKmzySy/Sb4ONFpi27HJdWK12dY4+4RR222c/ho9+jAEDvw3A1Fde4ro/3M1t9z7E9ZdfyNy5c5ny8iRG3/snbv7TAwwf/Rg17Wq47bbbAPj4449Zf/31efLJJysaCOAjBTOzyivpAjs/vbEus7fdaTc6dupEx06dWLFbd95/dzpPPv4oE8c/y8F77wTAnDlzWG+NnkBy5PGd73ynkKo7FMzMKmzNvuvy91EjF5g2638f8fa0N6lpt3ADTceOnbLhdu3aMW/efCJgnwMG8aOhZ2XzGvo+6ty58wIP6KkkNx+ZmVXYFttsz5xPPuHeEXcCycngS3/xcwYe8D1W6r4ysz+e1fI6tt6Ov98/kvfenQHAhzNn8tprrxVab/CRgpktAcYfVbkv03J6KpXE5b+7lfN+egrDrriY+vp6ttlpV044/ed8Mns2N17zaw7cfdvsRHNj1uy7Lsed+lOOPfjb1NfX075DB24c9ht69arw5bUlHApmZgX42mq1XHXTnQtN79ipE7ff/3CTy+W7zB4w8NvZyWj4PJBmzWr5SKO13HxkZmYZh4KZmWUcCma22AmCiKh2Nariy75vh4KZLXZe+2Au82Z/tMQFQ0Tw3nvv0blz51avwyeazWyxc9WTMzke6NX1XUQjN5F9CRP/txQA78z8pKLr/SLbbk7nzp2pra1t9TYcCma22Pno03rOG/NeIeueesFeAOwx9P5C1l/Otovk5iMzM8sUGgqSBkiaJGmypKGNzF9d0j8kPSNpvKQ9i6yPmZk1r7BQkFQDXAPsAfQDDpLUr6TYz4DhEbExMAi4tqj6mJlZy4o8UtgcmBwRUyLiM+BOYN+SMgF0SYeXB6YVWB8zM2tBkaHQA3gjN16XTss7GzhEUh0wCmj0eXSSjpE0VtLYGTNmFFFXMzOj2FBo7Dqw0ouGDwJ+HxG1wJ7ArZIWqlNEDIuI/hHRv3v37gVU1czMoNhQqAN65sZrWbh56EhgOEBE/BvoDHQrsE5mZtaMIkPhKWBtSX0kdSQ5kTyypMzrwM4AktYjCQW3D5mZVUlhoRAR84AhwGhgIslVRhMknStpYFrsx8DRkp4F7gAOjyXtvnQzs0VIoXc0R8QokhPI+Wln5oZfALYusg5mZlY+39FsZmYZh4KZmWUcCmZmlnEomJlZxqFgZmYZh4KZmWUcCmZmlnEomJlZxqFgZmYZh4KZmWUcCmZmlnEomJlZxqFgZmYZh4KZmWUcCmZmlnEomJlZxqFgZmYZh4KZmWUcCmZmlnEomJlZxqFgZmYZh4KZmWUcCmZmlnEomJlZxqFgZmYZh4KZmWUcCmZmlnEomJlZxqFgZmYZh4KZmWUcCmZmlnEomJlZxqFgZmYZh4KZmWUKDQVJAyRNkjRZ0tAmyhwo6QVJEyTdXmR9zMysee2LWrGkGuAaYFegDnhK0siIeCFXZm3gDGDriJgpaeWi6mNmZi0r8khhc2ByREyJiM+AO4F9S8ocDVwTETMBImJ6gfUxM7MWFBkKPYA3cuN16bS8vkBfSY9LekLSgMZWJOkYSWMljZ0xY0ZB1TUzsyJDQY1Mi5Lx9sDawA7AQcDvJHVdaKGIYRHRPyL6d+/eveIVNTOzRJGhUAf0zI3XAtMaKfOXiJgbEa8Ck0hCwszMqqDIUHgKWFtSH0kdgUHAyJIyfwZ2BJDUjaQ5aUqBdTIzs2YUFgoRMQ8YAowGJgLDI2KCpHMlDUyLjQbek/QC8A/g1Ih4r6g6mZlZ8wq7JBUgIkYBo0qmnZkbDuDk9GVmZlXmO5rNzCzjUDAzs4xDwczMMmWFgqQDJC2XDv9M0j2SNim2amZm1tbKPVL4eUT8T9I2wO7AzcB1xVXLzMyqodxQmJ/+uxdwXUT8BehYTJXMzKxayg2FNyVdDxwIjJLU6Qssa2ZmXxHlfrEfSHKj2YCI+ABYETi1sFqZmVlVlBUKETEbmA5sk06aB7xcVKXMzKw6yr366CzgdJIH4gB0AP5QVKXMzKw6ym0+2g8YCHwMEBHTgOWKqpSZmVVHuaHwWdpPUQBIWqa4KpmZWbWUGwrD06uPuko6Gvg78NviqmVmZtVQVi+pEXGJpF2Bj4B1gDMj4sFCa2ZmZm2uxVCQVAOMjohdAAeBmdlirMXmo4iYD8yWtHwb1MfMzKqo3IfszAGek/Qg6RVIABFxQiG1MjOzqig3FO5PX2Zmthgr90TzzZI6An3TSZMiYm5x1TIzs2ooKxQk7UDSXfZUQEBPSYMjYkxxVTMzs7ZWbvPRpcBuETEJQFJf4A5g06IqZmZmba/cm9c6NAQCQES8RNL/kZmZLUbKPVIYK+kG4NZ0/GDg6WKqZGZm1VJuKBwLHAecQHJOYQxwbVGVMjOz6ig3FNoDV0TEZZDd5dypsFqZmVlVlHtO4SFgqdz4UiSd4pmZ2WKk3FDoHBGzGkbS4aWLqZKZmVVLuaHwsaRNGkYk9Qc+KaZKZmZWLeWeUzgR+KOkaSQP2lkN+G5htTIzs6po9khB0maSvhYRTwHrAncB84AHgFfboH5mZtaGWmo+uh74LB3eCvgJcA0wExhWYL3MzKwKWmo+qomI99Ph7wLDIuJu4G5J44qtmpmZtbWWjhRqJDUEx87Aw7l55Z6PMDOzr4iWvtjvAB6V9C7J1UaPAUhaC/iw4LqZmVkbazYUIuI8SQ8BqwJ/i4hIZ7UDji+6cmZm1rbKeUbzExHxp4jIP4bzpYj4b0vLShogaZKkyZKGNlNuf0mR3v9gZmZVUu7Na19Y2j/SNcAeQD/gIEn9Gim3HElHe08WVRczMytPYaEAbA5MjogpEfEZcCewbyPlfgFcBMwpsC5mZlaGIkOhB/BGbrwunZaRtDHQMyLua25Fko6RNFbS2BkzZlS+pmZmBhQbCmpkWmQzpXbA5cCPW1pRRAyLiP4R0b979+4VrKKZmeUVGQp1QM/ceC0wLTe+HLA+8IikqcCWwEifbDYzq54iQ+EpYG1JfSR1BAYBIxtmRsSHEdEtInpHRG/gCWBgRIwtsE5mZtaMwkIhIuYBQ4DRwERgeERMkHSupIFFbdfMzFqv0K4qImIUMKpk2plNlN2hyLqYmVnLimw+MjOzrxiHgpmZZRwKZmaWcSiYmVnGoWBmZhmHgpmZZRwKZmaWcSiYmVnGoWBmZhmHgpmZZRwKZmaWcSiYmVnGoWBmZhmHgpmZZRwKZmaWcSiYmVnGoWBmZhmHgpmZZRwKZmaWcSiYmVnGoWBmZhmHgpmZZRwKZmaWcSiYmVnGoWBmZhmHgpmZZRwKZmaWcSiYmVnGoWBmZhmHgpmZZRwKZmaWcSiYmVnGoWBmZhmHgpmZZQoNBUkDJE2SNFnS0EbmnyzpBUnjJT0kqVeR9TEzs+YVFgqSaoBrgD2AfsBBkvqVFHsG6B8RGwIjgIuKqo+ZmbWsyCOFzYHJETElIj4D7gT2zReIiH9ExOx09AmgtsD6mJlZC4oMhR7AG7nxunRaU44E/trYDEnHSBoraeyMGTMqWEUzM8srMhTUyLRotKB0CNAfuLix+RExLCL6R0T/7t27V7CKZmaW177AddcBPXPjtcC00kKSdgF+CmwfEZ8WWB8zM2tBkUcKTwFrS+ojqSMwCBiZLyBpY+B6YGBETC+wLmZmVobCQiEi5gFDgNHARGB4REyQdK6kgWmxi4FlgT9KGidpZBOrMzOzNlBk8xERMQoYVTLtzNzwLkVuv1TvObe35eYAmNrmWzQzaz3f0WxmZhmHgpmZZRwKZmaWcSiYmVnGoWBmZhmHgpmZZRwKZmaWcSiYmVnGoWBmZhmHgpmZZRwKZmaWcSiYmVnGoWBmZhmHgpmZZRwKZmaWcSiYmVnGoWBmZhmHgpmZZRwKZmaWcSiYmVnGoWBmZhmHgpmZZRwKZmaWcSiYmVnGoWBmZhmHgpmZZRwKZmaWcSiYmVnGoWBmZhmHgpmZZRwKZmaWcSiYmVnGoWBmZhmHgpmZZRwKZmaWKTQUJA2QNEnSZElDG5nfSdJd6fwnJfUusj5mZta8wkJBUg1wDbAH0A84SFK/kmJHAjMjYi3gcuDCoupjZmYtK/JIYXNgckRMiYjPgDuBfUvK7AvcnA6PAHaWpALrZGZmzVBEFLNiaX9gQEQclY4fCmwREUNyZZ5Py9Sl46+kZd4tWdcxwDHp6DrApEIq3bxuwLstlrJK8f5uO97Xbata+7tXRHRvqVD7AivQ2C/+0gQqpwwRMQwYVolKtZaksRHRv5p1WJJ4f7cd7+u2tajv7yKbj+qAnrnxWmBaU2UktQeWB94vsE5mZtaMIkPhKWBtSX0kdQQGASNLyowEBqfD+wMPR1HtWWZm1qLCmo8iYp6kIcBooAa4MSImSDoXGBsRI4EbgFslTSY5QhhUVH0qoKrNV0sg7++2433dthbp/V3YiWYzM/vq8R3NZmaWcSiYmVlmiQgFSTWSnpF0X8n0qyTNyo33kvSQpPGSHpFUm5u3uqS/SZoo6YWGLjkk7Szpv5LGSfqnpLXS6Zen08ZJeknSB23zbtuepBslTU/vO2mYdlfu/U+VNC6d3lHSTZKek/SspB1yyxyUTh8v6QFJ3ZpbV2651SXNknRKbtpJkiZIel7SHZI6F74j2oCkzpL+k+67CZLOSacPSbuLiYb9VrLcZpLmp/cPIWnH3D4dJ2mOpG+l85r6m260W5rmPtPFxaL2HSJpsKSX09dgKikiFvsXcDJwO3Bfblp/4FZgVm7aH4HB6fBOwK25eY8Au6bDywJLp8MvAeulwz8Eft/I9o8nOdFe9X1R0P7dDtgEeL6J+ZcCZ6bDxwE3pcMrA0+T/DhpD0wHuqXzLgLObm5duWl3p5/dKel4D+BVYKl0fDhweLX3U4X2tYBl0+EOwJPAlsDGQG9gasM+zC1TAzwMjAL2b2SdK5Jc6NHs33Q6/Jt0eBBwV3OfabX3VYX3+yLzHZJ+XlPSf1dIh1eo1Htd7I8U0qTeC/hdbloNcDFwWknxfsBD6fA/SLvlUNJnU/uIeBAgImZFxOy0XABd0uHlWfheDICDgDu+9JtZREXEGJq4v0SSgAP5/P1n+zgipgMfkPznUvpaJl2mCyX7spF1kf66nQJMKNl0e2ApJfe/LF26rq+qSDT8Mu2QviIinomIqU0sdjxJcE5vYv7+wF/L+Jtuqluapj7TxcIi+B2yO/BgRLwfETOBB4EBrX6DJRb7UAB+TfLB1eemDQFGRsRbJWWfBb6TDu8HLCdpJaAv8IGke9JDyIvTPwqAo4BRkuqAQ4EL8iuU1AvoQ/JLbUm0LfBORLycjj8L7CupvaQ+wKZAz4iYCxwLPEfyn6IfySXLTa5L0jLA6cA5+UIR8SZwCfA68BbwYUT8rYg3Vw1pU8Y4ki/5ByPiyWbK9iD5W/5NM6scxII/Wpr6m+4BvAHJJefAh8BKNPGZtua9LaIWte+Q7HNI1aXTKmKxDgVJewPTI+Lp3LTVgAOAqxpZ5BRge0nPANsDbwLzSH51bpvO3wxYAzg8XeYkYM+IqAVuAi4rWecgYEREzK/Q2/qqKT1KupHkj3gsyX+2fwHzJHUgCYWNgdWA8cAZLazrHODy3C9nACStQPILrU+6rmUkHVKpN1RtETE/IjYi6SVgc0nrN1P818DpTf39SVoV2IDkfqIGTf1NN9UtTaOfafnvaNG1iH6HlNU9UKtVu62uyBfwK5I/1qnA28BsYGY6PDV91ZP05lq67LJAXTq8JfBIbt6hJN2CdwdeyU1fHXihZD3PAN+s9r5og33dm5JzCiT/Ed4BaptZ7l8kRwWbAQ/lpm8HjGpuXcBjuc/xA5ImrCEk/2FvyJU7DLi22vuooP1+Fum5lHR8KrlzCiTnVhr20SySo4tv5eb/CBiWG2/yb5okOLbKfR7vkt7r1NhnWu19U6H9u8h9h5D8OLo+N349cFCl3vNifaQQEWdERG1E9CZJ24cjYoWI+FpE9E6nz47keQ5I6iapYZ+cQfILCJIuO1aQ1NDD4E7ACyR/HMtL6ptO3xWY2LB9SeuQnAj6d2FvctG2C/BipL3gAkhaOm32QdKuwLyIeIHkF1W/3D5eYF82tq6I2Db3Of4aOD8iriZpNtoy3ZaAnUvW9ZUlqbukrunwUqT7panyEdEnt49GAD+MiD/nipQefTX3N91otzTNfKZfeYvod8hoYDdJK6RHxbux4JHel1JkL6lfRTsAv5IUwBiSqyqIiPlKLnd8KP2SeRr4bSRdeRwN3C2pnuQDPiK3voOAOyON88WVpDtI9l23tF30rIi4gYXbqiG5OmV0ur/eJPnFRERMU3J55RhJc4HX+PzwmibW1aiIeFLSCOC/JIfuz7CIdy3wBawK3Jy2R7cDhkfEfZJOIGn3/howXtKoSLutb0p6SWRP4NGGaS38TTfVLU2jn+kSagcK/g6JiPcl/YIkaADOjYiKdSTqbi7MzCyzWDcfmZnZF+NQMDOzjEPBzMwyDgUzM8s4FMzMLONQMDOzjEPBlhhpV8a7l0w7UdK1zSwzq6l5razD75V2X222KHIo2JLkDhZ+DnjZN8WZLQkcCrYkGQHsLakTZHf0rgaMU/JglP8qeVDMvqULStpBuQesSLpa0uHp8KaSHpX0tKTRaSdzLWpqufSI5kIlD9N5SdK2X/aNm5XLoWBLjIh4D/gPn/c9Pwi4C/gE2C8iNgF2BC5NuyJoUdq761UkD6/ZlKSvm/MqsFz7iNgcOJGk0zuzNuG+j2xJ09CE9Jf03yNIuiI+X9J2JD1e9gBWIekJsyXrAOsDD6Y5UkPyDIcvu9w96b9Pk/RAa9YmHAq2pPkzcJmkTUge1/nftBmoO7BpRMyVNBUofabzPBY8sm6YL2BCRGz1BevR0nKfpv/Ox/9PrQ25+ciWKJE8kOcRkuaahhPMy5M8SGWupB2BXo0s+hqITUiwAAAAnElEQVRJ196dJC1P0h03wCSgu6StIGkWkvT1MqrS2uXMCuVfILYkuoOkeabhSqTbgHsljQXG0cjzCSLiDUnDSZ4I9zJJd9xExGfpJaZXpmHRnuTZDqXPjC5dX6uWMyuau842M7OMm4/MzCzj5iOzAki6Bti6ZPIVEXFTNepjVi43H5mZWcbNR2ZmlnEomJlZxqFgZmYZh4KZmWX+Hw+3LHZZYCN9AAAAAElFTkSuQmCC\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "plotPercentualComparison(df, title='ALL VALUES: json x other')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "This graph proves that all bigger values are JSON and the non-json types only appear on the smaller values." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Conclusion\n", + "\n", + "There is absolute no value greater than 104653 (max value for non-json) that represents a valid JSON in this 10% sample. \n", + "\n", + "This implies that all the greater values are JSON but they represent very low percentage of the whole data (6.76%). " + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "The top (0.30% - whole sample) / (6.76% - values above the mean) is gurantee to be a valid JSON\n" + ] + } + ], + "source": [ + "max_non_json_value_len = df[df.is_json == False].value_len.max()\n", + "allJson = df[df['value_len'] > max_non_json_value_len ]\n", + "length = allJson.is_json.count()\n", + "print(\"The top ({0:0.2f}% - whole sample) / ({1:0.2f}% - values above the mean) is gurantee to be a valid JSON\".format(\n", + " length / df.is_json.count() * 100, length / df[df.value_len > df.value_len.mean()].is_json.count() * 100))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "--- \n", + "\n", + "# Out of Curiosity: small values\n", + "This is not exacly relevant to the issue 22 ('What's in the really large values?') but I was courisous to know how was the distribution of the smaller values" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Distribution of non-json values\n", + "That concentration of the non-json values made me curious: how is the distribution of NON-JSON values among the smaller values? \n", + "To answer this I filtered the data to only values bellow the bigger non-json and ploted the same graph.\n", + "- What I got is, again, the absolute majority of these non-json are on the first bin, so they really tend to be small values. \n", + " \n", + " *TODO: what is that really small portion of non-json values present on the 9th bin? Are they any different from the others thre are smaller?" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Bigger non json value_len: 104653\n" + ] + }, + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 11, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYUAAAEXCAYAAABCjVgAAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDMuMC4zLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvnQurowAAIABJREFUeJzt3Xu8FWW9x/HPl3veQIUsuecdzUzwllneKiTFU8cMj5l4N0OzTKM0RczUvOQlU6lMs0RJxVA5oWJe4qSBSigSioSwQwUUNUDk9jt/zOxxsVhr7wWs2Qv2/r5fr/Xaa555ZuY3s2bPb80zM89SRGBmZgbQqtYBmJnZhsNJwczMMk4KZmaWcVIwM7OMk4KZmWWcFMzMLOOkYBlJvSSFpDZVmNeBkuoaGH+zpB+v73IsH5IWSfpEreOoFkmDJf211nFsDJwUmlB6wN2+qGyYpN+v4/wel3TyesQzS9Kh6zr9+oiI0yPiklosu1i6HZdK6l5QdqikWUX1Bkt6QdISSW9IuklSp4Lxw9LP+GsFZW3Ssl5llr1aIpbUTdK9khZIejdd3uCC+u0lXSZptqT3Jb0i6VxJWtv1aUhEbBYRMyutv74kXS1pXFHZtZIeXId5Ve3LTUvkpGAtiqTWZUYtBsqeuUg6B7gCOBfoCOwL9AQekdSuoOrbwPAGltOYO4A56by3Br4JvFkw/o/AIcAAYHPgOOBU4Lq1WZ8N0I+B7SSdACBpP+B44PSaRlVCs082EeFXE72AALYvKhsG/D59fyBQB/wIWADMAo4tM69LgZXAUmAR8Iu0/DPARODd9O9nykx/B7AKeD+d/jygVxrj8cDsNIbzC6ZpBQwFXgXeAkYBW5WZf4PrAtwG/KRg+DzgdWAucHLhtiI5OD4AvJeu00+AvxZMuzPwCMkBeTpwdNFybgLGkhwoDy0R6+PARcB/CpZ5KDArfb9Fuo2OLppuM2AecGLBZ/kH4B/A8WlZm3RdepXZTvXbvE06vAjYo0zdQ9LPu3tR+T7pvrB9JeuztvsqSQJ6KZ3fv4HvF9Q7BZiRbvsxwLZF8zgdeAVYCNwIqIFlHpjuV72AacBpDdRtBVwAvJZ+Br8DOqbjZqfLXpS+9gMGA38Frkpj+RdwWMH8OgK/IdkH/53uY63TcYOBCcDP0/X8SaXbcWN81TyAlvSisqSwArgGaA98nuRAtlOZ+T0OnFwwvFW6wx+XHoyOSYe3LjP9LAoOkgUHqF8BHwE+BXwA7JKOPxt4GuiWxncLMLLMvBtcFwqSAtAfeAPYFdiEJGEVHpTuSl+bAH1Ivkn/NR23aTp8QrrOe5IkoV0LlvMusH96IOlQbjumsdZ/FoVJoX+6Lm1KTHt7/Tao/yyBgcBMoC1rnxQeTQ9Ag4AeRXUvB54oM5/XSA+ija1POvxL4JeV7KskB8oD0vdbAnum7w9Ot/We6Wd8A/Bk0TweBDoBPYD5QP9G/kduSef5OA0nkBNJktEnSJLzfcAdpbZpWjYYWE6SxFoD3yL5AqJ0/P3psjcFPgr8vWB7Dk4//zPTz/MjtT6W5Ply89GG6ccR8UFEPAE8BBxd4XRfBl6JiDsiYkVEjAT+CRyxlsu/OCLej4h/kHzr/VRafhrJmUNdRHxAchA8qpHT6UrW5WjgtxExNSKWABfXj0ibYf4buCgilkTESyQH4nqHkxzsfpuu83PAvcBRBXX+FBETImJVRCxtINbLgCMk7VpU3hlYEBErSkzzejo+ExFjSA6A63K952vAUyTNKf+SNFnSXgVxvF5mujXioPz6EBFnRMQZFca0HOgjaYuIWJhuY4BjgVsj4rl0f/ghsF/R9ZPLI+KdiJgN/AXYo5FlPUVyZviHSI/IZRwLXBMRMyNiUbrsQY3si69FxK8iYiXJPvRxYBtJ2wCHAWdHxOKImEdyVjCoYNq5EXFDuo+938g6bNScFJrWSpJvj4XakvzT1VsYEYsLhl8Dtq1w/tum9Qu9BnRdmyBJvrXXW0LyTQySdu7Rkt6R9A7JKf5KYJsy86l0XbYl+bZfr/B9F5JvZ+XG9wT2qY8pjetY4GNl6pcVEfOBXwDDi0YtADqXOeB8PB1f7ALgfKBDfYGkHuldPYskLSoTw8KIGBoRu5Js18nA/emF5AXp8kpZI44G1mdt/TdJE9Jrkp5I2/uhaH9LD85vsfr+Vm5fWoOkrUmad64luS7TqVzd4mWn79tQfl9cLZb0ywdpPD1J/g9fL9iHbiE5Y6hX0T7UHDgpNK3ZJKe2hXqz+s69paRNC4Z7kJzmllL8TWouyQ5eqAdJG2kl0zdmDkk7bKeCV4eIKDf/StfldZImqXrdC97PJzl1Lzd+DkmTSmFMm0XEtwrqrM16XgkcBPQtKPsbSTPaVwsrput2GDC+eCYR8QhJ88YZBWWz09g2i4iyB8eC+gtIDpLbkjQNPkqSAAvXH0l7k2yTxypcn7USERMj4kiSg+T9JNeSoGh/S7fH1pTf3xpzLfDniPgu8CTJupdTvK/3INlP3mTd9usPgM4F+9AWaWKu12K6k3ZSaFp3Axektx22Sm8HPQK4p6jexZLaSTqApHnkj2Xm9yZJm2q9scCOkv4nvRXy6yRt8OVu6yuevjE3A5dK6gkgqYukIxuZppJ1GQWcIGkXSZsAF9aPSE/17wOGSdpE0s4kd+TUe5BknY+T1DZ97SVpl7VYr0xEvANcTXLhu77sXZImrRsk9U+X0StdlzqSayClnF84n0pIukLSbunntzlJ2/eMiHgrIh4lSUD3StpVUmtJ+5Jc3L4pIl6pZH3WMp52ko6V1DEilpNc7F+Zjr6T5HPbQ1J74KfAMxExax2WMwD4AvC9tOhM4L8kHVRmkpHAdyX1lrRZuuy70ya++SQ3UVS0b0fE68DDwNWStkj/N7eT9Pm1XY/mwEmhaQ0H/o/kLoiFwM9I7sh5saDOG+m4uST/7KdHxD/LzO86kjb9hZKuj4i3SA6855Ccxp8HHJ5+4yzlMpIk9Y6k71cQ/3Ukd5g8LOk/JBed92mgfkXrEhH/C1xP0uY8g+SbOSTf3gCGkNwd8gbJAXhk/biI+A/wRZL237lpnStILnyuq+v48MBXH+PPSO6kuorkwPgMyTfMQ9L29DVExASSC5aVqP8mugkwGniH5GJ1T5IL1/X+m2Q7/Znkzprfk9w1c+barI+ShwdvrjC244BZkt4juZvoGwARMZ7k2se9JGd727F6O3xF0uR3M3BWRLydznseyX78K0kfKTHZrST7wpMkdxItJd0GadPQpcCEdN/et4Iwvgm0I7nLaiHJF7VyTXXNWv2Vd9sASDqQ5G6Rbo3Vbc7Sb/kvAu1LXdyVdAXwsYg4vsmDqzJJu5PcsdNQ+3mTktSKJIn0TC8QWwviMwXbIEj6StpUsSXJN/0H6hOCpJ0l7a7E3sBJJN+mN2rpwfdoYFKtYymyG8k37zcaq2jNT25JQdKtkuZJerHMeEm6XtIMSVMk7ZlXLLZROI2kLfhVkm+phReKNye5rrCY5PrD1cCfmjrAHMwmeQbiu7UOpJ6k+uapH0TEslrHY00vt+YjSZ8jafP8XUTsVmL8AJI2wAEk7dLXRURD7dNmZpaz3M4UIuJJkkfCyzmSJGFERDwNdJLUIi/smJltKGrZsVNXVn8gpC4tW+OJTUmnknT6xaabbtp35513Xrclzn1+3aZbH9t+urbLrsVya7nsdHu/8O93m3zRn+zasWbLBWq2vVviOtd82evg2WefXRARXRqrV8ukoBJlJduyImIEMAKgX79+MWnSOl6XG9Zx3aZbH8OSWHsNfajJFz3r8i/XZLm1XPasy78M1GZ7T6rROk+q4TrX6nOuX+eW+D+9riQV93ZQUi3vPqpj9SdTu1H+yV0zM2sCtUwKY4Bvpnch7Qu8mz5ZaGZmNZJb85GkkSTdJ3dW8rOMF5F2BhcRN5N0yTCA5AnWJSRdH5uZWQ3llhQi4phGxgfw7Wosa/ny5dTV1bF0aUO9IgNfGtXw+DxMmwbArwZW/8aqIHjtneXc8MxC3vtgVdXnb2YtT7P4Wbm6ujo233xzevXqhVTq+nVqbiNJIw/bJv2yLa97p+qzjgi23vo9zgQuffKtqs/fzFqeZtHNxdKlS9l6660bTgjNkCTabLIFPTsV/0SDmdm6aRZJAWhxCaGeJFTy7l4zs7XXLJqPzKxl6rX0ziZf5qwmX2LTapZJodoPlcw6q7Jfw9xsh/15b/pTnH3RVTw2YSKS6LBZJ0aNGgVtt+Q/773L5Rf+gMkTnwFgj732YejwK9h8i478e85sBnzmU/xg+BX8zwmnAvDTC85l190/zZFH/09V18fMrJxm03y0obh7zMPMfWM+Ux69mxfGj2L06NF06pR0lT/s3LPo1qMXD014nocmPE/X7j25+LzvZNNu1bkLd/7mZpYvc+eUZlYbTgpV9vqbC/j4Np1p1SrZtN26dWPLLbdk9r9m8tILkzn1O+dmdU87+zymTnmeObP+BcCWW23N3p/9HGPuGVmT2M3MnBSq7OgjvsADjzzJHl8YxDkXX8PzzyedZs185Z/s1OeTtG7dOqvbunVrdurzSV59eVpWduIZ3+V3I25k5cqVa8zbzCxvTgpV1m3bbZj+5Ggu++GZtGolDjnkEMaPH09EmTukkhEfTt+jJ7vtsSdj7y/1+/ZmZvlqlheaa619+3YcdvD+HHbw/myz3e7cf//9fOnoE/jn1CmsWrUqa1patWoV06e9yCe232m16U8e8j3OOe14+u7zmVqEb2YtmM8Uquy5F6Yx9435QHLQnzJlCj179qRH70+w8667M+L6q7K6I66/il12+xQ9en9itXn03n5HtttxZ54cP65JYzcza5ZnCmX7HM/xRzFWrFhB+3btmLfgbU459xI+WLYcgL0/8zmGDBnCywuWcvGVN3DZhedx+Gf3JCLYve9eDLvyhpLzO/nM7/H1/p/PLV4zs1KaZVKohanTZ7Jdr270P2h/+h+0/4cjsl9KWsoWnTpx2fUjSk7ftXsP7hv/t2x4pz6fZPLshn7N1Mys+pwUquDm393D9beO5NqLv1/rUMzM1ouTQhWc/s2jOP2bR9U6DDOz9eYLzWZmlnFSMDOzjJOCmZllnBTMzCzTPC80D+tY3fmd+nhF1ermvsm3z7+cl16eyaoIDj/0AK785W289NJLTJjyMgcc/EUAbrrmcjbZZFOOP/3M6sZpZraefKZQJRHBV0/5Pv/V/0BemfAnXn5qNIsWL+H8889n8uTJPPXYI1VbljvLM7O8NM8zhRp47K9/p0P7dpzw9SOBpAfUnw87h577DqRt27asWLmKyROf5sRvfxeAV1+ZzklfO5zX59Zx7Enf4tgTTwPgwfvu5s5bR7Bi+TJ2+3Rfzr/0alq3bs2+O3XjuFPO4P+eeIxzfnwJe+69X83W1cyaL58pVMnUl2fS95O7rFa2xeab0atXLy644AK+eMRXGDXuKfoP/CoAs159mZt+fy9/eGA8t/z8CpYvX87MV6Yz7oHR3D76z4wa9xStW7Vm7Oikt9T3lyxm+5124Q8PPOqEYGa58ZlClUREya6xy5UfcPAXade+Pe3at2erzl14e8E8npnwBNOm/INjDz8YgKVLl7JV5y5AcuZx6ICB+a6EmbV4TgpVsuuO23Hv2PGrlb33n0XMmTNntR/WqdeuXfvsfatWrVixYiURcMTXBvGdoRetWb99h5LzMTOrJjcfVckhB+zNkveX8rs/PggkF4PPGf5zBg8ezDbbbMOSxYsancc++3+ORx8aw1sLkq633124kLl1s3ON28ysUPM8Uxj2bunyHLvOlsToX1/NGT+6jEuu/RWrIhhw8P789Kc/ZfHixVw0/FKO/tIB2YXmUrbbcWe+fe75fOvYr7Jq1SratG3Lj35yJdt265Fb3GZmhZpnUqiR7l0/xgO3X7d6Yfv2tG/fnjsfeqzsdIVdZvcf+NXsYnShp6fXVS1OM7Ny3HxkZmYZJwUzM8s0m6QQEbUOoSYigqBlrruZVV+zSAodOnTgrbfeanGJISJYseQ9Xntnea1DMbNmollcaO7WrRt1dXXMnz+/4YrvzGuagAq9Ow2ANxe+X/VZB8Fr7yznhmcWVn3eZtYyNYuk0LZtW3r37t14xWH75h/MGstMbo89bOhDTb9sM7O11Cyaj8zMrDpyTQqS+kuaLmmGpKElxveQ9BdJz0uaImlAnvGYmVnDcksKkloDNwKHAX2AYyT1Kap2ATAqIj4NDAJ+mVc8ZmbWuDzPFPYGZkTEzIhYBtwFHFlUJ4At0vcdgbk5xmNmZo3IMyl0BeYUDNelZYWGAd+QVAeMBUr+PqWkUyVNkjSp0TuMzMxsneWZFNb8EQHWeMrqGOC2iOgGDADukLRGTBExIiL6RUS/Ll265BCqmZlBvkmhDuheMNyNNZuHTgJGAUTE34AOQOccYzIzswbkmRQmAjtI6i2pHcmF5DFFdWYDhwBI2oUkKbh9yMysRnJLChGxAhgCjAOmkdxlNFXScEn1vyt5DnCKpH8AI4HB0dL6qjAz24Dk+kRzRIwluYBcWHZhwfuXgP3zjMHMzCrnJ5rNzCzjpGBmZhknBTMzyzgpmJlZxknBzMwyTgpmZpZxUjAzs4yTgpmZZZwUzMws46RgZmYZJwUzM8s4KZiZWcZJwczMMk4KZmaWcVIwM7OMk4KZmWWcFMzMLOOkYGZmGScFMzPLOCmYmVnGScHMzDJOCmZmlnFSMDOzjJOCmZllnBTMzCzjpGBmZhknBTMzyzgpmJlZxknBzMwyTgpmZpZxUjAzs4yTgpmZZZwUzMws46RgZmaZXJOCpP6SpkuaIWlomTpHS3pJ0lRJd+YZj5mZNaxNXjOW1Bq4EfgCUAdMlDQmIl4qqLMD8ENg/4hYKOmjecVjZmaNy/NMYW9gRkTMjIhlwF3AkUV1TgFujIiFABExL8d4zMysEXkmha7AnILhurSs0I7AjpImSHpaUv9SM5J0qqRJkibNnz8/p3DNzCzPpKASZVE03AbYATgQOAb4taROa0wUMSIi+kVEvy5dulQ9UDMzS+SZFOqA7gXD3YC5Jer8KSKWR8S/gOkkScLMzGogz6QwEdhBUm9J7YBBwJiiOvcDBwFI6kzSnDQzx5jMzKwBuSWFiFgBDAHGAdOAURExVdJwSQPTauOAtyS9BPwFODci3sorJjMza1hut6QCRMRYYGxR2YUF7wP4XvoyM7Ma8xPNZmaWcVIwM7OMk4KZmWUqSgqSviZp8/T9BZLuk7RnvqGZmVlTq/RM4ccR8R9JnwW+BNwO3JRfWGZmVguVJoWV6d8vAzdFxJ+AdvmEZGZmtVJpUvi3pFuAo4GxktqvxbRmZraRqPTAfjTJg2b9I+IdYCvg3NyiMjOzmqgoKUTEEmAe8Nm0aAXwSl5BmZlZbVR699FFwA9IfhAHoC3w+7yCMjOz2qi0+egrwEBgMUBEzAU2zysoMzOrjUqTwrK0n6IAkLRpfiGZmVmtVJoURqV3H3WSdArwKPCr/MIyM7NaqKiX1Ii4StIXgPeAnYALI+KRXCMzM7Mm12hSkNQaGBcRhwJOBGZmzVijzUcRsRJYIqljE8RjZmY1VOmP7CwFXpD0COkdSAARcVYuUZmZWU1UmhQeSl9mZtaMVXqh+XZJ7YAd06LpEbE8v7DMzKwWKkoKkg4k6S57FiCgu6TjI+LJ/EIzM7OmVmnz0dXAFyNiOoCkHYGRQN+8AjMzs6ZX6cNrbesTAkBEvEzS/5GZmTUjlZ4pTJL0G+COdPhY4Nl8QjIzs1qpNCl8C/g2cBbJNYUngV/mFZSZmdVGpUmhDXBdRFwD2VPO7XOLyszMaqLSawrjgY8UDH+EpFM8MzNrRipNCh0iYlH9QPp+k3xCMjOzWqk0KSyWtGf9gKR+wPv5hGRmZrVS6TWFs4E/SppL8kM72wJfzy0qMzOriQbPFCTtJeljETER2Bm4G1gB/Bn4VxPEZ2ZmTaix5qNbgGXp+/2AHwE3AguBETnGZWZmNdBY81HriHg7ff91YERE3AvcK2lyvqGZmVlTa+xMobWk+sRxCPBYwbhKr0eYmdlGorED+0jgCUkLSO42egpA0vbAuznHZmZmTazBpBARl0oaD3wceDgiIh3VCjgz7+DMzKxpVfIbzU9HxOiIKPwZzpcj4rnGppXUX9J0STMkDW2g3lGSIn3+wczMaqTSh9fWWto/0o3AYUAf4BhJfUrU25yko71n8orFzMwqk1tSAPYGZkTEzIhYBtwFHFmi3iXAz4ClOcZiZmYVyDMpdAXmFAzXpWUZSZ8GukfEgw3NSNKpkiZJmjR//vzqR2pmZkC+SUElyiIbKbUCfg6c09iMImJERPSLiH5dunSpYohmZlYoz6RQB3QvGO4GzC0Y3hzYDXhc0ixgX2CMLzabmdVOnklhIrCDpN6S2gGDgDH1IyPi3YjoHBG9IqIX8DQwMCIm5RiTmZk1ILekEBErgCHAOGAaMCoipkoaLmlgXss1M7N1l2tXFRExFhhbVHZhmboH5hmLmZk1Ls/mIzMz28g4KZiZWcZJwczMMk4KZmaWcVIwM7OMk4KZmWWcFMzMLOOkYGZmGScFMzPLOCmYmVnGScHMzDJOCmZmlnFSMDOzjJOCmZllnBTMzCzjpGBmZhknBTMzyzgpmJlZxknBzMwyTgpmZpZxUjAzs4yTgpmZZZwUzMws46RgZmYZJwUzM8s4KZiZWcZJwczMMk4KZmaWcVIwM7OMk4KZmWWcFMzMLOOkYGZmGScFMzPLOCmYmVkm16Qgqb+k6ZJmSBpaYvz3JL0kaYqk8ZJ65hmPmZk1LLekIKk1cCNwGNAHOEZSn6JqzwP9ImJ34B7gZ3nFY2ZmjcvzTGFvYEZEzIyIZcBdwJGFFSLiLxGxJB18GuiWYzxmZtaIPJNCV2BOwXBdWlbOScD/lhoh6VRJkyRNmj9/fhVDNDOzQnkmBZUoi5IVpW8A/YArS42PiBER0S8i+nXp0qWKIZqZWaE2Oc67DuheMNwNmFtcSdKhwPnA5yPigxzjMTOzRuR5pjAR2EFSb0ntgEHAmMIKkj4N3AIMjIh5OcZiZmYVyC0pRMQKYAgwDpgGjIqIqZKGSxqYVrsS2Az4o6TJksaUmZ2ZmTWBPJuPiIixwNiisgsL3h+a5/LNzGzt5JoUNjS9lt7Z5Muc1eRLNDNbd+7mwszMMk4KZmaWcVIwM7OMk4KZmWWcFMzMLOOkYGZmGScFMzPLOCmYmVnGScHMzDJOCmZmlnFSMDOzjJOCmZllnBTMzCzjpGBmZhknBTMzyzgpmJlZxknBzMwyTgpmZpZxUjAzs4yTgpmZZZwUzMws46RgZmYZJwUzM8s4KZiZWcZJwczMMk4KZmaWcVIwM7OMk4KZmWWcFMzMLOOkYGZmGScFMzPLOCmYmVnGScHMzDJOCmZmlsk1KUjqL2m6pBmShpYY317S3en4ZyT1yjMeMzNrWG5JQVJr4EbgMKAPcIykPkXVTgIWRsT2wM+BK/KKx8zMGpfnmcLewIyImBkRy4C7gCOL6hwJ3J6+vwc4RJJyjMnMzBqgiMhnxtJRQP+IODkdPg7YJyKGFNR5Ma1Tlw6/mtZZUDSvU4FT08GdgOm5BN2wzsCCRmtZtXh7Nx1v66ZVq+3dMyK6NFapTY4BlPrGX5yBKqlDRIwARlQjqHUlaVJE9KtlDC2Jt3fT8bZuWhv69s6z+agO6F4w3A2YW66OpDZAR+DtHGMyM7MG5JkUJgI7SOotqR0wCBhTVGcMcHz6/ijgscirPcvMzBqVW/NRRKyQNAQYB7QGbo2IqZKGA5MiYgzwG+AOSTNIzhAG5RVPFdS0+aoF8vZuOt7WTWuD3t65XWg2M7ONj59oNjOzjJOCmZllWlRSkHSrpHnp8xH1ZVtJekTSK+nfLYum2UvSyvS5i8LyLST9W9IvSixnTOEyWjJJrSU9L+nBdHhI2q1JSOpcUO9cSZPT14vpNt9K0k4F5ZMlvSfp7ILpzky7Upkq6We1WMcNQbntJOkSSVPSsoclbZvW31nS3yR9IOn7RfMq2T2NpEMkPZfO66+Stm/q9aylWh0/GltGtbWopADcBvQvKhsKjI+IHYDx6TCQddVxBcnF8mKXAE8UF0r6KrCoSvE2B98BphUMTwAOBV4rrBQRV0bEHhGxB/BD4ImIeDsipheU9wWWAKMBJB1E8lT87hGxK3BV/quzYWpgO10ZEbun5Q8CF6aTvA2cRdE2a6R7mpuAY9N53QlckPNqbWhuozbHj7LLyEOLSgoR8SRrPgdR2NXG7cB/FYw7E7gXmFc4gaS+wDbAw0XlmwHfA35Svag3XpK6AV8Gfl1fFhHPR8SsRiY9BhhZovwQ4NWIqE8o3wIuj4gP0nnPKzFNS5Rtp4h4r6B8U9KHQyNiXkRMBJYXTdtQ9zQBbJG+78iazx01azU8fjS0jKprUUmhjG0i4nWA9O9HASR1Bb4C3FxYWVIr4Grg3BLzuiQdtyTPgDci1wLnAasqnUDSJiTfxu4tMXoQqyeLHYED0h52n5C01/oE24ystp0kXSppDnAsH54plNMVmFMwXJeWAZwMjJVUBxwHXF61iDdeTXH8KLmMvDgplHct8IOIWFlUfgYwNiIK/3GQtAewfUSMbqoAN2SSDgfmRcSzaznpEcCEiFjtG1n6AORA4I8FxW2ALYF9Sf7JRkktu0PFUtspIs6PiO7AH4Ah5aatn0WJsvr71r8LDIiIbsBvgWvWP+Jma6M9fuTZ99HG4k1JH4+I1yV9nA9P9foBd6XHmM7AAEkrgP1Ivp2eAWwGtJO0iKSNvK+kWSTb9aOSHo+IA5t2dTYY+wMDJQ0AOgBbSPp9RHyjkemKzwbqHQY8FxFvFpTVAfelT8H/XdIqks9q/vqHv9EqtZ3q3Qk8BFzUwPQlu6eR1AX4VEQ8k5bfDfy5CvFu7Jri+FFuGfmIiBb1AnoBLxYMXwkMTd8PBX5WYprbgKNKlA8GftHYMlr6CzgQeLCobBbQuaisvu+rTUvM4y7ghKKy04Hh6fsdSZo9VOv1rfG2Xm07ATsUvD8TuKeo/jDg+wXDbYCZQG+gHfAPYNe0fAGwY1rvJODeWq+i7vn/AAACi0lEQVRvDbZvkx8/KllGNV8t6kxB0kiSA1TntF30IpJ20VGSTgJmA1+rXYTNn6SzSK4zfAyYImlspN2rk7TBPhwRi4um2QT4AnBa0exuBW5Nb99bBhwf6X9OS1RmO10uaSeS6zqvkSRSJH0MmERy4XhVeptvn4h4TyW6p0mnOQW4Nz0jWwic2DRrtmGo4fGjSY9R7ubCzMwyvtBsZmYZJwUzM8s4KZiZWcZJwczMMk4KZmaWcVIwM7OMk4K1GJIel/SlorKzJf2ygWmq2uOtpNuKu1E225A4KVhLMpI1fwe8XLcaZi2Sk4K1JPcAh0tqDyCpF7AtMFnS+PQHZF6QdGTxhJIOVPpDQenwLyQNTt/3TXtpfVbSuLR/mkaVmy49o7lC0t8lvSzpgPVdcbNKOSlYixERbwF/58MfShlE0rHb+8BXImJP4CDg6kp7W5XUFriBpG+bviRdb1xahenaRMTewNk03IGdWVW1qL6PzPiwCelP6d8TSbqL/qmkz5H0EdSV5EdQ3qhgfjsBuwGPpHmkNfB6Faa7L/37LEkHaWZNwknBWpr7gWsk7Ql8JCKeS5uBugB9I2J52n1xh6LpVrD6mXX9eAFTI2K/tYyjsek+SP+uxP+n1oTcfGQtSkQsAh4naa6pv8DckeQHgZanv/vcs8SkrwF9JLWX1JHkJy8BpgNdJO0HSbOQpF0rCGVdpzPLlb+BWEs0kqR5pv5OpD8AD0iaBEwG/lk8QUTMkTQKmAK8Ajyfli9LbzG9Pk0WbUh+dWtqQwGs63RmeXPX2WZmlnHzkZmZZdx8ZJYDSTeS/E51oesi4re1iMesUm4+MjOzjJuPzMws46RgZmYZJwUzM8s4KZiZWeb/AVRzNc6YsQUIAAAAAElFTkSuQmCC\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "print(\"Bigger non json value_len: \", max_non_json_value_len)\n", + "plotPercentualComparison(df[df.value_len < (max_non_json_value_len)], title=\"Up to the bigger NON-JSON: json X other\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Still, what about that first bin (the fist 1/10th)?" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "first 1/10th: 10465.3\n" + ] + }, + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 12, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYUAAAEXCAYAAABCjVgAAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDMuMC4zLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvnQurowAAHzVJREFUeJzt3XmUFOW9xvHvw7C5ISpohGEzCkqMK2pUTFwjbphVMcQrccviGlfu1ajBY+ISYxLjEqLGJYoSYwxGIu5LNBJwwyCiiCgjKoi4ICLL/O4fVVM2Q89MA13TA/N8zplDV9VbVb/uGfrpeqvqbUUEZmZmAG0qXYCZmbUcDgUzM8s4FMzMLONQMDOzjEPBzMwyDgUzM8s4FGyVSOopab6kqkrXUm6S9pRUswrr/1PSUeWsqZIk9ZYUktpWuhbLj0PBSiJphqRP0wCo++kWEW9GxLoRsXQltjlM0r+aaHOYpKckLZD0aANtvifptvTxSElTJdVKGlak7U8lvSPpQ0k3SOpQsCwkbb6iz6MhEXFARNxUru01RdIh6XPbsGDeoZLekrT+SmxvhqR9y1ultXQOBVsRh6QBUPczq7HGSqzq39j7wG+AixtpcyAwNn38AvAT4Nki9ewPDAf2AXoDmwE/X8X6WoyIuAd4GLgCQFJn4BrgxxHxYSVrq89HGy2XQ8FWSf0uBUmPSrpI0pPAAmCz9IhguqSPJb0uaaikrYBrgV3To44Pim0/Ih6MiNFA0QBKQ2c/4L60/VUR8RCwsEjzo4DrI2JyRMwDLgSGpdt5PG3zQlrP4QX7OF3SbElvS/rBCrw2j0o6Nn28uaTH0iOU9yTdUdBuN0kT0mUTJO1WbxsXSnoyff3ul9Slkd2eDByQBuAVwGMRMaaRGgdLmizpg3RfW6XzbwF6Avekr8dZBasNlfRm+jzOKdhWG0nDJb0maa6k0XVHLQV/J8dIepMkvKwligj/+KfJH2AGsG+R+b2BANqm048CbwJfAtoC6wMfAf3S5ZsCX0ofDwP+VeL+jwUeLTL/K8C/i8z/FzCs3rwXgMMLpruktW+UTgewecHyPYElwAigHckRyQJgg3T594BJjdT8KHBs+ngUcA7JB7GOwMB0/obAPODI9PU6Ip3eqGAbrwF9gbXS6YubeK2OAN4D5gBdG2nXF/iEJFTbAWcB04D2xX7nBb/rP6a1bAt8BmyVLj8VeBqoBjoAfwBG1Vv3ZmAdYK1K/037p/iPjxRsRdydfqL8QNLdjbS7MZJP40tI3lRrga0lrRURb0fE5DLWdBCfdx01ZV2gsBul7vF6jayzGBgREYsjYiwwH+gHEBG3RcQ2Je57MdAL6BYRCyOi7lzKQcCrEXFLRCyJiFHAy8AhBev+KSJeiYhPgdHAdk3s62mSML4/IuY00u5w4N6IeCAiFgO/Inmz362RdQB+HhGfRsQLJEG7bTr/h8A5EVETEZ8BFwDfqddVdEFEfJI+F2uBHAq2Ir4REZ3Tn2800m5m3YOI+ITkzedHwNuS7pW0ZRlrKjyf0JT5QKeC6brHHzeyztw03OosIAmXFXUWIOA/aXfN0en8bsAb9dq+AXQvmH5nBfc/kuQT+YGFXVFFLLPviKgl+d11b3CNxuvpBfyt7oMDMAVYCmxS0H4m1qI5FCwPywy9GxHjImI/kq6jl0m6H5Zrt6IkfSHd5nInlRswmc8/1ZI+fjci5q5KHaWIiHci4riI6Ebyifrq9EqnWSRvpoV6Am+tzH4kHQP0IDnZ/n/AHyW1b6D5MvuWpHTdun2v6O9nJnBAwQeHzhHRMSIKn4uHZW7hHAqWK0mbpCcz1yHpf55P8ukR4F2gupE3LSRVSepI0t/eRlJHSe3SxQcC90VEFLRvn7YX0C5tX/d3fjNwjKT+kjYAzgVuLNjduyRXJJWdpO9Kqk4n55G8OS4lOcrpm15W2zY9wd0f+MdK7KMbcBlwXNp9cy0wl+RcRjGjgYMk7ZO+pqeT/I6eSpev6OtxLXCRpF5pPV0lHbqiz8Mqy6FgeWtD8mYzi+Ty0q+RfIqF5AqUycA7kt5rYP0jgU9JLq3cI31cd6RRrOvo/rTNbiTdKJ8CXwWIiPuAS4FHSLpN3gDOL1j3AuCmtPvjsKaeWHoVVannR3YCxkuaD4wBTomI19OjlINJXqO5JN1MB0dEQ69HY64Gbo+IJwDSsDwOOFXSl+o3joipwPeBK0lOTB9CctnxorTJL4Fz09fjjBL2/9v0ud0v6WOScxu7rMTzsApSwYcss9VGevLyHeCL0cKuwa+TXuZ6XUTcXOlazErlIwVbXW0I/KwFB8LaJF0vr1e6FrMVkVsoKBlCYLak/zawXJJ+J2mapEmSdsirFlvzRMTsiLim0nUUI2ljkqOYx0julzBbbeR5pHAjMKiR5QcAW6Q/x5P0GZut9tLA6hQRQ8P9s7aayS0UIuJxkhOLDTkUuDkSTwOdJW2aVz1mZta0Sg5K1Z1lb2SpSee9Xb+hpONJjiZYZ511dtxyy5W892nWcyu33qrotn1l912J/VZy363x9a7wc37xreY/rfPl7umgr63w9V5ZzzzzzHsR0bWpdpUMBRWZV/RQOyJGklxeyIABA2LixIkrt8cLVnj04FV3wcTK7rsS+63kvlvj650+597D7232Xc+4+KCK7HfixQcBlXvOld73ypBU/875oip59VENyd2TdappYCRMMzNrHpUMhTHA/6RXIX0F+DAilus6MjOz5pNb95GkUSRDD3dR8pWG55MMz0tEXEtyJ+qBJEP1LgBKHqfezMzykVsoRMQRTSwP4IS89m9WCb0X3tbs+5zR7Hts+Tp1aMNJu2xAr87tUNHTlytvypQpAPxxcPNfLFm378Z07NiR6upq2rVr12TbYvyVeGa2xjlplw3Y4YvdaLv2eiSDv5bPVtWdAVhcU/TLAnNVt++GRARz586lpqaGPn36rNQ+PMyFma1xenVul0sgtHSS2GijjVi4sNi30ZbGoWBmaxyhVhcIdVb1eTsUzMws43MKZrbGG/z7J8u6vVJuIvtKv2qemvIml17wv/znyceRRIcOHbn0mj9R3bMXH3/0IRefdzbPTxgPwHY77cLwEZewXqf1eWvmmxy427acPeISvveD4wH4xblnMmjP3Rk2bFhZn0t9PlIwM8vJuDF3Mefdd7jzgSf564NPccV1t9CpU3LX+wVnnkx1z97c++Rz3Pvkc3Tv0Yufn3VKtu6GXbpy2/XXsnjRooY2nwuHgplZTubMfpcuG29CmzbJW+0mm3anU+fOvPn6dF568XmOP+XMrO0PTz2LyZOeY+aM5Cs4NthwI3Ye+FXG3DmqWWt2KJiZ5WT/Q77B4w/ex2H778GvRpzLlP9OAmD6qy/Tr/+XqaqqytpWVVXRr/+Xee2Vz+9FOPonP+XmkVexdOnS5badF4eCmVlONtm0O39/dAInDz+PNm3E8UMOZfy/HiOigauEkgXZZHXPXmy93Q6MvfsvzVazTzSbmeWofYcODNxrPwbutR8bddmYh8fdy9Cjf8TLkydRW1ubdS3V1tYydcp/2Wzzfsusf+yJp3H6D49ix112a5Z6faRgZpaTKS++wOx3knE+a2treeXlyXTr3oOefTZjyy9tw8jf/SprO/J3v2KrrbelZ5/NltlGn8378sW+W/L4Q+OapWYfKZjZGm/MibuXbVvbNDHUBMCSJUto37497783h5+ffQqLPvsMgK2325Ehw44D4OeXXckvzzuLgwfuQESwzY47ccFlVxbd3rEnncbhg75WtufQGIeCmVmZvfbKFKp79WH3vfZl9732LdqmU+fO/PJ3I4su696jJ3c99O9sul//L/P8m++XFEiryqFgZlZGo2+5gVF/GsmZ5/+i0qWsFIeCmVkZHXbk0Rx25NGVLmOl+USzmZllHApmZpZxKJiZWcahYGZmGZ9oNrM13jbX9SrvBi/4sMkm7779Fr8450ymvzqV2tpavrrv/px2zghee3Uqc959mz32/joA1/z6YtZeex2O+tFJ5a1xJflIwcyszCKCnx73P+y1/0Hc88QzjHl8Igs++YQrL72QqZNf5ImHHyjbvso9WJ6PFMzMyuw/Tz5Ohw4d+MbhQ4FkBNQzz7+IQbtuQ9u27SCC5yc8zdEn/BSA116dyjHfPZi3Z9Uw9JgfM/ToHwLwj7vu4LYbRrJk8SK23n5H7rjpeqqqqlh33XU57bTTGDduHJdffjkDBw4sW+0+UjAzK7Npr7xM/y9vt8y8ddfrRLfqnhx38hl8/ZBvMnrcEwwa/C0AZrz2Ctf8+a/ces9D/OGKS1i8eDHTX53KuHv+xk1/u4/R456gqk0Vt956KwCffPIJW2+9NePHjy9rIICPFMzMyq/eENiF84sNmb3H3l+nfYcOtO/QgQ27dOX992Yz/snHmDLpBYYevDcACxcuZKvNegDJkce3v/3tXEp3KJiZldkX+27Jg2PHLDNv/scf8c6st6hqs3wHTfv2HbLHbdq0YcmSpUTAId8dwinDz8+W1Y191LFjx2W+oKec3H1kZlZmuwz8Ggs//ZR77rwdSE4GX37hzxj83e+xUdeNWfDJ/Ka3sftXefDeMcx9bw4AH86bxxtvvJFr3eAjBTNrBSYdW74301JGKpXEFdfdwkXnnMHI315GbW0tA/fej5PP/hmfLljADVf9hsP23yM70VzMF/tuyQlnnsOPh36L2tpa2rZrxw0jr6VXrzJfXluPQ8HMLAdf6FbNlX+6fbn57Tt04LZ7H25wvcIhswcN/lZ2Mho+D6T585s+0lhZ7j4yM7OMQ8HMzDIOBTNb4wRBRFS6jIpY1eftUDCzNc4bHyxmyYKPWl0wRARz586lY8eOK70Nn2g2szXOlePncRLQq/N7iCI3ka2CKR+vBcC78z4t63ZXZN+N6dixI9XV1Su9D4eCma1xPvqslosen5vLtmdcfBAABwy/N5ftl7LvPLn7yMzMMrmGgqRBkqZKmiZpeJHlPSU9Iuk5SZMkHZhnPWZm1rjcQkFSFXAVcADQHzhCUv96zc4FRkfE9sAQ4Oq86jEzs6bleaSwMzAtIqZHxCLgduDQem0C6JQ+Xh+YlWM9ZmbWhDxDoTsws2C6Jp1X6ALg+5JqgLFA0e+jk3S8pImSJs6ZMyePWs3MjHxDodh1YPUvGj4CuDEiqoEDgVskLVdTRIyMiAERMaBr1645lGpmZpBvKNQAPQqmq1m+e+gYYDRARPwb6Ah0ybEmMzNrRJ6hMAHYQlIfSe1JTiSPqdfmTWAfAElbkYSC+4fMzCokt1CIiCXAicA4YArJVUaTJY2QNDhtdjpwnKQXgFHAsGht96WbmbUgud7RHBFjSU4gF847r+DxS8DuedZgZmal8x3NZmaWcSiYmVnGoWBmZhmPkmprpN4Lb2v2fc5o9j2alZ+PFMzMLONQMDOzjEPBzMwyDgUzM8s4FMzMLONQMDOzjEPBzMwyDgUzM8s4FMzMLONQMDOzjEPBzMwyDgUzM8s4FMzMLONQMDOzjEPBzMwyDgUzM8s4FMzMLONQMDOzjEPBzMwyDgUzM8s4FMzMLONQMDOzjEPBzMwyDgUzM8s4FMzMLONQMDOzjEPBzMwyDgUzM8s4FMzMLONQMDOzTK6hIGmQpKmSpkka3kCbwyS9JGmypNvyrMfMzBrXNq8NS6oCrgL2A2qACZLGRMRLBW22AP4X2D0i5knaOK96zMysaXkeKewMTIuI6RGxCLgdOLRem+OAqyJiHkBEzM6xHjMza0KeodAdmFkwXZPOK9QX6CvpSUlPSxpUbEOSjpc0UdLEOXPm5FSumZnlGQoqMi/qTbcFtgD2BI4ArpPUebmVIkZGxICIGNC1a9eyF2pmZok8Q6EG6FEwXQ3MKtLm7xGxOCJeB6aShISZmVVAnqEwAdhCUh9J7YEhwJh6be4G9gKQ1IWkO2l6jjWZmVkjcguFiFgCnAiMA6YAoyNisqQRkganzcYBcyW9BDwCnBkRc/OqyczMGpfbJakAETEWGFtv3nkFjwM4Lf0xM7MK8x3NZmaWcSiYmVnGoWBmZpmSQkHSdyWtlz4+V9JdknbItzQzM2tupR4p/CwiPpY0ENgfuAm4Jr+yzMysEkoNhaXpvwcB10TE34H2+ZRkZmaVUmoovCXpD8BhwFhJHVZgXTMzW02U+sZ+GMmNZoMi4gNgQ+DM3KoyM7OKKCkUImIBMBsYmM5aAryaV1FmZlYZpV59dD5wNskX4gC0A/6cV1FmZlYZpXYffRMYDHwCEBGzgPXyKsrMzCqj1FBYlI5TFACS1smvJDMzq5RSQ2F0evVRZ0nHAQ8Cf8yvLDMzq4SSRkmNiF9J2g/4COgHnBcRD+RamZmZNbsmQ0FSFTAuIvYFHARmZmuwJruPImIpsEDS+s1Qj5mZVVCpX7KzEHhR0gOkVyABRMTJuVRlZmYVUWoo3Jv+mJnZGqzUE803SWoP9E1nTY2IxfmVZWZmlVBSKEjak2S47BmAgB6SjoqIx/MrzczMmlup3UeXA1+PiKkAkvoCo4Ad8yrMzMyaX6k3r7WrCwSAiHiFZPwjMzNbg5R6pDBR0vXALen0UOCZfEoyM7NKKTUUfgycAJxMck7hceDqvIoyM7PKKDUU2gK/jYhfQ3aXc4fcqjIzs4ooNRQeAvYF5qfTawH3A7vlUZSVT++Ft1VkvzMqslczW1WlnmjuGBF1gUD6eO18SjIzs0opNRQ+kbRD3YSkAcCn+ZRkZmaVUmr30anAXyTNIvminW7A4blVZWZmFdHokYKknSR9ISImAFsCdwBLgPuA15uhPjMza0ZNdR/9AViUPt4V+D/gKmAeMDLHuszMrAKa6j6qioj308eHAyMj4q/AXyU9n29pZmbW3Jo6UqiSVBcc+wAPFywr9XyEmZmtJpp6Yx8FPCbpPZKrjZ4AkLQ58GHOtZmZWTNrNBQi4iJJDwGbAvdHRKSL2gAn5V2cmZk1r1K+o/npiPhbRBR+DecrEfFsU+tKGiRpqqRpkoY30u47kiK9/8HMzCqk1JvXVlg6PtJVwAFAf+AISf2LtFuPZKC98XnVYmZmpcktFICdgWkRMT0iFgG3A4cWaXchcCmwMMdazMysBHmGQndgZsF0TTovI2l7oEdE/KOxDUk6XtJESRPnzJlT/krNzAzINxRUZF5kC6U2wBXA6U1tKCJGRsSAiBjQtWvXMpZoZmaF8gyFGqBHwXQ1MKtgej1ga+BRSTOArwBjfLLZzKxy8gyFCcAWkvpIag8MAcbULYyIDyOiS0T0jojewNPA4IiYmGNNZmbWiNxCISKWACcC44ApwOiImCxphKTBee3XzMxWXq5DVUTEWGBsvXnnNdB2zzxrMTOzpuXZfWRmZqsZh4KZmWUcCmZmlnEomJlZxqFgZmYZh4KZmWUcCmZmlnEomJlZxqFgZmYZh4KZmWUcCmZmlnEomJlZxqFgZmYZh4KZmWUcCmZmlnEomJlZxqFgZmaZXL95zVq33gtva/Z9zmj2PZqtWXykYGZmGYeCmZllHApmZpZxKJiZWcahYGZmGYeCmZllHApmZpZxKJiZWcahYGZmGYeCmZllHApmZpZxKJiZWcahYGZmGYeCmZllHApmZpZxKJiZWSbXL9mRNAj4LVAFXBcRF9dbfhpwLLAEmAMcHRFv5FlTpfgLZ8xsdZDbkYKkKuAq4ACgP3CEpP71mj0HDIiIbYA7gUvzqsfMzJqWZ/fRzsC0iJgeEYuA24FDCxtExCMRsSCdfBqozrEeMzNrQp6h0B2YWTBdk85ryDHAP4stkHS8pImSJs6ZM6eMJZqZWaE8Q0FF5kXRhtL3gQHAZcWWR8TIiBgQEQO6du1axhLNzKxQnieaa4AeBdPVwKz6jSTtC5wDfC0iPsuxHjMza0KeRwoTgC0k9ZHUHhgCjClsIGl74A/A4IiYnWMtZmZWgtxCISKWACcC44ApwOiImCxphKTBabPLgHWBv0h6XtKYBjZnZmbNINf7FCJiLDC23rzzCh7vm+f+zcxsxfiOZjMzy+R6pNDS+K5iM7PG+UjBzMwyDgUzM8s4FMzMLONQMDOzjEPBzMwyDgUzM8s4FMzMLONQMDOzjEPBzMwyDgUzM8s4FMzMLONQMDOzjEPBzMwyDgUzM8s4FMzMLONQMDOzjEPBzMwyDgUzM8s4FMzMLONQMDOzjEPBzMwyDgUzM8s4FMzMLONQMDOzjEPBzMwyDgUzM8s4FMzMLONQMDOzjEPBzMwyDgUzM8s4FMzMLONQMDOzjEPBzMwyuYaCpEGSpkqaJml4keUdJN2RLh8vqXee9ZiZWeNyCwVJVcBVwAFAf+AISf3rNTsGmBcRmwNXAJfkVY+ZmTUtzyOFnYFpETE9IhYBtwOH1mtzKHBT+vhOYB9JyrEmMzNrhCIinw1L3wEGRcSx6fSRwC4RcWJBm/+mbWrS6dfSNu/V29bxwPHpZD9gai5FN64L8F6Traxc/Ho3H7/WzatSr3eviOjaVKO2ORZQ7BN//QQqpQ0RMRIYWY6iVpakiRExoJI1tCZ+vZuPX+vm1dJf7zy7j2qAHgXT1cCshtpIagusD7yfY01mZtaIPENhArCFpD6S2gNDgDH12owBjkoffwd4OPLqzzIzsybl1n0UEUsknQiMA6qAGyJisqQRwMSIGANcD9wiaRrJEcKQvOopg4p2X7VCfr2bj1/r5tWiX+/cTjSbmdnqx3c0m5lZxqFgZmaZVhsKkm6QNDu9V6Ju3oaSHpD0avrvBvXW2UnS0vQejLp5PSXdL2mKpJc8VEfDJFVJek7SP9LpE9MhTkJSl4J260u6R9ILkiZL+kG97XSS9Jak3zf3c1gdSOon6fmCn48knSrpQkmT0nn3S+qWth+azp8k6SlJ26bze0h6JP3bnizplMo+s5Yl7/eQ9CKd8em27kgv2Mldqw0F4EZgUL15w4GHImIL4KF0GsiG7biE5MR5oZuByyJiK5K7uGfnVfAa4BRgSsH0k8C+wBv12p0AvBQR2wJ7ApfX+w9xIfBYjnWu1iJiakRsFxHbATsCC4C/kfydbpPO/wdwXrrK68DXImIbkte27kToEuD09G/7K8AJRYaqac1uJN/3kEuAK9JtzSMZFih3rTYUIuJxlr8nonDYjZuAbxQsOwn4KwVv+ul/kLYR8UC6zfkRsSC3oldjkqqBg4Dr6uZFxHMRMaNI8wDWS4c8WZfk97Qk3c6OwCbA/XnXvIbYB3gtIt6IiI8K5q9DeqNoRDwVEfPS+U+T3FNERLwdEc+mjz8mCfTuzVZ5C5fne0j6t783yfA/xbaVm1YbCg3YJCLehuQ/BLAxgKTuwDeBa+u17wt8IOmutFvksvTTgC3vN8BZQG0JbX8PbEVys+OLwCkRUSupDXA5cGZuVa55hgCj6iYkXSRpJjCUz48UCh0D/LP+zLRLY3tgfC5VrjnK9R6yEfBBRCxJ29XQTIHsUCjNb4CzI2JpvfltgT2AM4CdgM2AYc1bWssn6WBgdkQ8U+Iq+wPPA92A7YDfS+oE/AQYGxEz86l0zZJ2uQ0G/lI3LyLOiYgewK3AifXa70USCmfXm78uySfcU+sdbVjpVvQ9pKQhgPKQ59hHq6N3JW0aEW9L2pTPD/MGALenA7h2AQ6UtIQkvZ+LiOkAku4m6Xu9vvlLb9F2BwZLOhDoCHSS9OeI+H4D7X8AXJze3T5N0uvAlsCuwB6SfkLSrdRe0vyIWO67OgxIhq1/NiLeLbLsNuBe4HwASduQdO0dEBFz6xpJakcSCLdGxF35l7zaK9d7yA1AZ0lt06OFYsME5cJHCssqHHbjKODvABHRJyJ6R0Rvkj6+n0TE3SRDeWwgqW7kwb2Bl5q35JYvIv43IqrT128IyXAmDQUCwJskfeFI2oRkZNzpETE0Inqm2zkDuNmB0KgjWLbraIuCZYOBl9P5PYG7gCMj4pWC9iL5gDMlIn7dLBWv/sryHpJ+IHqEZPifZbaVt1YbCpJGAf8G+kmqkXQMcDGwn6RXgf3S6Qalh4JnAA9JepHkkO+P+Va+5pB0sqQakk9BkyTVnYS+ENgtfU0fIjns9tDOK0DS2iR/w4Wf7i+W9F9Jk4Cvk1wNBsm5hY2Aq9PLVSem83cHjgT2Lri89cBmegotXjO8h5wNnKZkGKCNaKYeCA9zYWZmmVZ7pGBmZstzKJiZWcahYGZmGYeCmZllHApmZpZxKJiZWcahYK2GpEcl7V9v3qmSrm5knfllruHGwmGTzVoah4K1JqNY/nvAlxkwzqy1cyhYa3IncLCkDpCN/NkNeF7SQ5KelfSipEPrryhpT6VfDpRO/17SsPTxjpIek/SMpHHpmDdNami99IjmEkn/kfSKpD1W9YmblcqhYK1GOtDbf/j8i1GGAHcAnwLfjIgdgL1IvtSn2CiVy0kHjLsS+E5E7EgykNlFZVivbUTsDJxKOmidWXPwKKnW2tR1If09/fdokvFmfiHpqyTf99Cd5It83ilhe/2ArYEH0hypAt4uw3p1YxY9A/QuYXtmZeFQsNbmbuDXknYA1oqIZ9NuoK7AjhGxWNIMkiG+Cy1h2SPruuUCJkfEritYR1PrfZb+uxT/P7Vm5O4ja1UiYj7wKEl3Td0J5vVJvgRocfpFM72KrPoG0F9SB0nrkw7tDUwFukraFZJuIUlfKqGUlV3PLFf+BGKt0SiS7pm6K5FuBe5Jh4x+nvR7BgpFxExJo4FJwKvAc+n8Reklpr9Lw6ItybdsTW6sgJVdzyxvHjrbzMwy7j4yM7OMu4/MciDpKpJvLiv024j4UyXqMSuVu4/MzCzj7iMzM8s4FMzMLONQMDOzjEPBzMwy/w/fmB3hK55bmAAAAABJRU5ErkJggg==\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "print(\"first 1/10th: \", max_non_json_value_len/10)\n", + "plotPercentualComparison(df[df.value_len < (max_non_json_value_len/10)], title=\"First 1/10th: json X other\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Distribution for the values bellow the mean\n", + "This is where 95% of the rows are, they do have JSON types on them but as we can see, most of this data is of some other type, not json, and they have a eavenly distribution of this value_range, why is that?" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "The values smaller than the mean represent 95.57% of the whole sample\n" + ] + }, + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 13, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYUAAAEXCAYAAABCjVgAAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDMuMC4zLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvnQurowAAHoFJREFUeJzt3XmclXXd//HXm2FzSw1GEwYcVDBRH7kQ1g11U7ggJrRYapZZKm1g/NzSXNO8y8rcbjdcyi0UNRWVxC2X7FYBdyAUEWUk2VQUUNk+vz+uay6PwyyH4VxznJn38/E4j7n28znXzFzvc32vc76XIgIzMzOADuUuwMzMPjkcCmZmlnEomJlZxqFgZmYZh4KZmWUcCmZmlnEoWJMkDZFUUzA+V9LeLVzDw5KOasnnLDdJf5f0g3LXUSqSqiWFpI7lrsUa5lBoJ9ID+fuSlkl6W9I9knqVu676SDpT0g3lrqPcImL/iLi2pZ5P0oGS3pT06YJpIyW9IWnzZmyvxd882IZzKLQvB0bEpsA2wALg4jLXY58gEXEX8BBwPoCkLYDLgJ9GxNJy1laXzzby41BohyLiA+BWoH/tNEldJP1R0uuSFki6XNJGTW0rXe8CSfPTxwWSuqTzHpH0rXR4cNp0MDwd31vSs/VsbxjwK+Dg9KzmuYLZ20p6XNJ7ku6T1L1gvS9I+pekdyQ9J2lIIzXPlXSCpOclLZd0taSt0+aa9yQ9IGnLYrYt6YeSZqbrzZH044J5QyTVSDpO0kJJ/5H0w6b2acH6WZOZpB3S/blU0mJJNxcs91+SpqTzpkj6rzrbOLuh/VaPY4D9Je1HEg6PRMTERmocIWl6um8elrRTOv16oDdwV/p7PLFgtcPSv7PFkk4p2FYHSSdJekXSEkkTas9aCpqejpT0Okl4WR4iwo928ADmAnunwxsD1wLXFcy/AJgIfBrYDLgL+G06bwhQ08C2zgKeALYCKoF/AWcXzLs4Hf4V8ApwbsG8Cxuo9UzghjrTHk7X7wdslI7/Lp3XE1gCDCd5o7NPOl7ZyL54Atg6XXch8DSwO9CF5IBzRjHbBg4AtgcE/DewAtijYL+tTl9rp3QbK4At0/nfBZ5v5Hf2MHBUOjweOCWtoSswOJ3+aeBt4PtAR+DQdLxbU/utkec9FFgMLGpoH6bL9QOWp/ukE3AiMBvoXPfvJB2vBgK4Mq3lc8CHwE7p/LHp76Uq/T1cAYyvs+51wCbARuX+n2qrj7IX4EcL/aKTf9BlwDvpgWo+sGs6T+k/9/YFy38ReDUdHkLDofAKMLxg3n7A3HR4aO1BD7gXOAp4Ih1/BPhmA7WeSf2hcGrB+M+Ae9PhXwLX11l+MvCDRvbFYQXjtwGXFYyPAe5o5rbvAH5RsN/eBzoWzF8IfKHI39nDfBQK1wHjgKo6y3wfeKrOtP8DjmhqvzXyvH2AVcCNTSx3GjChYLwD8AYwpO7fSTpee2CvKpj2FHBIOjwTGFowb5u0jo4F625X7v+ltv5w81H78vWI2ILkXdho4BFJnyF5h78xMC1tBniH5CBeWcQ2ewCvFYy/lk6D5ODUT9LWwG4kB7ZeafPFQODR9az/zYLhFcCm6fC2wLdra0/rH0xyUGnIgoLh9+sZL2rbkvaX9ISkt9J5w4HC5pklEbG6gbrXx4kk4f1U2lzzo3R63f1POt6zYLyh/daQcSS/q+GFTVH1+NhzR8RaYF6d565PY7/H2wv280xgDckZXa15TWzbNpAv1rRDEbEG+JukK0gOcH8jORDuHBFvrOfm5pP8M09Px3un04iIFZKmAb8AXoyIlZL+BRwLvBIRixsqcT1rmEfybv7o9Vxvg7adXju5DTgcuDMiVkm6g+TgXVIR8SZwdPq8g4EHJD3KR/u/UG+SUF9vko4EegFfA6YCV0raPSJW1rP4fGDXgnWVrlv7N9Sc3+OPIuLxeuqqbuY2bT35TKEdUmIksCUwM32HdyVwvqSt0mV6phcbmzIeOFVSZXoGcDpQ+HHSR0jPStLxh+uM12cBUC2p2L/PG4ADJe0nqUJS1/Qib1WR6zd3251JzroWAasl7Q/sW4LnXIekbxe8nrdJDo5rgEkkZ2PfldRR0sEkHyC4uxnP0QP4A3B0RHwIXE5y/eSUBlaZABwgaaikTsBxJNcI/pXOXwBstx4lXA6cI2nbtJ7K9O/UWpBDoX25S9Iy4F3gHJJ28dp3+L8kuUj4hKR3gQeAHYvY5m9I3lE+D7xAcsH2NwXzHyG5cP1oA+P1uSX9uUTS000VEBHzgJEkF7MXkbzjPIES/H03tu2IeI/k0zoTSA7U3yW5WF8USYdJmt70kgB8Hngy/f1NJLlu8WpELCF5V38cyQH8ROBrjZyFNeZS4KaIeAwgkob9o4Gxknauu3BEzAK+R/LR5sXAgSQfe649q/gtyRuGdyQdX8TzX5i+tvskvUdy0XmvZrwO2wBKL+iY2SdM2jx0VURcV+5arP3wmYLZJ5CkjUmaXl4tdy3WvjgUzD5h0us6b5I0tf2zzOVYO+PmIzMzy/hMwczMMq3uewrdu3eP6urqcpdhZtaqTJs2bXFENPmF1FYXCtXV1UydOrXcZZiZtSqS6n7zvV5uPjIzs4xDwczMMg4FMzPLtLprCmZmTVm1ahU1NTV88MEH5S6lxXXt2pWqqio6derUrPUdCmbW5tTU1LDZZptRXV1N0nlr+xARLFmyhJqaGvr06dOsbbj5yMzanA8++IBu3bq1q0AAkES3bt026Awpt1CQdE16X9oXG5gvSRdJmq3kXrl75FWLmbU/7S0Qam3o687zTOEvwLBG5u8P9E0fo4DLcqzFzMyKkNs1hYh4tOBuSfUZSXLj+CDpw38LSdtExH/yqsnM2qfqk+4p6fbm/u6AJpfZdNNNeffddxk7diwPPfQQkujatSsTJkygT58+LF26lDFjxvD448mN5gYNGsTFF1/M5ptvzty5c+nTpw8XXXQRY8aMAWD06NEMGDCAI444oqSvpa5yXmjuycfvt1qTTlsnFCSNIjmboHfv3s1/xjM3b/66zX7OpeV97nI8bzmfuz3ub7/mde03Aebn/Mmj+c80Pj/WcvPl5zL/lRk8f++1dOjQgZr5C9jk/ddh/jscefQJ7PLZ7bnusVsBOOOPl3HUYQdxy7jfw4L5bNX901z4p9/z4wP3onPnTrB8Ub6vJ1XOC831NXzV22VrRIyLiAERMaCysph7yZuZld9/Fixmm62706FDcqit6rE1W27xKWa/+jrTXpjJaWM/uvX36f9vFFOfn8Erc5P3ypXdtmTooIFce8tdLVpzOUOhhuQm37WqSG/4bmbWFnznwH246/5H2W2fQzju13/imRf/DcCMl19lt513pKKiIlu2oqKC3XbekekvzcmmnTT6h5x3xQ2sWbOmxWouZyhMBA5PP4X0BWCpryeYWVtS1WNrZj16O789eQwdOoihB/+EBx97koigvg8JRcTHmlD69O7JwN125q+3/73Fas7tmoKk8cAQoLukGuAMoBNARFwOTAKGk9wsfgXww7xqMTMrly5dOrP/Vwex/1cHsXVlN+6Y/DC/OPJQnnlxFmvXrs2altauXctzM15ip74f/9LZr475EQeNOpEv79Uyn9rP7UwhIg6NiG0iolNEVEXE1RFxeRoIROLnEbF9ROwaEe4P28zalKdfmMn8N5MLxGvXruX5GS+zbdU27NCnN7vvsiO/ufCqbNnfXHgVe+z6WXbo8/EP03x2hz7077sddz/wWIvU7G4uzKzNm3tMj9JtrMfuTS6yevVqunTuzMLFb3H0CWfz4cpVAAzcbWdGH3EwAFf/8QzGnHYuOwwaQQR8cc9dufqPZ9S7vVOOOZLd9zu0dK+hEQ4FM7MSmz5rDttXVzHsK4MY9pVB9S6z5Raf4oaLz6l3XnWvHrz40C3Z+Od27sfammlFBdKGciiYmZXQ5dfdykXXjOeCXx9f7lKaxaFgZlZCPzn8IH5y+EHlLqPZ3EuqmZllHApmZpZxKJiZWcahYGZmGV9oNrO2b9yQ0m6vtpfWRtTMX8DPT/kdM16aw9oIvrb3l/jDqWOZ8dIc5i9YxPChg5NNnXc5m26yMcf/5PDS1thMPlMwMyuxiOCbRx/P14cN4eXH7+Slx25n2fIVnHLu//Ls9FlMeuifJXuuUneW5zMFM7MSe+ifT9G1S2d+ePBIIOkB9fwzj2PbvQ6gU8eORAT/fOpZTh6ddPk246U5DDnoaF5/403GHvVdjjky+fbyDbfdw0XX3MTKlavYa/dduPTaCVRUVLDpppty7LHHMnnyZM477zwGDx5cstp9pmBmVmLTX5rDnrvu9LFpn9psU6qrenDqL47i4BH78uz9N3HwyP0A+PfsuUy+8RKeuuc6fv2ncaxatYqZL8/h5on38fgd1/Ds/TdRUVHBjTfeCMDy5cvZZZddePLJJ0saCOAzBTOzkku6xl63b+yGph8wdDBdunSmS5fObNV9SxYseosH//kU016YyeeHfx+A9z/4kK369AeSM49vfetbudTuUDAzK7Gd+23PbZMe/Ni0d99bxrz5C6josG4DTZcunbPhiooKVq9ZQwT84NsH8tuTx3y0YNr3UdeuXT92g55ScvORmVmJDf3SQFa8/wHX3XI3kFwMPu6s8zniOweydWU33lu2oultDB7IrXc/wMLFbwHw1ttLee2113KtG3ymYGbtwaiHS7etInoqlcTtV53Hz371W86+4ErWRjD8q4P4n5NGs3zF+/zukj+z2z6HZBea69O/33b85sSfse+hP2NtrKVTx45cMu7PbLvttqV7LfVwKJiZ5aBXz89w17UXrjO9S5fOTJl0Q4PrFXaZffDI/bKL0UAWSMuWLStdoXW4+cjMzDIOBTMzyzgUzKwNCiKi3EWUxYa+boeCmbU5XZfOYcny1e0uGCKCJUuW0LVr12ZvwxeazazNqXr6XGr4JYs23w5Y98tiG2TpzOTnOwtLu931ee5GdO3alaqqqmY/hUPBzNqcTivfoc8TJ+ez8doeUs/8Qj7bL+a5c+TmIzMzyzgUzMws41AwM7OMQ8HMzDIOBTMzyzgUzMws41AwM7OMQ8HMzDIOBTMzy+QaCpKGSZolabakk+qZ31vSPyQ9I+l5ScPzrMfMzBqXWyhIqgAuAfYH+gOHSupfZ7FTgQkRsTtwCHBpXvWYmVnT8jxTGAjMjog5EbESuAkYWWeZAD6VDm8OzM+xHjMza0KeodATmFcwXpNOK3Qm8D1JNcAkYEx9G5I0StJUSVMXLVqUR61mZka+oVBff7V1Ozc/FPhLRFQBw4HrJa1TU0SMi4gBETGgsrIyh1LNzAzyDYUaoFfBeBXrNg8dCUwAiIj/A7oC3XOsyczMGpFnKEwB+krqI6kzyYXkiXWWeR0YCiBpJ5JQcPuQmVmZ5BYKEbEaGA1MBmaSfMpouqSzJI1IFzsOOFrSc8B44Ihob/fPMzP7BMn1zmsRMYnkAnLhtNMLhmcAg/KswczMiudvNJuZWcahYGZmGYeCmZllHApmZpZxKJiZWcahYGZmGYeCmZllHApmZpZxKJiZWcahYGZmGYeCmZllHApmZpZxKJiZWcahYGZmGYeCmZllHApmZpZxKJiZWcahYGZmGYeCmZllHApmZpZxKJiZWcahYGZmGYeCmZllHApmZpZxKJiZWcahYGZmGYeCmZllHApmZpZxKJiZWcahYGZmGYeCmZllHApmZpbJNRQkDZM0S9JsSSc1sMx3JM2QNF3SX/Osx8zMGtcxrw1LqgAuAfYBaoApkiZGxIyCZfoCJwODIuJtSVvlVY+ZmTUtzzOFgcDsiJgTESuBm4CRdZY5GrgkIt4GiIiFOdZjZmZNyDMUegLzCsZr0mmF+gH9JD0u6QlJw3Ksx8zMmpBb8xGgeqZFPc/fFxgCVAGPSdolIt752IakUcAogN69e5e+UjMzA/I9U6gBehWMVwHz61nmzohYFRGvArNIQuJjImJcRAyIiAGVlZW5FWxm1t4VFQqSvi1ps3T4VEl/k7RHE6tNAfpK6iOpM3AIMLHOMncAX0m3252kOWnO+rwAMzMrnWLPFE6LiPckDQb2A64FLmtshYhYDYwGJgMzgQkRMV3SWZJGpItNBpZImgH8AzghIpY054WYmdmGK/aawpr05wHAZRFxp6Qzm1opIiYBk+pMO71gOIBj04eZmZVZsWcKb0i6AvgOMElSl/VY18zMWoliD+zfIWnqGZZ+MujTwAm5VWVmZmVRVChExApgITA4nbQaeDmvoszMrDyK/fTRGcAvSbqkAOgE3JBXUWZmVh7FNh99AxgBLAeIiPnAZnkVZWZm5VFsKKxMPykUAJI2ya8kMzMrl2JDYUL66aMtJB0NPABcmV9ZZmZWDkV9TyEi/ihpH+BdYEfg9Ii4P9fKzMysxTUZCul9ESZHxN6Ag8DMrA1rsvkoItYAKyRt3gL1mJlZGRXbzcUHwAuS7if9BBJARByTS1VmZlYWxYbCPenDzMzasGIvNF+bdn/dL500KyJW5VeWmZmVQ1GhIGkISXfZc0nuqNZL0g8i4tH8SjMzs5ZWbPPRecC+ETELQFI/YDywZ16FmZlZyyv2y2udagMBICJeIun/yMzM2pBizxSmSroauD4dPwyYlk9JZmZWLsWGwk+BnwPHkFxTeBS4NK+izMysPIoNhY7AhRHxJ8i+5dwlt6rMzKwsir2m8CCwUcH4RiSd4pmZWRtSbCh0jYhltSPp8Mb5lGRmZuVSbCgsl7RH7YikAcD7+ZRkZmblUuw1hbHALZLmk9xopwdwcG5VmZlZWTR6piDp85I+ExFTgM8CNwOrgXuBV1ugPjMza0FNNR9dAaxMh78I/Aq4BHgbGJdjXWZmVgZNNR9VRMRb6fDBwLiIuA24TdKz+ZZmZmYtrakzhQpJtcExFHioYF6x1yPMzKyVaOrAPh54RNJikk8bPQYgaQdgac61mZlZC2s0FCLiHEkPAtsA90VEpLM6AGPyLs7MzFpWk01AEfFEPdNeyqccMzMrp2K/vGZmZu2AQ8HMzDIOBTMzy+QaCpKGSZolabakkxpZ7iBJkfapZGZmZZJbKKT3XLgE2B/oDxwqqX89y21GcvOeJ/OqxczMipPnmcJAYHZEzImIlcBNwMh6ljsb+D3wQY61mJlZEfIMhZ7AvILxmnRaRtLuQK+IuLuxDUkaJWmqpKmLFi0qfaVmZgbkGwqqZ1pkM6UOwPnAcU1tKCLGRcSAiBhQWVlZwhLNzKxQnqFQA/QqGK8C5heMbwbsAjwsaS7wBWCiLzabmZVPnqEwBegrqY+kzsAhwMTamRGxNCK6R0R1RFQDTwAjImJqjjWZmVkjcguFiFgNjAYmAzOBCRExXdJZkkbk9bxmZtZ8uXZ/HRGTgEl1pp3ewLJD8qzFzMya5m80m5lZxqFgZmYZh4KZmWUcCmZmlnEomJlZxqFgZmYZh4KZmWUcCmZmlnEomJlZxqFgZmYZh4KZmWUcCmZmlnEomJlZxqFgZmYZh4KZmWUcCmZmlnEomJlZxqFgZmYZh4KZmWVyvUezmVmeqj/4a4s/59wWf8aW5TMFMzPLOBTMzCzjUDAzs4xDwczMMg4FMzPLOBTMzCzjUDAzs4xDwczMMg4FMzPLOBTMzCzjUDAzs4z7Pmoh7qPFzFqDXENB0jDgQqACuCoifldn/rHAUcBqYBHwo4h4Lc+azMxKoa2+0cstFCRVAJcA+wA1wBRJEyNiRsFizwADImKFpJ8CvwcOzqsmMyu9tnpwbK/yvKYwEJgdEXMiYiVwEzCycIGI+EdErEhHnwCqcqzHzMyakGfzUU9gXsF4DbBXI8sfCfy9vhmSRgGjAHr37l2q+tqFcryLg/K/k/O7V7PmyfNMQfVMi3oXlL4HDAD+UN/8iBgXEQMiYkBlZWUJSzQzs0J5ninUAL0KxquA+XUXkrQ3cArw3xHxYY71mJlZE/I8U5gC9JXUR1Jn4BBgYuECknYHrgBGRMTCHGsxM7Mi5HamEBGrJY0GJpN8JPWaiJgu6SxgakRMJGku2hS4RRLA6xExIq+azNoyX0exUsj1ewoRMQmYVGfa6QXDe+f5/GZmtn7czYWZmWXczYXlxs0ZZq2PQ8GshByE1tq1q1DwP6yZWeN8TcHMzDIOBTMzyzgUzMws41AwM7OMQ8HMzDIOBTMzyzgUzMws41AwM7OMQ8HMzDIOBTMzyzgUzMws41AwM7OMQ8HMzDIOBTMzyzgUzMws41AwM7OMQ8HMzDIOBTMzyzgUzMws41AwM7OMQ8HMzDIOBTMzyzgUzMws41AwM7OMQ8HMzDIOBTMzyzgUzMws41AwM7OMQ8HMzDK5hoKkYZJmSZot6aR65neRdHM6/0lJ1XnWY2ZmjcstFCRVAJcA+wP9gUMl9a+z2JHA2xGxA3A+cG5e9ZiZWdPyPFMYCMyOiDkRsRK4CRhZZ5mRwLXp8K3AUEnKsSYzM2uEIiKfDUsHAcMi4qh0/PvAXhExumCZF9NlatLxV9JlFtfZ1ihgVDq6IzArl6Ib1x1Y3ORSVire3y3H+7pllWt/bxsRlU0t1DHHAup7x183gYpZhogYB4wrRVHNJWlqRAwoZw3tifd3y/G+blmf9P2dZ/NRDdCrYLwKmN/QMpI6ApsDb+VYk5mZNSLPUJgC9JXUR1Jn4BBgYp1lJgI/SIcPAh6KvNqzzMysSbk1H0XEakmjgclABXBNREyXdBYwNSImAlcD10uaTXKGcEhe9ZRAWZuv2iHv75bjfd2yPtH7O7cLzWZm1vr4G81mZpZxKJiZWcahAEi6RtLC9HsTtdPOlvS8pGcl3SepR8G8Ien06ZIeKU/VrZukuZJeSPfj1DrzjpcUkrrXmf55SWvS78BYkST9QtKL6d/r2HTamZLeSPf/s5KGp9P3kTQt/d1Mk/TV8lbfOpTqGNJU10AtIiLa/QP4MrAH8GLBtE8VDB8DXJ4ObwHMAHqn41uVu/7W+ADmAt3rmd6L5MMJrxXOJ/mwwkPAJOCgctffWh7ALsCLwMYkHyx5AOgLnAkcX8/yuwM9CtZ9o9yvoTU8SnEMSf/GXwG2AzoDzwH9W/q1+EwBiIhHqfP9iIh4t2B0Ez76Ut13gb9FxOvpcgtbpMj243zgRNb9EuMY4DbA+3v97AQ8ERErImI18AjwjYYWjohnIqL2+0TTga6SurRAna1aiY4hxXQNlDuHQiMknSNpHnAYcHo6uR+wpaSH09Prw8tXYasWwH3pPhwFIGkEyTvT5woXlNST5EB2ecuX2eq9CHxZUjdJGwPD+ehLpaPT5o1rJG1Zz7rfAp6JiA9bqti2Zj2PIT2BeQWr16TTWpRDoRERcUpE9AJuBGr7bOoI7AkcAOwHnCapX5lKbM0GRcQeJL3o/lzSl4FT+Ogfp9AFwC8jYk1LFtgWRMRMkt6H7wfuJWmSWA1cBmwP7Ab8BzivcD1JO6fr/bgl621r1vMYUlS3P3lzKBTnryTvmiBJ73sjYnkkHfc9CnyubJW1UrVNFOmp8+3AfwN9gOckzSXpFuVpSZ8BBgA3pdMPAi6V9PVy1N0aRcTVEbFHRHyZpInj5YhYEBFrImItcCVJ0wUAkqpIfieHR8Qr5am6zSnmGFJM10C5cyg0QFLfgtERwL/T4TuBL0nqmJ6O7wXMbOn6WjNJm0jarHYY2BeYEhFbRUR1RFST/IPsERFvRkSfgum3Aj+LiDvKVX9rI2mr9Gdv4JvAeEnbFCzyDZJmJiRtAdwDnBwRj7d0rW1JM44hxXQNlLs8e0ltNSSNB4YA3SXVAGcAwyXtCKwl+STMTyA5HZd0L/B8Ou+qiHix3g1bQ7YGbk9vndER+GtE3Fvektq02yR1A1YBP4+ItyVdL2k3kuaJuXzUTDQa2IGkSeO0dNq+/kBF40p1DKmva6AWfy3pR6HMzMzcfGRmZh9xKJiZWcahYGZmGYeCmZllHApmZpZxKJiZWcahYO1G2tfMfnWmjZV0aSPrLCtxDX9x19/2SeZQsPZkPOveB/yQdLqZ4VCw9uVW4Gu1XUFLqgZ6AM9KelDS0+nNZdbprji9KcrdBeP/K+mIdHhPSY+kPV5OrtOFRIMaWi89ozlX0lOSXpL0pQ194WbFcihYuxERS4CngGHppEOAm4H3gW+kvbZ+BThPaR8cTZHUCbiY5MY/ewLXAOeUYL2OETEQGEvSZYJZi3DfR9be1DYh3Zn+/BFJl8X/k3bfvZakD/utgTeL2N6OJHcouz/NkQqSrqg3dL2/pT+nAdVFbM+sJBwK1t7cAfxJ0h7ARhHxdNoMVAnsGRGr0i66u9ZZbzUfP7OunS9gekR8cT3raGq92hvbrMH/p9aC3Hxk7UpELAMeJmmuqb3AvDmwMA2ErwDb1rPqa0B/SV0kbQ4MTafPAiolfRGSZqH0BjVNae56ZrnyOxBrj8aTNM/UfhLpRuAuSVOBZ/mo3/tMRMyTNIGku+OXgWfS6SvTj5helIZFR5I7xTXa5XFz1zPLm7vONjOzjJuPzMws4+YjsxxIugQYVGfyhRHx53LUY1YsNx+ZmVnGzUdmZpZxKJiZWcahYGZmGYeCmZll/j+Z5Ya0yysJKQAAAABJRU5ErkJggg==\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "m = df.value_len.mean()\n", + "bellow_mean = df[df.value_len <= (m)]\n", + "print('The values smaller than the mean represent {0:0.2f}% of the whole sample'.format(bellow_mean.is_json.count()/df.is_json.count()*100))\n", + "plotPercentualComparison(bellow_mean, title='Bellow the mean: json X other')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "There is more JSON values bellow the mean than above the mean, but thats not surprising since the data bellow the mean is 95% of everthing." + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Jsons bellow the mean are 90.48% of all jsons\n" + ] + } + ], + "source": [ + "bellow_mean_count = bellow_mean[bellow_mean.is_json == True].is_json.count()\n", + "above_mean_count = df[(df.value_len > m) & (df.is_json == True)].is_json.count()\n", + "total = bellow_mean_count + above_mean_count\n", + "print(\"Jsons bellow the mean are {0:.2f}% of all jsons\".format(bellow_mean_count/total * 100))" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.6.8" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} From 699b06677c45dc7d0b7c115649c846c89caa77f3 Mon Sep 17 00:00:00 2001 From: Camila Resende Date: Mon, 8 Apr 2019 15:31:20 -0300 Subject: [PATCH 16/23] Fix typo --- .../isJson_Value_Distribution.ipynb | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/analyses/2019_03_aliamcami_value_analyses/isJson_Value_Distribution.ipynb b/analyses/2019_03_aliamcami_value_analyses/isJson_Value_Distribution.ipynb index ba2df56..77766f7 100644 --- a/analyses/2019_03_aliamcami_value_analyses/isJson_Value_Distribution.ipynb +++ b/analyses/2019_03_aliamcami_value_analyses/isJson_Value_Distribution.ipynb @@ -142,7 +142,7 @@ { "data": { "text/plain": [ - "" + "" ] }, "execution_count": 4, @@ -379,7 +379,7 @@ "metadata": {}, "source": [ "## All values\n", - "If all data is divided in 10 bins and the percentual of NON-JSON values in each bin is painted orange, we have the following graph:" + "If all data is divided in 10 bins and the percentage of NON-JSON values in each bin is painted orange, we have the following graph:" ] }, { @@ -441,7 +441,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "The top (0.30% - whole sample) / (6.76% - values above the mean) is gurantee to be a valid JSON\n" + "The top (0.30% - whole sample) / (6.76% - values above the mean) is guarantee to be a valid JSON\n" ] } ], @@ -449,7 +449,7 @@ "max_non_json_value_len = df[df.is_json == False].value_len.max()\n", "allJson = df[df['value_len'] > max_non_json_value_len ]\n", "length = allJson.is_json.count()\n", - "print(\"The top ({0:0.2f}% - whole sample) / ({1:0.2f}% - values above the mean) is gurantee to be a valid JSON\".format(\n", + "print(\"The top ({0:0.2f}% - whole sample) / ({1:0.2f}% - values above the mean) is guarantee to be a valid JSON\".format(\n", " length / df.is_json.count() * 100, length / df[df.value_len > df.value_len.mean()].is_json.count() * 100))" ] }, @@ -460,7 +460,7 @@ "--- \n", "\n", "# Out of Curiosity: small values\n", - "This is not exacly relevant to the issue 22 ('What's in the really large values?') but I was courisous to know how was the distribution of the smaller values" + "This is not exactly relevant to the issue 22 ('What's in the really large values?') but I was curious to know how was the distribution of the smaller values" ] }, { @@ -616,7 +616,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "There is more JSON values bellow the mean than above the mean, but thats not surprising since the data bellow the mean is 95% of everthing." + "There is more JSON values below the mean than above the mean, but that's not surprising since the data below the mean is 95% of everything." ] }, { @@ -628,7 +628,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "Jsons bellow the mean are 90.48% of all jsons\n" + "Jsons below the mean are 90.48% of all jsons\n" ] } ], @@ -636,7 +636,7 @@ "bellow_mean_count = bellow_mean[bellow_mean.is_json == True].is_json.count()\n", "above_mean_count = df[(df.value_len > m) & (df.is_json == True)].is_json.count()\n", "total = bellow_mean_count + above_mean_count\n", - "print(\"Jsons bellow the mean are {0:.2f}% of all jsons\".format(bellow_mean_count/total * 100))" + "print(\"Jsons below the mean are {0:.2f}% of all jsons\".format(bellow_mean_count/total * 100))" ] } ], From 46c31d04a56d6783137853d0c8deaf8e500a3cda Mon Sep 17 00:00:00 2001 From: Camila Resende Date: Mon, 8 Apr 2019 17:46:01 -0300 Subject: [PATCH 17/23] Removed fixed names, session organization, removed false positives for valid jsons --- .../isJson_dataPrep.ipynb | 613 ++++++++++-------- 1 file changed, 325 insertions(+), 288 deletions(-) diff --git a/analyses/2019_03_aliamcami_value_analyses/isJson_dataPrep.ipynb b/analyses/2019_03_aliamcami_value_analyses/isJson_dataPrep.ipynb index d14915f..f0dbbc4 100644 --- a/analyses/2019_03_aliamcami_value_analyses/isJson_dataPrep.ipynb +++ b/analyses/2019_03_aliamcami_value_analyses/isJson_dataPrep.ipynb @@ -17,16 +17,18 @@ "output_type": "stream", "text": [ "/anaconda3/envs/overscripted/lib/python3.6/site-packages/dask/config.py:168: YAMLLoadWarning: calling yaml.load() without Loader=... is deprecated, as the default Loader is unsafe. Please read https://msg.pyyaml.org/load for full details.\n", - " data = yaml.load(f.read()) or {}\n", - "/anaconda3/envs/overscripted/lib/python3.6/site-packages/distributed/config.py:20: YAMLLoadWarning: calling yaml.load() without Loader=... is deprecated, as the default Loader is unsafe. Please read https://msg.pyyaml.org/load for full details.\n", - " defaults = yaml.load(f)\n" + " data = yaml.load(f.read()) or {}\n" ] } ], "source": [ "import dask.dataframe as dd\n", - "from dask.distributed import Client\n", - "from dask.diagnostics import ProgressBar" + "from dask.diagnostics import ProgressBar\n", + "import json\n", + "import pandas as pd\n", + "import os\n", + "import tldextract\n", + "import hashlib\n" ] }, { @@ -34,7 +36,9 @@ "metadata": {}, "source": [ "All sub samples and new samples with new columns/data will be saved under the \"DIR\" directory to keep things organized. \n", - "As such, the function \"save_parquet\" and \"read_parquet\" adds this directory to every parquet name, and I'm using this functions instead of dd.read_parquet/dd.to_parquet direct to ensure the same read and write settings across the notebook. " + "As such, the function \"save_parquet\" and \"read_parquet\" adds this directory to every parquet name, and I'm using this functions instead of dd.read_parquet/dd.to_parquet direct to ensure the same read and write settings across the notebook. \n", + "\n", + "NOTE: each section adds its name to the 'FILE_NAME' and saves the new parquet with this name. Because of it, you can run the sections at any order you desire to have the output you need. " ] }, { @@ -48,8 +52,9 @@ "# client\n", "\n", "#Create folder to save/read new data\n", - "DIR = 'sample_0_prep/'\n", - "import os\n", + "DIR = 'sample0_prep/'\n", + "FILE_NAME = 's0'\n", + "\n", "if not os.path.exists(DIR):\n", " os.makedirs(DIR)" ] @@ -74,13 +79,13 @@ " if recalculate_partition:\n", " n = 1+df.memory_usage(deep=True).sum().compute() // (1000 * 1000 * 100)\n", " print(\"Npartition: \", n)\n", - " df.repartition(npartitions= n).to_parquet(DIR + name, engine=\"pyarrow\")\n", + " df.repartition(npartitions= n).to_parquet(DIR + name + '.parquet', engine=\"pyarrow\")\n", " else:\n", - " df.to_parquet(DIR + name, engine=\"pyarrow\")\n", + " df.to_parquet(DIR + name + '.parquet', engine=\"pyarrow\")\n", " \n", " \n", "def read_parquet(name):\n", - " return dd.read_parquet(DIR + name, engine='pyarrow')" + " return dd.read_parquet(DIR + name + '.parquet', engine='pyarrow')" ] }, { @@ -101,7 +106,9 @@ { "data": { "text/plain": [ - "Index(['value_1000', 'value', 'value_len', 'symbol', 'script_url', 'location'], dtype='object')" + "Index(['value_1000', 'value', 'value_len', 'symbol', 'script_url', 'location',\n", + " 'operation'],\n", + " dtype='object')" ] }, "execution_count": 4, @@ -110,10 +117,10 @@ } ], "source": [ - "#Original sample\n", + "#Original sample \n", "df = dd.read_parquet('sample_0.parquet', \n", " engine='pyarrow', \n", - " columns=['value_1000', 'value', 'value_len', 'symbol', 'script_url', 'location'])\n", + " columns=['value_1000', 'value', 'value_len', 'symbol', 'script_url', 'location', 'operation'])\n", "\n", "# df.astype({'value_1000': str, 'value': str,'value_len': int,'symbol': int,'script_url': str})\n", "df.columns" @@ -140,7 +147,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "[########################################] | 100% Completed | 48.2s\n", + "[########################################] | 100% Completed | 58.7s\n", "1356.9776628910975 0 4496861 26310.62140481331 11292867\n" ] } @@ -160,12 +167,11 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### FILTER: value_len > df_mean\n", - "1356 is the value_len mean\n", - "\n", - "To filter the data into something that is more interesting to this task I decided to only work with values that are at above the mean.\n", + "# Add Column: Domains\n", + "The following code is copyed from this same project: ~/analyses/hello_world.ipynb\n", "\n", - "All values above the mean count up to 499805 rows. That is just 4,42% of the whole sample, and a lot easier to work on. " + "It uses the data saved from the last section\n", + "This section is dedicated to extract the domain of the columns \"location\" and \"script_url\" and add it as new columns \"location_domain\" and \"script_domain\"" ] }, { @@ -177,58 +183,21 @@ "name": "stdout", "output_type": "stream", "text": [ - "[########################################] | 100% Completed | 58.0s\n", - "Npartition: 244\n", - "[########################################] | 100% Completed | 1min 30.9s\n" + "Notebook name: s0_domains\n" ] } ], "source": [ - "#Save\n", - "save_parquet(df= df[df['value_len'] > df_mean], name='above_mean.parquet', recalculate_partition=True)" + "FILE_NAME += '_domains'\n", + "print('Notebook name: ', FILE_NAME)" ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "Index(['value_1000', 'value', 'value_len', 'symbol', 'script_url', 'location'], dtype='object')" - ] - }, - "execution_count": 7, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "#Read\n", - "df = read_parquet('above_mean.parquet')\n", - "df.columns" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Add Column: Domains\n", - "The following code is copyed from this same project: ~/analyses/hello_world.ipynb\n", - "\n", - "It uses the data saved from the last section\n", - "This section is dedicated to extract the domain of the columns \"location\" and \"script_url\" and add it as new columns \"location_domain\" and \"script_domain\"" - ] - }, - { - "cell_type": "code", - "execution_count": 8, - "metadata": {}, "outputs": [], "source": [ - "import tldextract\n", - "\n", "def extract_domain(url):\n", " \"\"\"Use tldextract to return the base domain from a url\"\"\"\n", " try:\n", @@ -240,13 +209,10 @@ }, { "cell_type": "code", - "execution_count": 9, + "execution_count": 8, "metadata": {}, "outputs": [], "source": [ - "#To guarantee the usage of the correct parquet created above in case we start from this section\n", - "df = read_parquet('above_mean.parquet')\n", - "\n", "df.astype({'value_1000': str, 'value': str,'value_len': int,'symbol': int,'script_url': str, 'location': str})\n", "df['location_domain'] = df.location.apply(extract_domain, meta='O')\n", "df['script_domain'] = df.script_url.apply(extract_domain, meta='O')" @@ -254,25 +220,25 @@ }, { "cell_type": "code", - "execution_count": 10, + "execution_count": 9, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "[########################################] | 100% Completed | 1min 17.3s\n" + "[########################################] | 100% Completed | 6min 23.0s\n" ] } ], "source": [ "#save\n", - "save_parquet(df=df, name='above_mean_domain.parquet')" + "save_parquet(df=df, name=FILE_NAME)" ] }, { "cell_type": "code", - "execution_count": 11, + "execution_count": 10, "metadata": {}, "outputs": [ { @@ -305,67 +271,67 @@ " \n", " \n", " 0\n", - " canada.ca\n", - " https://www.canada.ca/en/services.html\n", - " adobedtm.com\n", - " https://assets.adobedtm.com/caacec67651710193d...\n", + " vk.com\n", + " https://vk.com/widget_comments.php?app=2297596...\n", + " vk.com\n", + " https://vk.com/js/api/xdm.js?1449919642\n", " \n", " \n", " 1\n", - " tmall.com\n", - " https://maniform.world.tmall.com/category-1282...\n", - " alicdn.com\n", - " https://g.alicdn.com/alilog/mlog/aplus_v2.js\n", + " vk.com\n", + " https://vk.com/widget_comments.php?app=2297596...\n", + " vk.com\n", + " https://vk.com/js/api/xdm.js?1449919642\n", " \n", " \n", " 2\n", - " tmall.com\n", - " https://maniform.world.tmall.com/category-1282...\n", - " alicdn.com\n", - " https://g.alicdn.com/alilog/mlog/aplus_v2.js\n", + " vk.com\n", + " https://vk.com/widget_comments.php?app=2297596...\n", + " vk.com\n", + " https://vk.com/js/al/aes_light.js?592436914\n", " \n", " \n", " 3\n", - " coches.net\n", - " https://www.coches.net/fiat/segunda-mano/\n", - " coches.net\n", - " https://www.coches.net/scripts/common.min.js?2...\n", + " baidu.com\n", + " https://pos.baidu.com/s?hei=70&wid=670&di=u313...\n", + " baidustatic.com\n", + " https://cpro.baidustatic.com/cpro/ui/noexpire/...\n", " \n", " \n", " 4\n", - " coches.net\n", - " https://www.coches.net/fiat/segunda-mano/\n", - " coches.net\n", - " https://www.coches.net/scripts/common.min.js?2...\n", + " serienjunkies.org\n", + " http://serienjunkies.org/smilf/smilf-season-1-...\n", + " google.com\n", + " https://apis.google.com/js/plusone.js?_=151338...\n", " \n", " \n", "\n", "" ], "text/plain": [ - " location_domain location \\\n", - "0 canada.ca https://www.canada.ca/en/services.html \n", - "1 tmall.com https://maniform.world.tmall.com/category-1282... \n", - "2 tmall.com https://maniform.world.tmall.com/category-1282... \n", - "3 coches.net https://www.coches.net/fiat/segunda-mano/ \n", - "4 coches.net https://www.coches.net/fiat/segunda-mano/ \n", + " location_domain location \\\n", + "0 vk.com https://vk.com/widget_comments.php?app=2297596... \n", + "1 vk.com https://vk.com/widget_comments.php?app=2297596... \n", + "2 vk.com https://vk.com/widget_comments.php?app=2297596... \n", + "3 baidu.com https://pos.baidu.com/s?hei=70&wid=670&di=u313... \n", + "4 serienjunkies.org http://serienjunkies.org/smilf/smilf-season-1-... \n", "\n", - " script_domain script_url \n", - "0 adobedtm.com https://assets.adobedtm.com/caacec67651710193d... \n", - "1 alicdn.com https://g.alicdn.com/alilog/mlog/aplus_v2.js \n", - "2 alicdn.com https://g.alicdn.com/alilog/mlog/aplus_v2.js \n", - "3 coches.net https://www.coches.net/scripts/common.min.js?2... \n", - "4 coches.net https://www.coches.net/scripts/common.min.js?2... " + " script_domain script_url \n", + "0 vk.com https://vk.com/js/api/xdm.js?1449919642 \n", + "1 vk.com https://vk.com/js/api/xdm.js?1449919642 \n", + "2 vk.com https://vk.com/js/al/aes_light.js?592436914 \n", + "3 baidustatic.com https://cpro.baidustatic.com/cpro/ui/noexpire/... \n", + "4 google.com https://apis.google.com/js/plusone.js?_=151338... " ] }, - "execution_count": 11, + "execution_count": 10, "metadata": {}, "output_type": "execute_result" } ], "source": [ "#read\n", - "df = read_parquet('above_mean_domain.parquet')\n", + "df = read_parquet(FILE_NAME)\n", "df[['location_domain', 'location', 'script_domain', 'script_url']].head()" ] }, @@ -380,20 +346,37 @@ "After simple validation of value is a json or not, boolean value will be saved on a new column named \"is_json\"\n" ] }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Notebook name: s0_domains_isjson\n" + ] + } + ], + "source": [ + "FILE_NAME += '_isjson'\n", + "print('Notebook name: ', FILE_NAME)" + ] + }, { "cell_type": "code", "execution_count": 12, "metadata": {}, "outputs": [], "source": [ - "import json\n", - "import pandas as pd\n", - "\n", "def is_json(myjson):\n", + " if (myjson == '{}'):\n", + " #would be counted as valid, but its an empty json\n", + " return False\n", " try:\n", - " json.loads(myjson)\n", - " return True\n", - "\n", + " #Eliminate false positives\n", + " return (type(json.loads(myjson)) == dict)\n", " except ValueError as e:\n", " return False" ] @@ -404,8 +387,6 @@ "metadata": {}, "outputs": [], "source": [ - "#To guarantee the usage of the correct parquet created above in case we start from this section\n", - "df = read_parquet('above_mean_domain.parquet')\n", "df['is_json'] = df['value'].apply(is_json, meta=False)" ] }, @@ -418,13 +399,13 @@ "name": "stdout", "output_type": "stream", "text": [ - "[########################################] | 100% Completed | 2min 25.1s\n" + "[########################################] | 100% Completed | 4min 21.6s\n" ] } ], "source": [ "#save\n", - "save_parquet(df=df, name='above_mean_domain_json.parquet')" + "save_parquet(df=df, name=FILE_NAME)" ] }, { @@ -460,27 +441,27 @@ " \n", " \n", " 0\n", - " {\"im-settings\":\"{\\\"val\\\":{\\\"settings\\\":{\\\"Site...\n", - " True\n", + " fXDcab74\n", + " False\n", " \n", " \n", " 1\n", - " {\"APLUS_S_CORE_0.17.12_20171214163401_2ee09a0c...\n", - " True\n", + " fXDcab74\n", + " False\n", " \n", " \n", " 2\n", - " {\"APLUS_S_CORE_0.17.12_20171214163401_2ee09a0c...\n", - " True\n", + " Mozilla/5.0 (X11; Linux x86_64; rv:52.0) Gecko...\n", + " False\n", " \n", " \n", " 3\n", - " usunico=17/12/2017:0-00155123:830; SessionASM=...\n", + " Mozilla/5.0 (X11; Linux x86_64; rv:52.0) Gecko...\n", " False\n", " \n", " \n", " 4\n", - " usunico=17/12/2017:0-00155123:830; SessionASM=...\n", + " _ga=GA1.2.1529583939.1513387469; _gid=GA1.2.17...\n", " False\n", " \n", " \n", @@ -489,11 +470,11 @@ ], "text/plain": [ " value_1000 is_json\n", - "0 {\"im-settings\":\"{\\\"val\\\":{\\\"settings\\\":{\\\"Site... True\n", - "1 {\"APLUS_S_CORE_0.17.12_20171214163401_2ee09a0c... True\n", - "2 {\"APLUS_S_CORE_0.17.12_20171214163401_2ee09a0c... True\n", - "3 usunico=17/12/2017:0-00155123:830; SessionASM=... False\n", - "4 usunico=17/12/2017:0-00155123:830; SessionASM=... False" + "0 fXDcab74 False\n", + "1 fXDcab74 False\n", + "2 Mozilla/5.0 (X11; Linux x86_64; rv:52.0) Gecko... False\n", + "3 Mozilla/5.0 (X11; Linux x86_64; rv:52.0) Gecko... False\n", + "4 _ga=GA1.2.1529583939.1513387469; _gid=GA1.2.17... False" ] }, "execution_count": 15, @@ -503,7 +484,7 @@ ], "source": [ "#read\n", - "df = read_parquet('above_mean_domain_json.parquet')\n", + "df = read_parquet(FILE_NAME)\n", "df[['value_1000', 'is_json']].head()" ] }, @@ -519,47 +500,60 @@ "cell_type": "code", "execution_count": 16, "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Notebook name: s0_domains_isjson_md5\n" + ] + } + ], + "source": [ + "FILE_NAME += '_md5'\n", + "print('Notebook name: ', FILE_NAME)" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "metadata": {}, "outputs": [], "source": [ - "import hashlib\n", - "\n", "def md5(value):\n", " return hashlib.md5(value.encode('utf-8')).hexdigest()" ] }, { "cell_type": "code", - "execution_count": 17, + "execution_count": 18, "metadata": {}, "outputs": [], "source": [ - "#To guarantee the usage of the correct parquet created above in case we start from this section\n", - "df = read_parquet('above_mean_domain_json.parquet') \n", - "\n", - "df['value_md5'] = df['value'].apply(md5, meta=' ')" + "df['value_md5'] = df['value'].apply(md5, meta='O')" ] }, { "cell_type": "code", - "execution_count": 18, + "execution_count": 19, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "[########################################] | 100% Completed | 1min 26.8s\n" + "[########################################] | 100% Completed | 2min 45.9s\n" ] } ], "source": [ "#save\n", - "save_parquet(df=df, name='above_mean_domain_json_md5.parquet')" + "save_parquet(df=df, name=FILE_NAME)" ] }, { "cell_type": "code", - "execution_count": 19, + "execution_count": 20, "metadata": {}, "outputs": [ { @@ -590,28 +584,28 @@ " \n", " \n", " 0\n", - " {\"im-settings\":\"{\\\"val\\\":{\\\"settings\\\":{\\\"Site...\n", - " cff77029e3ae45dd439a62987b1d8340\n", + " fXDcab74\n", + " 7df64196939a8b6ff11482ed6df4b25a\n", " \n", " \n", " 1\n", - " {\"APLUS_S_CORE_0.17.12_20171214163401_2ee09a0c...\n", - " 9ac0a0a0afb677c8fd985a7c2f4ddbc5\n", + " fXDcab74\n", + " 7df64196939a8b6ff11482ed6df4b25a\n", " \n", " \n", " 2\n", - " {\"APLUS_S_CORE_0.17.12_20171214163401_2ee09a0c...\n", - " 9ac0a0a0afb677c8fd985a7c2f4ddbc5\n", + " Mozilla/5.0 (X11; Linux x86_64; rv:52.0) Gecko...\n", + " bc0aac3569031babbd73e069947a4b12\n", " \n", " \n", " 3\n", - " usunico=17/12/2017:0-00155123:830; SessionASM=...\n", - " db64465b639e01993d9212390f057628\n", + " Mozilla/5.0 (X11; Linux x86_64; rv:52.0) Gecko...\n", + " bc0aac3569031babbd73e069947a4b12\n", " \n", " \n", " 4\n", - " usunico=17/12/2017:0-00155123:830; SessionASM=...\n", - " db64465b639e01993d9212390f057628\n", + " _ga=GA1.2.1529583939.1513387469; _gid=GA1.2.17...\n", + " 324dd29b8c6438bc700ac2d85e33f12d\n", " \n", " \n", "\n", @@ -619,28 +613,28 @@ ], "text/plain": [ " value_1000 \\\n", - "0 {\"im-settings\":\"{\\\"val\\\":{\\\"settings\\\":{\\\"Site... \n", - "1 {\"APLUS_S_CORE_0.17.12_20171214163401_2ee09a0c... \n", - "2 {\"APLUS_S_CORE_0.17.12_20171214163401_2ee09a0c... \n", - "3 usunico=17/12/2017:0-00155123:830; SessionASM=... \n", - "4 usunico=17/12/2017:0-00155123:830; SessionASM=... \n", + "0 fXDcab74 \n", + "1 fXDcab74 \n", + "2 Mozilla/5.0 (X11; Linux x86_64; rv:52.0) Gecko... \n", + "3 Mozilla/5.0 (X11; Linux x86_64; rv:52.0) Gecko... \n", + "4 _ga=GA1.2.1529583939.1513387469; _gid=GA1.2.17... \n", "\n", " value_md5 \n", - "0 cff77029e3ae45dd439a62987b1d8340 \n", - "1 9ac0a0a0afb677c8fd985a7c2f4ddbc5 \n", - "2 9ac0a0a0afb677c8fd985a7c2f4ddbc5 \n", - "3 db64465b639e01993d9212390f057628 \n", - "4 db64465b639e01993d9212390f057628 " + "0 7df64196939a8b6ff11482ed6df4b25a \n", + "1 7df64196939a8b6ff11482ed6df4b25a \n", + "2 bc0aac3569031babbd73e069947a4b12 \n", + "3 bc0aac3569031babbd73e069947a4b12 \n", + "4 324dd29b8c6438bc700ac2d85e33f12d " ] }, - "execution_count": 19, + "execution_count": 20, "metadata": {}, "output_type": "execute_result" } ], "source": [ "#read\n", - "df = read_parquet('above_mean_domain_json_md5.parquet')\n", + "df = read_parquet(FILE_NAME)\n", "df[['value_1000', 'value_md5']].head()" ] }, @@ -648,14 +642,19 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "# Saving other possible usefull samples to future analyses" + "# Saving other possible usefull filtered samples to future analyses" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "### Filter to parquet containing only JSON " + "## value_len > df_mean\n", + "1356 is the value_len mean\n", + "\n", + "To filter the data into something that is more interesting to this task I decided to only work with values that are at above the mean.\n", + "\n", + "All values above the mean count up to 499805 rows. That is just 4,42% of the whole sample, and a lot easier to work on. " ] }, { @@ -667,21 +666,110 @@ "name": "stdout", "output_type": "stream", "text": [ - "[########################################] | 100% Completed | 27.4s\n", - "Npartition: 233\n", - "[########################################] | 100% Completed | 1min 3.4s\n" + "Notebook name: s0_domains_isjson_md5_above_mean\n" ] } ], "source": [ - "df = read_parquet('above_mean_domain_json_md5.parquet')\n", - "save_parquet(df=df[df['is_json'] == True], name='JSONs_only.parquet', recalculate_partition=True)" + "name = FILE_NAME + '_above_mean'\n", + "print('Notebook name: ', name)" ] }, { "cell_type": "code", "execution_count": 22, "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[########################################] | 100% Completed | 50.5s\n", + "Npartition: 245\n", + "[########################################] | 100% Completed | 1min 38.3s\n" + ] + } + ], + "source": [ + "#Save\n", + "save_parquet(df= df[df['value_len'] > df_mean], name= name, recalculate_partition=True)" + ] + }, + { + "cell_type": "code", + "execution_count": 23, + "metadata": { + "scrolled": false + }, + "outputs": [ + { + "data": { + "text/plain": [ + "Index(['value_1000', 'value', 'value_len', 'symbol', 'script_url', 'location',\n", + " 'operation', 'location_domain', 'script_domain', 'is_json',\n", + " 'value_md5'],\n", + " dtype='object')" + ] + }, + "execution_count": 23, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "#Read\n", + "df = read_parquet(name)\n", + "df.columns" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Filter to parquet containing only JSON " + ] + }, + { + "cell_type": "code", + "execution_count": 24, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Notebook name: s0_domains_isjson_md5_JSON_ONLY\n" + ] + } + ], + "source": [ + "name = FILE_NAME + '_JSON_ONLY'\n", + "print('Notebook name: ', name)" + ] + }, + { + "cell_type": "code", + "execution_count": 25, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[########################################] | 100% Completed | 28.9s\n", + "Npartition: 233\n", + "[########################################] | 100% Completed | 1min 5.0s\n" + ] + } + ], + "source": [ + "save_parquet(df=df[df['is_json'] == True], name=name, recalculate_partition=True)" + ] + }, + { + "cell_type": "code", + "execution_count": 26, + "metadata": {}, "outputs": [ { "data": { @@ -747,14 +835,14 @@ "4 {\"dueljs_channel_comm\":\"[{\\\"id\\\":4734405521279... True" ] }, - "execution_count": 22, + "execution_count": 26, "metadata": {}, "output_type": "execute_result" } ], "source": [ "#read all_json_above_mean\n", - "df = read_parquet('JSONs_only.parquet')\n", + "df = read_parquet(name)\n", "df[['value_1000', 'is_json']].head()" ] }, @@ -775,14 +863,30 @@ }, { "cell_type": "code", - "execution_count": 23, + "execution_count": 27, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Notebook name: s0_domains_isjson_md5_JSON_ONLY_schema_keys\n" + ] + } + ], + "source": [ + "name += '_schema_keys'\n", + "print('Notebook name: ', name)" + ] + }, + { + "cell_type": "code", + "execution_count": 28, "metadata": {}, "outputs": [], "source": [ "from json_schema_inferencer.guess_json_schema import guess_schema\n", "\n", - "df = read_parquet('JSONs_only.parquet')\n", - "\n", "def jsonSchema(myjson):\n", " try:\n", " dct = json.loads(myjson)\n", @@ -805,26 +909,26 @@ }, { "cell_type": "code", - "execution_count": 24, + "execution_count": 29, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "[########################################] | 100% Completed | 3min 57.7s\n" + "[########################################] | 100% Completed | 4min 18.1s\n" ] } ], "source": [ - "df['json_keys'] = df['value'].apply(jsonKeys, meta='')\n", - "df['json_schema'] = df['value'].apply(jsonSchema, meta='')\n", - "save_parquet(df=df, name='JSONs_key_schema.parquet')" + "df['json_keys'] = df.value.apply(jsonKeys, meta='O')\n", + "df['json_schema'] = df.value.apply(jsonSchema, meta='O')\n", + "save_parquet(df=df, name=name)\n" ] }, { "cell_type": "code", - "execution_count": 25, + "execution_count": 30, "metadata": {}, "outputs": [ { @@ -911,14 +1015,14 @@ "4 [LastSearch, LastSearch_e, dueljs_channel_comm... " ] }, - "execution_count": 25, + "execution_count": 30, "metadata": {}, "output_type": "execute_result" } ], "source": [ "#read \n", - "df = read_parquet('JSONs_key_schema.parquet')\n", + "df = read_parquet(name)\n", "df[['value_1000', 'json_keys', 'json_schema']].head()" ] }, @@ -926,34 +1030,60 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### All NON json above the mean" + "## All NON json above the mean" ] }, { "cell_type": "code", - "execution_count": 27, + "execution_count": 37, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "[########################################] | 100% Completed | 26.7s\n", - "Npartition: 12\n", - "[########################################] | 100% Completed | 27.8s\n" + "Notebook name: s0_domains_isjson_md5_nonJSON_ONLY\n" ] } ], "source": [ - "df = read_parquet('above_mean_domain_json_md5.parquet')\n", - "save_parquet(df=df[df['is_json'] == False], name='NON_JSONs_only.parquet', recalculate_partition=True)" + "name = FILE_NAME + '_nonJSON_ONLY'\n", + "df = read_parquet(FILE_NAME)\n", + "print('Notebook name: ', name)" ] }, { "cell_type": "code", - "execution_count": 28, + "execution_count": 38, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[########################################] | 100% Completed | 1min 54.5s\n", + "Npartition: 116\n", + "[########################################] | 100% Completed | 1min 13.1s\n" + ] + } + ], + "source": [ + "save_parquet(df=df[df['is_json'] == False], name=name, recalculate_partition=True)" + ] + }, + { + "cell_type": "code", + "execution_count": 35, "metadata": {}, "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/anaconda3/envs/overscripted/lib/python3.6/site-packages/dask/dataframe/core.py:4494: UserWarning: Insufficient elements for `head`. 5 elements requested, only 0 elements available. Try passing larger `npartitions` to `head`.\n", + " warnings.warn(msg.format(n, len(r)))\n" + ] + }, { "data": { "text/html": [ @@ -981,127 +1111,34 @@ " symbol\n", " script_url\n", " location\n", + " operation\n", " location_domain\n", " script_domain\n", " is_json\n", " value_md5\n", + " json_keys\n", + " json_schema\n", " \n", " \n", " \n", - " \n", - " 0\n", - " usunico=17/12/2017:0-00155123:830; SessionASM=...\n", - " usunico=17/12/2017:0-00155123:830; SessionASM=...\n", - " 1358\n", - " window.document.cookie\n", - " https://www.coches.net/scripts/common.min.js?2...\n", - " https://www.coches.net/fiat/segunda-mano/\n", - " coches.net\n", - " coches.net\n", - " False\n", - " db64465b639e01993d9212390f057628\n", - " \n", - " \n", - " 1\n", - " usunico=17/12/2017:0-00155123:830; SessionASM=...\n", - " usunico=17/12/2017:0-00155123:830; SessionASM=...\n", - " 1358\n", - " window.document.cookie\n", - " https://www.coches.net/scripts/common.min.js?2...\n", - " https://www.coches.net/fiat/segunda-mano/\n", - " coches.net\n", - " coches.net\n", - " False\n", - " db64465b639e01993d9212390f057628\n", - " \n", - " \n", - " 2\n", - " usunico=17/12/2017:0-00155123:830; SessionASM=...\n", - " usunico=17/12/2017:0-00155123:830; SessionASM=...\n", - " 1358\n", - " window.document.cookie\n", - " https://tags.tiqcdn.com/utag/schibsted/coches....\n", - " https://www.coches.net/fiat/segunda-mano/\n", - " coches.net\n", - " tiqcdn.com\n", - " False\n", - " db64465b639e01993d9212390f057628\n", - " \n", - " \n", - " 3\n", - " usunico=17/12/2017:0-00155123:830; SessionASM=...\n", - " usunico=17/12/2017:0-00155123:830; SessionASM=...\n", - " 1358\n", - " window.document.cookie\n", - " https://tags.tiqcdn.com/utag/schibsted/coches....\n", - " https://www.coches.net/fiat/segunda-mano/\n", - " coches.net\n", - " tiqcdn.com\n", - " False\n", - " db64465b639e01993d9212390f057628\n", - " \n", - " \n", - " 4\n", - " usunico=17/12/2017:0-00155123:830; SessionASM=...\n", - " usunico=17/12/2017:0-00155123:830; SessionASM=...\n", - " 1358\n", - " window.document.cookie\n", - " https://tags.tiqcdn.com/utag/schibsted/coches....\n", - " https://www.coches.net/fiat/segunda-mano/\n", - " coches.net\n", - " tiqcdn.com\n", - " False\n", - " db64465b639e01993d9212390f057628\n", - " \n", " \n", "\n", "" ], "text/plain": [ - " value_1000 \\\n", - "0 usunico=17/12/2017:0-00155123:830; SessionASM=... \n", - "1 usunico=17/12/2017:0-00155123:830; SessionASM=... \n", - "2 usunico=17/12/2017:0-00155123:830; SessionASM=... \n", - "3 usunico=17/12/2017:0-00155123:830; SessionASM=... \n", - "4 usunico=17/12/2017:0-00155123:830; SessionASM=... \n", - "\n", - " value value_len \\\n", - "0 usunico=17/12/2017:0-00155123:830; SessionASM=... 1358 \n", - "1 usunico=17/12/2017:0-00155123:830; SessionASM=... 1358 \n", - "2 usunico=17/12/2017:0-00155123:830; SessionASM=... 1358 \n", - "3 usunico=17/12/2017:0-00155123:830; SessionASM=... 1358 \n", - "4 usunico=17/12/2017:0-00155123:830; SessionASM=... 1358 \n", - "\n", - " symbol script_url \\\n", - "0 window.document.cookie https://www.coches.net/scripts/common.min.js?2... \n", - "1 window.document.cookie https://www.coches.net/scripts/common.min.js?2... \n", - "2 window.document.cookie https://tags.tiqcdn.com/utag/schibsted/coches.... \n", - "3 window.document.cookie https://tags.tiqcdn.com/utag/schibsted/coches.... \n", - "4 window.document.cookie https://tags.tiqcdn.com/utag/schibsted/coches.... \n", - "\n", - " location location_domain script_domain \\\n", - "0 https://www.coches.net/fiat/segunda-mano/ coches.net coches.net \n", - "1 https://www.coches.net/fiat/segunda-mano/ coches.net coches.net \n", - "2 https://www.coches.net/fiat/segunda-mano/ coches.net tiqcdn.com \n", - "3 https://www.coches.net/fiat/segunda-mano/ coches.net tiqcdn.com \n", - "4 https://www.coches.net/fiat/segunda-mano/ coches.net tiqcdn.com \n", - "\n", - " is_json value_md5 \n", - "0 False db64465b639e01993d9212390f057628 \n", - "1 False db64465b639e01993d9212390f057628 \n", - "2 False db64465b639e01993d9212390f057628 \n", - "3 False db64465b639e01993d9212390f057628 \n", - "4 False db64465b639e01993d9212390f057628 " + "Empty DataFrame\n", + "Columns: [value_1000, value, value_len, symbol, script_url, location, operation, location_domain, script_domain, is_json, value_md5, json_keys, json_schema]\n", + "Index: []" ] }, - "execution_count": 28, + "execution_count": 35, "metadata": {}, "output_type": "execute_result" } ], "source": [ "#read \n", - "df = read_parquet('NON_JSONs_only.parquet')\n", + "df = read_parquet(name)\n", "df.head()" ] }, From df6d8433d087bf9836ef1430ce1ed9ad71f454b1 Mon Sep 17 00:00:00 2001 From: Camila Resende Date: Mon, 8 Apr 2019 17:51:50 -0300 Subject: [PATCH 18/23] Value distribution with new data that filtered json false positives --- .../isJson_Value_Distribution.ipynb | 31 ++++++++++++------- 1 file changed, 19 insertions(+), 12 deletions(-) diff --git a/analyses/2019_03_aliamcami_value_analyses/isJson_Value_Distribution.ipynb b/analyses/2019_03_aliamcami_value_analyses/isJson_Value_Distribution.ipynb index 77766f7..3ff820d 100644 --- a/analyses/2019_03_aliamcami_value_analyses/isJson_Value_Distribution.ipynb +++ b/analyses/2019_03_aliamcami_value_analyses/isJson_Value_Distribution.ipynb @@ -40,7 +40,7 @@ "metadata": {}, "source": [ "# Parquet\n", - "Used sample: sample_0_prep/full_sample_json.parquet\n", + "Used sample: sample_0_prep/s0_domains_isjson.parquet\n", " * This sample is the 10% sample with the \"is_json\" column added to it, this column is the result of the 'value' columns as a valid json or not. \n", " * This sample can be obtained by running 'jsJson_dataPrep.ipynb'" ] @@ -96,7 +96,7 @@ } ], "source": [ - "df = dd.read_parquet('sample_0_prep/full_sample_json.parquet', engine=\"pyarrow\", columns=['value_len', 'is_json'])\n", + "df = dd.read_parquet('sample0_prep/s0_domains_isjson.parquet', engine=\"pyarrow\", columns=['value_len', 'is_json'])\n", "df.head(1)" ] }, @@ -109,7 +109,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "[########################################] | 100% Completed | 1.6s\n" + "[########################################] | 100% Completed | 1.5s\n" ] } ], @@ -142,7 +142,7 @@ { "data": { "text/plain": [ - "" + "" ] }, "execution_count": 4, @@ -194,7 +194,7 @@ }, { "data": { - "image/png": "iVBORw0KGgoAAAANSUhEUgAAAZIAAAELCAYAAADz6wBxAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDMuMC4zLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvnQurowAAFqxJREFUeJzt3X+QXWd93/H3JzLCDSEOGEFdy4rkyHHipknBWwWawpC0JnZi4YF4GimmUFu1BoLbZtJMRm46BaaTUSBTEsCe2E4xxkywcR2nkYyohjGkzkw9INkmII0irDim3tiJDEmME2aCRb794x7ZV8v+uLvnnr2rs+/XzM7e8+w95/neY60/+zznueemqpAkaam+Y9IFSJJObwaJJKkVg0SS1IpBIklqxSCRJLVikEiSWjFIJEmtGCSSpFZOyyBJ8vokf5jkpiSvn3Q9krSanbHcHSa5FbgcOF5VPzTUfinwAWAN8D+q6tfmOUwBfwOcCUwv1OfLXvay2rhxY5uyJWnVefDBB79aVesWel6W+xYpSV7HIARuPxkkSdYAXwYuYRAMB4DtDEJl94xDXAN8tar+PskrgPdX1VXz9Tk1NVUHDx4c7wuRpJ5L8mBVTS30vGUfkVTV/Uk2zmjeAhyrqkcBktwJXFFVuxmMXubyV8ALZ/tBkp3AToANGza0rFqSNJeVco3kXODxoe3ppm1WSd6c5GbgY8ANsz2nqm6pqqmqmlq3bsGRmSRpiZZ9RDKHzNI255xbVd0D3LPgQZOtwNbNmze3KE2SNJ+VMiKZBs4b2l4PPDGhWiRJi7BSguQAcEGSTUnWAtuAPW0PWlV7q2rnWWed1bpASdLslj1IktwBPABcmGQ6yY6qOgFcB+wHjgB3VdXhMfS1NcktTz/9dNtDSZLmsOzLfyfB5b+StHijLv9dKVNbnXBEIkndWymrtjpRVXuBvVNTU9e2Oc7GXZ88ZfuxM3/u25/0bsNK0urU6xGJJKl7vQ4Sp7YkqXu9DhKX/0pS93odJJKk7vU6SJzakqTu9TpInNqSpO71OkgkSd0zSCRJrfQ6SLxGIknd63WQeI1EkrrX6yCRJHXPIJEktWKQSJJa6XWQeLFdkrrX6yDxYrskda/XQSJJ6p5BIklqxSCRJLVikEiSWjktP7M9yXcA/w34buBgVX10wiVJ0qq17COSJLcmOZ7k0Iz2S5McTXIsya4FDnMFcC7wLDDdVa2SpIVNYkRyG3ADcPvJhiRrgBuBSxgEw4Eke4A1wO4Z+18DXAg8UFU3J7kbuG8Z6pYkzWLZg6Sq7k+ycUbzFuBYVT0KkORO4Iqq2g1cPvMYSaaBbzab3+quWknSQlbKxfZzgceHtqebtrncA/xkkg8B98/2hCQ7kxxMcvCpp54aX6WSpFOslIvtmaWt5npyVX0D2DHfAavqliRPAlvXrl17ccv6JElzWCkjkmngvKHt9cATbQ/qLVIkqXsrJUgOABck2ZRkLbAN2NP2oN60UZK6N4nlv3cADwAXJplOsqOqTgDXAfuBI8BdVXW4bV+OSCSpe5NYtbV9jvZ9wL5x9pVkK7B18+bN4zysJGnISpna6oQjEknqXq+DxGskktS9XgeJIxJJ6l6vg8QRiSR1r9dB4ohEkrrX6yCRJHWv10Hi1JYkda/XQeLUliR1r9dBIknqnkEiSWql10HiNRJJ6l6vg8RrJJLUvV4HiSSpewaJJKkVg0SS1IpBIklqpddB4qotSeper4PEVVuS1L1eB4kkqXsGiSSpFYNEktSKQSJJauWMSRewFEleC1zFoP6LquqfT7gkSVq1ln1EkuTWJMeTHJrRfmmSo0mOJdk13zGq6g+r6u3AvcBHu6xXkjS/SYxIbgNuAG4/2ZBkDXAjcAkwDRxIsgdYA+yesf81VXW8efxzwL/rumBJ0tyWPUiq6v4kG2c0bwGOVdWjAEnuBK6oqt3A5bMdJ8kG4Omq+vocP98J7ATYsGHDeIqXJH2blXKx/Vzg8aHt6aZtPjuAj8z1w6q6BXgP8NDatWtbFyhJmt1KCZLM0lbz7VBV76qq/7vAc3xnuyR1bKUEyTRw3tD2euCJtgf1XluS1L2VEiQHgAuSbEqyFtgG7Gl7UEckktS9SSz/vQN4ALgwyXSSHVV1ArgO2A8cAe6qqsNj6MsRiSR1bBKrtrbP0b4P2DfmvvYCe6empq4d53ElSc9bKVNbnXBEIknd63WQeI1EkrrX6yBxRCJJ3et1kDgikaTu9TpIJEndM0gkSa30Oki8RiJJ3TstP9hqVJN6H8nGXZ/8trbHfu2nl7MESVo2vR6RSJK61+sgcWpLkrrX6yBx+a8kda/XQSJJ6p5BIklqxSCRJLVikEiSWul1kLhqS5K61+sgcdWWJHWv1+9sP134TnhJp7Nej0gkSd0zSCRJrRgkkqRWRgqSJJcnWTGhk2RDkj1Jbk2ya9L1SNJqNmo4bAMeSfK+JD/YpsPmf/7Hkxya0X5pkqNJjo0QDt8PfLKqrgEualOPJKmdkYKkqt4CvBL4E+AjSR5IsjPJi5fQ523ApcMNSdYANwKXMQiG7UkuSvJPktw74+vlwMPAtiSfAT67hBokSWMy8nRVVX0d+F3gTuAc4E3AQ0n+/WI6rKr7gb+c0bwFOFZVj1bVN5s+rqiqL1XV5TO+jgNXA++qqp8AZl0n2wTdwSQHn3rqqcWUKElahFGvkbwxye8BnwFeAGypqsuAHwF+aQx1nAs8PrQ93bTN5X8D/yHJTcBjsz2hqm6pqqmqmlq3bt0YSpQkzWbUNyReCfxGM5p4TlV9I8k1Y6gjs7TVXE+uqkNNTfMfNNkKbN28eXOL0iRJ8xl1auvJmSGS5L0AVXXfGOqYBs4b2l4PPDGG40qSOjZqkFwyS9tlY6zjAHBBkk1J1jJYJban7UG915YkdW/eIEnyjiRfAn4gyReHvv4U+OJSOkxyB/AAcGGS6SQ7quoEcB2wHzgC3FVVh5dy/Bl9efdfSerYQtdIPg58CtgNDL+345mqmrnyaiRVtX2O9n3AvqUcc56+9gJ7p6amrh3ncSVJz1toaquq6jHgncAzQ18keWm3pbXniESSurdQkHy8+f4gcLD5/uDQ9ormNRJJ6t68U1tVdXnzfdPylCNJOt2M+obEH0vyoubxW5K8P8mGbktrz6ktSereqMt/fwv4RpIfAX4Z+Arwsc6qGhOntiSpe6MGyYmqKuAK4ANV9QFgKTdslCT1zKi3SHkmyfXAW4DXNXfrfUF3ZY2Ht0h5np8LL6kro45Ifhb4O2BHVf05gxsq/npnVY2JU1uS1L2RRiRNeLx/aPv/Abd3VZQk6fQx6qqtNyd5JMnTSb6e5JkkX++6OEnSyjfqNZL3AVur6kiXxYxbX6+ReL1D0koy6jWSvzjdQgS8RiJJy2HUEcnBJJ8A/heDi+4AVNU9nVQlSTptjBok3w18A3jDUFsBBokkrXKjrtq6uutCJEmnp1FXbX1/kvuSHGq2fzjJf+m2tPa815YkdW/Ui+2/DVwPPAtQVV9k8HG4K5oX2yWpe6MGyXdW1edntJ0YdzGSpNPPqEHy1STfx+ACO0muBJ7srCpJ0mlj1FVb7wRuAX4gyZ8Bfwpc1VlVmjjf9ChpVPMGSZJfHNrcB3yWwSjmb4GfYej+W5Kk1WmhEcnJzxy5EPhnwO8DAf4NcH+Hdc0ryUXAu4GvAfdV1d2TqkWSVrt5r5FU1Xuq6j3Ay4BXVdUvVdV/Ai4G1i+lwyS3Jjl+cinxUPulSY4mOZZk1wKHuQz4UFW9A3jrUuqQJI3HqNdINgDfHNr+JrBxiX3eBtzA0G3omw/KuhG4BJgGDiTZA6wBds/Y/xoGH/P7riRvBM5eYh2SpDEYNUg+Bnw+ye8xWLn1JuCjS+mwqu5PsnFG8xbgWFU9CpDkTuCKqtoNXD7Hod7ZBJC3aZGkCRr1Fim/muRTwGubpqur6uEx1nEu8PjQ9jTwo3M9uQmi/wy8iDk+qTHJTmAnwIYNG8ZUpiRpplFHJFTVQ8BDHdWR2bqcp5bHaEJinufckuRJYOvatWsvbleexs3lxVJ/jBwkHZsGzhvaXg880fagVbUX2Ds1NXVt22NpfgaDtHqN+s72rh0ALkiyKclaBvfx2tP2oN60UZK6t+xBkuQO4AHgwiTTSXZU1QngOmA/cAS4q6oOt+3LmzZKUveWfWqrqrbP0b6Pwbvnx6avn9kuSSvJSpna6oQjEknq3kq52N4JRyT94gV9aWVyRCJJaqXXQeKqLUnqXq+DxBGJJHWv10EiSeper4PEqS1J6l6vg8SpLUnqXq+DRJLUPYNEktRKr4PEaySS1L1eB4nXSCSpe70OEklS9wwSSVIrBokkqRWDRJLUSq+DxFVbktS9Xn8eSVXtBfZOTU1dO+latPL5eScrg/8dTj+9HpFIkrpnkEiSWjFIJEmtGCSSpFZWfJAkOT/Jh5PcPdT2oiQfTfLbSa6aZH2StNp1GiRJbk1yPMmhGe2XJjma5FiSXfMdo6oeraodM5rfDNxdVdcCbxxz2ZKkReh6+e9twA3A7ScbkqwBbgQuAaaBA0n2AGuA3TP2v6aqjs9y3PXAl5rH3xpzzZKkReg0SKrq/iQbZzRvAY5V1aMASe4Erqiq3cDlIx56mkGYfIE5RlVJdgI7ATZs2LDo2iVJo5nENZJzgceHtqebtlklOTvJTcArk1zfNN8D/EyS3wL2zrZfVd1SVVNVNbVu3boxlS5JmmkS72zPLG0115Or6mvA22e0/S1w9YIdJVuBrZs3b15sjZKkEU0iSKaB84a21wNPTKAOST3hbVUmaxJTWweAC5JsSrIW2Abs6aIjPyFRkrrX9fLfO4AHgAuTTCfZUVUngOuA/cAR4K6qOtxR/979V5I61vWqre1ztO8D9nXZd9OPd/+VpI6t+He2t+GIRJK65+eRqLe8ACstj16PSCRJ3ev1iMT3kUgaB0e38+v1iMTlv5LUvV4HiSSpe70OEldtSVL3eh0kTm1JUvd6HSSSpO71Okic2pKk7vU6SJzakqTu9fp9JFLXZr6/wPcWaDUySKQVzDfC6XTQ66ktSVL3DBJJUiu9DhJXbUlS93odJK7akqTu9TpIJEndM0gkSa0YJJKkVgwSSVIrKz5Ikpyf5MNJ7p6vTZI0GZ0GSZJbkxxPcmhG+6VJjiY5lmTXfMeoqkerasdCbZKkyej6Fim3ATcAt59sSLIGuBG4BJgGDiTZA6wBds/Y/5qqOt5xjZKkFjoNkqq6P8nGGc1bgGNV9ShAkjuBK6pqN3D5uPpOshPYCbBhw4ZxHVaSNMMkrpGcCzw+tD3dtM0qydlJbgJemeT6udpmqqpbqmqqqqbWrVs3xvIlScMmcfffzNJWcz25qr4GvH2htlk7SrYCWzdv3rzYGiVJI5pEkEwD5w1trweemEAdkvBW9WpvElNbB4ALkmxKshbYBuzpoiPvtSVJ3et6+e8dwAPAhUmmk+yoqhPAdcB+4AhwV1Ud7qh/7/4rSR3retXW9jna9wH7uuy76WcvsHdqaurarvuSpNVqxb+zvQ1HJJLUvV4HiddIJKl7k1i1Ja1arpDSXE7nfxu9HpE4tSVJ3et1kDi1JUnd63WQSJK61+sgcWpLkrrX6yBxakuSutfrIJEkdc8gkSS10usg8RqJJHWv129I9F5bkvpqJb2BsdcjEklS9wwSSVIrBokkqZVeB4kX2yWpe70OEt+QKEnd63WQSJK6Z5BIkloxSCRJrRgkkqRWUlWTrqFzSZ4CvrKEXV8GfHXM5ZzuPCen8nycyvNxqtP9fHxvVa1b6EmrIkiWKsnBqpqadB0riefkVJ6PU3k+TrVazodTW5KkVgwSSVIrBsn8bpl0ASuQ5+RUno9TeT5OtSrOh9dIJEmtOCKRJLVikEiSWjFI5pDk0iRHkxxLsmvS9SxFkluTHE9yaKjtpUk+neSR5vtLmvYk+WDzer+Y5FVD+7ytef4jSd421H5xki81+3wwSZbax3JIcl6SzyY5kuRwkv+4ms9JkjOTfD7JHzXn4z1N+6Ykn2tq/USStU37C5vtY83PNw4d6/qm/WiSnxxqn/X3aCl9LJcka5I8nOTepdbap/Mxkqrya8YXsAb4E+B8YC3wR8BFk65rCa/jdcCrgENDbe8DdjWPdwHvbR7/FPApIMCrgc817S8FHm2+v6R5/JLmZ58HXtPs8yngsqX0sYzn4xzgVc3jFwNfBi5areek6fO7mscvAD7X1HAXsK1pvwl4R/P454GbmsfbgE80jy9qfkdeCGxqfnfWzPd7tNg+lvnfyS8CHwfuXUqtfTsfI52zSRewEr+a/xHsH9q+Hrh+0nUt8bVs5NQgOQqc0zw+BzjaPL4Z2D7zecB24Oah9pubtnOAPx5qf+55i+1jgufm94FLPCcF8J3AQ8CPMngn9hlN+3O/C8B+4DXN4zOa52Xm78fJ5831e9Tss6g+lvE8rAfuA34CuHcptfbpfIz65dTW7M4FHh/anm7a+uAVVfUkQPP95U37XK95vvbpWdqX0seya6YIXsngr/BVe06aaZwvAMeBTzP4i/mvq+rELPU8V2vz86eBs1n8eTp7CX0sl98Efhn4+2Z7KbX26XyMxCCZXWZp6/s66ble82Lbl9LHskryXcDvAr9QVV+f76mztPXqnFTVt6rqnzL4S3wL8IPz1DOu8zHfa57Y+UhyOXC8qh4cbp6nnl6fj8UwSGY3DZw3tL0eeGJCtYzbXyQ5B6D5frxpn+s1z9e+fpb2pfSxbJK8gEGI/E5V3dM0r+pzAlBVfw38AYNrJN+T5IxZ6nmu1ubnZwF/yeLP01eX0Mdy+DHgjUkeA+5kML31m0uotS/nY2QGyewOABc0KynWMrjItWfCNY3LHuDkKqO3MbhOcLL9rc0qolcDTzdTMPuBNyR5SbPS6A0M5m+fBJ5J8upmZdJbZxxrMX0si6bODwNHqur9Qz9aleckybok39M8/gfAvwKOAJ8Frpyj1pOv4UrgMzWYvN8DbGtWGG0CLmCw6GDW36Nmn8X20bmqur6q1lfVxqbWz1TVVUuotRfnY1EmfZFmpX4xWE3zZQZzxr8y6XqW+BruAJ4EnmXwl80OBvOr9wGPNN9f2jw3wI3N6/0SMDV0nGuAY83X1UPtU8ChZp8beP5OCYvuY5nOx79gMC3wReALzddPrdZzAvww8HBzPg4B/7VpP5/B//iOAf8TeGHTfmazfaz5+flDx/qV5jUcpVmpNt/v0VL6WOZ/K6/n+VVbq/58LPTlLVIkSa04tSVJasUgkSS1YpBIkloxSCRJrRgkkqRWDBJJUisGiTRmSf5mzMe7LcmVCz9TmgyDRJLUikEiLSDJe5P8/ND2u5O8K8l9SR7K4IOsrphlv9ef/HCkZvuGJP+2eXxxkv+T5MEk+0/eh2uEWmbdL8kfNHV+PsmXk7y29QuXRmSQSAu7E/jZoe1/DXwEeFNVvQr4ceC/N/fXWlBz48gPAVdW1cXArcCvjmG/M6pqC/ALwLtGqUUahzMWfoq0ulXVw0lenuQfAeuAv2JwD7PfSPI6Bp9dcS7wCuDPRzjkhcAPAZ9usmdNc7y2+528m/GDDD7QTFoWBok0mrsZ3H31HzIYoVzFIFQurqpnm1uPnzljnxOcOuo/+fMAh6vqNYusYaH9/q75/i383dYycmpLGs2dDG77fSWDUDmLwYcgPZvkx4HvnWWfrwAXNbcTPwv4l037UWBdktfAYMoqyT8eoYal7id1yr9apBFU1eEkLwb+rKqeTPI7wN4kBxncjv6PZ9nn8SR3MbhN+yMMbtlOVX2zWc77wSZgzmDwAUqHF6hhSftJXfM28pKkVpzakiS14tSWtEIkuZHB54YP+0BVfWQS9UijcmpLktSKU1uSpFYMEklSKwaJJKkVg0SS1Mr/B0NHHeyvZyHKAAAAAElFTkSuQmCC\n", + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAZIAAAEMCAYAAADu7jDJAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDMuMC4zLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvnQurowAAFvpJREFUeJzt3X2MZXd93/H3J2sGmoSYZ+p6vdl1xnHjUlK8IwNNQSSpwSQ7WFCr8WIUirde8eCqUVRF60aqiVq0PKgJD7YCm2KMo8TGcRx1F5ZuEZAaVRbsGgjsylm8cU08eJM1eTAOSAGTb/+4Z+HuMA9359wzd+bM+yVdzT2/OQ/fe7zXn/n9fueem6pCkqSV+qFJFyBJWt8MEklSKwaJJKkVg0SS1IpBIklqxSCRJLVikEiSWjFIJEmtrMsgSfKyJJ9J8v4kL5t0PZK0ka16kCS5JcmpJEfntV+R5HiSE0n2LLObAv4OeAow11WtkqTlZbVvkZLkpQxC4Laqel7Ttgn4CnA5g2A4DOwENgF75+3iWuDrVfUPSZ4L/GZVXbPUMZ/1rGfV1q1bx/o6JKnv7rvvvq9X1bOXW++c1ShmWFXdk2TrvObLgBNV9SBAkjuAK6tqL7Bjid39DfDk5Y65detWjhw5srKCJWmDSvLVUdZb9SBZxPnAw0PLc8ALF1s5yWuAVwBPA25aZJ3dwG6ALVu2jK1QSdKZ1kqQZIG2Rcfcqupu4O6ldlhV+5KcBGanpqa2t6xPkrSItXLV1hxwwdDyZuCRtjutqgNVtfvcc89tuytJ0iLWSpAcBi5Ksi3JFHA1sL/tTpPMJtn32GOPtS5QkrSwSVz+eztwL3Bxkrkku6rqCeB64BBwP3BnVR1reyx7JJLUvUlctbVzkfaDwMFxHivJLDA7PT09zt1KkoaslaGtTtgjkaTu9TpInCORpO6tlct/O1FVB4ADMzMz1411x29doIfzVsNK0sbU6yAZl617PnbG8kNPmVAhkrQGObQlSWql10HiZLskda/XQSJJ6l6vg8ShLUnqXq+DxKEtSeper4NEktQ9g0SS1Eqvg8Q5EknqXq+DxDkSSeper4NEktQ9g0SS1IpBIklqxSCRJLXS6yDxqi1J6l6vg8SrtiSpe70OEklS9wwSSVIrBokkqRWDRJLUyrr8zvYkPwT8V+DHgCNV9eEJlyRJG9aq90iS3JLkVJKj89qvSHI8yYkke5bZzZXA+cB3gLmuapUkLW8SPZJbgZuA2043JNkE3AxcziAYDifZD2wC9s7b/lrgYuDeqvpAkruAT65C3ZKkBax6kFTVPUm2zmu+DDhRVQ8CJLkDuLKq9gI75u8jyRzw7Wbxu91VK0lazlqZbD8feHhoea5pW8zdwCuSvA+4Z6EVkuxOciTJkUcffXR8lUqSzrBWJtuzQFsttnJVfQvYtdQOq2pfkpPA7NTU1PaW9UmSFrFWeiRzwAVDy5uBR9ru1FukSFL31kqQHAYuSrItyRRwNbC/7U69aaMkdW8Sl//eDtwLXJxkLsmuqnoCuB44BNwP3FlVx9oeyx6JJHVvEldt7Vyk/SBwcJzHSjILzE5PT49zt5KkIWtlaKsT9kgkqXu9DhLnSCSpe70OEnskktS9XgeJJKl7vQ4Sh7YkqXu9DhKHtiSpe70OEklS93odJA5tSVL3eh0kDm1JUvd6HSSSpO4ZJJKkVnodJM6RSFL3eh0kzpFIUvd6HSSSpO4ZJJKkVgwSSVIrBokkqZVeB4lXbUlS93odJF61JUnd63WQSJK6Z5BIkloxSCRJrRgkkqRWzpl0ASuR5CXANQzqv6Sq/uWES5KkDWvVeyRJbklyKsnRee1XJDme5ESSPUvto6o+U1VvBD4KfLjLeiVJS5tEj+RW4CbgttMNSTYBNwOXA3PA4ST7gU3A3nnbX1tVp5rnrwX+fdcFS5IWt+pBUlX3JNk6r/ky4ERVPQiQ5A7gyqraC+xYaD9JtgCPVdU3OixXkrSMtTLZfj7w8NDyXNO2lF3Ahxb7ZZLdSY4kOfLoo4+OoURJ0kLWymR7FmirpTaoqhuX+f2+JCeB2ampqe1tipMkLW6t9EjmgAuGljcDj7TdqbdIkaTurZUgOQxclGRbkingamB/251600ZJ6t6qD20luR14GfCsJHPAjVX1wSTXA4cYXKl1S1Uda3usqjoAHJiZmbmu7b7OxtY9H/uBtofe/ourWYIkrZpJXLW1c5H2g8DBcR4rySwwOz09Pc7dSpKGrJWhrU44RyJJ3et1kDhHIknd63WQ2CORpO71OkgkSd3rdZA4tCVJ3et1kDi0JUnd63WQSJK61+sgcWhLkrrX6yBxaEuSurdW7v67oXlLFUnrWa97JJKk7vU6SJwjkaTu9TpInCORpO71OkgkSd0zSCRJrRgkkqRWDBJJUiu9DhKv2pKk7vU6SLxqS5K6N1KQJNmRpNehI0lamVHD4WrggSTvTPJTXRYkSVpfRgqSqnod8ALgz4APJbk3ye4kT+20OknSmjfycFVVfQP4Q+AO4Dzg1cDnk/yHjmqTJK0Do86RvCrJHwGfAp4EXFZVrwR+GvhPHda3WD1bkuxPckuSPat9fEnS943aI7kK+K2qen5VvauqTgFU1beAa8/mgM3//E8lOTqv/Yokx5OcGCEcfhL4WFVdC1xyNseXJI3XqEFysqruGW5I8g6AqvrkWR7zVuCKefvaBNwMvJJBMOxMckmSf57ko/MezwG+AFyd5FPAp8/y+JKkMRo1SC5foO2VKzlgE0h/Pa/5MuBEVT1YVd9mMA9zZVV9uap2zHucAt4A3FhVPwf4DVCSNEFLfkNikjcBbwZ+IsmXhn71VOD/jrGO84GHh5bngBcusf7/At6a5LXAQwutkGQ3sBtgy5Yt46lSkvQDlvuq3d8HPg7sBYbnLR6vqvm9ijayQFsttnJVHWUwb7OoqtqX5CQwOzU1tb1lfZKkRSw3tFVV9RDwFuDxoQdJnjHGOuaAC4aWNwOPtN2pt0iRpO6N0iPZAdzHoIcw3HMo4MIx1XEYuCjJNuBrDD5J/9q2O00yC8xOT0+33dW6t3XPx36g7aG3O70kqb0leyRVtaP5ua2qLmx+nn6sKESS3A7cC1ycZC7Jrqp6ArgeOATcD9xZVcdWsv959dsjkaSOLdcjASDJzwBfrKpvJnkdcCnw7qr687M9YFXtXKT9IHDwbPe3FHskktS9US///W3gW0l+Gvg14KvA73ZW1ZjYI5Gk7o3UIwGeqKpKciXwnqr6YJLXd1nYOPS1R+J8h6S1ZNQeyeNJbgBeB3ys+ST6k7orazzskUhS90YNkl8C/h7YVVV/weADhO/qrCpJ0rox0tBWEx6/ObT858BtXRU1Ln0d2pKktWTU28i/JskDSR5L8o0kjyf5RtfFteXQliR1b9TJ9ncCs1V1f5fFSJLWn1HnSP5yPYZIktkk+x577LFJlyJJvTVqkBxJ8pEkO5thrtckeU2nlY2BQ1uS1L1Rh7Z+DPgW8PKhtgLuHntFkqR1ZdSrtt7QdSGSpPVp1Htt/SSD26Q8t6qel+T5wKuq6r91Wl1LXv67cn56XtKoRp0j+R3gBuA7AFX1JQa3el/TnCORpO6NGiQ/XFWfm9f2xLiLkSStP6MGydeT/ATN198muQo42VlVkqR1Y9Srtt4C7AP+aZKvAf8PuKazqiRJ68aSQZLkV4cWDwKfZtCL+Sbwbxi6/5YkaWNabmjrqc1jBngT8HTgacAbgUu6La09P9kuSd1bskdSVb8BkOR/A5dW1ePN8luBP+i8upaq6gBwYGZm5rpJ16IzeXmx1B+jTrZvAb49tPxtYOvYq5EkrTujTrb/LvC5JH/E4MqtVwMf7qwqrTv2MKSNa9RbpLwtyceBlzRNb6iqL3RXliRpvRi1R0JVfR74fIe1SJLWoVHnSNaUJJckuTPJbzcfjpQkTcjIPZJxSXILsAM4VVXPG2q/AngPsAn4H1X19iV280rgfVX1mST7gbu6rFlrg/Mw0tq06kEC3ArcBNx2uiHJJuBm4HJgDjjcBMQmYO+87a9lMPl/Y5JXAc9chZolSYtY9SCpqnuSbJ3XfBlwoqoeBEhyB3BlVe1l0HtZyFuaAPLLtSRpgibRI1nI+cDDQ8tzwAsXW7kJov8M/AjwrkXW2Q3sBtiyZcuYypQkzbdWgiQLtNViK1fVQzQhscQ6+5KcBGanpqa2tytPkrSYtXLV1hxwwdDyZuCRtjv1i60kqXtrJUgOAxcl2ZZkisG3L+5vu1Nv2ihJ3Vv1IElyO3AvcHGSuSS7quoJ4HrgEHA/cGdVHWt7LHskktS9SVy1tXOR9oMMvvNkbJLMArPT09Pj3K0kachaGdrqhD0SSeper4PEORJJ6l6vg8QeiSR1r9dBIknqXq+DxKEtSeper4PEoS1J6l6vg0SS1L1eB4lDW5LUvbVy08ZOVNUB4MDMzMx1k65Fa59fnLU2+N9h/el1j0SS1D2DRJLUSq+DxDkSSeper4PEy38lqXu9DhJJUvcMEklSKwaJJKkVg0SS1Eqvg8SrtiSpe70OEq/akqTu9TpIJEnd6/W9tiRtDN6fa7LskUiSWjFIJEmtrPkgSXJhkg8muWuo7UeSfDjJ7yS5ZpL1SdJG12mQJLklyakkR+e1X5HkeJITSfYstY+qerCqds1rfg1wV1VdB7xqzGVLks5C15PttwI3AbedbkiyCbgZuByYAw4n2Q9sAvbO2/7aqjq1wH43A19unn93zDWrJ5yAlVZHp0FSVfck2Tqv+TLgRFU9CJDkDuDKqtoL7Bhx13MMwuSLrIPhOUnqs0lc/ns+8PDQ8hzwwsVWTvJM4G3AC5Lc0ATO3cBNSX4ROLDIdruB3QBbtmwZU+mSNiJ7t0ubRJBkgbZabOWq+ivgjfPavgm8YamDVNW+JCeB2ampqe0rKVSStLxJDAvNARcMLW8GHuniQN4iRZK6N4kgOQxclGRbkingamB/Fwfypo2S1L2uL/+9HbgXuDjJXJJdVfUEcD1wCLgfuLOqjnVxfHskktS9rq/a2rlI+0HgYJfHhkGPBJidnp7u+lCStGH1+tJZeySS1L1eB4lzJJLUvV7fRr6qDgAHZmZmrpt0Leqn+Z8v8LMF2ojskUiSWrFHIq1hfqJa60GveySSpO4ZJJKkVnodJM6RSFL3eh0kfo5EkrrX6yCRJHXPIJEktdLrIHGORJK61+sgcY5EkrrX6yCRJHXPIJEktWKQSJJaMUgkSa30Oki8akuSutfrIPGqLUnqXq9vIy9ped6qXm31ukciSeqeQSJJasUgkSS1suaDJMmFST6Y5K6l2iRJk9FpkCS5JcmpJEfntV+R5HiSE0n2LLWPqnqwqnYt1yZJmoyur9q6FbgJuO10Q5JNwM3A5cAccDjJfmATsHfe9tdW1amOa5RWjVdIaTHr+d9Gp0FSVfck2Tqv+TLgRFU9CJDkDuDKqtoL7OiyHknS+E1ijuR84OGh5bmmbUFJnpnk/cALktywWNsC2+1OciTJkUcffXSM5UuShk3iA4lZoK0WW7mq/gp443JtC2y3L8lJYHZqamr7SgqVJC1vEj2SOeCCoeXNwCNdHMhbpEhS9yYRJIeBi5JsSzIFXA3s7+JA3rRRkrrX9eW/twP3AhcnmUuyq6qeAK4HDgH3A3dW1bEujm+PRJK61/VVWzsXaT8IHOzy2DDokQCz09PTXR9KkjasNf/J9jbskUhS93odJM6RSFL3ev19JFV1ADgwMzNz3aRrkaRxWkufhLdHIklqpddB4hyJJHWv10EiSeqeQSJJaqXXQeIciSR1r9dB4hyJJHWv10EiSeqeQSJJaqXXQeIciSR1L1WLfqdUbyR5FPjqCjZ9FvD1MZez3nlOzuT5OJPn40zr/Xz8eFU9e7mVNkSQrFSSI1U1M+k61hLPyZk8H2fyfJxpo5yPXg9tSZK6Z5BIkloxSJa2b9IFrEGekzN5Ps7k+TjThjgfzpFIklqxRyJJasUgWUSSK5IcT3IiyZ5J17MSSW5JcirJ0aG2ZyT5RJIHmp9Pb9qT5L3N6/1SkkuHtnl9s/4DSV4/1L49yZebbd6bJCs9xmpIckGSTye5P8mxJP9xI5+TJE9J8rkkf9Kcj99o2rcl+WxT60eSTDXtT26WTzS/3zq0rxua9uNJXjHUvuD7aCXHWC1JNiX5QpKPrrTWPp2PkVSVj3kPYBPwZ8CFwBTwJ8Alk65rBa/jpcClwNGhtncCe5rne4B3NM9/Afg4EOBFwGeb9mcADzY/n948f3rzu88BL262+TjwypUcYxXPx3nApc3zpwJfAS7ZqOekOeaPNs+fBHy2qeFO4Oqm/f3Am5rnbwbe3zy/GvhI8/yS5j3yZGBb897ZtNT76GyPscr/Tn4V+H3goyuptW/nY6RzNukC1uKj+R/BoaHlG4AbJl3XCl/LVs4MkuPAec3z84DjzfMPADvnrwfsBD4w1P6Bpu084E+H2r+33tkeY4Ln5n8Cl3tOCuCHgc8DL2TwAbpzmvbvvReAQ8CLm+fnNOtl/vvj9HqLvY+abc7qGKt4HjYDnwR+DvjoSmrt0/kY9eHQ1sLOBx4eWp5r2vrguVV1EqD5+ZymfbHXvFT73ALtKznGqmuGCF7A4K/wDXtOmmGcLwKngE8w+Iv5b6vqiQXq+V6tze8fA57J2Z+nZ67gGKvl3cCvAf/QLK+k1j6dj5EYJAvLAm19v7xtsdd8tu0rOcaqSvKjwB8Cv1JV31hq1QXaenVOquq7VfUvGPwlfhnwU0vUM67zsdRrntj5SLIDOFVV9w03L1FPr8/H2TBIFjYHXDC0vBl4ZEK1jNtfJjkPoPl5qmlf7DUv1b55gfaVHGPVJHkSgxD5vaq6u2ne0OcEoKr+FvhjBnMkT0tyzgL1fK/W5vfnAn/N2Z+nr6/gGKvhZ4BXJXkIuIPB8Na7V1BrX87HyAyShR0GLmqupJhiMMm1f8I1jct+4PRVRq9nME9wuv2Xm6uIXgQ81gzBHAJenuTpzZVGL2cwfnsSeDzJi5ork3553r7O5hiroqnzg8D9VfWbQ7/akOckybOTPK15/o+Afw3cD3wauGqRWk+/hquAT9Vg8H4/cHVzhdE24CIGFx0s+D5qtjnbY3Suqm6oqs1VtbWp9VNVdc0Kau3F+Tgrk56kWasPBlfTfIXBmPGvT7qeFb6G24GTwHcY/GWzi8H46ieBB5qfz2jWDXBz83q/DMwM7eda4ETzeMNQ+wxwtNnmJr7/AdezPsYqnY9/xWBY4EvAF5vHL2zUcwI8H/hCcz6OAv+lab+Qwf/4TgB/ADy5aX9Ks3yi+f2FQ/v69eY1HKe5Um2p99FKjrHK/1Zexvev2trw52O5h59slyS14tCWJKkVg0SS1IpBIklqxSCRJLVikEiSWjFIJEmtGCTSmCX5uzHv79YkVy2/pjQZBokkqRWDRFpGknckefPQ8luT3Jjkk0k+n8EXWV25wHYvO/3lSM3yTUn+XfN8e5L/k+S+JIdO34drhFoW3C7JHzd1fi7JV5K8pPULl0ZkkEjLuwP4paHlfwt8CHh1VV0K/Czw35v7ay2ruXHk+4Crqmo7cAvwtjFsd05VXQb8CnDjKLVI43DO8qtIG1tVfSHJc5L8E+DZwN8wuIfZbyV5KYPvrjgfeC7wFyPs8mLgecAnmuzZ1Oyv7Xan72Z8H4MvNJNWhUEijeYuBndf/ccMeijXMAiV7VX1nebW40+Zt80TnNnrP/37AMeq6sVnWcNy2/198/O7+N7WKnJoSxrNHQxu+30Vg1A5l8GXIH0nyc8CP77ANl8FLmluJ34u8PNN+3Hg2UleDIMhqyT/bIQaVrqd1Cn/apFGUFXHkjwV+FpVnUzye8CBJEcY3I7+TxfY5uEkdzK4TfsDDG7ZTlV9u7mc971NwJzD4AuUji1Tw4q2k7rmbeQlSa04tCVJasWhLWmNSHIzg+8NH/aeqvrQJOqRRuXQliSpFYe2JEmtGCSSpFYMEklSKwaJJKkVg0SS1Mr/B63LLKd83LcbAAAAAElFTkSuQmCC\n", "text/plain": [ "
" ] @@ -235,7 +235,7 @@ }, { "data": { - "image/png": "iVBORw0KGgoAAAANSUhEUgAAAaQAAAELCAYAAACWBvIOAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDMuMC4zLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvnQurowAAGKdJREFUeJzt3X+wXOV93/H3J5KFmsQBW5YTyg9fOShuRJo45hbb49iTQG1Ealu4YVqBHVOHGZoYOs14PEGaNA1mnE7lmVixa6hNB1zKJBWExEE1danHkKTNNMCV8S/hyFx+uCiQWApCtuMGLPztH/sAq8vu3dXl7tVB9/2a2bl7nn2e7/OcM1x9OLvnnk1VIUnS0fZ9R3sBkiSBgSRJ6ggDSZLUCQaSJKkTDCRJUicYSJKkTjCQJEmdYCBJkjrBQJIkdcLKo72AF5KXvexlNTU1dbSXIUkvKLt27dpfVWtH9TOQjsDU1BQzMzNHexmS9IKS5Ovj9PMtO0lSJxhIkqROMJAkSZ1gIEmSOsFAkiR1goEkSeoEA0mS1AkGkiSpEwwkSVIneKeGo+mK4we0HVz6dUhSB3iGJEnqBM+QltDUllsP235o9VFaiCR1kGdIkqROMJAkSZ1gIEmSOsFAkiR1goEkSeoEA0mS1AkGkiSpEwwkSVInGEiSpE4wkCRJnWAgSZI6wUCSJHWCgSRJ6gQDSZLUCRMNpCQbk+xJMptky4DXj0tyY3v9ziRTfa9tbe17kpwzqmaSda3Gfa3mqvnmSDKV5P8l+UJ7fHxyR0KSNMrEAinJCuAq4FxgA3BBkg1zul0MHKiq04DtwLY2dgOwGTgd2AhcnWTFiJrbgO1VtR440GoPnaO5v6pe3R6/vIi7L0k6QpM8QzoTmK2qB6rqSWAHsGlOn03A9e35zcDZSdLad1TVE1X1IDDb6g2s2cac1WrQap43Yg5JUodMMpBOAh7u297b2gb2qapDwEFgzTxjh7WvAR5vNebONWwOgHVJ7knyJ0neuLDdlCQthkl+hfmgs5Aas8+w9kEBOl//+eZ4FDi1qv4myRnAHyU5vaq+edgCk0uASwBOPfXUAaUkSYthkmdIe4FT+rZPBh4Z1ifJSuB44LF5xg5r3w+c0GrMnWvgHO3twL8BqKpdwP3Aj83diaq6pqqmq2p67dq1Y++8JOnITDKQ7gbWt6vfVtG7SGHnnD47gYva8/OB26uqWvvmdoXcOmA9cNewmm3MHa0GreYt882RZG27SIIkr2xzPLCI+y9JOgITe8uuqg4luQy4DVgBXFdVu5NcCcxU1U7gWuCGJLP0zow2t7G7k9wE3AscAi6tqqcABtVsU14O7EjyQeCeVpthcwBvAq5Mcgh4CvjlqnpsUsdDkjS/9E4uNI7p6emamZlZ8PipLbcetv3Q6guf2+mKgwuuL0ldlGRXVU2P6uedGiRJnWAgSZI6wUCSJHWCgSRJ6gQDSZLUCQaSJKkTDCRJUicYSJKkTjCQJEmdYCBJkjrBQJIkdYKBJEnqBANJktQJBpIkqRMMJElSJxhIkqROMJAkSZ1gIEmSOsFAkiR1goEkSeoEA0mS1AkGkiSpEwwkSVInGEiSpE4wkCRJnWAgSZI6wUCSJHWCgSRJ6oSJBlKSjUn2JJlNsmXA68clubG9fmeSqb7Xtrb2PUnOGVUzybpW475Wc9WoOdrrpyb5dpL3L/4RkCSNa2KBlGQFcBVwLrABuCDJhjndLgYOVNVpwHZgWxu7AdgMnA5sBK5OsmJEzW3A9qpaDxxotYfO0Wc78JnF2WtJ0kJN8gzpTGC2qh6oqieBHcCmOX02Ade35zcDZydJa99RVU9U1YPAbKs3sGYbc1arQat53og5SHIe8ACwexH3W5K0AJMMpJOAh/u297a2gX2q6hBwEFgzz9hh7WuAx1uNuXMNnCPJDwCXAx9Y8B5KkhbNJAMpA9pqzD6L1T7fHB+g9xbftwe8/uwCk0uSzCSZ2bdv33xdJUnPw8oJ1t4LnNK3fTLwyJA+e5OsBI4HHhsxdlD7fuCEJCvbWVB//2FzvBY4P8mHgBOA7yX5u6r6WP8Cq+oa4BqA6enpuYEqSVokkzxDuhtY365+W0XvIoWdc/rsBC5qz88Hbq+qau2b2xVy64D1wF3DarYxd7QatJq3zDdHVb2xqqaqagr4HeDfzQ0jSdLSmdgZUlUdSnIZcBuwAriuqnYnuRKYqaqdwLXADUlm6Z21bG5jdye5CbgXOARcWlVPAQyq2aa8HNiR5IPAPa02w+aQJHVLeicXGsf09HTNzMwsePzUllsP235o9YXP7XTFwQXXl6QuSrKrqqZH9fNODZKkTjCQJEmdYCBJkjrBQJIkdYKBJEnqBANJktQJBpIkqRMMJElSJxhIkqROMJAkSZ1gIEmSOsFAkiR1goEkSeoEA0mS1AkGkiSpEwwkSVInGEiSpE4wkCRJnWAgSZI6YaxASvLWJIaXJGlixg2ZzcB9ST6U5McnuSBJ0vI0ViBV1buAnwbuBz6Z5P8kuSTJiye6OknSsjH223BV9U3gD4AdwInAO4DPJ/lXE1qbJGkZGfczpLcn+RRwO/Ai4MyqOhf4KeD9E1yfJGmZWDlmv/OB7VX1p/2NVfWdJL+0+MuSJC03475l9+jcMEqyDaCqPrfoq5IkLTvjBtKbB7Sdu5gLkSQtb/O+ZZfkV4D3Aj+a5Et9L70Y+LNJLkyStLyMOkP6PeBtwC3t59OPM9ql4PNKsjHJniSzSbYMeP24JDe21+9MMtX32tbWvifJOaNqJlnXatzXaq6ab44kZyb5Qnt8Mck7Ru2PJGlyRgVSVdVDwKXAt/oeJHnpfAOTrACuovfW3gbggiQb5nS7GDhQVacB24FtbewGen+MezqwEbg6yYoRNbfRu/BiPXCg1R46B/AVYLqqXt3m+ESScS/ykCQtsnHOkAB2ATPt566+7fmcCcxW1QNV9SS9v1/aNKfPJuD69vxm4Owkae07quqJqnoQmG31BtZsY85qNWg1z5tvjqr6TlUdau2rgRqxP5KkCZr3jKCq3tp+rltA7ZOAh/u29wKvHdanqg4lOQisae1/PmfsSe35oJprgMf7Aqa//7A59id5LXAd8ArgF/vGS5KW2Lh/GPuGJD/Qnr8ryYeTnDpq2IC2uWchw/osVvu866iqO6vqdOAfAVuTrJ7bsd0iaSbJzL59+waUkiQthnEv+/6PwHeS/BTwa8DXgRtGjNkLnNK3fTLwyLA+7fOb44HH5hk7rH0/cELfZ0D9cw2b4xlV9VXgb4GfmLsTVXVNVU1X1fTatWtH7LIkaaHGDaRDVVX0Po/5SFV9hN6l3/O5G1jfrn5bRe8ihZ1z+uwELmrPzwdub/PsBDa3K+TWAeuBu4bVbGPuaDVoNW+Zb45WYyVAklcArwIeGvN4SJIW2bhXlX0ryVbgXcCb2tVuL5pvQPu85jLgNmAFcF1V7U5yJTBTVTuBa4EbkszSO2vZ3MbuTnITcC9wCLi0qp4CGFSzTXk5sCPJB4F7Wm2GzQH8DLAlyXeB7wHvrar9Yx4PSdIiS+/kYkSn5EeAC4G7q+p/tc+Pfraq/sukF9gl09PTNTMz6uLC4aa23HrY9kOrL3xupysOLri+JHVRkl1VNT2q31hnSFX1V8CH+7b/L7CswkiSNFnjXmX3T9sdEA4m+WaSbyX55qQXJ0laPsb9DOlDwNva1WiSJC26ca+y+2vDSJI0SeOeIc0kuRH4I+CJpxur6g8nsipJ0rIzbiD9EPAd4C19bQUYSJKkRTHuVXbvmfRCJEnL27hX2f1Yks8l+Urb/skk/2ayS5MkLSfjXtTwn4CtwHcBqupLPHvHA0mSnrdxA+n7q+quOW1+VYMkadGMG0j7k/wo7WsbkpwPPDqxVUmSlp1xr7K7FLgG+AdJ/hJ4EHjnxFYlSVp25g2kJO/r2/zv9L7i4fvofXfQL9B3fztJkp6PUWdIT3/n0avofavqLfS+gfUXgT+d4LokScvMvIFUVR8ASPI/gddU1bfa9hXA7098dZKkZWPcixpOBZ7s234SmFr01UiSlq1xL2q4AbgryafoXWn3DuD6ia1KkrTsjHvroN9K8hngja3pPVV1z+SWJUlabsY9Q6KqPg98foJrkSQtY+N+hiRJ0kQZSJKkTjCQJEmdYCBJkjrBQJIkdYKBJEnqBANJktQJBpIkqRMMJElSJxhIkqROmGggJdmYZE+S2SRbBrx+XJIb2+t3Jpnqe21ra9+T5JxRNZOsazXuazVXzTdHkjcn2ZXky+3nWZM7EpKkUSYWSElWAFcB5wIbgAuSbJjT7WLgQFWdBmwHtrWxG4DNwOnARuDqJCtG1NwGbK+q9cCBVnvoHMB+4G1V9Q+Bi+jd0VySdJRM8gzpTGC2qh6oqieBHcCmOX028ezXWNwMnJ0krX1HVT1RVQ8Cs63ewJptzFmtBq3mefPNUVX3VNUjrX03sDrJcYu295KkIzLJQDoJeLhve29rG9inqg4BB4E184wd1r4GeLzVmDvXsDn6/QJwT1U9MXcnklySZCbJzL59+0bssiRpoSYZSBnQVmP2Waz2ketIcjq9t/H+5YB+VNU1VTVdVdNr164d1EWStAgmGUh7gVP6tk8GHhnWJ8lK4HjgsXnGDmvfD5zQasyda9gcJDkZ+BTw7qq6f4H7KUlaBJMMpLuB9e3qt1X0LlLYOafPTnoXFACcD9xeVdXaN7cr5NYB64G7htVsY+5oNWg1b5lvjiQnALcCW6vqzxZ1zyVJR2xigdQ+r7kMuA34KnBTVe1OcmWSt7du1wJrkswC7wO2tLG7gZuAe4H/AVxaVU8Nq9lqXQ68r9Va02oPnaPVOQ34jSRfaI+XT+RgSJJGSu/kQuOYnp6umZmZBY+f2nLrYdsPrb7wuZ2uOLjg+pLURUl2VdX0qH7eqUGS1AkGkiSpEwwkSVInGEiSpE4wkCRJnWAgSZI6wUCSJHWCgSRJ6gQDSZLUCQaSJKkTDCRJUicYSJKkTjCQJEmdYCBJkjrBQJIkdYKBJEnqBANJktQJBpIkqRMMJElSJxhIkqROMJAkSZ1gIEmSOsFAkiR1goEkSeoEA0mS1AkGkiSpEwwkSVInGEiSpE6YaCAl2ZhkT5LZJFsGvH5ckhvb63cmmep7bWtr35PknFE1k6xrNe5rNVfNN0eSNUnuSPLtJB+b3FGQJI1jYoGUZAVwFXAusAG4IMmGOd0uBg5U1WnAdmBbG7sB2AycDmwErk6yYkTNbcD2qloPHGi1h84B/B3wG8D7F3XHJUkLMskzpDOB2ap6oKqeBHYAm+b02QRc357fDJydJK19R1U9UVUPArOt3sCabcxZrQat5nnzzVFVf1tV/5teMEmSjrJJBtJJwMN923tb28A+VXUIOAismWfssPY1wOOtxty5hs0hSeqQSQZSBrTVmH0Wq33cdQyV5JIkM0lm9u3bN+4wSdIRmmQg7QVO6ds+GXhkWJ8kK4HjgcfmGTusfT9wQqsxd65hc4ylqq6pqumqml67du24wyRJR2iSgXQ3sL5d/baK3kUKO+f02Qlc1J6fD9xeVdXaN7cr5NYB64G7htVsY+5oNWg1bxkxhySpQ1aO7rIwVXUoyWXAbcAK4Lqq2p3kSmCmqnYC1wI3JJmld9ayuY3dneQm4F7gEHBpVT0FMKhmm/JyYEeSDwL3tNoMm6PVegj4IWBVkvOAt1TVvZM5IpKk+cSThfFNT0/XzMzMgsdPbbn1sO2HVl/43E5XHFxwfUnqoiS7qmp6VD/v1CBJ6gQDSZLUCQaSJKkTDCRJUicYSJKkTjCQJEmdYCBJkjrBQJIkdYKBJEnqBANJktQJBpIkqRMMJElSJxhIkqROMJAkSZ1gIEmSOsFAkiR1goEkSeoEA0mS1AkGkiSpEwwkSVInGEiSpE4wkCRJnWAgSZI6wUCSJHWCgSRJ6gQDSZLUCQaSJKkTDCRJUidMNJCSbEyyJ8lski0DXj8uyY3t9TuTTPW9trW170lyzqiaSda1Gve1mqsWOockaelNLJCSrACuAs4FNgAXJNkwp9vFwIGqOg3YDmxrYzcAm4HTgY3A1UlWjKi5DdheVeuBA632Ec+xuEdBkjSuSZ4hnQnMVtUDVfUksAPYNKfPJuD69vxm4Owkae07quqJqnoQmG31BtZsY85qNWg1z1vgHJKko2DlBGufBDzct70XeO2wPlV1KMlBYE1r//M5Y09qzwfVXAM8XlWHBvRfyBydMLXl1ue0PfTv/8mi9ZekLplkIGVAW43ZZ1j7oDO6+fovZI7DF5hcAlzSNr+dZM+AcaO8DNj/nNqDen5gYOuzY7Yd2cRH2n8JDTwmy5jH43Aej8O90I/HK8bpNMlA2guc0rd9MvDIkD57k6wEjgceGzF2UPt+4IQkK9tZUn//hczxjKq6BrhmjP0dKslMVU0/nxrHGo/J4Tweh/N4HG65HI9JfoZ0N7C+Xf22it4FBDvn9NkJXNSenw/cXlXV2je3K+TWAeuBu4bVbGPuaDVoNW9Z4BySpKNgYmdI7fOay4DbgBXAdVW1O8mVwExV7QSuBW5IMkvvrGVzG7s7yU3AvcAh4NKqegpgUM025eXAjiQfBO5ptVnIHJKkpZfeyYImKckl7a0/NR6Tw3k8DufxONxyOR4GkiSpE7x1kCSpEwykCRt1+6QXgiTXJflGkq/0tb00yWfbrZo+m+QlrT1JPtr290tJXtM35qLW/74kF/W1n5Hky23MR9sfLi9ojqWQ5JQkdyT5apLdSf71cj4mSVYnuSvJF9vx+EBrX5dFup3XsN+jhcyxVNK7u8w9ST690LUeS8djLFXlY0IPehde3A+8ElgFfBHYcLTXtYD9eBPwGuArfW0fAra051uAbe35zwOfofdnVq8D7mztLwUeaD9f0p6/pL12F/D6NuYzwLkLmWMJj8eJwGva8xcDX6N3K6tleUzanD/Ynr8IuLOt4SZgc2v/OPAr7fl7gY+355uBG9vzDe135DhgXfvdWTHf79GRzrHE/528D/g94NMLWeuxdjzGOmZHewHH8qP9g3Jb3/ZWYOvRXtcC92WKwwNpD3Bie34isKc9/wRwwdx+wAXAJ/raP9HaTgT+oq/9mX5HOsdRPDa3AG/2mBTA9wOfp3cHlf3Aytb+zO8CvatkX9+er2z9Mvf34+l+w36P2pgjmmMJj8PJwOfo3dLs0wtZ67F0PMZ9+JbdZA26fVKnbk/0PPxwVT0K0H6+vLUP2+f52vcOaF/IHEuuvfXx0/TOCpbtMWlvT30B+AbwWXr/Bz/W7byA/tt5HclxGvuWYX1zLJXfAX4N+F7bXshaj6XjMRYDabLGuj3RMeZIb9W0kGPUieOa5AeBPwB+taq+OV/XAW3H1DGpqqeq6tX0zgzOBH58nvUs1vFYyC3DJi7JW4FvVNWu/uZ51nNMH48jYSBN1li3J3qB+uskJwK0n99o7cP2eb72kwe0L2SOJZPkRfTC6Her6g9b87I+JgBV9Tjwx/Q+Qzohvdt1zV3PM2vNeLfzGtb+zC3DjmCOpfAG4O1JHqL3jQRn0TtjWq7HY2wG0mSNc/ukF6r+WzLNvVXTu9tVX68DDra3lm4D3pLkJe3KsLfQe3/7UeBbSV7XriR7N4Nv+zTOHEuirfNa4KtV9eG+l5blMUmyNskJ7fnfA/4x8FUW73Zei3nLsImrqq1VdXJVTbW13l5V71zAWo+J43FEjvaHWMf6g97VT1+j9576rx/t9SxwH/4r8CjwXXr/p3UxvfefPwfc136+tPUNvS9RvB/4MjDdV+eX6H3v1Czwnr72aeArbczHePYPto94jiU6Hj9D7+2OLwFfaI+fX67HBPhJerfr+lJb879t7a+k9w/oLPD7wHGtfXXbnm2vv7Kv1q+3fdhDu7Jwvt+jhcyxxP+t/CzPXmW37I/HqId3apAkdYJv2UmSOsFAkiR1goEkSeoEA0mS1AkGkiSpEwwkSVInGEhSRyX59iLX+89Jzh/dUzo6DCRJUicYSNISSbItyXv7tq9I8ptJPpfk8+l9Id+mAeN+9ukveWvbH0vyL9rzM5L8SZJdSW57+j53Y6xl4Lgkf9zWeVeSryV54/PecWlMBpK0dHYA/7xv+58BnwTeUVWvAX4O+O12/7qR2g1e/wNwflWdAVwH/NYijFtZVWcCvwr85jhrkRbDytFdJC2GqronycuT/H1gLXCA3j0Ctyd5E73vzjkJ+GHgr8Yo+SrgJ4DPtgxb0eo933FP3718F70vZpSWhIEkLa2b6d1t+UfonTG9k144nVFV321fWbB6zphDHP5uxtOvB9hdVa8/wjWMGvdE+/kU/huhJeRbdtLS2kHv6wLOpxdOx9P7MrfvJvk54BUDxnwd2NC+huB44OzWvgdYm+T10HsrLsnpY6xhoeOkifL/fqQlVFW7k7wY+MuqejTJ7wL/LckMva+x+IsBYx5OchO9r3e4j95XPVBVT7bLuD/agmolvS+C2z1iDQsaJ02aXz8hSeoE37KTJHWCb9lJx5gkVwFvmNP8kar65NFYjzQu37KTJHWCb9lJkjrBQJIkdYKBJEnqBANJktQJBpIkqRP+PzUtoPevk+qYAAAAAElFTkSuQmCC\n", + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAaQAAAELCAYAAACWBvIOAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDMuMC4zLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvnQurowAAGLpJREFUeJzt3X+w3XV95/Hnq4kh29aCxthafnhjiW5Dt7VyF3WsjoVVoKsGt5ltQCtrmWFbYWc7jlOS6XaLjN3ZOFNTXbHKDrgs026gtJasrMu6Qtvdzha4EX8FG7n8cEmhNSkhat2Cwff+cT7AyeWce04u99x8yX0+Zs7c8/2cz+f9+Xy/w82L7znf+z2pKiRJOtq+72gvQJIkMJAkSR1hIEmSOsFAkiR1goEkSeoEA0mS1AkGkiSpEwwkSVInGEiSpE5YebQX8Fzyohe9qKampo72MiTpOWXXrl37q2rtqH4G0hGYmppiZmbmaC9Dkp5Tknx9nH6+ZSdJ6gQDSZLUCQaSJKkTDCRJUicYSJKkTjCQJEmdYCBJkjrBQJIkdYKBJEnqBO/UcDRdfvyAtoNLvw5J6gDPkCRJnWAgSZI6wUCSJHWCgSRJ6gQvalhCU1tuPmz7gdVHaSGS1EGeIUmSOsFAkiR1goEkSeoEA0mS1AkGkiSpEwwkSVInGEiSpE4wkCRJnWAgSZI6wUCSJHWCgSRJ6oSJBlKSc5LsSTKbZMuA149Lcn17/fYkU32vbW3te5KcPapmknWtxj2t5qr55kgyleT/JflCe3x8ckdCkjTKxAIpyQrgSuBcYANwfpINc7pdBByoqlOB7cC2NnYDsBk4DTgH+FiSFSNqbgO2V9V64ECrPXSO5t6qemV7/PIi7r4k6QhN8gzpDGC2qu6rqseBHcDGOX02Ate25zcCZyVJa99RVY9V1f3AbKs3sGYbc2arQat53og5JEkdMslAOhF4sG97b2sb2KeqDgEHgTXzjB3WvgZ4tNWYO9ewOQDWJbkryZ8mef3CdlOStBgm+X1Ig85Casw+w9oHBeh8/eeb42HglKr62ySnA3+c5LSq+uZhC0wuBi4GOOWUUwaUkiQthkmeIe0FTu7bPgl4aFifJCuB44FH5hk7rH0/cEKrMXeugXO0twP/FqCqdgH3Ai+fuxNVdVVVTVfV9Nq1a8feeUnSkZlkIN0JrG9Xv62id5HCzjl9dgIXtuebgFurqlr75naF3DpgPXDHsJptzG2tBq3mTfPNkWRtu0iCJC9rc9y3iPsvSToCE3vLrqoOJbkUuAVYAVxTVbuTXAHMVNVO4GrguiSz9M6MNrexu5PcANwNHAIuqaonAAbVbFNeBuxI8gHgrlabYXMAbwCuSHIIeAL45ap6ZFLHQ5I0v/ROLjSO6enpmpmZWfD4qS03H7b9wOoLntnp8oMLri9JXZRkV1VNj+rnnRokSZ1gIEmSOsFAkiR1goEkSeoEA0mS1AkGkiSpEwwkSVInGEiSpE4wkCRJnWAgSZI6wUCSJHWCgSRJ6gQDSZLUCQaSJKkTDCRJUicYSJKkTjCQJEmdYCBJkjrBQJIkdYKBJEnqBANJktQJBpIkqRMMJElSJxhIkqROMJAkSZ1gIEmSOsFAkiR1goEkSeqEiQZSknOS7Ekym2TLgNePS3J9e/32JFN9r21t7XuSnD2qZpJ1rcY9reaqUXO0109J8u0k71v8IyBJGtfEAinJCuBK4FxgA3B+kg1zul0EHKiqU4HtwLY2dgOwGTgNOAf4WJIVI2puA7ZX1XrgQKs9dI4+24HPLM5eS5IWapJnSGcAs1V1X1U9DuwANs7psxG4tj2/ETgrSVr7jqp6rKruB2ZbvYE125gzWw1azfNGzEGS84D7gN2LuN+SpAWYZCCdCDzYt723tQ3sU1WHgIPAmnnGDmtfAzzaasyda+AcSX4AuAx4/4L3UJK0aCYZSBnQVmP2Waz2+eZ4P723+L494PWnF5hcnGQmycy+ffvm6ypJehZWTrD2XuDkvu2TgIeG9NmbZCVwPPDIiLGD2vcDJyRZ2c6C+vsPm+PVwKYkHwROAL6X5O+r6qP9C6yqq4CrAKanp+cGqiRpkUzyDOlOYH27+m0VvYsUds7psxO4sD3fBNxaVdXaN7cr5NYB64E7htVsY25rNWg1b5pvjqp6fVVNVdUU8DvAv5sbRpKkpTOxM6SqOpTkUuAWYAVwTVXtTnIFMFNVO4GrgeuSzNI7a9ncxu5OcgNwN3AIuKSqngAYVLNNeRmwI8kHgLtabYbNIUnqlvROLjSO6enpmpmZWfD4qS03H7b9wOoLntnp8oMLri9JXZRkV1VNj+rnnRokSZ1gIEmSOsFAkiR1goEkSeoEA0mS1AkGkiSpEwwkSVInGEiSpE4wkCRJnWAgSZI6wUCSJHWCgSRJ6gQDSZLUCQaSJKkTDCRJUicYSJKkTjCQJEmdYCBJkjrBQJIkdcJYgZTkLUkML0nSxIwbMpuBe5J8MMmPT3JBkqTlaaxAqqp3Aj8N3At8Msn/SXJxkudPdHWSpGVj7LfhquqbwB8CO4CXAG8HPp/kX01obZKkZWTcz5DeluRTwK3A84Azqupc4KeA901wfZKkZWLlmP02Adur6s/6G6vqO0l+afGXJUlabsZ9y+7huWGUZBtAVX1u0VclSVp2xg2kNw1oO3cxFyJJWt7mfcsuya8A7wF+LMmX+l56PvDnk1yYJGl5GXWG9PvAW4Gb2s8nH6e3S8HnleScJHuSzCbZMuD145Jc316/PclU32tbW/ueJGePqplkXatxT6u5ar45kpyR5Avt8cUkbx+1P5KkyRkVSFVVDwCXAN/qe5DkhfMNTLICuJLeW3sbgPOTbJjT7SLgQFWdCmwHtrWxG+j9Me5pwDnAx5KsGFFzG70LL9YDB1rtoXMAXwGmq+qVbY5PJBn3Ig9J0iIb5wwJYBcw037u6tuezxnAbFXdV1WP0/v7pY1z+mwErm3PbwTOSpLWvqOqHquq+4HZVm9gzTbmzFaDVvO8+eaoqu9U1aHWvhqoEfsjSZqgec8Iquot7ee6BdQ+EXiwb3sv8OphfarqUJKDwJrW/hdzxp7Yng+quQZ4tC9g+vsPm2N/klcD1wAvBX6xb7wkaYmN+4exr0vyA+35O5N8KMkpo4YNaJt7FjKsz2K1z7uOqrq9qk4D/jGwNcnquR3bLZJmkszs27dvQClJ0mIY97Lv3wW+k+SngF8Dvg5cN2LMXuDkvu2TgIeG9Wmf3xwPPDLP2GHt+4ET+j4D6p9r2BxPqaqvAn8H/MTcnaiqq6pquqqm165dO2KXJUkLNW4gHaqqovd5zIer6sP0Lv2ez53A+nb12yp6FynsnNNnJ3Bhe74JuLXNsxPY3K6QWwesB+4YVrONua3VoNW8ab45Wo2VAEleCrwCeGDM4yFJWmTjXlX2rSRbgXcCb2hXuz1vvgHt85pLgVuAFcA1VbU7yRXATFXtBK4GrksyS++sZXMbuzvJDcDdwCHgkqp6AmBQzTblZcCOJB8A7mq1GTYH8DPAliTfBb4HvKeq9o95PCRJiyy9k4sRnZIfAS4A7qyq/9U+P3pjVf3nSS+wS6anp2tmZtTFhcNNbbn5sO0HVl/wzE6XH1xwfUnqoiS7qmp6VL+xzpCq6q+BD/Vt/19gWYWRJGmyxr3K7p+1OyAcTPLNJN9K8s1JL06StHyM+xnSB4G3tqvRJEladONeZfc3hpEkaZLGPUOaSXI98MfAY082VtUfTWRVkqRlZ9xA+iHgO8Cb+9oKMJAkSYti3Kvs3j3phUiSlrdxr7J7eZLPJflK2/7JJP9mskuTJC0n417U8B+BrcB3AarqSzx9xwNJkp61cQPp+6vqjjltflWDJGnRjBtI+5P8GO1rG5JsAh6e2KokScvOuFfZXQJcBfzDJH8F3A+8Y2KrkiQtO/MGUpL39m3+N3pf8fB99L476Ofpu7+dJEnPxqgzpCe/8+gV9L5V9SZ638D6i8CfTXBdkqRlZt5Aqqr3AyT5H8Crqupbbfty4A8mvjpJ0rIx7kUNpwCP920/Dkwt+mokScvWuBc1XAfckeRT9K60eztw7cRWJUladsa9ddBvJfkM8PrW9O6qumtyy5IkLTfjniFRVZ8HPj/BtUiSlrFxP0OSJGmiDCRJUicYSJKkTjCQJEmdYCBJkjrBQJIkdYKBJEnqBANJktQJBpIkqRMMJElSJ0w0kJKck2RPktkkWwa8flyS69vrtyeZ6ntta2vfk+TsUTWTrGs17mk1V803R5I3JdmV5Mvt55mTOxKSpFEmFkhJVgBXAucCG4Dzk2yY0+0i4EBVnQpsB7a1sRuAzcBpwDnAx5KsGFFzG7C9qtYDB1rtoXMA+4G3VtU/Ai6kd0dzSdJRMskzpDOA2aq6r6oeB3YAG+f02cjTX2NxI3BWkrT2HVX1WFXdD8y2egNrtjFnthq0mufNN0dV3VVVD7X23cDqJMct2t5Lko7IJAPpRODBvu29rW1gn6o6BBwE1swzdlj7GuDRVmPuXMPm6PfzwF1V9djcnUhycZKZJDP79u0bscuSpIWaZCBlQFuN2Wex2keuI8lp9N7G+5cD+lFVV1XVdFVNr127dlAXSdIimGQg7QVO7ts+CXhoWJ8kK4HjgUfmGTusfT9wQqsxd65hc5DkJOBTwLuq6t4F7qckaRFMMpDuBNa3q99W0btIYeecPjvpXVAAsAm4taqqtW9uV8itA9YDdwyr2cbc1mrQat403xxJTgBuBrZW1Z8v6p5Lko7YxAKpfV5zKXAL8FXghqraneSKJG9r3a4G1iSZBd4LbGljdwM3AHcD/x24pKqeGFaz1boMeG+rtabVHjpHq3Mq8BtJvtAeL57IwZAkjZTeyYXGMT09XTMzMwseP7Xl5sO2H1h9wTM7XX5wwfUlqYuS7Kqq6VH9vFODJKkTDCRJUicYSJKkTjCQJEmdYCBJkjrBQJIkdYKBJEnqBANJktQJBpIkqRMMJElSJxhIkqROMJAkSZ1gIEmSOsFAkiR1goEkSeoEA0mS1AkGkiSpEwwkSVInGEiSpE4wkCRJnWAgSZI6wUCSJHWCgSRJ6gQDSZLUCQaSJKkTDCRJUicYSJKkTjCQJEmdMNFASnJOkj1JZpNsGfD6cUmub6/fnmSq77WtrX1PkrNH1UyyrtW4p9VcNd8cSdYkuS3Jt5N8dHJHQZI0jokFUpIVwJXAucAG4PwkG+Z0uwg4UFWnAtuBbW3sBmAzcBpwDvCxJCtG1NwGbK+q9cCBVnvoHMDfA78BvG9Rd1yStCCTPEM6A5itqvuq6nFgB7BxTp+NwLXt+Y3AWUnS2ndU1WNVdT8w2+oNrNnGnNlq0GqeN98cVfV3VfW/6QWTJOkom2QgnQg82Le9t7UN7FNVh4CDwJp5xg5rXwM82mrMnWvYHJKkDplkIGVAW43ZZ7Hax13HUEkuTjKTZGbfvn3jDpMkHaFJBtJe4OS+7ZOAh4b1SbISOB54ZJ6xw9r3Aye0GnPnGjbHWKrqqqqarqrptWvXjjtMknSEJhlIdwLr29Vvq+hdpLBzTp+dwIXt+Sbg1qqq1r65XSG3DlgP3DGsZhtzW6tBq3nTiDkkSR2ycnSXhamqQ0kuBW4BVgDXVNXuJFcAM1W1E7gauC7JLL2zls1t7O4kNwB3A4eAS6rqCYBBNduUlwE7knwAuKvVZtgcrdYDwA8Bq5KcB7y5qu6ezBGRJM0nniyMb3p6umZmZhY8fmrLzYdtP7D6gmd2uvzggutLUhcl2VVV06P6eacGSVInGEiSpE4wkCRJnWAgSZI6wUCSJHWCgSRJ6gQDSZLUCQaSJKkTDCRJUicYSJKkTjCQJEmdYCBJkjrBQJIkdYKBJEnqBANJktQJBpIkqRMMJElSJxhIkqROMJAkSZ1gIEmSOsFAkiR1goEkSeoEA0mS1AkGkiSpEwwkSVInGEiSpE4wkCRJnWAgSZI6YaKBlOScJHuSzCbZMuD145Jc316/PclU32tbW/ueJGePqplkXatxT6u5aqFzSJKW3spJFU6yArgSeBOwF7gzyc6quruv20XAgao6NclmYBvwC0k2AJuB04AfBf5nkpe3McNqbgO2V9WOJB9vtX/3SOeoqicmdUyO1NSWm5/R9sC//6dHYSWSNHmTPEM6A5itqvuq6nFgB7BxTp+NwLXt+Y3AWUnS2ndU1WNVdT8w2+oNrNnGnNlq0Gqet8A5JElHwcTOkIATgQf7tvcCrx7Wp6oOJTkIrGntfzFn7Int+aCaa4BHq+rQgP4LmeM5yTMqSc9lkwykDGirMfsMax90Rjdf/4XMcfgCk4uBi9vmt5PsGTBulBcB+59Re1DP9w9sfXrMtiOb+Ej7L6GBx2QZ83gczuNxuOf68XjpOJ0mGUh7gZP7tk8CHhrSZ2+SlcDxwCMjxg5q3w+ckGRlO0vq77+QOZ5SVVcBV42xv0Mlmamq6WdT41jjMTmcx+NwHo/DLZfjMcnPkO4E1rer31bRu4Bg55w+O4EL2/NNwK1VVa19c7tCbh2wHrhjWM025rZWg1bzpgXOIUk6CiZ2htQ+r7kUuAVYAVxTVbuTXAHMVNVO4GrguiSz9M5aNrexu5PcANwNHAIuefLqt0E125SXATuSfAC4q9VmIXNIkpZeeicLmqQkF7e3/tR4TA7n8Ticx+Nwy+V4GEiSpE7w1kGSpE4wkCZs1O2TnguSXJPkG0m+0tf2wiSfbbdq+mySF7T2JPlI298vJXlV35gLW/97klzY1356ki+3MR9pf7i8oDmWQpKTk9yW5KtJdif518v5mCRZneSOJF9sx+P9rX1dFul2XsN+jxYyx1JJsiLJXUk+vdC1HkvHYyxV5WNCD3oXXtwLvAxYBXwR2HC017WA/XgD8CrgK31tHwS2tOdbgG3t+c8Bn6H3Z1avAW5v7S8E7ms/X9Cev6C9dgfw2jbmM8C5C5ljCY/HS4BXtefPB74GbFiux6TN+YPt+fOA29sabgA2t/aPA7/Snr8H+Hh7vhm4vj3f0H5HjgPWtd+dFfP9Hh3pHEv838l7gd8HPr2QtR5rx2OsY3a0F3AsP9o/KLf0bW8Fth7tdS1wX6Y4PJD2AC9pz18C7GnPPwGcP7cfcD7wib72T7S2lwB/2df+VL8jneMoHpub6N1fcdkfE+D7gc/Tu4PKfmBla3/qd4HeVbKvbc9Xtn6Z+/vxZL9hv0dtzBHNsYTH4STgc/Ruafbphaz1WDoe4z58y26yBt0+6Tl9e6I+P1xVDwO0ny9u7cP2eb72vQPaFzLHkmtvffw0vbOCZXtM2ttTXwC+AXyW3v/Bj3U7L6D/dl5HcpzGvmVY3xxL5XeAXwO+17YXstZj6XiMxUCarLFuT3SMOdJbNS3kGHXiuCb5QeAPgV+tqm/O13VA2zF1TKrqiap6Jb0zgzOAH59nPYt1PBZyy7CJS/IW4BtVtau/eZ71HNPH40gYSJM11u2JnqP+JslLANrPb7T2Yfs8X/tJA9oXMseSSfI8emH0e1X1R615WR8TgKp6FPgTep8hnZDe7brmrueptWa823kNa3/qlmFHMMdSeB3wtiQP0PtGgjPpnTEt1+MxNgNpssa5fdJzVf8tmebequld7aqv1wAH21tLtwBvTvKCdmXYm+m9v/0w8K0kr2lXkr2Lwbd9GmeOJdHWeTXw1ar6UN9Ly/KYJFmb5IT2/B8A/wT4Kot3O6/FvGXYxFXV1qo6qaqm2lpvrap3LGCtx8TxOCJH+0OsY/1B7+qnr9F7T/3Xj/Z6FrgP/wV4GPguvf/Tuoje+8+fA+5pP1/Y+obelyjeC3wZmO6r80v0vndqFnh3X/s08JU25qM8/QfbRzzHEh2Pn6H3dseXgC+0x88t12MC/CS923V9qa3537b2l9H7B3QW+APguNa+um3Pttdf1lfr19s+7KFdWTjf79FC5lji/1beyNNX2S374zHq4Z0aJEmd4Ft2kqROMJAkSZ1gIEmSOsFAkiR1goEkSeoEA0mS1AkGktRRSb69yPX+U5JNo3tKR4eBJEnqBANJWiJJtiV5T9/25Ul+M8nnknw+vS/k2zhg3Buf/JK3tv3RJP+iPT89yZ8m2ZXklifvczfGWgaOS/InbZ13JPlaktc/6x2XxmQgSUtnB/ALfdv/HPgk8PaqehXws8Bvt/vXjdRu8PofgE1VdTpwDfBbizBuZVWdAfwq8JvjrEVaDCtHd5G0GKrqriQvTvKjwFrgAL17BG5P8gZ6351zIvDDwF+PUfIVwE8An20ZtqLVe7bjnrx7+S56X8woLQkDSVpaN9K72/KP0Dtjege9cDq9qr7bvrJg9Zwxhzj83YwnXw+wu6pee4RrGDXusfbzCfw3QkvIt+ykpbWD3tcFbKIXTsfT+zK37yb5WeClA8Z8HdjQvobgeOCs1r4HWJvktdB7Ky7JaWOsYaHjpIny/36kJVRVu5M8H/irqno4ye8B/zXJDL2vsfjLAWMeTHIDva93uIfeVz1QVY+3y7g/0oJqJb0vgts9Yg0LGidNml8/IUnqBN+ykyR1gm/ZSceYJFcCr5vT/OGq+uTRWI80Lt+ykyR1gm/ZSZI6wUCSJHWCgSRJ6gQDSZLUCQaSJKkT/j9xvrE1jZRlEwAAAABJRU5ErkJggg==\n", "text/plain": [ "
" ] @@ -279,7 +279,7 @@ "outputs": [ { "data": { - "image/png": "iVBORw0KGgoAAAANSUhEUgAAAXcAAAExCAYAAACHweKPAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDMuMC4zLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvnQurowAAF35JREFUeJzt3X2QVfWd5/H3Vx5kV0BnpZ1S0cDuEhUxIrSAIQuaTErRjIyJm2hIVlNGy80aY2XiyMQpdUztOokZJ2bXzK4mPqZ8irVRosy6W6PiQ9QFFBR0zaKDsQc3QeJzZATy3T/ubek0DX0bLn36/u77VXWr7jn31/d+r21/+N3fPed7IjORJJVlj6oLkCQ1n+EuSQUy3CWpQIa7JBXIcJekAhnuklQgw12SCmS4S1KBDHdJKtDwql543LhxOWHChKpeXpJa0vLly1/LzI7+xlUW7hMmTGDZsmVVvbwktaSIeLmRcS7LSFKBDHdJKpDhLkkFqmzNvS+bNm2iq6uLjRs3Vl3KoBo1ahTjx49nxIgRVZciqRBDKty7uroYM2YMEyZMICKqLmdQZCYbNmygq6uLiRMnVl2OpEL0uywTEddHxK8jYtV2Ho+I+H5ErImIZyJi2s4Ws3HjRvbdd9+2CXaAiGDfffdtu08rknavRtbcbwRO2MHj84BJ9ds5wN/uSkHtFOzd2vE9S9q9+g33zHwY+M0OhswHbs6aJ4B9ImL/ZhUoSRq4Zqy5Hwi80mO7q77v1d4DI+IcarN7Dj744H6feMLC+5pQ3lZr/+qkhsZ99KMf5ec//3lTX1tqV83+O+5Po3/npWvGoZB9rSn0edXtzLw2Mzszs7Ojo9+zZytjsEtqdc0I9y7goB7b44F1TXjeyowePZpXX32VOXPmMHXqVKZMmcIjjzwCwG233cYRRxzBlClTuOiii37vZy6++GKOPPJIZs2axa9+9auqypekpoT7IuDf1Y+amQW8mZnbLMm0mltvvZXjjz+eFStWsHLlSqZOncq6deu46KKLeOCBB1ixYgVLly7l7rvvBuDdd99l1qxZrFy5kjlz5nDddddV/A4ktbNGDoW8DXgcOCQiuiLirIg4NyLOrQ9ZDLwErAGuA76y26odREcffTQ33HADl112Gc8++yxjxoxh6dKlHHvssXR0dDB8+HAWLFjAww8/DMDIkSP51Kc+BcD06dNZu3ZthdVLanf9fqGamaf383gC/6FpFQ0Rc+bM4eGHH+a+++7ji1/8IhdeeCFjx47d7vgRI0Z8cEjjsGHD2Lx582CVKknbsLfMdrz88svst99+nH322Zx11lk89dRTzJw5kyVLlvDaa6+xZcsWbrvtNubOnVt1qZK0jSHVfqC3qg5piggeeughrrzySkaMGMHo0aO5+eab2X///bniiis47rjjyExOPPFE5s+fX0mNkrQjUVtVGXydnZ3Z+2Idzz//PIcddlgl9XTbsGED06ZN4+WXG+qH3zRD4b1Lu4PHuTdXRCzPzM7+xrks08O6des45phj+MY3vlF1KZK0S4b0ssxgO+CAA/jFL35RdRmStMucuUtSgQx3SSqQ4S5JBTLcJalAQ/sL1cv2bvLzvblTP/bGG29w66238pWv1DorPPTQQ3z3u9/l3nvvbWZ1ktQ0ztwb8MYbb/CDH/ygac9nawJJu5vh3oerrrqKKVOmMGXKFL73ve+xcOFCXnzxRaZOncqFF14IwDvvvMOpp57KoYceyoIFC+g+GWz58uXMnTuX6dOnc/zxx/Pqq7UGmcceeyzf/OY3mTt3LldffXVl701SexjayzIVWL58OTfccANPPvkkmcnMmTP58Y9/zKpVq1ixYgVQW5Z5+umnWb16NQcccACzZ8/mscceY+bMmXz1q1/lnnvuoaOjgzvuuIOLL76Y66+/Hqh9AliyZEmVb09SmzDce3n00Uc55ZRT2GuvvQD49Kc//cGFOnqaMWMG48ePB2Dq1KmsXbuWffbZh1WrVvHJT34SgC1btrD//lsvJ/u5z31uEN6BJBnu22i0186ee+75wf3uFr+ZyeGHH87jjz/e5890/4MhSbuba+69zJkzh7vvvpvf/va3vPvuu/z0pz9l9uzZvP322/3+7CGHHML69es/CPdNmzaxevXq3V2yJG1jaM/cd/LQxV0xbdo0zjzzTGbMmAHAl7/8ZaZPn87s2bOZMmUK8+bN46ST+u46N3LkSO666y7OP/983nzzTTZv3swFF1zA4YcfPphvQZJs+TtUtPN7V9ls+dtctvyVpDZmuEtSgYZcuFe1TFSldnzPknavIRXuo0aNYsOGDW0VdpnJhg0bGDVqVNWlSCrIkDpaZvz48XR1dbF+/fqqSxlUo0aN+uCEKElqhiEV7iNGjGDixIlVlyFJLW9ILctIkprDcJekAhnuklQgw12SCmS4S1KBDHdJKpDhLkkFGlLHuRflsr0H+fUGvz2ypKGroZl7RJwQES9ExJqIWNjH4wdHxIMR8XREPBMRJza/VElSo/oN94gYBlwDzAMmA6dHxORew/4CuDMzjwJOA37Q7EIlSY1rZOY+A1iTmS9l5vvA7cD8XmMSGFu/vzewrnklSpIGqpFwPxB4pcd2V31fT5cBX4iILmAx8NW+nigizomIZRGxrN2ag0nSYGok3KOPfb178p4O3JiZ44ETgVsiYpvnzsxrM7MzMzs7OjoGXq0kqSGNhHsXcFCP7fFsu+xyFnAnQGY+DowCxjWjQEnSwDUS7kuBSRExMSJGUvvCdFGvMb8EPgEQEYdRC3fXXSSpIv2Ge2ZuBs4D7geep3ZUzOqIuDwiTq4P+1Pg7IhYCdwGnJntdDklSRpiGjqJKTMXU/uitOe+S3rcfw6Y3dzSJEk7y/YDklQgw12SCmS4S1KBDHdJKpDhLkkFMtwlqUCGuyQVyHCXpAIZ7pJUIMNdkgpkuEtSgQx3SSqQ4S5JBTLcJalAhrskFchwl6QCGe6SVCDDXZIKZLhLUoEMd0kqkOEuSQUy3CWpQIa7JBXIcJekAhnuklQgw12SCmS4S1KBDHdJKpDhLkkFMtwlqUCGuyQVyHCXpAIZ7pJUoOGNDIqIE4CrgWHADzPzr/oY81ngMiCBlZn5+SbWKQ2ey/Ye5Nd7c3BfT22h33CPiGHANcAngS5gaUQsyszneoyZBPw5MDszX4+I/XZXwZKk/jWyLDMDWJOZL2Xm+8DtwPxeY84GrsnM1wEy89fNLVOSNBCNhPuBwCs9trvq+3r6MPDhiHgsIp6oL+NsIyLOiYhlEbFs/fr1O1exJKlfjYR79LEve20PByYBxwKnAz+MiH22+aHMazOzMzM7Ozo6BlqrJKlBjYR7F3BQj+3xwLo+xtyTmZsy8x+AF6iFvSSpAo2E+1JgUkRMjIiRwGnAol5j7gaOA4iIcdSWaV5qZqGSpMb1G+6ZuRk4D7gfeB64MzNXR8TlEXFyfdj9wIaIeA54ELgwMzfsrqIlSTvW0HHumbkYWNxr3yU97ifw9fpNklQxz1CVpAIZ7pJUIMNdkgpkuEtSgQx3SSqQ4S5JBTLcJalAhrskFchwl6QCGe6SVCDDXZIKZLhLUoEMd0kqkOEuSQUy3CWpQIa7JBXIcJekAhnuklQgw12SCmS4S1KBDHdJKpDhLkkFMtwlqUCGuyQVyHCXpAIZ7pJUIMNdkgpkuEtSgQx3SSqQ4S5JBTLcJalAhrskFaihcI+IEyLihYhYExELdzDu1IjIiOhsXomSpIHqN9wjYhhwDTAPmAycHhGT+xg3BjgfeLLZRUqSBqaRmfsMYE1mvpSZ7wO3A/P7GPct4DvAxibWJ0naCY2E+4HAKz22u+r7PhARRwEHZea9O3qiiDgnIpZFxLL169cPuFhJUmMaCffoY19+8GDEHsDfAH/a3xNl5rWZ2ZmZnR0dHY1XKUkakEbCvQs4qMf2eGBdj+0xwBTgoYhYC8wCFvmlqiRVZ3gDY5YCkyJiIvCPwGnA57sfzMw3gXHd2xHxEPCNzFzW3FLVziYsvG/QXmvtqEF7KWm36XfmnpmbgfOA+4HngTszc3VEXB4RJ+/uAiVJA9fIzJ3MXAws7rXvku2MPXbXy5Ik7QrPUJWkAhnuklQgw12SCmS4S1KBDHdJKpDhLkkFMtwlqUCGuyQVyHCXpAIZ7pJUIMNdkgpkuEtSgQx3SSqQ4S5JBTLcJalAhrskFchwl6QCGe6SVCDDXZIKZLhLUoEMd0kqkOEuSQUy3CWpQIa7JBXIcJekAhnuklQgw12SCmS4S1KBDHdJKpDhLkkFMtwlqUCGuyQVyHCXpAI1FO4RcUJEvBARayJiYR+Pfz0inouIZyLi7yPiQ80vVZLUqH7DPSKGAdcA84DJwOkRMbnXsKeBzsz8CHAX8J1mFypJalwjM/cZwJrMfCkz3wduB+b3HJCZD2bmb+ubTwDjm1umJGkgGgn3A4FXemx31fdtz1nA3/X1QEScExHLImLZ+vXrG69SkjQgjYR79LEv+xwY8QWgE7iyr8cz89rM7MzMzo6OjsarlCQNyPAGxnQBB/XYHg+s6z0oIv4IuBiYm5n/1JzyJEk7o5GZ+1JgUkRMjIiRwGnAop4DIuIo4L8BJ2fmr5tfpiRpIPqduWfm5og4D7gfGAZcn5mrI+JyYFlmLqK2DDMa+ElEAPwyM0/ejXUP2ISF9w3q660dNagvJ0m/p5FlGTJzMbC4175Letz/oybXJUnaBZ6hKkkFMtwlqUCGuyQVyHCXpAIZ7pJUIMNdkgpkuEtSgQx3SSqQ4S5JBTLcJalAhrskFchwl6QCGe6SVCDDXZIKZLhLUoEMd0kqkOEuSQUy3CWpQIa7JBXIcJekAhnuklQgw12SCmS4S1KBDHdJKpDhLkkFMtwlqUCGuyQVyHCXpAIZ7pJUIMNdkgpkuEtSgQx3SSpQQ+EeESdExAsRsSYiFvbx+J4RcUf98ScjYkKzC5UkNa7fcI+IYcA1wDxgMnB6REzuNews4PXM/NfA3wDfbnahkqTGNTJznwGsycyXMvN94HZgfq8x84Gb6vfvAj4REdG8MiVJAzG8gTEHAq/02O4CZm5vTGZujog3gX2B13oOiohzgHPqm+9ExAs7U3QrCBhHr/e/W/2l/5Y2i7+71hbfHuTf3+D7UCODGgn3vv7Py50YQ2ZeC1zbwGu2vIhYlpmdVdehgfN319r8/dU0sizTBRzUY3s8sG57YyJiOLA38JtmFChJGrhGwn0pMCkiJkbESOA0YFGvMYuAM+r3TwUeyMxtZu6SpMHR77JMfQ39POB+YBhwfWaujojLgWWZuQj4EXBLRKyhNmM/bXcW3SLaYvmpUP7uWpu/PyCcYEtSeTxDVZIKZLhLUoEMd0kqkOEuSQUy3CWpQI2coaodiIif0cfZuN0y8+RBLEe7ICI+DFxI7fTuD/42MvPjlRWlHYqIt9nx39/YQSxnSDHcd913qy5ATfMT4L8C1wFbKq5FDcjMMQD1827+H3ALtXYoC4AxFZZWOY9zl+oiYnlmTq+6Dg1cRDyZmTP729dOXHNvkoiYFBF3RcRzEfFS963qujQgP4uIr0TE/hHxL7pvVRelhmyJiAURMSwi9oiIBbT5py9n7k0SEY8Cl1K7WMkfA1+i9t/30koLU8Mi4h/62J2Z+S8HvRgNSP3qb1cDs6mtwT8GXJCZa6urqlqGe5N0f6SPiGcz84j6vkcy899UXZtUuoiYnZmP9bevnfiFavNsjIg9gP9bb7T2j8B+FdekAYqIKdQuJzmqe19m3lxdRWrQfwamNbCvbRjuzXMB8M+B84FvAR9naxtktYCIuBQ4llq4L6Z23eBHAcN9iIqIY4CPAh0R8fUeD42l1sW2bRnuTZKZS+t336G23q7WcypwJPB0Zn4pIv4Q+GHFNWnHRgKjqWVZz0Mf36L2+2xbhnuTeAJMEd7LzN9FxOaIGAv8GvDL1CEsM5cASyLixsx8GaC+PDo6M9+qtrpqGe7N4wkwrW9ZROxD7Xe4nNqnsP9dbUlq0BURcS61v73lwN4RcVVmXllxXZXxaJkm8QSYstQPrRubmc9UXIoaEBErMnNq/fj26cBFwPLM/EjFpVXGk5iaxxNgWlzUfCEiLqkfH/1GRMyoui41ZEREjAD+BLgnMzexg54z7cCZe5N4Akzri4i/BX4HfDwzD4uIPwD+Z2YeXXFp6kdEnE9ttr4SOAk4GPhxO59nYrhLdRHxVGZOi4inM/Oo+r6VmXlk1bVpYCIigGGZubm+fUZm3lRxWYPKL1R3UUR8PDMfiIhP9/V4Zv73wa5JO21TRAyj/nE+IjqozeTVYrI2a93cY9fXAMNdAzIXeIBaP5neEjDcW8f3gZ8C+0XEf6R2nPRfVFuSmiSqLmCwuSwj9RARhwKfoBYGf5+Zz1dckpqge8mt6joGkzP3Jul16nO3N6kdjrVisOvRTvsV8Ai1v41/FhHTMvOpimvSrmu7mbvh3jyd9dvP6tsnAUuBcyPiJ5n5ncoqU0Mi4lvAmcCLbD2MLqn1CVJra7vukC7LNElE3A98JjPfqW+PBu4CTqE2e59cZX3qX0S8AByRme9XXYsGpt4H6D8BB2TmvIiYDByTmT+quLTKeBJT8xwM9AyFTcCHMvM94J+qKUkDtArYp+oitFNuBO4HDqhv/4Jap9a25bJM89wKPBER99S3/xi4LSL2Ap6rriwNwBXA0xGxih7/IGfmydWVpAaNy8w7I+LPATJzc0S0dY8nw71JMvNbEbEY+Bi1L2/Ozcxl9YcXVFeZBuAm4NvAs3h8e6t5NyL2Zes5CrOoHdDQtlxz30URMTYz39peH5nM/M1g16SdExFLMnNu1XVo4CJiOrXzFKZQW17rAE5t58Zvhvsuioh7M/NT9d4yPf9jBvaWaSkRcRW15ZhF/P6yjIdCtoCIGA4cQu1v74V687C2Zbg3Qb2PxUGZ+cuqa9HOi4gH+9idXnBl6IuIlcAdwB2Z+WLV9QwFhnuT2M+9fO3YfKpVRMSHgM/Vb7+jFvR3tvOEy0Mhm+eJiLA1bNm+VnUB6ltmvpyZ36lPsD4PfAToqw132/BomeY5jtrZqGuBd9m65t62V4IpUNudwt5K6lfP+iy12fsW4M+qrKdqhnvzzAP+AOi+OMDDwBvVlaPdwDXMISoingRGULuW8b/NzJcqLqlyLss0z58AtwDjqB2GdQvgyS9lceY+dJ2RmdMy8wqDvcYvVJskIp6h1svi3fr2XsDjLsuUIyL+S2aeV3Ud2lZE7A1cCsyp71oCXJ6ZbXsikzP35glq63zdtuBMr6VExB9GxI8i4u/q25Mj4qzuxw32Ie164G1qa+6fBd4Cbqi0ooo5c2+Sej/3M6hdyQdqyzQ3Zub3qqtKA1EP9RuAizPzyPpJMU9n5hEVl6Z+RMSKzJza37524sy9STLzKuBLwG+A14EvGewtZ1xm3km9r0z94spt3XyqhbwXER/r3oiI2cB7FdZTOY+WaaL6aeqeqt66bD7Vuv49cFN97T2oTbLOqLakarksI9XZfKr1RcRYgMx8q+paqma4Sz3YfKo11T9xXUqt5XYCj1I7WmZDpYVVyDV3qa7efOrPgI2Zucpgbym3A+uBzwCn1u/fUWlFFXPmLtXZfKp19dW4LyKWZWZnVTVVzZm7VGfzqZb2YEScFhF71G+fBe6ruqgqOXOXeuij+dQdmfnXVdak7YuIt6mtsQewF1sPXR0GvJOZY6uqrWoeCinV2Xyq9WTmmO779UtdTgJGVVfR0OHMXaqLiEMz8/9UXYcGLiK+TK3f/nhgBTAL+HlmfqLSwirkmru01asRcVVELKvf/rp+UoyGvq8BRwMvZ+ZxwFHAa9WWVC3DXdrK5lOta2NmbgSIiD3rn8AOqbimSrnmLm31rzLzMz22/zIiVlRWjQaiKyL2Ae4G/ldEvA6sq7imShnu0lbvRcTHMvNRsPlUK8nMU+p3L4uIB4G9gf9RYUmV8wtVqS4ipgI3UQuGD5pP2VtGrchwl3qx+ZRK4BeqUl1E7BsR3wceonbG49X1hlRSyzHcpa1sPqViuCwj1dl8SiVx5i5tZfMpFcOZu9qezadUIo9zV9uz+ZRKZLhLddtrPgW0bfMptS7X3KWtbD6lYhju0lY2n1IxXJaRtrL5lIrh0TJSHyJiLvXmU5n5ftX1SANluEtSgVxzl6QCGe6SVCDDXZIKZLhLUoH+Pyga7Mm8jS84AAAAAElFTkSuQmCC\n", + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAXcAAAExCAYAAACHweKPAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDMuMC4zLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvnQurowAAF35JREFUeJzt3X2wVPWd5/H3Vx5kV0Bn5TqlooGdJSpiREDAkAVNJqVoRsbETTQkqymj5WaNsTJxZOKUOqZ2ncSME7NrZlYTH1M+xdooUWbdrVHxIeoCCgq6ZtHB8Q5ugsTnyAjku3/0AW7wwu0LzT30r9+vqlvV5/Tvdn/b9n749a/P+Z7ITCRJZdmj7gIkSa1nuEtSgQx3SSqQ4S5JBTLcJalAhrskFchwl6QCGe6SVCDDXZIKNLiuJx41alSOGTOmrqeXpLa0ZMmS1zKzq69xtYX7mDFjWLx4cV1PL0ltKSJebmacyzKSVCDDXZIKZLhLUoFqW3OXtmf9+vV0d3ezbt26uksZUMOGDWP06NEMGTKk7lLU5gx37Za6u7sZMWIEY8aMISLqLmdAZCZr166lu7ubsWPH1l2O2lyfyzIRcX1E/Coilm/j/oiI70fEyoh4JiImtb5MdZp169ax7777dkywA0QE++67b8d9WtGu0cya+43ACdu5fzYwrvo5B/ibnS9LoqOCfZNOfM3aNfoM98x8GPj1dobMAW7OhieAfSJi/1YVKEnqv1asuR8IvNJju7va9+rWAyPiHBqzew4++OAWPLU6xZh597X08Vb95UlNjfvoRz/Kz3/+85Y+d6dp9XvXl2bf29K14lDI3j5H9nrV7cy8NjOnZOaUrq4+z56Vamewq121Ity7gYN6bI8GVrfgcaXaDR8+nFdffZWZM2cyceJEJkyYwCOPPALAbbfdxhFHHMGECRO46KKLfud3Lr74Yo488kimT5/OL3/5y7rKVwdrRbjPB/59ddTMdODNzPzAkozUrm699VaOP/54li5dyrJly5g4cSKrV6/moosu4oEHHmDp0qUsWrSIu+++G4B3332X6dOns2zZMmbOnMl1111X8ytQJ2rmUMjbgMeBQyKiOyLOiohzI+LcasgC4CVgJXAd8JVdVq1Ug6OPPpobbriByy67jGeffZYRI0awaNEijj32WLq6uhg8eDBz587l4YcfBmDo0KF86lOfAmDy5MmsWrWqxurVqfr8QjUzT+/j/gT+Y8sqknYzM2fO5OGHH+a+++7ji1/8IhdeeCEjR47c5vghQ4ZsPqRx0KBBbNiwYaBKlTazt4zUh5dffpn99tuPs88+m7POOounnnqKadOmsXDhQl577TU2btzIbbfdxqxZs+ouVdrM9gNqC3Ud3hYRPPTQQ1x55ZUMGTKE4cOHc/PNN7P//vtzxRVXcNxxx5GZnHjiicyZM6eWGqXeRGNVZeBNmTIlvViHtuX555/nsMMOq7WGtWvXMmnSJF5+ualrI7TM7vDaW8nj3FsrIpZk5pS+xrksI/Vi9erVHHPMMXzjG9+ouxRph7gsI/XigAMO4Be/+EXdZUg7zJm7JBXIcJekAhnuklQgw12SCuQXqmoPl+3d4sd7c4d+7Y033uDWW2/lK19pdNl46KGH+O53v8u9997byuqknebMXeqHN954gx/84ActezxbE2hXMdyl7bjqqquYMGECEyZM4Hvf+x7z5s3jxRdfZOLEiVx44YUAvPPOO5x66qkceuihzJ07l00nBi5ZsoRZs2YxefJkjj/+eF59tdEs9dhjj+Wb3/wms2bN4uqrr67ttalsLsvsKq1eRujz+XZsmUHbtmTJEm644QaefPJJMpNp06bx4x//mOXLl7N06VKgsSzz9NNPs2LFCg444ABmzJjBY489xrRp0/jqV7/KPffcQ1dXF3fccQcXX3wx119/PdD4BLBw4cI6X54KZ7hL2/Doo49yyimnsNdeewHw6U9/evOFOnqaOnUqo0ePBmDixImsWrWKffbZh+XLl/PJT34SgI0bN7L//lsuLfy5z31uAF6BOpnhLm1Ds32X9txzz823N7X4zUwOP/xwHn/88V5/Z9M/GNKu4pq7tA0zZ87k7rvv5je/+Q3vvvsuP/3pT5kxYwZvv/12n797yCGHsGbNms3hvn79elasWLGrS5Y2c+au9lDDdwqTJk3izDPPZOrUqQB8+ctfZvLkycyYMYMJEyYwe/ZsTjqp9w6EQ4cO5a677uL888/nzTffZMOGDVxwwQUcfvjhA/kS1MFs+bur+IXqTimt7W1/lPbabfnbWrb8laQOZrhLUoEMd+226loyrFMnvmbtGoa7dkvDhg1j7dq1HRV2mcnatWsZNmxY3aWoAB4to93S6NGj6e7uZs2aNXWXMqCGDRu2+YQoaWcY7totDRkyhLFjx9ZdhtS2XJaRpAIZ7pJUIMNdkgpkuEtSgQx3SSqQ4S5JBTLcJalAhrskFaipcI+IEyLihYhYGRHzern/4Ih4MCKejohnIuLE1pcqSWpWn+EeEYOAa4DZwHjg9IgYv9WwPwfuzMyjgNOAH7S6UElS85qZuU8FVmbmS5n5PnA7MGerMQmMrG7vDaxuXYmSpP5qJtwPBF7psd1d7evpMuALEdENLAC+2tsDRcQ5EbE4IhZ3WkMoSRpIzYR79LJv6z6spwM3ZuZo4ETgloj4wGNn5rWZOSUzp3R1dfW/WklSU5oJ927goB7bo/ngsstZwJ0Amfk4MAwY1YoCJUn910y4LwLGRcTYiBhK4wvT+VuN+UfgEwARcRiNcHfdRZJq0me4Z+YG4DzgfuB5GkfFrIiIyyPi5GrYnwBnR8Qy4DbgzOykS+hI0m6mqYt1ZOYCGl+U9tx3SY/bzwEzWluaJGlHeYaqJBXIcJekAhnuklQgw12SCmS4S1KBDHdJKpDhLkkFMtwlqUCGuyQVyHCXpAIZ7pJUIMNdkgpkuEtSgQx3SSqQ4S5JBTLcJalAhrskFchwl6QCGe6SVCDDXZIKZLhLUoEMd0kqkOEuSQUy3CWpQIa7JBXIcJekAhnuklQgw12SCmS4S1KBDHdJKpDhLkkFMtwlqUCGuyQVaHAzgyLiBOBqYBDww8z8y17GfBa4DEhgWWZ+voV1SgPnsr0H+PneHNjnU0foM9wjYhBwDfBJoBtYFBHzM/O5HmPGAX8GzMjM1yNiv11VsCSpb80sy0wFVmbmS5n5PnA7MGerMWcD12Tm6wCZ+avWlilJ6o9mwv1A4JUe293Vvp4+DHw4Ih6LiCeqZZwPiIhzImJxRCxes2bNjlUsSepTM+EevezLrbYHA+OAY4HTgR9GxD4f+KXMazNzSmZO6erq6m+tkqQmNRPu3cBBPbZHA6t7GXNPZq7PzH8AXqAR9pKkGjQT7ouAcRExNiKGAqcB87caczdwHEBEjKKxTPNSKwuVJDWvz3DPzA3AecD9wPPAnZm5IiIuj4iTq2H3A2sj4jngQeDCzFy7q4qWJG1fU8e5Z+YCYMFW+y7pcTuBr1c/kqSaeYaqJBXIcJekAhnuklQgw12SCmS4S1KBDHdJKpDhLkkFMtwlqUCGuyQVyHCXpAIZ7pJUIMNdkgpkuEtSgQx3SSqQ4S5JBTLcJalAhrskFchwl6QCGe6SVCDDXZIKZLhLUoEMd0kqkOEuSQUy3CWpQIa7JBXIcJekAhnuklQgw12SCmS4S1KBDHdJKpDhLkkFMtwlqUBNhXtEnBARL0TEyoiYt51xp0ZERsSU1pUoSeqvPsM9IgYB1wCzgfHA6RExvpdxI4DzgSdbXaQkqX+amblPBVZm5kuZ+T5wOzCnl3HfAr4DrGthfZKkHdBMuB8IvNJju7vat1lEHAUclJn3bu+BIuKciFgcEYvXrFnT72IlSc1pJtyjl325+c6IPYC/Bv6krwfKzGszc0pmTunq6mq+SklSvzQT7t3AQT22RwOre2yPACYAD0XEKmA6MN8vVSWpPoObGLMIGBcRY4F/Ak4DPr/pzsx8Exi1aTsiHgK+kZmLW1uqOtmYefcN2HOtGjZgTyXtMn3O3DNzA3AecD/wPHBnZq6IiMsj4uRdXaAkqf+ambmTmQuABVvtu2QbY4/d+bIkSTvDM1QlqUCGuyQVyHCXpAIZ7pJUIMNdkgpkuEtSgQx3SSqQ4S5JBTLcJalAhrskFchwl6QCGe6SVCDDXZIKZLhLUoEMd0kqkOEuSQUy3CWpQIa7JBXIcJekAhnuklQgw12SCmS4S1KBDHdJKpDhLkkFMtwlqUCGuyQVyHCXpAIZ7pJUIMNdkgpkuEtSgQx3SSqQ4S5JBTLcJalATYV7RJwQES9ExMqImNfL/V+PiOci4pmI+PuI+FDrS5UkNavPcI+IQcA1wGxgPHB6RIzfatjTwJTM/AhwF/CdVhcqSWpeMzP3qcDKzHwpM98Hbgfm9ByQmQ9m5m+qzSeA0a0tU5LUH82E+4HAKz22u6t923IW8He93RER50TE4ohYvGbNmuarlCT1SzPhHr3sy14HRnwBmAJc2dv9mXltZk7JzCldXV3NVylJ6pfBTYzpBg7qsT0aWL31oIj4Q+BiYFZm/nNrypMk7YhmZu6LgHERMTYihgKnAfN7DoiIo4D/Bpycmb9qfZmSpP7oM9wzcwNwHnA/8DxwZ2auiIjLI+LkatiVwHDgJxGxNCLmb+PhJEkDoJllGTJzAbBgq32X9Lj9hy2uS5K0EzxDVZIKZLhLUoEMd0kqkOEuSQUy3CWpQIa7JBXIcJekAhnuklQgw12SCmS4S1KBDHdJKpDhLkkFMtwlqUCGuyQVyHCXpAIZ7pJUIMNdkgrU1JWYSjBm3n0D+nyrhg3o00nS73DmLkkFMtwlqUCGuyQVyHCXpAIZ7pJUIMNdkgpkuEtSgQx3SSqQ4S5JBTLcJalAhrskFchwl6QCGe6SVCDDXZIK1FS4R8QJEfFCRKyMiHm93L9nRNxR3f9kRIxpdaGSpOb1Ge4RMQi4BpgNjAdOj4jxWw07C3g9M/8N8NfAt1tdqCSpec3M3KcCKzPzpcx8H7gdmLPVmDnATdXtu4BPRES0rkxJUn80cyWmA4FXemx3A9O2NSYzN0TEm8C+wGs9B0XEOcA51eY7EfHCjhTdDgJGsdXr36X+wn9LW8X3rr3Ftwf4/Rt4H2pmUDPh3tv/ebkDY8jMa4Frm3jOthcRizNzSt11qP9879qb719DM8sy3cBBPbZHA6u3NSYiBgN7A79uRYGSpP5rJtwXAeMiYmxEDAVOA+ZvNWY+cEZ1+1Tggcz8wMxdkjQw+lyWqdbQzwPuBwYB12fmioi4HFicmfOBHwG3RMRKGjP203Zl0W2iI5afCuV71958/4Bwgi1J5fEMVUkqkOEuSQUy3CWpQIa7JBXIcJekAjVzhqq2IyJ+Ri9n426SmScPYDnaCRHxYeBCGqd3b/7byMyP11aUtisi3mb7f38jB7Cc3YrhvvO+W3cBapmfAH8LXAdsrLkWNSEzRwBU5938P+AWGu1Q5gIjaiytdh7nLlUiYklmTq67DvVfRDyZmdP62tdJXHNvkYgYFxF3RcRzEfHSpp+661K//CwivhIR+0fEv9r0U3dRasrGiJgbEYMiYo+ImEuHf/py5t4iEfEocCmNi5X8EfAlGv99L621MDUtIv6hl92Zmf96wItRv1RXf7samEFjDf4x4ILMXFVfVfUy3Ftk00f6iHg2M4+o9j2Smf+27tqk0kXEjMx8rK99ncQvVFtnXUTsAfzfqtHaPwH71VyT+ikiJtC4nOSwTfsy8+b6KlKT/gswqYl9HcNwb50LgH8JnA98C/g4W9ogqw1ExKXAsTTCfQGN6wY/Chjuu6mIOAb4KNAVEV/vcddIGl1sO5bh3iKZuai6+Q6N9Xa1n1OBI4GnM/NLEfH7wA9rrknbNxQYTiPLeh76+BaN97NjGe4t4gkwRXgvM38bERsiYiTwK8AvU3djmbkQWBgRN2bmywDV8ujwzHyr3urqZbi3jifAtL/FEbEPjfdwCY1PYf+73pLUpCsi4lwaf3tLgL0j4qrMvLLmumrj0TIt4gkwZakOrRuZmc/UXIqaEBFLM3NidXz7ZOAiYElmfqTm0mrjSUyt4wkwbS4avhARl1THR78REVPrrktNGRIRQ4A/Bu7JzPVsp+dMJ3Dm3iKeANP+IuJvgN8CH8/MwyLi94D/mZlH11ya+hAR59OYrS8DTgIOBn7cyeeZGO5SJSKeysxJEfF0Zh5V7VuWmUfWXZv6JyICGJSZG6rtMzLzpprLGlB+obqTIuLjmflARHy6t/sz878PdE3aYesjYhDVx/mI6KIxk1ebycasdUOPXV8DDHf1yyzgARr9ZLaWgOHePr4P/BTYLyL+E43jpP+83pLUIlF3AQPNZRmph4g4FPgEjTD4+8x8vuaS1AKbltzqrmMgOXNvka1Ofd7kTRqHYy0d6Hq0w34JPELjb+NfRMSkzHyq5pq08zpu5m64t86U6udn1fZJwCLg3Ij4SWZ+p7bK1JSI+BZwJvAiWw6jSxp9gtTeOq47pMsyLRIR9wOfycx3qu3hwF3AKTRm7+PrrE99i4gXgCMy8/26a1H/VH2A/jNwQGbOjojxwDGZ+aOaS6uNJzG1zsFAz1BYD3woM98D/rmektRPy4F96i5CO+RG4H7ggGr7FzQ6tXYsl2Va51bgiYi4p9r+I+C2iNgLeK6+stQPVwBPR8RyevyDnJkn11eSmjQqM++MiD8DyMwNEdHRPZ4M9xbJzG9FxALgYzS+vDk3MxdXd8+trzL1w03At4Fn8fj2dvNuROzLlnMUptM4oKFjuea+kyJiZGa+ta0+Mpn564GuSTsmIhZm5qy661D/RcRkGucpTKCxvNYFnNrJjd8M950UEfdm5qeq3jI9/2MG9pZpKxFxFY3lmPn87rKMh0K2gYgYDBxC42/vhap5WMcy3Fug6mNxUGb+Y921aMdFxIO97E4vuLL7i4hlwB3AHZn5Yt317A4M9xaxn3v5OrH5VLuIiA8Bn6t+fksj6O/s5AmXh0K2zhMRYWvYsn2t7gLUu8x8OTO/U02wPg98BOitDXfH8GiZ1jmOxtmoq4B32bLm3rFXgilQx53C3k6qq2d9lsbsfSPwp3XWUzfDvXVmA78HbLo4wMPAG/WVo13ANczdVEQ8CQyhcS3jf5eZL9VcUu1clmmdPwZuAUbROAzrFsCTX8rizH33dUZmTsrMKwz2Br9QbZGIeIZGL4t3q+29gMddlilHRPzXzDyv7jr0QRGxN3ApMLPatRC4PDM79kQmZ+6tEzTW+TbZiDO9thIRvx8RP4qIv6u2x0fEWZvuN9h3a9cDb9NYc/8s8BZwQ60V1cyZe4tU/dzPoHElH2gs09yYmd+rryr1RxXqNwAXZ+aR1UkxT2fmETWXpj5ExNLMnNjXvk7izL1FMvMq4EvAr4HXgS8Z7G1nVGbeSdVXprq4ckc3n2oj70XExzZtRMQM4L0a66mdR8u0UHWauqeqty+bT7Wv/wDcVK29B41J1hn1llQvl2Wkis2n2l9EjATIzLfqrqVuhrvUg82n2lP1ietSGi23E3iUxtEya2strEauuUuVqvnUnwLrMnO5wd5WbgfWAJ8BTq1u31FrRTVz5i5VbD7Vvnpr3BcRizNzSl011c2Zu1Sx+VRbezAiTouIPaqfzwL31V1UnZy5Sz300nzqjsz8qzpr0rZFxNs01tgD2Isth64OAt7JzJF11VY3D4WUKjafaj+ZOWLT7epSl+OAYfVVtPtw5i5VIuLQzPw/ddeh/ouIL9Potz8aWApMB36emZ+otbAaueYubfFqRFwVEYurn7+qTorR7u9rwNHAy5l5HHAU8Fq9JdXLcJe2sPlU+1qXmesAImLP6hPYITXXVCvX3KUt/iAzP9Nj+y8iYmlt1ag/uiNiH+Bu4H9FxOvA6pprqpXhLm3xXkR8LDMfBZtPtZPMPKW6eVlEPAjsDfyPGkuqnV+oSpWImAjcRCMYNjefsreM2pHhLm3F5lMqgV+oSpWI2Dcivg88ROOMx6urhlRS2zHcpS1sPqViuCwjVWw+pZI4c5e2sPmUiuHMXR3P5lMqkce5q+PZfEolMtylyraaTwEd23xK7cs1d2kLm0+pGIa7tIXNp1QMl2WkLWw+pWJ4tIzUi4iYRdV8KjPfr7seqb8Md0kqkGvuklQgw12SCmS4S1KBDHdJKtD/BzvP6NJJ9UOoAAAAAElFTkSuQmCC\n", "text/plain": [ "
" ] @@ -399,7 +399,7 @@ }, { "data": { - "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYUAAAEXCAYAAABCjVgAAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDMuMC4zLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvnQurowAAIABJREFUeJzt3Xm8HfP9x/HXOzebLYKEkhtJLEGKWmKrfY8tqkWjlvjZWhWKWqKLraV2tVdaFLWloW2QSpUS1VJRESJCRHAFCYJGhCT38/tj5o7JyV2O68w9kbyfj8d5ZJbvzHzPnJvzPvOdme8oIjAzMwNoV+0KmJnZosOhYGZmGYeCmZllHApmZpZxKJiZWcahYGZmGYeCWcEkTZC0Q7XrUQmSDpf0z2rXw4rjULCySXpE0kxJnUqm/17SL5tYJiSt1cJ6D5I0VZJKpreXNF3S3rlpfSTVS7q23G1JOlvSH5orn763OZJm5V735sr+RNKr6fQ6SXc1957yIuLrEfFIueUXFZJ6p/uofbXrYm3HoWBlkdQb2BYIYGCFV/8noCuwfcn0Aen2HshNOwyYCQwqDacKGBIRy+Ze+wBIGgwcCuwSEcsC/YGHKrztJY7DZtHkULByHQY8AfweGFzJFUfEHGB4uo3Sbd4WEfNKpv0MmAvsU8l6NGMzYHREvAIQEW9HxLByF06PgnZJhzeXNFbSR5LekXRZrtzAtKnpg/TIZb2SdZwiabykDyXdJalzE9u7TtKI3PiFkh4qPRJL57WT9DNJr6VHZbdIWj6dPSb994P0CGmr3HKXpEeNr0raIzd9eUk3SHpL0puSfimpJp13uKTHJV0u6X3g7HL3obUdh4KV6zDgtvS1u6RVKrz+m4H9JS0FyZcLyZf+LQ0FJG0L1AJ30niIFOUJ4DBJp0rq3/All6vXUEn3lbmuK4ArIqILsCbJ+0BSX+AO4ESgOzAKuFdSx9yyB5IcPfUBNgQOb2IbPwY2TL+EtwWOBAZH433aHJ6+dgTWAJYFrk7nbZf+2zU9cvp3Or4FMAnoBlwE3JALnJuBecBawMbAbsBRue1tAUwBVgbOa6L+VkUOBWuRpG2AXsDwiHgaeAX4XiW3ERGPA+8A+6WTDgReiohxuWKDgb9GxEzgdmAPSStXsBpXpr/SG16/SOv2B+B4YHfgUWC6pKG5ul8QEXs3vsqFzAXWktQtImZFxBPp9O8C90fEgxExF7gEWAr4Zr5+ETEtIt4H7gU2amwDETEbOAS4DPgDcHxE1DVRn4OByyJiSkTMAs4gaZprrmnntYj4bUTMJwmBVYFV0h8KewAnRsTHETEduBwYlFt2WkRcFRHzIuKTZrZhVeJQsHIMBv4WEe+m47dT4Sak1C18/uv/UJIvHADSI4gDSI5USH+1vk554TQP6JCfIKlhfG5u8gkR0TX3+nnDjIi4LSJ2ITn38QPgXEm7f5E3lzoS6Au8KOmp3En01YDXcturB94AeuSWfTs3PJvkV32jIuI/JL/IRXo00oQFtpsOtweaOxLM6pEGEGldepHs57caghW4nuSooMEbzazXFgEOBWtW+mV8ILC9pLclvQ2cBHxD0jcqvLlbgJ3TtustScKnwX5AF+DaXD16UF4T0utA75JpfYD5wJtfpIIRMTci/giMB9b/Isumy78cEQeRfFFeCIyQtAwwjeRLFYC0OabnF61fbvnjgE7pek9rpugC2wVWJwnRd0hO8n8RbwCfAt1ywdolIr6eK+NumRdxDgVrybdIvjz7kTRXbASsBzzGgl/INZI65175tvCOJfMWaJNvEBGvAf8kaVt/MCLyv4wHAzcCG+TqsTWwkaQNWtjWA8A6kg6V1EHSisD5wIiSk9iNStvm95K0XHpidg/g68CTLS3byLoOkdQ9PRL4IJ08n+TX/F6Sdk6PYn5M8gX7r1Zsoy/wS5ImpEOB0yQ12tREsq9PUnKp77Ik++WudL/MAOpJzjW0KCLeAv4GXCqpS7qv1pRUelWZLcIcCtaSwcBNEfF6etXN2+mX9dXAwbm256HAJ7nXw7l1TCiZ93/NbO9mkl+u+RPMPYCdgV/n65Ce33iABZuyFtpW2ra9J/B9YDrwPPAhcGzJtq/WgvcpPJ1O/wj4CckRxwckJ1ePjYh/pvX7iaS/NvOe8gYAEyTNIjnpPCgi5kTEJJIv8auAd0lOsu8TEZ+VuV7SurQnOY9wYUQ8GxEvp3W/VY1fwnsjcCvJlUavAnNIzp80NA2dBzyeNgdtWUYVDgM6Ai+QXDo8guScg31FyA/ZMSuWpNeBQyJiTIuFzarMRwpmBZLUneQS06lVropZWQoLBUk3pjfDPN/EfEm6UtJkJTfkbFJUXcyqQdJmwMvAVRHxerXrY1aOwpqPJG0HzAJuiYiFrtKQtCdJ2+WeJDe0XBERWxRSGTMzK0thRwpp++n7zRTZlyQwIr2Bp6skn5AyM6uianZI1YMFb2SpS6e9VVpQ0jHAMQDLLLPMpuuuu27rtjjtmdYt92WstjEAz735YZtveoMey1dlu9Xc9gY9km57lqT97fe85G27NZ5++ul3I6J7S+WqGQoLdc5FEze2pJ2PDQPo379/jB07tnVbPLv1O7TVzk7q2nvo/W2+6bEX7FWV7VZz22Mv2AtYsva33/OSt+3WkPRay6Wqe/VRHckdmw1qSe6uNDOzKqlmKIwk6XlS6U0xH6Z3RJqZWZUU1nwk6Q5gB6CbpDrgLNJOySLiNyRdA+8JTCbp3Ku5u1zNzKwNFBYKaadfzc0P4Liitm9mS64undpx/BYr0KtrB9To6cvWmzhxIgC/Hdj2F0s2bLs5nTt3pra2lg4dOrRYtjF+HJ6ZLXaO32IFNllzNdovvRxa+IFzX8p6tV0BmFv3QQslK69h202JCN577z3q6uro06dPq7bhbi7MbLHTq2uHQgJhUSeJlVZaiTlz5rR6HQ4FM1vsCC1xgdDgy75vh4KZmWV8TsHMFnsDr368ouubWsZNZFuuU8u/Jr7ORWefwX8eH4MkOnXqzEXX3UTt6r3430cfcsGZpzPuqeRZTRtttgVDz72Q5bosz5tvvM6e3/wGp597Id/7v2MAOP9npzJgh605/PDDK/peSvlIwcysIKNH3sOMd95mxIOPc/ff/8Xlv7uVLl2SnhXOPvUEalfvzf2PP8P9jz9Dj569OOe0H2XLrtitO7ff8BvmfvaFnrP0pTkUzMwKMmP6O3RbeRXatUu+aldZtQddunbl9Ven8MJz4zjmR6dmZb9/4mlMGP8Mb0x9FYAVVlyJzbfZjpEj7mjTOjsUzMwKsvs+32LM3x/gwN235ZJzf8bE58cDMOXlF1mn3wbU1Hz+uPKamhrW6bcBr7z0+b0IR/zwJG4Zdg3z589vszo7FMzMCrLKqj34yyNPccLQM2nXThwzaF+e/OejRDRxlVAyIxutXb0X62+0CaP+/Mc2q7NPNJuZFahjp05ss+OubLPjrqzUbWUeHn0/Bx/xA16cMJ76+vqsaam+vp5JE59njbXWWWD5o4aczI+/P5hNt/hmm9TXRwpmZgWZ+NyzTH876eezvr6el16cwGo9erJ6nzVY9+sbMuzKS7Kyw668hPXW/war91ljgXX0Wasva/ZdlzEPjW6TOvtIwcwWeyOHbF2xdW3YQlcTAPPmzaNjx468/+4Mzjn9R3z26acArL/Rpgw6/GgAzrn4Kn515mnsvc0mRAQbbroZZ198VaPrO+r4k/nugO0r9h6a41AwM6uwV16aSG2vPmy94y5sveMujZbp0rUrv7pyWKPzevRcnXse+nc2vk6/DRj3+vtlBdKX5VAwM6ug4bfeyB03DePUs86vdlVaxaFgZlZBBx56BAceekS1q9FqPtFsZmYZh4KZmWUcCmZmlnEomJlZxieazWyxt+HvelV2hWd/2GKRd956k/N/eipTXp5EfX092+2yOyf/9FxeeXkSM955i2132g2A6y67gKWXXobBPzi+snVsJR8pmJlVWERw0tGHsePue3HvY08zcsxYZn/8MVdd9AsmTXiOxx5+sGLbqnRneT5SMDOrsP88PoZOnTrxre8eDCQ9oJ561nkM2GpD2rfvABGMe+oJjjjuJABeeXkSRx6wN29Nq+PgI4/l4CO+D8B999zF7TcOY97cz1h/40256+YbqKmpYdlll+Xkk09m9OjRXHrppWyzzTYVq7uPFMzMKmzySy/Sb4ONFpi27HJdWK12dY4+4RR222c/ho9+jAEDvw3A1Fde4ro/3M1t9z7E9ZdfyNy5c5ny8iRG3/snbv7TAwwf/Rg17Wq47bbbAPj4449Zf/31efLJJysaCOAjBTOzyivpAjs/vbEus7fdaTc6dupEx06dWLFbd95/dzpPPv4oE8c/y8F77wTAnDlzWG+NnkBy5PGd73ynkKo7FMzMKmzNvuvy91EjF5g2638f8fa0N6lpt3ADTceOnbLhdu3aMW/efCJgnwMG8aOhZ2XzGvo+6ty58wIP6KkkNx+ZmVXYFttsz5xPPuHeEXcCycngS3/xcwYe8D1W6r4ysz+e1fI6tt6Ov98/kvfenQHAhzNn8tprrxVab/CRgpktAcYfVbkv03J6KpXE5b+7lfN+egrDrriY+vp6ttlpV044/ed8Mns2N17zaw7cfdvsRHNj1uy7Lsed+lOOPfjb1NfX075DB24c9ht69arw5bUlHApmZgX42mq1XHXTnQtN79ipE7ff/3CTy+W7zB4w8NvZyWj4PJBmzWr5SKO13HxkZmYZh4KZmWUcCma22AmCiKh2Nariy75vh4KZLXZe+2Au82Z/tMQFQ0Tw3nvv0blz51avwyeazWyxc9WTMzke6NX1XUQjN5F9CRP/txQA78z8pKLr/SLbbk7nzp2pra1t9TYcCma22Pno03rOG/NeIeueesFeAOwx9P5C1l/Otovk5iMzM8sUGgqSBkiaJGmypKGNzF9d0j8kPSNpvKQ9i6yPmZk1r7BQkFQDXAPsAfQDDpLUr6TYz4DhEbExMAi4tqj6mJlZy4o8UtgcmBwRUyLiM+BOYN+SMgF0SYeXB6YVWB8zM2tBkaHQA3gjN16XTss7GzhEUh0wCmj0eXSSjpE0VtLYGTNmFFFXMzOj2FBo7Dqw0ouGDwJ+HxG1wJ7ArZIWqlNEDIuI/hHRv3v37gVU1czMoNhQqAN65sZrWbh56EhgOEBE/BvoDHQrsE5mZtaMIkPhKWBtSX0kdSQ5kTyypMzrwM4AktYjCQW3D5mZVUlhoRAR84AhwGhgIslVRhMknStpYFrsx8DRkp4F7gAOjyXtvnQzs0VIoXc0R8QokhPI+Wln5oZfALYusg5mZlY+39FsZmYZh4KZmWUcCmZmlnEomJlZxqFgZmYZh4KZmWUcCmZmlnEomJlZxqFgZmYZh4KZmWUcCmZmlnEomJlZxqFgZmYZh4KZmWUcCmZmlnEomJlZxqFgZmYZh4KZmWUcCmZmlnEomJlZxqFgZmYZh4KZmWUcCmZmlnEomJlZxqFgZmYZh4KZmWUcCmZmlnEomJlZxqFgZmYZh4KZmWUcCmZmlnEomJlZxqFgZmYZh4KZmWUKDQVJAyRNkjRZ0tAmyhwo6QVJEyTdXmR9zMysee2LWrGkGuAaYFegDnhK0siIeCFXZm3gDGDriJgpaeWi6mNmZi0r8khhc2ByREyJiM+AO4F9S8ocDVwTETMBImJ6gfUxM7MWFBkKPYA3cuN16bS8vkBfSY9LekLSgMZWJOkYSWMljZ0xY0ZB1TUzsyJDQY1Mi5Lx9sDawA7AQcDvJHVdaKGIYRHRPyL6d+/eveIVNTOzRJGhUAf0zI3XAtMaKfOXiJgbEa8Ck0hCwszMqqDIUHgKWFtSH0kdgUHAyJIyfwZ2BJDUjaQ5aUqBdTIzs2YUFgoRMQ8YAowGJgLDI2KCpHMlDUyLjQbek/QC8A/g1Ih4r6g6mZlZ8wq7JBUgIkYBo0qmnZkbDuDk9GVmZlXmO5rNzCzjUDAzs4xDwczMMmWFgqQDJC2XDv9M0j2SNim2amZm1tbKPVL4eUT8T9I2wO7AzcB1xVXLzMyqodxQmJ/+uxdwXUT8BehYTJXMzKxayg2FNyVdDxwIjJLU6Qssa2ZmXxHlfrEfSHKj2YCI+ABYETi1sFqZmVlVlBUKETEbmA5sk06aB7xcVKXMzKw6yr366CzgdJIH4gB0AP5QVKXMzKw6ym0+2g8YCHwMEBHTgOWKqpSZmVVHuaHwWdpPUQBIWqa4KpmZWbWUGwrD06uPuko6Gvg78NviqmVmZtVQVi+pEXGJpF2Bj4B1gDMj4sFCa2ZmZm2uxVCQVAOMjohdAAeBmdlirMXmo4iYD8yWtHwb1MfMzKqo3IfszAGek/Qg6RVIABFxQiG1MjOzqig3FO5PX2Zmthgr90TzzZI6An3TSZMiYm5x1TIzs2ooKxQk7UDSXfZUQEBPSYMjYkxxVTMzs7ZWbvPRpcBuETEJQFJf4A5g06IqZmZmba/cm9c6NAQCQES8RNL/kZmZLUbKPVIYK+kG4NZ0/GDg6WKqZGZm1VJuKBwLHAecQHJOYQxwbVGVMjOz6ig3FNoDV0TEZZDd5dypsFqZmVlVlHtO4SFgqdz4UiSd4pmZ2WKk3FDoHBGzGkbS4aWLqZKZmVVLuaHwsaRNGkYk9Qc+KaZKZmZWLeWeUzgR+KOkaSQP2lkN+G5htTIzs6po9khB0maSvhYRTwHrAncB84AHgFfboH5mZtaGWmo+uh74LB3eCvgJcA0wExhWYL3MzKwKWmo+qomI99Ph7wLDIuJu4G5J44qtmpmZtbWWjhRqJDUEx87Aw7l55Z6PMDOzr4iWvtjvAB6V9C7J1UaPAUhaC/iw4LqZmVkbazYUIuI8SQ8BqwJ/i4hIZ7UDji+6cmZm1rbKeUbzExHxp4jIP4bzpYj4b0vLShogaZKkyZKGNlNuf0mR3v9gZmZVUu7Na19Y2j/SNcAeQD/gIEn9Gim3HElHe08WVRczMytPYaEAbA5MjogpEfEZcCewbyPlfgFcBMwpsC5mZlaGIkOhB/BGbrwunZaRtDHQMyLua25Fko6RNFbS2BkzZlS+pmZmBhQbCmpkWmQzpXbA5cCPW1pRRAyLiP4R0b979+4VrKKZmeUVGQp1QM/ceC0wLTe+HLA+8IikqcCWwEifbDYzq54iQ+EpYG1JfSR1BAYBIxtmRsSHEdEtInpHRG/gCWBgRIwtsE5mZtaMwkIhIuYBQ4DRwERgeERMkHSupIFFbdfMzFqv0K4qImIUMKpk2plNlN2hyLqYmVnLimw+MjOzrxiHgpmZZRwKZmaWcSiYmVnGoWBmZhmHgpmZZRwKZmaWcSiYmVnGoWBmZhmHgpmZZRwKZmaWcSiYmVnGoWBmZhmHgpmZZRwKZmaWcSiYmVnGoWBmZhmHgpmZZRwKZmaWcSiYmVnGoWBmZhmHgpmZZRwKZmaWcSiYmVnGoWBmZhmHgpmZZRwKZmaWcSiYmVnGoWBmZhmHgpmZZRwKZmaWcSiYmVnGoWBmZhmHgpmZZQoNBUkDJE2SNFnS0EbmnyzpBUnjJT0kqVeR9TEzs+YVFgqSaoBrgD2AfsBBkvqVFHsG6B8RGwIjgIuKqo+ZmbWsyCOFzYHJETElIj4D7gT2zReIiH9ExOx09AmgtsD6mJlZC4oMhR7AG7nxunRaU44E/trYDEnHSBoraeyMGTMqWEUzM8srMhTUyLRotKB0CNAfuLix+RExLCL6R0T/7t27V7CKZmaW177AddcBPXPjtcC00kKSdgF+CmwfEZ8WWB8zM2tBkUcKTwFrS+ojqSMwCBiZLyBpY+B6YGBETC+wLmZmVobCQiEi5gFDgNHARGB4REyQdK6kgWmxi4FlgT9KGidpZBOrMzOzNlBk8xERMQoYVTLtzNzwLkVuv1TvObe35eYAmNrmWzQzaz3f0WxmZhmHgpmZZRwKZmaWcSiYmVnGoWBmZhmHgpmZZRwKZmaWcSiYmVnGoWBmZhmHgpmZZRwKZmaWcSiYmVnGoWBmZhmHgpmZZRwKZmaWcSiYmVnGoWBmZhmHgpmZZRwKZmaWcSiYmVnGoWBmZhmHgpmZZRwKZmaWcSiYmVnGoWBmZhmHgpmZZRwKZmaWcSiYmVnGoWBmZhmHgpmZZRwKZmaWcSiYmVnGoWBmZhmHgpmZZRwKZmaWKTQUJA2QNEnSZElDG5nfSdJd6fwnJfUusj5mZta8wkJBUg1wDbAH0A84SFK/kmJHAjMjYi3gcuDCoupjZmYtK/JIYXNgckRMiYjPgDuBfUvK7AvcnA6PAHaWpALrZGZmzVBEFLNiaX9gQEQclY4fCmwREUNyZZ5Py9Sl46+kZd4tWdcxwDHp6DrApEIq3bxuwLstlrJK8f5uO97Xbata+7tXRHRvqVD7AivQ2C/+0gQqpwwRMQwYVolKtZaksRHRv5p1WJJ4f7cd7+u2tajv7yKbj+qAnrnxWmBaU2UktQeWB94vsE5mZtaMIkPhKWBtSX0kdQQGASNLyowEBqfD+wMPR1HtWWZm1qLCmo8iYp6kIcBooAa4MSImSDoXGBsRI4EbgFslTSY5QhhUVH0qoKrNV0sg7++2433dthbp/V3YiWYzM/vq8R3NZmaWcSiYmVlmiQgFSTWSnpF0X8n0qyTNyo33kvSQpPGSHpFUm5u3uqS/SZoo6YWGLjkk7Szpv5LGSfqnpLXS6Zen08ZJeknSB23zbtuepBslTU/vO2mYdlfu/U+VNC6d3lHSTZKek/SspB1yyxyUTh8v6QFJ3ZpbV2651SXNknRKbtpJkiZIel7SHZI6F74j2oCkzpL+k+67CZLOSacPSbuLiYb9VrLcZpLmp/cPIWnH3D4dJ2mOpG+l85r6m260W5rmPtPFxaL2HSJpsKSX09dgKikiFvsXcDJwO3Bfblp/4FZgVm7aH4HB6fBOwK25eY8Au6bDywJLp8MvAeulwz8Eft/I9o8nOdFe9X1R0P7dDtgEeL6J+ZcCZ6bDxwE3pcMrA0+T/DhpD0wHuqXzLgLObm5duWl3p5/dKel4D+BVYKl0fDhweLX3U4X2tYBl0+EOwJPAlsDGQG9gasM+zC1TAzwMjAL2b2SdK5Jc6NHs33Q6/Jt0eBBwV3OfabX3VYX3+yLzHZJ+XlPSf1dIh1eo1Htd7I8U0qTeC/hdbloNcDFwWknxfsBD6fA/SLvlUNJnU/uIeBAgImZFxOy0XABd0uHlWfheDICDgDu+9JtZREXEGJq4v0SSgAP5/P1n+zgipgMfkPznUvpaJl2mCyX7spF1kf66nQJMKNl0e2ApJfe/LF26rq+qSDT8Mu2QviIinomIqU0sdjxJcE5vYv7+wF/L+Jtuqluapj7TxcIi+B2yO/BgRLwfETOBB4EBrX6DJRb7UAB+TfLB1eemDQFGRsRbJWWfBb6TDu8HLCdpJaAv8IGke9JDyIvTPwqAo4BRkuqAQ4EL8iuU1AvoQ/JLbUm0LfBORLycjj8L7CupvaQ+wKZAz4iYCxwLPEfyn6IfySXLTa5L0jLA6cA5+UIR8SZwCfA68BbwYUT8rYg3Vw1pU8Y4ki/5ByPiyWbK9iD5W/5NM6scxII/Wpr6m+4BvAHJJefAh8BKNPGZtua9LaIWte+Q7HNI1aXTKmKxDgVJewPTI+Lp3LTVgAOAqxpZ5BRge0nPANsDbwLzSH51bpvO3wxYAzg8XeYkYM+IqAVuAi4rWecgYEREzK/Q2/qqKT1KupHkj3gsyX+2fwHzJHUgCYWNgdWA8cAZLazrHODy3C9nACStQPILrU+6rmUkHVKpN1RtETE/IjYi6SVgc0nrN1P818DpTf39SVoV2IDkfqIGTf1NN9UtTaOfafnvaNG1iH6HlNU9UKtVu62uyBfwK5I/1qnA28BsYGY6PDV91ZP05lq67LJAXTq8JfBIbt6hJN2CdwdeyU1fHXihZD3PAN+s9r5og33dm5JzCiT/Ed4BaptZ7l8kRwWbAQ/lpm8HjGpuXcBjuc/xA5ImrCEk/2FvyJU7DLi22vuooP1+Fum5lHR8KrlzCiTnVhr20SySo4tv5eb/CBiWG2/yb5okOLbKfR7vkt7r1NhnWu19U6H9u8h9h5D8OLo+N349cFCl3vNifaQQEWdERG1E9CZJ24cjYoWI+FpE9E6nz47keQ5I6iapYZ+cQfILCJIuO1aQ1NDD4E7ACyR/HMtL6ptO3xWY2LB9SeuQnAj6d2FvctG2C/BipL3gAkhaOm32QdKuwLyIeIHkF1W/3D5eYF82tq6I2Db3Of4aOD8iriZpNtoy3ZaAnUvW9ZUlqbukrunwUqT7panyEdEnt49GAD+MiD/nipQefTX3N91otzTNfKZfeYvod8hoYDdJK6RHxbux4JHel1JkL6lfRTsAv5IUwBiSqyqIiPlKLnd8KP2SeRr4bSRdeRwN3C2pnuQDPiK3voOAOyON88WVpDtI9l23tF30rIi4gYXbqiG5OmV0ur/eJPnFRERMU3J55RhJc4HX+PzwmibW1aiIeFLSCOC/JIfuz7CIdy3wBawK3Jy2R7cDhkfEfZJOIGn3/howXtKoSLutb0p6SWRP4NGGaS38TTfVLU2jn+kSagcK/g6JiPcl/YIkaADOjYiKdSTqbi7MzCyzWDcfmZnZF+NQMDOzjEPBzMwyDgUzM8s4FMzMLONQMDOzjEPBlhhpV8a7l0w7UdK1zSwzq6l5razD75V2X222KHIo2JLkDhZ+DnjZN8WZLQkcCrYkGQHsLakTZHf0rgaMU/JglP8qeVDMvqULStpBuQesSLpa0uHp8KaSHpX0tKTRaSdzLWpqufSI5kIlD9N5SdK2X/aNm5XLoWBLjIh4D/gPn/c9Pwi4C/gE2C8iNgF2BC5NuyJoUdq761UkD6/ZlKSvm/MqsFz7iNgcOJGk0zuzNuG+j2xJ09CE9Jf03yNIuiI+X9J2JD1e9gBWIekJsyXrAOsDD6Y5UkPyDIcvu9w96b9Pk/RAa9YmHAq2pPkzcJmkTUge1/nftBmoO7BpRMyVNBUofabzPBY8sm6YL2BCRGz1BevR0nKfpv/Ox/9PrQ25+ciWKJE8kOcRkuaahhPMy5M8SGWupB2BXo0s+hqITUiwAAAAnElEQVRJ196dJC1P0h03wCSgu6StIGkWkvT1MqrS2uXMCuVfILYkuoOkeabhSqTbgHsljQXG0cjzCSLiDUnDSZ4I9zJJd9xExGfpJaZXpmHRnuTZDqXPjC5dX6uWMyuau842M7OMm4/MzCzj5iOzAki6Bti6ZPIVEXFTNepjVi43H5mZWcbNR2ZmlnEomJlZxqFgZmYZh4KZmWX+Hw+3LHZZYCN9AAAAAElFTkSuQmCC\n", + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYUAAAEXCAYAAABCjVgAAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDMuMC4zLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvnQurowAAIABJREFUeJzt3Xm8HfP9x/HXOzebLYKEkhtJLEGKWmKrfY8tqkWjlvjZWrUUtUQXW0vtaq+0KGpLQ9sglSolqqWiIkSEiOAKEgSNCEnu5/fHzB2Tk7ucXGfuieT9fDzOI7N8Z+Z75tyc95nvzHxHEYGZmRlAu2pXwMzMFh0OBTMzyzgUzMws41AwM7OMQ8HMzDIOBTMzyzgUzAomabykHapdj0qQdLikf1a7HlYch4KVTdKjkmZI6lQy/feSftnEMiFprRbWe5CkKZJUMr29pGmS9s5N6yOpXtJ15W5L0jmS/tBc+fS9zZY0M/e6L1f2J5JeS6fXSbq7ufeUFxFfj4hHyy2/qJDUO91H7atdF2s7DgUri6TewLZAAAMrvPo/AV2B7UumD0i392Bu2mHADGBQaThVwPERsWzutQ+ApMHAocAuEbEs0B94uMLbXuI4bBZNDgUr12HAk8DvgcGVXHFEzAaGpdso3ebtETG3ZNrPgDnAPpWsRzM2A0ZFxKsAEfFORAwtd+H0KGiXdHhzSWMkfSzpXUmX58oNTJuaPkyPXNYrWcepksZJ+kjS3ZI6N7G96yUNz41fJOnh0iOxdF47ST+T9Hp6VHarpOXT2aPTfz9Mj5C2yi13aXrU+JqkPXLTl5d0o6S3Jb0l6ZeSatJ5h0t6QtIVkj4Azil3H1rbcShYuQ4Dbk9fu0tapcLrvwXYX9JSkHy5kHzp39pQQNK2QC1wF42HSFGeBA6TdJqk/g1fcrl6DZF0f5nruhK4MiK6AGuSvA8k9QXuBE4CugMjgfskdcwteyDJ0VMfYEPg8Ca28WNgw/RLeFvgSGBwNN6nzeHpa0dgDWBZ4Jp03nbpv13TI6d/p+NbABOBbsDFwI25wLkFmAusBWwM7AYcldveFsBkYGXg/Cbqb1XkULAWSdoG6AUMi4hngFeB71VyGxHxBPAusF866UDg5YgYmys2GPhrRMwA7gD2kLRyBatxVforveH1i7RufwBOAHYHHgOmSRqSq/uFEbF346tcwBxgLUndImJmRDyZTv8u8EBEPBQRc4BLgaWAb+brFxFTI+ID4D5go8Y2EBGzgEOAy4E/ACdERF0T9TkYuDwiJkfETOBMkqa55pp2Xo+I30bEPJIQWBVYJf2hsAdwUkR8EhHTgCuAQbllp0bE1RExNyI+bWYbViUOBSvHYOBvEfFeOn4HFW5CSt3KF7/+DyX5wgEgPYI4gORIhfRX6xuUF05zgQ75CZIaxufkJp8YEV1zr583zIiI2yNiF5JzHz8AzpO0+8K8udSRQF/gJUlP506irwa8nttePfAm0CO37Du54Vkkv+obFRH/IflFLtKjkSbMt910uD3Q3JFgVo80gEjr0otkP7/dEKzADSRHBQ3ebGa9tghwKFiz0i/jA4HtJb0j6R3gZOAbkr5R4c3dCuyctl1vSRI+DfYDugDX5erRg/KakN4AepdM6wPMA95amApGxJyI+CMwDlh/YZZNl38lIg4i+aK8CBguaRlgKsmXKgBpc0zPha1fbvnjgE7pek9vpuh82wVWJwnRd0lO8i+MN4HPgG65YO0SEV/PlXG3zIs4h4K15FskX579SJorNgLWAx5n/i/kGkmdc698W3jHknnztck3iIjXgX+StK0/FBH5X8aDgZuADXL12BrYSNIGLWzrQWAdSYdK6iBpReACYHjJSexGpW3ze0laLj0xuwfwdeCplpZtZF2HSOqeHgl8mE6eR/Jrfi9JO6dHMT8m+YL9Vyu20Rf4JUkT0qHA6ZIabWoi2dcnK7nUd1mS/XJ3ul+mA/Uk5xpaFBFvA38DLpPUJd1Xa0oqvarMFmEOBWvJYODmiHgjvermnfTL+hrg4Fzb8xDg09zrkdw6xpfM+79mtncLyS/X/AnmHsDOwK/zdUjPbzzI/E1ZC2wrbdveE/g+MA14AfgIOLZk29do/vsUnkmnfwz8hOSI40OSk6vHRsQ/0/r9RNJfm3lPeQOA8ZJmkpx0HhQRsyNiIsmX+NXAeyQn2feJiM/LXC9pXdqTnEe4KCKei4hX0rrfpsYv4b0JuI3kSqPXgNkk508amobOB55Im4O2LKMKhwEdgRdJLh0eTnLOwb4i5IfsmBVL0hvAIRExusXCZlXmIwWzAknqTnKJ6ZQqV8WsLIWFgqSb0pthXmhiviRdJWmSkhtyNimqLmbVIGkz4BXg6oh4o9r1MStHYc1HkrYDZgK3RsQCV2lI2pOk7XJPkhtaroyILQqpjJmZlaWwI4W0/fSDZorsSxIYkd7A01WST0iZmVVRNTuk6sH8N7LUpdPeLi0o6RjgGIBllllm03XXXbd1W5z6bOuW+zJW2xiA59/6qM03vUGP5auy3Wpue4MeSbc9S9L+9nte8rbdGs8888x7EdG9pXLVDIUFOueiiRtb0s7HhgL0798/xowZ07otntP6Hdpq5yR17T3kgTbf9JgL96rKdqu57TEX7gUsWfvb73nJ23ZrSHq95VLVvfqojuSOzQa1JHdXmplZlVQzFEaQ9Dyp9KaYj9I7Is3MrEoKaz6SdCewA9BNUh1wNmmnZBHxG5KugfcEJpF07tXcXa5mZtYGCguFtNOv5uYHcFxR2zezJVeXTu04YYsV6NW1A2r09GXrTZgwAYDfDmz7iyUbtt2czp07U1tbS4cOHVos2xg/Ds/MFjsnbLECm6y5Gu2XXg4t+MC5L2W92q4AzKn7sIWSldew7aZEBO+//z51dXX06dOnVdtwNxdmttjp1bVDIYGwqJPESiutxOzZs1u9DoeCmS12hJa4QGjwZd+3Q8HMzDI+p2Bmi72B1zxR0fVNKeMmsi3XqeVfE97g4nPO5D9PjEYSnTp15uLrb6Z29V787+OPuPCsMxj7dPKspo0224Ih513Ecl2W560332DPb36DM867iO/93zEAXPCz0xiww9YcfvjhFX0vpXykYGZWkFEj7mX6u+8w/KEnuOfv/+KK391Gly5JzwrnnHYitav35oEnnuWBJ56lR89enHv6j7JlV+zWnTtu/A1zPl+o5yx9aQ4FM7OCTJ/2Lt1WXoV27ZKv2lVW7UGXrl1547XJvPj8WI750WlZ2e+fdDrjxz3Lm1NeA2CFFVdi8222Y8TwO9u0zg4FM7OC7L7Ptxj99wc5cPdtufS8nzHhhXEATH7lJdbptwE1NV88rrympoZ1+m3Aqy9/cS/CET88mVuHXsu8efParM4OBTOzgqyyag/+8ujTnDjkLNq1E8cM2pen/vkYEU1cJZTMyEZrV+/F+httwsg//7HN6uwTzWZmBerYqRPb7Lgr2+y4Kyt1W5lHRj3AwUf8gJfGj6O+vj5rWqqvr2fihBdYY6115lv+qONP4cffH8ymW3yzTerrIwUzs4JMeP45pr2T9PNZX1/Pyy+NZ7UePVm9zxqs+/UNGXrVpVnZoVddynrrf4PV+6wx3zr6rNWXNfuuy+iHR7VJnX2kYGaLvRHHb12xdW3YQlcTAHPnzqVjx4588N50zj3jR3z+2WcArL/Rpgw6/GgAzr3kan511unsvc0mRAQbbroZ51xydaPrO+qEU/jugO0r9h6a41AwM6uwV1+eQG2vPmy94y5sveMujZbp0rUrv7pqaKPzevRcnXsf/nc2vk6/DRj7xgdlBdKX5VAwM6ugYbfdxJ03D+W0sy+odlVaxaFgZlZBBx56BAceekS1q9FqPtFsZmYZh4KZmWUcCmZmlnEomJlZxieazWyxt+HvelV2hed81GKRd99+iwt+ehqTX5lIfX092+2yO6f89DxefWUi0999m2132g2A6y+/kKWXXobBPzihsnVsJR8pmJlVWERw8tGHsePue3Hf488wYvQYZn3yCVdf/Asmjn+exx95qGLbqnRneT5SMDOrsP88MZpOnTrxre8eDCQ9oJ529vkM2GpD2rfvABGMffpJjjjuZABefWUiRx6wN29PrePgI4/l4CO+D8D9997NHTcNZe6cz1l/4025+5YbqampYdlll+WUU05h1KhRXHbZZWyzzTYVq7uPFMzMKmzSyy/Rb4ON5pu27HJdWK12dY4+8VR222c/ho16nAEDvw3AlFdf5vo/3MPt9z3MDVdcxJw5c5j8ykRG3fcnbvnTgwwb9Tg17Wq4/fbbAfjkk09Yf/31eeqppyoaCOAjBTOzyivpAjs/vbEus7fdaTc6dupEx06dWLFbdz54bxpPPfEYE8Y9x8F77wTA7NmzWW+NnkBy5PGd73ynkKo7FMzMKmzNvuvy95Ej5ps2838f887Ut6hpt2ADTceOnbLhdu3aMXfuPCJgnwMG8aMhZ2fzGvo+6ty583wP6KkkNx+ZmVXYFttsz+xPP+W+4XcBycngy37xcwYe8D1W6r4ysz6Z2fI6tt6Ovz8wgvffmw7ARzNm8Prrrxdab/CRgpktAcYdVbkv03J6KpXEFb+7jfN/eipDr7yE+vp6ttlpV0484+d8OmsWN137aw7cfdvsRHNj1uy7Lsed9lOOPfjb1NfX075DB24a+ht69arw5bUlHApmZgX42mq1XH3zXQtM79ipE3c88EiTy+W7zB4w8NvZyWj4IpBmzmz5SKO13HxkZmYZh4KZmWUcCma22AmCiKh2Nariy75vh4KZLXZe/3AOc2d9vMQFQ0Tw/vvv07lz51avwyeazWyxc/VTMzgB6NX1PUQjN5F9CRP+txQA7874tKLrXZhtN6dz587U1ta2ehsOBTNb7Hz8WT3nj36/kHVPuXAvAPYY8kAh6y9n20Vy85GZmWUKDQVJAyRNlDRJ0pBG5q8u6R+SnpU0TtKeRdbHzMyaV1goSKoBrgX2APoBB0nqV1LsZ8CwiNgYGARcV1R9zMysZUUeKWwOTIqIyRHxOXAXsG9JmQC6pMPLA1MLrI+ZmbWgyFDoAbyZG69Lp+WdAxwiqQ4YCTT6PDpJx0gaI2nM9OnTi6irmZlRbCg0dh1Y6UXDBwG/j4haYE/gNkkL1CkihkZE/4jo37179wKqamZmUGwo1AE9c+O1LNg8dCQwDCAi/g10BroVWCczM2tGkaHwNLC2pD6SOpKcSB5RUuYNYGcASeuRhILbh8zMqqSwUIiIucDxwChgAslVRuMlnSdpYFrsx8DRkp4D7gQOjyXtvnQzs0VIoXc0R8RIkhPI+Wln5YZfBLYusg5mZlY+39FsZmYZh4KZmWUcCmZmlnEomJlZxqFgZmYZh4KZmWUcCmZmlnEomJlZxqFgZmYZh4KZmWUcCmZmlnEomJlZxqFgZmYZh4KZmWUcCmZmlnEomJlZxqFgZmYZh4KZmWUcCmZmlnEomJlZxqFgZmYZh4KZmWUcCmZmlnEomJlZxqFgZmYZh4KZmWUcCmZmlnEomJlZxqFgZmYZh4KZmWUcCmZmlnEomJlZxqFgZmYZh4KZmWUKDQVJAyRNlDRJ0pAmyhwo6UVJ4yXdUWR9zMysee2LWrGkGuBaYFegDnha0oiIeDFXZm3gTGDriJghaeWi6mNmZi0r8khhc2BSREyOiM+Bu4B9S8ocDVwbETMAImJagfUxM7MWFBkKPYA3c+N16bS8vkBfSU9IelLSgMZWJOkYSWMkjZk+fXpB1TUzsyJDQY1Mi5Lx9sDawA7AQcDvJHVdYKGIoRHRPyL6d+/eveIVNTOzRJGhUAf0zI3XAlMbKfOXiJgTEa8BE0lCwszMqqDIUHgaWFtSH0kdgUHAiJIyfwZ2BJDUjaQ5aXKBdTIzs2YUFgoRMRc4HhgFTACGRcR4SedJGpgWGwW8L+lF4B/AaRHxflF1MjOz5hV2SSpARIwERpZMOys3HMAp6cvMzKrMdzSbmVnGoWBmZhmHgpmZZcoKBUkHSFouHf6ZpHslbVJs1czMrK2Ve6Tw84j4n6RtgN2BW4Dri6uWmZlVQ7mhMC/9dy/g+oj4C9CxmCqZmVm1lBsKb0m6ATgQGCmp00Isa2ZmXxHlfrEfSHKj2YCI+BBYETitsFqZmVlVlBUKETELmAZsk06aC7xSVKXMzKw6yr366GzgDJIH4gB0AP5QVKXMzKw6ym0+2g8YCHwCEBFTgeWKqpSZmVVHuaHwedpPUQBIWqa4KpmZWbWUGwrD0quPuko6Gvg78NviqmVmZtVQVi+pEXGppF2Bj4F1gLMi4qFCa2ZmZm2uxVCQVAOMiohdAAeBmdlirMXmo4iYB8yStHwb1MfMzKqo3IfszAael/QQ6RVIABFxYiG1MjOzqig3FB5IX2Zmthgr90TzLZI6An3TSRMjYk5x1TIzs2ooKxQk7UDSXfYUQEBPSYMjYnRxVTMzs7ZWbvPRZcBuETERQFJf4E5g06IqZmZmba/cm9c6NAQCQES8TNL/kZmZLUbKPVIYI+lG4LZ0/GDgmWKqZGZm1VJuKBwLHAecSHJOYTRwXVGVMjOz6ig3FNoDV0bE5ZDd5dypsFqZmVlVlHtO4WFgqdz4UiSd4pmZ2WKk3FDoHBEzG0bS4aWLqZKZmVVLuaHwiaRNGkYk9Qc+LaZKZmZWLeWeUzgJ+KOkqSQP2lkN+G5htTIzs6po9khB0maSvhYRTwPrAncDc4EHgdfaoH5mZtaGWmo+ugH4PB3eCvgJcC0wAxhaYL3MzKwKWmo+qomID9Lh7wJDI+Ie4B5JY4utmpmZtbWWjhRqJDUEx87AI7l55Z6PMDOzr4iWvtjvBB6T9B7J1UaPA0haC/io4LqZmVkbazYUIuJ8SQ8DqwJ/i4hIZ7UDTii6cmZm1rbKeUbzkxHxp4jIP4bz5Yj4b0vLShogaaKkSZKGNFNuf0mR3v9gZmZVUu7Nawst7R/pWmAPoB9wkKR+jZRbjqSjvaeKqouZmZWnsFAANgcmRcTkiPgcuAvYt5FyvwAuBmYXWBczMytDkaHQA3gzN16XTstI2hjoGRH3N7ciScdIGiNpzPTp0ytfUzMzA4oNBTUyLbKZUjvgCuDHLa0oIoZGRP+I6N+9e/cKVtHMzPKKDIU6oGduvBaYmhtfDlgfeFTSFGBLYIRPNpuZVU+RofA0sLakPpI6AoOAEQ0zI+KjiOgWEb0jojfwJDAwIsYUWCczM2tGYaEQEXOB44FRwARgWESMl3SepIFFbdfMzFqv0K4qImIkMLJk2llNlN2hyLqYmVnLimw+MjOzrxiHgpmZZRwKZmaWcSiYmVnGoWBmZhmHgpmZZRwKZmaWcSiYmVnGoWBmZhmHgpmZZRwKZmaWcSiYmVnGoWBmZhmHgpmZZRwKZmaWcSiYmVnGoWBmZhmHgpmZZRwKZmaWcSiYmVnGoWBmZhmHgpmZZRwKZmaWcSiYmVnGoWBmZhmHgpmZZRwKZmaWcSiYmVnGoWBmZhmHgpmZZRwKZmaWcSiYmVnGoWBmZhmHgpmZZQoNBUkDJE2UNEnSkEbmnyLpRUnjJD0sqVeR9TEzs+YVFgqSaoBrgT2AfsBBkvqVFHsW6B8RGwLDgYuLqo+ZmbWsyCOFzYFJETE5Ij4H7gL2zReIiH9ExKx09EmgtsD6mJlZC4oMhR7Am7nxunRaU44E/trYDEnHSBojacz06dMrWEUzM8srMhTUyLRotKB0CNAfuKSx+RExNCL6R0T/7t27V7CKZmaW177AddcBPXPjtcDU0kKSdgF+CmwfEZ8VWB8zM2tBkUcKTwNrS+ojqSMwCBiRLyBpY+AGYGBETCuwLmZmVobCQiEi5gLHA6OACcCwiBgv6TxJA9NilwDLAn+UNFbSiCZWZ2ZmbaDI5iMiYiQwsmTaWbnhXYrcvpmZLRzf0WxmZhmHgpmZZRwKZmaWcSiYmVnGoWBmZhmHgpmZZRwKZmaWcSiYmVnGoWBmZhmHgpmZZRwKZmaWcSiYmVnGoWBmZhmHgpmZZRwKZmaWcSiYmVnGoWBmZhmHgpmZZRwKZmaWKfQZzYua3rPvaPNtTmnzLZqZtZ6PFMzMLONQMDOzjEPBzMwyDgUzM8s4FMzMLONQMDOzjEPBzMwyDgUzM8s4FMzMLONQMDOzjEPBzMwyDgUzM8s4FMzMLONQMDOzjEPBzMwyhYaCpAGSJkqaJGlII/M7Sbo7nf+UpN5F1sfMzJpXWChIqgGuBfYA+gEHSepXUuxIYEZErAVcAVxUVH3MzKxlRR4pbA5MiojJEfE5cBewb0mZfYFb0uHhwM6SVGCdzMysGYqIYlYs7Q8MiIij0vFDgS0i4vhcmRfSMnXp+KtpmfdK1nUMcEw6ug4wsZBKN68b8F6LpaxSvL/bjvd126rW/u4VEd1bKlTkM5ob+8VfmkDllCEihgJDK1Gp1pI0JiL6V7MOSxLv77bjfd22FvX9XWTzUR3QMzdeC0xtqoyk9sDywAcF1snMzJpRZCg8DawtqY+kjsAgYERJmRHA4HR4f+CRKKo9y8zMWlRY81FEzJV0PDAKqAFuiojxks4DxkTECOBG4DZJk0iOEAYVVZ8KqGrz1RLI+7vteF+3rUV6fxd2otnMzL56fEezmZllHApmZpZZIkJBUo2kZyXdXzL9akkzc+O9JD0saZykRyXV5uatLulvkiZIerGhSw5JO0v6r6Sxkv4paa10+hXptLGSXpb0Ydu827Yn6SZJ09L7Thqm3Z17/1MkjU2nd5R0s6TnJT0naYfcMgel08dJelBSt+bWlVtudUkzJZ2am3aypPGSXpB0p6TOhe+INiCps6T/pPtuvKRz0+nHp93FRMN+K1luM0nz0vuHkLRjbp+OlTRb0rfSeU39TTfaLU1zn+niYlH7DpE0WNIr6WswlRQRi/0LOAW4A7g/N60/cBswMzftj8DgdHgn4LbcvEeBXdPhZYGl0+GXgfXS4R8Cv29k+yeQnGiv+r4oaP9uB2wCvNDE/MuAs9Lh44Cb0+GVgWdIfpy0B6YB3dJ5FwPnNLeu3LR70s/u1HS8B/AasFQ6Pgw4vNr7qUL7WsCy6XAH4ClgS2BjoDcwpWEf5papAR4BRgL7N7LOFUku9Gj2bzod/k06PAi4u7nPtNr7qsL7fZH5Dkk/r8npvyukwytU6r0u9kcKaVLvBfwuN60GuAQ4vaR4P+DhdPgfpN1yKOmzqX1EPAQQETMjYlZaLoAu6fDyLHgvBsBBwJ1f+s0soiJiNE3cXyJJwIF88f6zfRwR04APSf5zKX0tky7ThZJ92ci6SH/dTgbGl2y6PbCUkvtfli5d11dVJBp+mXZIXxERz0bElCYWO4EkOKc1MX9/4K9l/E031S1NU5/pYmER/A7ZHXgoIj6IiBnAQ8CAVr/BEot9KAC/Jvng6nPTjgdGRMTbJWWfA76TDu8HLCdpJaAv8KGke9NDyEvSPwqAo4CRkuqAQ4EL8yuU1AvoQ/JLbUm0LfBuRLySjj8H7CupvaQ+wKZAz4iYAxwLPE/yn6IfySXLTa5L0jLAGcC5+UIR8RZwKfAG8DbwUUT8rYg3Vw1pU8ZYki/5hyLiqWbK9iD5W/5NM6scxPw/Wpr6m+4BvAnJJefAR8BKNPGZtua9LaIWte+Q7HNI1aXTKmKxDgVJewPTIuKZ3LTVgAOAqxtZ5FRge0nPAtsDbwFzSX51bpvO3wxYAzg8XeZkYM+IqAVuBi4vWecgYHhEzKvQ2/qqKT1Kuonkj3gMyX+2fwFzJXUgCYWNgdWAccCZLazrXOCK3C9nACStQPILrU+6rmUkHVKpN1RtETEvIjYi6SVgc0nrN1P818AZTf39SVoV2IDkfqIGTf1NN9UtTaOfafnvaNG1iH6HlNU9UKtVu62uyBfwK5I/1inAO8AsYEY6PCV91ZP05lq67LJAXTq8JfBobt6hJN2CdwdezU1fHXixZD3PAt+s9r5og33dm5JzCiT/Ed4FaptZ7l8kRwWbAQ/npm8HjGxuXcDjuc/xQ5ImrONJ/sPemCt3GHBdtfdRQfv9bNJzKen4FHLnFEjOrTTso5kkRxffys3/ETA0N97k3zRJcGyV+zzeI73XqbHPtNr7pkL7d5H7DiH5cXRDbvwG4KBKvefF+kghIs6MiNqI6E2Sto9ExAoR8bWI6J1OnxXJ8xyQ1E1Swz45k+QXECRddqwgqaGHwZ2AF0n+OJaX1DedviswoWH7ktYhORH078Le5KJtF+ClSHvBBZC0dNrsg6RdgbkR8SLJL6p+uX08375sbF0RsW3uc/w1cEFEXEPSbLRlui0BO5es6ytLUndJXdPhpUj3S1PlI6JPbh8NB34YEX/OFSk9+mrub7rRbmma+Uy/8hbR75BRwG6SVkiPindj/iO9L6XIXlK/inYAfiUpgNEkV1UQEfOUXO74cPol8wzw20i68jgauEdSPckHfERufQcBd0Ua54srSXeS7Ltuabvo2RFxIwu2VUNydcqodH+9RfKLiYiYquTyytGS5gCv88XhNU2sq1ER8ZSk4cB/SQ7dn2UR71pgIawK3JK2R7cDhkXE/ZJOJGn3/howTtLISLutb0p6SWRP4LGGaS38TTfVLU2jn+kSagcK/g6JiA8k/YIkaADOi4iKdSTqbi7MzCyzWDcfmZnZwnEomJlZxqFgZmYZh4KZmWUcCmZmlnEomJlZxqFgS4y0K+PdS6adJOm6ZpaZ2dS8Vtbh90q7rzZbFDkUbElyJws+B7zsm+LMlgQOBVuSDAf2ltQJsjt6VwPGKnkwyn+VPChm39IFJe2g3ANWJF0j6fB0eFNJj0l6RtKotJO5FjW1XHpEc5GSh+m8LGnbL/vGzcrlULAlRkS8D/yHL/qeHwTcDXwK7BcRmwA7ApelXRG0KO3d9WqSh9dsStLXzfkVWK59RGwOnETS6Z1Zm3DfR7akaWhC+kv67xEkXRFfIGk7kh4vewCrkPSE2ZJ1gPWBh9IcqSF5hsOXXe7e9N9nSHqgNWsTDgVb0vwZuFzSJiSP6/xv2gzUHdg0IuZImgKUPtN5LvMfWTfMFzA+IrZayHq0tNxn6b/z8P9Ta0NuPrIlSiQP5HmUpLmm4QTz8iQPUpkjaUegVyPGiHSvAAAAn0lEQVSLvk7StXcnScuTdMcNMBHoLmkrSJqFJH29jKq0djmzQvkXiC2J7iRpnmm4Eul24D5JY4CxNPJ8goh4U9IwkifCvULSHTcR8Xl6ielVaVi0J3m2Q+kzo0vX16rlzIrmrrPNzCzj5iMzM8u4+cisAJKuBbYumXxlRNxcjfqYlcvNR2ZmlnHzkZmZZRwKZmaWcSiYmVnGoWBmZpn/B2N/LHgmX1d2AAAAAElFTkSuQmCC\n", "text/plain": [ "
" ] @@ -499,7 +499,7 @@ }, { "data": { - "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYUAAAEXCAYAAABCjVgAAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDMuMC4zLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvnQurowAAIABJREFUeJzt3Xu8FWW9x/HPl3veQIUsuecdzUzwllneKiTFU8cMj5l4N0OzTKM0RczUvOQlU6lMs0RJxVA5oWJe4qSBSigSioSwQwUUNUDk9jt/zOxxsVhr7wWs2Qv2/r5fr/Xaa555ZuY3s2bPb80zM89SRGBmZgbQqtYBmJnZhsNJwczMMk4KZmaWcVIwM7OMk4KZmWWcFMzMLOOkYBlJvSSFpDZVmNeBkuoaGH+zpB+v73IsH5IWSfpEreOoFkmDJf211nFsDJwUmlB6wN2+qGyYpN+v4/wel3TyesQzS9Kh6zr9+oiI0yPiklosu1i6HZdK6l5QdqikWUX1Bkt6QdISSW9IuklSp4Lxw9LP+GsFZW3Ssl5llr1aIpbUTdK9khZIejdd3uCC+u0lXSZptqT3Jb0i6VxJWtv1aUhEbBYRMyutv74kXS1pXFHZtZIeXId5Ve3LTUvkpGAtiqTWZUYtBsqeuUg6B7gCOBfoCOwL9AQekdSuoOrbwPAGltOYO4A56by3Br4JvFkw/o/AIcAAYHPgOOBU4Lq1WZ8N0I+B7SSdACBpP+B44PSaRlVCs082EeFXE72AALYvKhsG/D59fyBQB/wIWADMAo4tM69LgZXAUmAR8Iu0/DPARODd9O9nykx/B7AKeD+d/jygVxrj8cDsNIbzC6ZpBQwFXgXeAkYBW5WZf4PrAtwG/KRg+DzgdWAucHLhtiI5OD4AvJeu00+AvxZMuzPwCMkBeTpwdNFybgLGkhwoDy0R6+PARcB/CpZ5KDArfb9Fuo2OLppuM2AecGLBZ/kH4B/A8WlZm3RdepXZTvXbvE06vAjYo0zdQ9LPu3tR+T7pvrB9JeuztvsqSQJ6KZ3fv4HvF9Q7BZiRbvsxwLZF8zgdeAVYCNwIqIFlHpjuV72AacBpDdRtBVwAvJZ+Br8DOqbjZqfLXpS+9gMGA38Frkpj+RdwWMH8OgK/IdkH/53uY63TcYOBCcDP0/X8SaXbcWN81TyAlvSisqSwArgGaA98nuRAtlOZ+T0OnFwwvFW6wx+XHoyOSYe3LjP9LAoOkgUHqF8BHwE+BXwA7JKOPxt4GuiWxncLMLLMvBtcFwqSAtAfeAPYFdiEJGEVHpTuSl+bAH1Ivkn/NR23aTp8QrrOe5IkoV0LlvMusH96IOlQbjumsdZ/FoVJoX+6Lm1KTHt7/Tao/yyBgcBMoC1rnxQeTQ9Ag4AeRXUvB54oM5/XSA+ija1POvxL4JeV7KskB8oD0vdbAnum7w9Ot/We6Wd8A/Bk0TweBDoBPYD5QP9G/kduSef5OA0nkBNJktEnSJLzfcAdpbZpWjYYWE6SxFoD3yL5AqJ0/P3psjcFPgr8vWB7Dk4//zPTz/MjtT6W5Ply89GG6ccR8UFEPAE8BBxd4XRfBl6JiDsiYkVEjAT+CRyxlsu/OCLej4h/kHzr/VRafhrJmUNdRHxAchA8qpHT6UrW5WjgtxExNSKWABfXj0ibYf4buCgilkTESyQH4nqHkxzsfpuu83PAvcBRBXX+FBETImJVRCxtINbLgCMk7VpU3hlYEBErSkzzejo+ExFjSA6A63K952vAUyTNKf+SNFnSXgVxvF5mujXioPz6EBFnRMQZFca0HOgjaYuIWJhuY4BjgVsj4rl0f/ghsF/R9ZPLI+KdiJgN/AXYo5FlPUVyZviHSI/IZRwLXBMRMyNiUbrsQY3si69FxK8iYiXJPvRxYBtJ2wCHAWdHxOKImEdyVjCoYNq5EXFDuo+938g6bNScFJrWSpJvj4XakvzT1VsYEYsLhl8Dtq1w/tum9Qu9BnRdmyBJvrXXW0LyTQySdu7Rkt6R9A7JKf5KYJsy86l0XbYl+bZfr/B9F5JvZ+XG9wT2qY8pjetY4GNl6pcVEfOBXwDDi0YtADqXOeB8PB1f7ALgfKBDfYGkHuldPYskLSoTw8KIGBoRu5Js18nA/emF5AXp8kpZI44G1mdt/TdJE9Jrkp5I2/uhaH9LD85vsfr+Vm5fWoOkrUmad64luS7TqVzd4mWn79tQfl9cLZb0ywdpPD1J/g9fL9iHbiE5Y6hX0T7UHDgpNK3ZJKe2hXqz+s69paRNC4Z7kJzmllL8TWouyQ5eqAdJG2kl0zdmDkk7bKeCV4eIKDf/StfldZImqXrdC97PJzl1Lzd+DkmTSmFMm0XEtwrqrM16XgkcBPQtKPsbSTPaVwsrput2GDC+eCYR8QhJ88YZBWWz09g2i4iyB8eC+gtIDpLbkjQNPkqSAAvXH0l7k2yTxypcn7USERMj4kiSg+T9JNeSoGh/S7fH1pTf3xpzLfDniPgu8CTJupdTvK/3INlP3mTd9usPgM4F+9AWaWKu12K6k3ZSaFp3Axektx22Sm8HPQK4p6jexZLaSTqApHnkj2Xm9yZJm2q9scCOkv4nvRXy6yRt8OVu6yuevjE3A5dK6gkgqYukIxuZppJ1GQWcIGkXSZsAF9aPSE/17wOGSdpE0s4kd+TUe5BknY+T1DZ97SVpl7VYr0xEvANcTXLhu77sXZImrRsk9U+X0StdlzqSayClnF84n0pIukLSbunntzlJ2/eMiHgrIh4lSUD3StpVUmtJ+5Jc3L4pIl6pZH3WMp52ko6V1DEilpNc7F+Zjr6T5HPbQ1J74KfAMxExax2WMwD4AvC9tOhM4L8kHVRmkpHAdyX1lrRZuuy70ya++SQ3UVS0b0fE68DDwNWStkj/N7eT9Pm1XY/mwEmhaQ0H/o/kLoiFwM9I7sh5saDOG+m4uST/7KdHxD/LzO86kjb9hZKuj4i3SA6855Ccxp8HHJ5+4yzlMpIk9Y6k71cQ/3Ukd5g8LOk/JBed92mgfkXrEhH/C1xP0uY8g+SbOSTf3gCGkNwd8gbJAXhk/biI+A/wRZL237lpnStILnyuq+v48MBXH+PPSO6kuorkwPgMyTfMQ9L29DVExASSC5aVqP8mugkwGniH5GJ1T5IL1/X+m2Q7/Znkzprfk9w1c+barI+ShwdvrjC244BZkt4juZvoGwARMZ7k2se9JGd727F6O3xF0uR3M3BWRLydznseyX78K0kfKTHZrST7wpMkdxItJd0GadPQpcCEdN/et4Iwvgm0I7nLaiHJF7VyTXXNWv2Vd9sASDqQ5G6Rbo3Vbc7Sb/kvAu1LXdyVdAXwsYg4vsmDqzJJu5PcsdNQ+3mTktSKJIn0TC8QWwviMwXbIEj6StpUsSXJN/0H6hOCpJ0l7a7E3sBJJN+mN2rpwfdoYFKtYymyG8k37zcaq2jNT25JQdKtkuZJerHMeEm6XtIMSVMk7ZlXLLZROI2kLfhVkm+phReKNye5rrCY5PrD1cCfmjrAHMwmeQbiu7UOpJ6k+uapH0TEslrHY00vt+YjSZ8jafP8XUTsVmL8AJI2wAEk7dLXRURD7dNmZpaz3M4UIuJJkkfCyzmSJGFERDwNdJLUIi/smJltKGrZsVNXVn8gpC4tW+OJTUmnknT6xaabbtp35513Xrclzn1+3aZbH9t+urbLrsVya7nsdHu/8O93m3zRn+zasWbLBWq2vVviOtd82evg2WefXRARXRqrV8ukoBJlJduyImIEMAKgX79+MWnSOl6XG9Zx3aZbH8OSWHsNfajJFz3r8i/XZLm1XPasy78M1GZ7T6rROk+q4TrX6nOuX+eW+D+9riQV93ZQUi3vPqpj9SdTu1H+yV0zM2sCtUwKY4Bvpnch7Qu8mz5ZaGZmNZJb85GkkSTdJ3dW8rOMF5F2BhcRN5N0yTCA5AnWJSRdH5uZWQ3llhQi4phGxgfw7Wosa/ny5dTV1bF0aUO9IgNfGtXw+DxMmwbArwZW/8aqIHjtneXc8MxC3vtgVdXnb2YtT7P4Wbm6ujo233xzevXqhVTq+nVqbiNJIw/bJv2yLa97p+qzjgi23vo9zgQuffKtqs/fzFqeZtHNxdKlS9l6660bTgjNkCTabLIFPTsV/0SDmdm6aRZJAWhxCaGeJFTy7l4zs7XXLJqPzKxl6rX0ziZf5qwmX2LTapZJodoPlcw6q7Jfw9xsh/15b/pTnH3RVTw2YSKS6LBZJ0aNGgVtt+Q/773L5Rf+gMkTnwFgj732YejwK9h8i478e85sBnzmU/xg+BX8zwmnAvDTC85l190/zZFH/09V18fMrJxm03y0obh7zMPMfWM+Ux69mxfGj2L06NF06pR0lT/s3LPo1qMXD014nocmPE/X7j25+LzvZNNu1bkLd/7mZpYvc+eUZlYbTgpV9vqbC/j4Np1p1SrZtN26dWPLLbdk9r9m8tILkzn1O+dmdU87+zymTnmeObP+BcCWW23N3p/9HGPuGVmT2M3MnBSq7OgjvsADjzzJHl8YxDkXX8PzzyedZs185Z/s1OeTtG7dOqvbunVrdurzSV59eVpWduIZ3+V3I25k5cqVa8zbzCxvTgpV1m3bbZj+5Ggu++GZtGolDjnkEMaPH09EmTukkhEfTt+jJ7vtsSdj7y/1+/ZmZvlqlheaa619+3YcdvD+HHbw/myz3e7cf//9fOnoE/jn1CmsWrUqa1patWoV06e9yCe232m16U8e8j3OOe14+u7zmVqEb2YtmM8Uquy5F6Yx9435QHLQnzJlCj179qRH70+w8667M+L6q7K6I66/il12+xQ9en9itXn03n5HtttxZ54cP65JYzcza5ZnCmX7HM/xRzFWrFhB+3btmLfgbU459xI+WLYcgL0/8zmGDBnCywuWcvGVN3DZhedx+Gf3JCLYve9eDLvyhpLzO/nM7/H1/p/PLV4zs1KaZVKohanTZ7Jdr270P2h/+h+0/4cjsl9KWsoWnTpx2fUjSk7ftXsP7hv/t2x4pz6fZPLshn7N1Mys+pwUquDm393D9beO5NqLv1/rUMzM1ouTQhWc/s2jOP2bR9U6DDOz9eYLzWZmlnFSMDOzjJOCmZllnBTMzCzTPC80D+tY3fmd+nhF1ermvsm3z7+cl16eyaoIDj/0AK785W289NJLTJjyMgcc/EUAbrrmcjbZZFOOP/3M6sZpZraefKZQJRHBV0/5Pv/V/0BemfAnXn5qNIsWL+H8889n8uTJPPXYI1VbljvLM7O8NM8zhRp47K9/p0P7dpzw9SOBpAfUnw87h577DqRt27asWLmKyROf5sRvfxeAV1+ZzklfO5zX59Zx7Enf4tgTTwPgwfvu5s5bR7Bi+TJ2+3Rfzr/0alq3bs2+O3XjuFPO4P+eeIxzfnwJe+69X83W1cyaL58pVMnUl2fS95O7rFa2xeab0atXLy644AK+eMRXGDXuKfoP/CoAs159mZt+fy9/eGA8t/z8CpYvX87MV6Yz7oHR3D76z4wa9xStW7Vm7Oikt9T3lyxm+5124Q8PPOqEYGa58ZlClUREya6xy5UfcPAXade+Pe3at2erzl14e8E8npnwBNOm/INjDz8YgKVLl7JV5y5AcuZx6ICB+a6EmbV4TgpVsuuO23Hv2PGrlb33n0XMmTNntR/WqdeuXfvsfatWrVixYiURcMTXBvGdoRetWb99h5LzMTOrJjcfVckhB+zNkveX8rs/PggkF4PPGf5zBg8ezDbbbMOSxYsancc++3+ORx8aw1sLkq633124kLl1s3ON28ysUPM8Uxj2bunyHLvOlsToX1/NGT+6jEuu/RWrIhhw8P789Kc/ZfHixVw0/FKO/tIB2YXmUrbbcWe+fe75fOvYr7Jq1SratG3Lj35yJdt265Fb3GZmhZpnUqiR7l0/xgO3X7d6Yfv2tG/fnjsfeqzsdIVdZvcf+NXsYnShp6fXVS1OM7Ny3HxkZmYZJwUzM8s0m6QQEbUOoSYigqBlrruZVV+zSAodOnTgrbfeanGJISJYseQ9Xntnea1DMbNmollcaO7WrRt1dXXMnz+/4YrvzGuagAq9Ow2ANxe+X/VZB8Fr7yznhmcWVn3eZtYyNYuk0LZtW3r37t14xWH75h/MGstMbo89bOhDTb9sM7O11Cyaj8zMrDpyTQqS+kuaLmmGpKElxveQ9BdJz0uaImlAnvGYmVnDcksKkloDNwKHAX2AYyT1Kap2ATAqIj4NDAJ+mVc8ZmbWuDzPFPYGZkTEzIhYBtwFHFlUJ4At0vcdgbk5xmNmZo3IMyl0BeYUDNelZYWGAd+QVAeMBUr+PqWkUyVNkjSp0TuMzMxsneWZFNb8EQHWeMrqGOC2iOgGDADukLRGTBExIiL6RUS/Ll265BCqmZlBvkmhDuheMNyNNZuHTgJGAUTE34AOQOccYzIzswbkmRQmAjtI6i2pHcmF5DFFdWYDhwBI2oUkKbh9yMysRnJLChGxAhgCjAOmkdxlNFXScEn1vyt5DnCKpH8AI4HB0dL6qjAz24Dk+kRzRIwluYBcWHZhwfuXgP3zjMHMzCrnJ5rNzCzjpGBmZhknBTMzyzgpmJlZxknBzMwyTgpmZpZxUjAzs4yTgpmZZZwUzMws46RgZmYZJwUzM8s4KZiZWcZJwczMMk4KZmaWcVIwM7OMk4KZmWWcFMzMLOOkYGZmGScFMzPLOCmYmVnGScHMzDJOCmZmlnFSMDOzjJOCmZllnBTMzCzjpGBmZhknBTMzyzgpmJlZxknBzMwyTgpmZpZxUjAzs4yTgpmZZZwUzMws46RgZmaZXJOCpP6SpkuaIWlomTpHS3pJ0lRJd+YZj5mZNaxNXjOW1Bq4EfgCUAdMlDQmIl4qqLMD8ENg/4hYKOmjecVjZmaNy/NMYW9gRkTMjIhlwF3AkUV1TgFujIiFABExL8d4zMysEXkmha7AnILhurSs0I7AjpImSHpaUv9SM5J0qqRJkibNnz8/p3DNzCzPpKASZVE03AbYATgQOAb4taROa0wUMSIi+kVEvy5dulQ9UDMzS+SZFOqA7gXD3YC5Jer8KSKWR8S/gOkkScLMzGogz6QwEdhBUm9J7YBBwJiiOvcDBwFI6kzSnDQzx5jMzKwBuSWFiFgBDAHGAdOAURExVdJwSQPTauOAtyS9BPwFODci3sorJjMza1hut6QCRMRYYGxR2YUF7wP4XvoyM7Ma8xPNZmaWcVIwM7OMk4KZmWUqSgqSviZp8/T9BZLuk7RnvqGZmVlTq/RM4ccR8R9JnwW+BNwO3JRfWGZmVguVJoWV6d8vAzdFxJ+AdvmEZGZmtVJpUvi3pFuAo4GxktqvxbRmZraRqPTAfjTJg2b9I+IdYCvg3NyiMjOzmqgoKUTEEmAe8Nm0aAXwSl5BmZlZbVR699FFwA9IfhAHoC3w+7yCMjOz2qi0+egrwEBgMUBEzAU2zysoMzOrjUqTwrK0n6IAkLRpfiGZmVmtVJoURqV3H3WSdArwKPCr/MIyM7NaqKiX1Ii4StIXgPeAnYALI+KRXCMzM7Mm12hSkNQaGBcRhwJOBGZmzVijzUcRsRJYIqljE8RjZmY1VOmP7CwFXpD0COkdSAARcVYuUZmZWU1UmhQeSl9mZtaMVXqh+XZJ7YAd06LpEbE8v7DMzKwWKkoKkg4k6S57FiCgu6TjI+LJ/EIzM7OmVmnz0dXAFyNiOoCkHYGRQN+8AjMzs6ZX6cNrbesTAkBEvEzS/5GZmTUjlZ4pTJL0G+COdPhY4Nl8QjIzs1qpNCl8C/g2cBbJNYUngV/mFZSZmdVGpUmhDXBdRFwD2VPO7XOLyszMaqLSawrjgY8UDH+EpFM8MzNrRipNCh0iYlH9QPp+k3xCMjOzWqk0KSyWtGf9gKR+wPv5hGRmZrVS6TWFs4E/SppL8kM72wJfzy0qMzOriQbPFCTtJeljETER2Bm4G1gB/Bn4VxPEZ2ZmTaix5qNbgGXp+/2AHwE3AguBETnGZWZmNdBY81HriHg7ff91YERE3AvcK2lyvqGZmVlTa+xMobWk+sRxCPBYwbhKr0eYmdlGorED+0jgCUkLSO42egpA0vbAuznHZmZmTazBpBARl0oaD3wceDgiIh3VCjgz7+DMzKxpVfIbzU9HxOiIKPwZzpcj4rnGppXUX9J0STMkDW2g3lGSIn3+wczMaqTSh9fWWto/0o3AYUAf4BhJfUrU25yko71n8orFzMwqk1tSAPYGZkTEzIhYBtwFHFmi3iXAz4ClOcZiZmYVyDMpdAXmFAzXpWUZSZ8GukfEgw3NSNKpkiZJmjR//vzqR2pmZkC+SUElyiIbKbUCfg6c09iMImJERPSLiH5dunSpYohmZlYoz6RQB3QvGO4GzC0Y3hzYDXhc0ixgX2CMLzabmdVOnklhIrCDpN6S2gGDgDH1IyPi3YjoHBG9IqIX8DQwMCIm5RiTmZk1ILekEBErgCHAOGAaMCoipkoaLmlgXss1M7N1l2tXFRExFhhbVHZhmboH5hmLmZk1Ls/mIzMz28g4KZiZWcZJwczMMk4KZmaWcVIwM7OMk4KZmWWcFMzMLOOkYGZmGScFMzPLOCmYmVnGScHMzDJOCmZmlnFSMDOzjJOCmZllnBTMzCzjpGBmZhknBTMzyzgpmJlZxknBzMwyTgpmZpZxUjAzs4yTgpmZZZwUzMws46RgZmYZJwUzM8s4KZiZWcZJwczMMk4KZmaWcVIwM7OMk4KZmWWcFMzMLOOkYGZmGScFMzPLOCmYmVkm16Qgqb+k6ZJmSBpaYvz3JL0kaYqk8ZJ65hmPmZk1LLekIKk1cCNwGNAHOEZSn6JqzwP9ImJ34B7gZ3nFY2ZmjcvzTGFvYEZEzIyIZcBdwJGFFSLiLxGxJB18GuiWYzxmZtaIPJNCV2BOwXBdWlbOScD/lhoh6VRJkyRNmj9/fhVDNDOzQnkmBZUoi5IVpW8A/YArS42PiBER0S8i+nXp0qWKIZqZWaE2Oc67DuheMNwNmFtcSdKhwPnA5yPigxzjMTOzRuR5pjAR2EFSb0ntgEHAmMIKkj4N3AIMjIh5OcZiZmYVyC0pRMQKYAgwDpgGjIqIqZKGSxqYVrsS2Az4o6TJksaUmZ2ZmTWBPJuPiIixwNiisgsL3h+a5/LNzGzt5JoUNjS9lt7Z5Muc1eRLNDNbd+7mwszMMk4KZmaWcVIwM7OMk4KZmWWcFMzMLOOkYGZmGScFMzPLOCmYmVnGScHMzDJOCmZmlnFSMDOzjJOCmZllnBTMzCzjpGBmZhknBTMzyzgpmJlZxknBzMwyTgpmZpZxUjAzs4yTgpmZZZwUzMws46RgZmYZJwUzM8s4KZiZWcZJwczMMk4KZmaWcVIwM7OMk4KZmWWcFMzMLOOkYGZmGScFMzPLOCmYmVnGScHMzDJOCmZmlsk1KUjqL2m6pBmShpYY317S3en4ZyT1yjMeMzNrWG5JQVJr4EbgMKAPcIykPkXVTgIWRsT2wM+BK/KKx8zMGpfnmcLewIyImBkRy4C7gCOL6hwJ3J6+vwc4RJJyjMnMzBqgiMhnxtJRQP+IODkdPg7YJyKGFNR5Ma1Tlw6/mtZZUDSvU4FT08GdgOm5BN2wzsCCRmtZtXh7Nx1v66ZVq+3dMyK6NFapTY4BlPrGX5yBKqlDRIwARlQjqHUlaVJE9KtlDC2Jt3fT8bZuWhv69s6z+agO6F4w3A2YW66OpDZAR+DtHGMyM7MG5JkUJgI7SOotqR0wCBhTVGcMcHz6/ijgscirPcvMzBqVW/NRRKyQNAQYB7QGbo2IqZKGA5MiYgzwG+AOSTNIzhAG5RVPFdS0+aoF8vZuOt7WTWuD3t65XWg2M7ONj59oNjOzjJOCmZllWlRSkHSrpHnp8xH1ZVtJekTSK+nfLYum2UvSyvS5i8LyLST9W9IvSixnTOEyWjJJrSU9L+nBdHhI2q1JSOpcUO9cSZPT14vpNt9K0k4F5ZMlvSfp7ILpzky7Upkq6We1WMcNQbntJOkSSVPSsoclbZvW31nS3yR9IOn7RfMq2T2NpEMkPZfO66+Stm/q9aylWh0/GltGtbWopADcBvQvKhsKjI+IHYDx6TCQddVxBcnF8mKXAE8UF0r6KrCoSvE2B98BphUMTwAOBV4rrBQRV0bEHhGxB/BD4ImIeDsipheU9wWWAKMBJB1E8lT87hGxK3BV/quzYWpgO10ZEbun5Q8CF6aTvA2cRdE2a6R7mpuAY9N53QlckPNqbWhuozbHj7LLyEOLSgoR8SRrPgdR2NXG7cB/FYw7E7gXmFc4gaS+wDbAw0XlmwHfA35Svag3XpK6AV8Gfl1fFhHPR8SsRiY9BhhZovwQ4NWIqE8o3wIuj4gP0nnPKzFNS5Rtp4h4r6B8U9KHQyNiXkRMBJYXTdtQ9zQBbJG+78iazx01azU8fjS0jKprUUmhjG0i4nWA9O9HASR1Bb4C3FxYWVIr4Grg3BLzuiQdtyTPgDci1wLnAasqnUDSJiTfxu4tMXoQqyeLHYED0h52n5C01/oE24ystp0kXSppDnAsH54plNMVmFMwXJeWAZwMjJVUBxwHXF61iDdeTXH8KLmMvDgplHct8IOIWFlUfgYwNiIK/3GQtAewfUSMbqoAN2SSDgfmRcSzaznpEcCEiFjtG1n6AORA4I8FxW2ALYF9Sf7JRkktu0PFUtspIs6PiO7AH4Ah5aatn0WJsvr71r8LDIiIbsBvgWvWP+Jma6M9fuTZ99HG4k1JH4+I1yV9nA9P9foBd6XHmM7AAEkrgP1Ivp2eAWwGtJO0iKSNvK+kWSTb9aOSHo+IA5t2dTYY+wMDJQ0AOgBbSPp9RHyjkemKzwbqHQY8FxFvFpTVAfelT8H/XdIqks9q/vqHv9EqtZ3q3Qk8BFzUwPQlu6eR1AX4VEQ8k5bfDfy5CvFu7Jri+FFuGfmIiBb1AnoBLxYMXwkMTd8PBX5WYprbgKNKlA8GftHYMlr6CzgQeLCobBbQuaisvu+rTUvM4y7ghKKy04Hh6fsdSZo9VOv1rfG2Xm07ATsUvD8TuKeo/jDg+wXDbYCZQG+gHfAPYNe0fAGwY1rvJODeWq+i7vn/AAACi0lEQVRvDbZvkx8/KllGNV8t6kxB0kiSA1TntF30IpJ20VGSTgJmA1+rXYTNn6SzSK4zfAyYImlspN2rk7TBPhwRi4um2QT4AnBa0exuBW5Nb99bBhwf6X9OS1RmO10uaSeS6zqvkSRSJH0MmERy4XhVeptvn4h4TyW6p0mnOQW4Nz0jWwic2DRrtmGo4fGjSY9R7ubCzMwyvtBsZmYZJwUzM8s4KZiZWcZJwczMMk4KZmaWcVIwM7OMk4K1GJIel/SlorKzJf2ygWmq2uOtpNuKu1E225A4KVhLMpI1fwe8XLcaZi2Sk4K1JPcAh0tqDyCpF7AtMFnS+PQHZF6QdGTxhJIOVPpDQenwLyQNTt/3TXtpfVbSuLR/mkaVmy49o7lC0t8lvSzpgPVdcbNKOSlYixERbwF/58MfShlE0rHb+8BXImJP4CDg6kp7W5XUFriBpG+bviRdb1xahenaRMTewNk03IGdWVW1qL6PzPiwCelP6d8TSbqL/qmkz5H0EdSV5EdQ3qhgfjsBuwGPpHmkNfB6Faa7L/37LEkHaWZNwknBWpr7gWsk7Ql8JCKeS5uBugB9I2J52n1xh6LpVrD6mXX9eAFTI2K/tYyjsek+SP+uxP+n1oTcfGQtSkQsAh4naa6pv8DckeQHgZanv/vcs8SkrwF9JLWX1JHkJy8BpgNdJO0HSbOQpF0rCGVdpzPLlb+BWEs0kqR5pv5OpD8AD0iaBEwG/lk8QUTMkTQKmAK8Ajyfli9LbzG9Pk0WbUh+dWtqQwGs63RmeXPX2WZmlnHzkZmZZdx8ZJYDSTeS/E51oesi4re1iMesUm4+MjOzjJuPzMws46RgZmYZJwUzM8s4KZiZWeb/AVRzNc6YsQUIAAAAAElFTkSuQmCC\n", + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYUAAAEXCAYAAABCjVgAAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDMuMC4zLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvnQurowAAIABJREFUeJzt3Xu8FWW9x/HPl3veQIUsuecdzUzwllneKiTFU8cMj5l4N0OzTKM0RczUvOQlU6lMs0RJxVA5oWJe4qSBSigSioSwQwUUNUDk9jt/zOxxsVhr7wWs2Qv2/r5fr/Xaa555ZuY3s2bPb80zM89SRGBmZgbQqtYBmJnZhsNJwczMMk4KZmaWcVIwM7OMk4KZmWWcFMzMLOOkYBlJvSSFpDZVmNeBkuoaGH+zpB+v73IsH5IWSfpEreOoFkmDJf211nFsDJwUmlB6wN2+qGyYpN+v4/wel3TyesQzS9Kh6zr9+oiI0yPiklosu1i6HZdK6l5QdqikWUX1Bkt6QdISSW9IuklSp4Lxw9LP+GsFZW3Ssl5llr1aIpbUTdK9khZIejdd3uCC+u0lXSZptqT3Jb0i6VxJWtv1aUhEbBYRMyutv74kXS1pXFHZtZIeXId5Ve3LTUvkpGAtiqTWZUYtBsqeuUg6B7gCOBfoCOwL9AQekdSuoOrbwPAGltOYO4A56by3Br4JvFkw/o/AIcAAYHPgOOBU4Lq1WZ8N0I+B7SSdACBpP+B44PSaRlVCs082EeFXE72AALYvKhsG/D59fyBQB/wIWADMAo4tM69LgZXAUmAR8Iu0/DPARODd9O9nykx/B7AKeD+d/jygVxrj8cDsNIbzC6ZpBQwFXgXeAkYBW5WZf4PrAtwG/KRg+DzgdWAucHLhtiI5OD4AvJeu00+AvxZMuzPwCMkBeTpwdNFybgLGkhwoDy0R6+PARcB/CpZ5KDArfb9Fuo2OLppuM2AecGLBZ/kH4B/A8WlZm3RdepXZTvXbvE06vAjYo0zdQ9LPu3tR+T7pvrB9JeuztvsqSQJ6KZ3fv4HvF9Q7BZiRbvsxwLZF8zgdeAVYCNwIqIFlHpjuV72AacBpDdRtBVwAvJZ+Br8DOqbjZqfLXpS+9gMGA38Frkpj+RdwWMH8OgK/IdkH/53uY63TcYOBCcDP0/X8SaXbcWN81TyAlvSisqSwArgGaA98nuRAtlOZ+T0OnFwwvFW6wx+XHoyOSYe3LjP9LAoOkgUHqF8BHwE+BXwA7JKOPxt4GuiWxncLMLLMvBtcFwqSAtAfeAPYFdiEJGEVHpTuSl+bAH1Ivkn/NR23aTp8QrrOe5IkoV0LlvMusH96IOlQbjumsdZ/FoVJoX+6Lm1KTHt7/Tao/yyBgcBMoC1rnxQeTQ9Ag4AeRXUvB54oM5/XSA+ija1POvxL4JeV7KskB8oD0vdbAnum7w9Ot/We6Wd8A/Bk0TweBDoBPYD5QP9G/kduSef5OA0nkBNJktEnSJLzfcAdpbZpWjYYWE6SxFoD3yL5AqJ0/P3psjcFPgr8vWB7Dk4//zPTz/MjtT6W5Ply89GG6ccR8UFEPAE8BBxd4XRfBl6JiDsiYkVEjAT+CRyxlsu/OCLej4h/kHzr/VRafhrJmUNdRHxAchA8qpHT6UrW5WjgtxExNSKWABfXj0ibYf4buCgilkTESyQH4nqHkxzsfpuu83PAvcBRBXX+FBETImJVRCxtINbLgCMk7VpU3hlYEBErSkzzejo+ExFjSA6A63K952vAUyTNKf+SNFnSXgVxvF5mujXioPz6EBFnRMQZFca0HOgjaYuIWJhuY4BjgVsj4rl0f/ghsF/R9ZPLI+KdiJgN/AXYo5FlPUVyZviHSI/IZRwLXBMRMyNiUbrsQY3si69FxK8iYiXJPvRxYBtJ2wCHAWdHxOKImEdyVjCoYNq5EXFDuo+938g6bNScFJrWSpJvj4XakvzT1VsYEYsLhl8Dtq1w/tum9Qu9BnRdmyBJvrXXW0LyTQySdu7Rkt6R9A7JKf5KYJsy86l0XbYl+bZfr/B9F5JvZ+XG9wT2qY8pjetY4GNl6pcVEfOBXwDDi0YtADqXOeB8PB1f7ALgfKBDfYGkHuldPYskLSoTw8KIGBoRu5Js18nA/emF5AXp8kpZI44G1mdt/TdJE9Jrkp5I2/uhaH9LD85vsfr+Vm5fWoOkrUmad64luS7TqVzd4mWn79tQfl9cLZb0ywdpPD1J/g9fL9iHbiE5Y6hX0T7UHDgpNK3ZJKe2hXqz+s69paRNC4Z7kJzmllL8TWouyQ5eqAdJG2kl0zdmDkk7bKeCV4eIKDf/StfldZImqXrdC97PJzl1Lzd+DkmTSmFMm0XEtwrqrM16XgkcBPQtKPsbSTPaVwsrput2GDC+eCYR8QhJ88YZBWWz09g2i4iyB8eC+gtIDpLbkjQNPkqSAAvXH0l7k2yTxypcn7USERMj4kiSg+T9JNeSoGh/S7fH1pTf3xpzLfDniPgu8CTJupdTvK/3INlP3mTd9usPgM4F+9AWaWKu12K6k3ZSaFp3Axektx22Sm8HPQK4p6jexZLaSTqApHnkj2Xm9yZJm2q9scCOkv4nvRXy6yRt8OVu6yuevjE3A5dK6gkgqYukIxuZppJ1GQWcIGkXSZsAF9aPSE/17wOGSdpE0s4kd+TUe5BknY+T1DZ97SVpl7VYr0xEvANcTXLhu77sXZImrRsk9U+X0StdlzqSayClnF84n0pIukLSbunntzlJ2/eMiHgrIh4lSUD3StpVUmtJ+5Jc3L4pIl6pZH3WMp52ko6V1DEilpNc7F+Zjr6T5HPbQ1J74KfAMxExax2WMwD4AvC9tOhM4L8kHVRmkpHAdyX1lrRZuuy70ya++SQ3UVS0b0fE68DDwNWStkj/N7eT9Pm1XY/mwEmhaQ0H/o/kLoiFwM9I7sh5saDOG+m4uST/7KdHxD/LzO86kjb9hZKuj4i3SA6855Ccxp8HHJ5+4yzlMpIk9Y6k71cQ/3Ukd5g8LOk/JBed92mgfkXrEhH/C1xP0uY8g+SbOSTf3gCGkNwd8gbJAXhk/biI+A/wRZL237lpnStILnyuq+v48MBXH+PPSO6kuorkwPgMyTfMQ9L29DVExASSC5aVqP8mugkwGniH5GJ1T5IL1/X+m2Q7/Znkzprfk9w1c+barI+ShwdvrjC244BZkt4juZvoGwARMZ7k2se9JGd727F6O3xF0uR3M3BWRLydznseyX78K0kfKTHZrST7wpMkdxItJd0GadPQpcCEdN/et4Iwvgm0I7nLaiHJF7VyTXXNWv2Vd9sASDqQ5G6Rbo3Vbc7Sb/kvAu1LXdyVdAXwsYg4vsmDqzJJu5PcsdNQ+3mTktSKJIn0TC8QWwviMwXbIEj6StpUsSXJN/0H6hOCpJ0l7a7E3sBJJN+mN2rpwfdoYFKtYymyG8k37zcaq2jNT25JQdKtkuZJerHMeEm6XtIMSVMk7ZlXLLZROI2kLfhVkm+phReKNye5rrCY5PrD1cCfmjrAHMwmeQbiu7UOpJ6k+uapH0TEslrHY00vt+YjSZ8jafP8XUTsVmL8AJI2wAEk7dLXRURD7dNmZpaz3M4UIuJJkkfCyzmSJGFERDwNdJLUIi/smJltKGrZsVNXVn8gpC4tW+OJTUmnknT6xaabbtp35513Xrclzn1+3aZbH9t+urbLrsVya7nsdHu/8O93m3zRn+zasWbLBWq2vVviOtd82evg2WefXRARXRqrV8ukoBJlJduyImIEMAKgX79+MWnSOl6XG9Zx3aZbH8OSWHsNfajJFz3r8i/XZLm1XPasy78M1GZ7T6rROk+q4TrX6nOuX+eW+D+9riQV93ZQUi3vPqpj9SdTu1H+yV0zM2sCtUwKY4Bvpnch7Qu8mz5ZaGZmNZJb85GkkSTdJ3dW8rOMF5F2BhcRN5N0yTCA5AnWJSRdH5uZWQ3llhQi4phGxgfw7Wosa/ny5dTV1bF0aUO9IgNfGtXw+DxMmwbArwZW/8aqIHjtneXc8MxC3vtgVdXnb2YtT7P4Wbm6ujo233xzevXqhVTq+nVqbiNJIw/bJv2yLa97p+qzjgi23vo9zgQuffKtqs/fzFqeZtHNxdKlS9l6660bTgjNkCTabLIFPTsV/0SDmdm6aRZJAWhxCaGeJFTy7l4zs7XXLJqPzKxl6rX0ziZf5qwmX2LTapZJodoPlcw6q7Jfw9xsh/15b/pTnH3RVTw2YSKS6LBZJ0aNGgVtt+Q/773L5Rf+gMkTnwFgj732YejwK9h8i478e85sBnzmU/xg+BX8zwmnAvDTC85l190/zZFH/09V18fMrJxm03y0obh7zMPMfWM+Ux69mxfGj2L06NF06pR0lT/s3LPo1qMXD014nocmPE/X7j25+LzvZNNu1bkLd/7mZpYvc+eUZlYbTgpV9vqbC/j4Np1p1SrZtN26dWPLLbdk9r9m8tILkzn1O+dmdU87+zymTnmeObP+BcCWW23N3p/9HGPuGVmT2M3MnBSq7OgjvsADjzzJHl8YxDkXX8PzzyedZs185Z/s1OeTtG7dOqvbunVrdurzSV59eVpWduIZ3+V3I25k5cqVa8zbzCxvTgpV1m3bbZj+5Ggu++GZtGolDjnkEMaPH09EmTukkhEfTt+jJ7vtsSdj7y/1+/ZmZvlqlheaa619+3YcdvD+HHbw/myz3e7cf//9fOnoE/jn1CmsWrUqa1patWoV06e9yCe232m16U8e8j3OOe14+u7zmVqEb2YtmM8Uquy5F6Yx9435QHLQnzJlCj179qRH70+w8667M+L6q7K6I66/il12+xQ9en9itXn03n5HtttxZ54cP65JYzcza5ZnCmX7HM/xRzFWrFhB+3btmLfgbU459xI+WLYcgL0/8zmGDBnCywuWcvGVN3DZhedx+Gf3JCLYve9eDLvyhpLzO/nM7/H1/p/PLV4zs1KaZVKohanTZ7Jdr270P2h/+h+0/4cjsl9KWsoWnTpx2fUjSk7ftXsP7hv/t2x4pz6fZPLshn7N1Mys+pwUquDm393D9beO5NqLv1/rUMzM1ouTQhWc/s2jOP2bR9U6DDOz9eYLzWZmlnFSMDOzjJOCmZllnBTMzCzTPC80D+tY3fmd+nhF1ermvsm3z7+cl16eyaoIDj/0AK785W289NJLTJjyMgcc/EUAbrrmcjbZZFOOP/3M6sZpZraefKZQJRHBV0/5Pv/V/0BemfAnXn5qNIsWL+H8889n8uTJPPXYI1VbljvLM7O8NM8zhRp47K9/p0P7dpzw9SOBpAfUnw87h577DqRt27asWLmKyROf5sRvfxeAV1+ZzklfO5zX59Zx7Enf4tgTTwPgwfvu5s5bR7Bi+TJ2+3Rfzr/0alq3bs2+O3XjuFPO4P+eeIxzfnwJe+69X83W1cyaL58pVMnUl2fS95O7rFa2xeab0atXLy644AK+eMRXGDXuKfoP/CoAs159mZt+fy9/eGA8t/z8CpYvX87MV6Yz7oHR3D76z4wa9xStW7Vm7Oikt9T3lyxm+5124Q8PPOqEYGa58ZlClUREya6xy5UfcPAXade+Pe3at2erzl14e8E8npnwBNOm/INjDz8YgKVLl7JV5y5AcuZx6ICB+a6EmbV4TgpVsuuO23Hv2PGrlb33n0XMmTNntR/WqdeuXfvsfatWrVixYiURcMTXBvGdoRetWb99h5LzMTOrJjcfVckhB+zNkveX8rs/PggkF4PPGf5zBg8ezDbbbMOSxYsancc++3+ORx8aw1sLkq633124kLl1s3ON28ysUPM8Uxj2bunyHLvOlsToX1/NGT+6jEuu/RWrIhhw8P789Kc/ZfHixVw0/FKO/tIB2YXmUrbbcWe+fe75fOvYr7Jq1SratG3Lj35yJdt265Fb3GZmhZpnUqiR7l0/xgO3X7d6Yfv2tG/fnjsfeqzsdIVdZvcf+NXsYnShp6fXVS1OM7Ny3HxkZmYZJwUzM8s0m6QQEbUOoSYigqBlrruZVV+zSAodOnTgrbfeanGJISJYseQ9Xntnea1DMbNmollcaO7WrRt1dXXMnz+/4YrvzGuagAq9Ow2ANxe+X/VZB8Fr7yznhmcWVn3eZtYyNYuk0LZtW3r37t14xWH75h/MGstMbo89bOhDTb9sM7O11Cyaj8zMrDpyTQqS+kuaLmmGpKElxveQ9BdJz0uaImlAnvGYmVnDcksKkloDNwKHAX2AYyT1Kap2ATAqIj4NDAJ+mVc8ZmbWuDzPFPYGZkTEzIhYBtwFHFlUJ4At0vcdgbk5xmNmZo3IMyl0BeYUDNelZYWGAd+QVAeMBUr+PqWkUyVNkjSp0TuMzMxsneWZFNb8EQHWeMrqGOC2iOgGDADukLRGTBExIiL6RUS/Ll265BCqmZlBvkmhDuheMNyNNZuHTgJGAUTE34AOQOccYzIzswbkmRQmAjtI6i2pHcmF5DFFdWYDhwBI2oUkKbh9yMysRnJLChGxAhgCjAOmkdxlNFXScEn1vyt5DnCKpH8AI4HB0dL6qjAz24Dk+kRzRIwluYBcWHZhwfuXgP3zjMHMzCrnJ5rNzCzjpGBmZhknBTMzyzgpmJlZxknBzMwyTgpmZpZxUjAzs4yTgpmZZZwUzMws46RgZmYZJwUzM8s4KZiZWcZJwczMMk4KZmaWcVIwM7OMk4KZmWWcFMzMLOOkYGZmGScFMzPLOCmYmVnGScHMzDJOCmZmlnFSMDOzjJOCmZllnBTMzCzjpGBmZhknBTMzyzgpmJlZxknBzMwyTgpmZpZxUjAzs4yTgpmZZZwUzMws46RgZmaZXJOCpP6SpkuaIWlomTpHS3pJ0lRJd+YZj5mZNaxNXjOW1Bq4EfgCUAdMlDQmIl4qqLMD8ENg/4hYKOmjecVjZmaNy/NMYW9gRkTMjIhlwF3AkUV1TgFujIiFABExL8d4zMysEXkmha7AnILhurSs0I7AjpImSHpaUv9SM5J0qqRJkibNnz8/p3DNzCzPpKASZVE03AbYATgQOAb4taROa0wUMSIi+kVEvy5dulQ9UDMzS+SZFOqA7gXD3YC5Jer8KSKWR8S/gOkkScLMzGogz6QwEdhBUm9J7YBBwJiiOvcDBwFI6kzSnDQzx5jMzKwBuSWFiFgBDAHGAdOAURExVdJwSQPTauOAtyS9BPwFODci3sorJjMza1hut6QCRMRYYGxR2YUF7wP4XvoyM7Ma8xPNZmaWcVIwM7OMk4KZmWUqSgqSviZp8/T9BZLuk7RnvqGZmVlTq/RM4ccR8R9JnwW+BNwO3JRfWGZmVguVJoWV6d8vAzdFxJ+AdvmEZGZmtVJpUvi3pFuAo4GxktqvxbRmZraRqPTAfjTJg2b9I+IdYCvg3NyiMjOzmqgoKUTEEmAe8Nm0aAXwSl5BmZlZbVR699FFwA9IfhAHoC3w+7yCMjOz2qi0+egrwEBgMUBEzAU2zysoMzOrjUqTwrK0n6IAkLRpfiGZmVmtVJoURqV3H3WSdArwKPCr/MIyM7NaqKiX1Ii4StIXgPeAnYALI+KRXCMzM7Mm12hSkNQaGBcRhwJOBGZmzVijzUcRsRJYIqljE8RjZmY1VOmP7CwFXpD0COkdSAARcVYuUZmZWU1UmhQeSl9mZtaMVXqh+XZJ7YAd06LpEbE8v7DMzKwWKkoKkg4k6S57FiCgu6TjI+LJ/EIzM7OmVmnz0dXAFyNiOoCkHYGRQN+8AjMzs6ZX6cNrbesTAkBEvEzS/5GZmTUjlZ4pTJL0G+COdPhY4Nl8QjIzs1qpNCl8C/g2cBbJNYUngV/mFZSZmdVGpUmhDXBdRFwD2VPO7XOLyszMaqLSawrjgY8UDH+EpFM8MzNrRipNCh0iYlH9QPp+k3xCMjOzWqk0KSyWtGf9gKR+wPv5hGRmZrVS6TWFs4E/SppL8kM72wJfzy0qMzOriQbPFCTtJeljETER2Bm4G1gB/Bn4VxPEZ2ZmTaix5qNbgGXp+/2AHwE3AguBETnGZWZmNdBY81HriHg7ff91YERE3AvcK2lyvqGZmVlTa+xMobWk+sRxCPBYwbhKr0eYmdlGorED+0jgCUkLSO42egpA0vbAuznHZmZmTazBpBARl0oaD3wceDgiIh3VCjgz7+DMzKxpVfIbzU9HxOiIKPwZzpcj4rnGppXUX9J0STMkDW2g3lGSIn3+wczMaqTSh9fWWto/0o3AYUAf4BhJfUrU25yko71n8orFzMwqk1tSAPYGZkTEzIhYBtwFHFmi3iXAz4ClOcZiZmYVyDMpdAXmFAzXpWUZSZ8GukfEgw3NSNKpkiZJmjR//vzqR2pmZkC+SUElyiIbKbUCfg6c09iMImJERPSLiH5dunSpYohmZlYoz6RQB3QvGO4GzC0Y3hzYDXhc0ixgX2CMLzabmdVOnklhIrCDpN6S2gGDgDH1IyPi3YjoHBG9IqIX8DQwMCIm5RiTmZk1ILekEBErgCHAOGAaMCoipkoaLmlgXss1M7N1l2tXFRExFhhbVHZhmboH5hmLmZk1Ls/mIzMz28g4KZiZWcZJwczMMk4KZmaWcVIwM7OMk4KZmWWcFMzMLOOkYGZmGScFMzPLOCmYmVnGScHMzDJOCmZmlnFSMDOzjJOCmZllnBTMzCzjpGBmZhknBTMzyzgpmJlZxknBzMwyTgpmZpZxUjAzs4yTgpmZZZwUzMws46RgZmYZJwUzM8s4KZiZWcZJwczMMk4KZmaWcVIwM7OMk4KZmWWcFMzMLOOkYGZmGScFMzPLOCmYmVkm16Qgqb+k6ZJmSBpaYvz3JL0kaYqk8ZJ65hmPmZk1LLekIKk1cCNwGNAHOEZSn6JqzwP9ImJ34B7gZ3nFY2ZmjcvzTGFvYEZEzIyIZcBdwJGFFSLiLxGxJB18GuiWYzxmZtaIPJNCV2BOwXBdWlbOScD/lhoh6VRJkyRNmj9/fhVDNDOzQnkmBZUoi5IVpW8A/YArS42PiBER0S8i+nXp0qWKIZqZWaE2Oc67DuheMNwNmFtcSdKhwPnA5yPigxzjMTOzRuR5pjAR2EFSb0ntgEHAmMIKkj4N3AIMjIh5OcZiZmYVyC0pRMQKYAgwDpgGjIqIqZKGSxqYVrsS2Az4o6TJksaUmZ2ZmTWBPJuPiIixwNiisgsL3h+a5/LNzGzt+IlmMzPLOCmYmVnGScHMzDJOCmZmlnFSMDOzjJOCmZllnBTMzCzjpGBmZhknBTMzyzgpmJlZxknBzMwyTgpmZpZxUjAzs4yTgpmZZZwUzMws46RgZmYZJwUzM8s4KZiZWcZJwczMMk4KZmaWaVPrAJpSr6V3NvkyZzX5Es3M1p3PFMzMLOOkYGZmGScFMzPLOCmYmVnGScHMzDJOCmZmlnFSMDOzjJOCmZllnBTMzCzjpGBmZhknBTMzyzgpmJlZxknBzMwyTgpmZpbJNSlI6i9puqQZkoaWGN9e0t3p+Gck9cozHjMza1huSUFSa+BG4DCgD3CMpD5F1U4CFkbE9sDPgSvyisfMzBqX55nC3sCMiJgZEcuAu4Aji+ocCdyevr8HOESScozJzMwaoIjIZ8bSUUD/iDg5HT4O2CcihhTUeTGtU5cOv5rWWVA0r1OBU9PBnYDpuQTdsM7AgkZrWbV4ezcdb+umVavt3TMiujRWKc+f4yz1jb84A1VSh4gYAYyoRlDrStKkiOhXyxhaEm/vpuNt3bQ29O2dZ/NRHdC9YLgbMLdcHUltgI7A2znGZGZmDcgzKUwEdpDUW1I7YBAwpqjOGOD49P1RwGORV3uWmZk1Krfmo4hYIWkIMA5oDdwaEVMlDQcmRcQY4DfAHZJmkJwhDMorniqoafNVC+Tt3XS8rZvWBr29c7vQbGZmGx8/0WxmZhknBTMzy7SopCDpVknz0ucj6su2kvSIpFfSv1sWTbOXpJXpcxeF5VtI+rekX5RYzpjCZbRkklpLel7Sg+nwkLRbk5DUuaDeuZImp68X022+laSdCsonS3pP0tkF052ZdqUyVdLParGOG4Jy20nSJZKmpGUPS9o2rb+zpL9J+kDS94vmVbJ7GkmHSHounddfJW3f1OtZS7U6fjS2jGprUUkBuA3oX1Q2FBgfETsA49NhIOuq4wqSi+XFLgGeKC6U9FVgUZXibQ6+A0wrGJ4AHAq8VlgpIq6MiD0iYg/gh8ATEfF2REwvKO8LLAFGA0g6iOSp+N0jYlfgqvxXZ8PUwHa6MiJ2T8sfBC5MJ3kbOIuibdZI9zQ3Acem87oTuCDn1drQ3EZtjh9ll5GHFpUUIuJJ1nwOorCrjduB/yoYdyZwLzCvcAJJfYFtgIeLyjcDvgf8pHpRb7wkdQO+DPy6viwino+IWY1MegwwskT5IcCrEVGfUL4FXB4RH6TznldimpYo204R8V5B+aakD4dGxLyImAgsL5q2oe5pAtgifd+RNZ87atZqePxoaBlV16KSQhnbRMTrAOnfjwJI6gp8Bbi5sLKkVsDVwLkl5nVJOm5JngFvRK4FzgNWVTqBpE1Ivo3dW2L0IFZPFjsCB6Q97D4haa/1CbYZWW07SbpU0hzgWD48UyinKzCnYLguLQM4GRgrqQ44Dri8ahFvvJri+FFyGXlxUijvWuAHEbGyqPwMYGxEFP7jIGkPYPuIGN1UAW7IJB0OzIuIZ9dy0iOACRGx2jey9AHIgcAfC4rbAFsC+5L8k42SWnaHiqW2U0ScHxHdgT8AQ8pNWz+LEmX1961/FxgQEd2A3wLXrH/EzdZGe/zIs++jjcWbkj4eEa9L+jgfnur1A+5KjzGdgQGSVgD7kXw7PQPYDGgnaRFJG3lfSbNItutHJT0eEQc27epsMPYHBkoaAHQAtpD0+4j4RiPTFZ8N1DsMeC4i3iwoqwPuS5+C/7ukVSSf1fz1D3+jVWo71bsTeAi4qIHpS3ZPI6kL8KmIeCYtvxv4cxXi3dg1xfGj3DLyEREt6gX0Al4sGL4SGJq+Hwr8rMQ0twFHlSgfDPyisWW09BdwIPBgUdksoHNRWX3fV5uWmMddwAlFZacDw9P3O5I0e6jW61vjbb3adgJ2KHh/JnBPUf1hwPcLhtsAM4HeQDvgH8CuafkCYMe03knAvbUBmbNkAAACjElEQVRe3xps3yY/flSyjGq+WtSZgqSRJAeozmm76EUk7aKjJJ0EzAa+VrsImz9JZ5FcZ/gYMEXS2Ei7Vydpg304IhYXTbMJ8AXgtKLZ3Qrcmt6+tww4PtL/nJaozHa6XNJOJNd1XiNJpEj6GDCJ5MLxqvQ23z4R8Z5KdE+TTnMKcG96RrYQOLFp1mzDUMPjR5Meo9zNhZmZZXyh2czMMk4KZmaWcVIwM7OMk4KZmWWcFMzMLOOkYGZmGScFazEkPS7pS0VlZ0v6ZQPTVLXHW0m3FXejbLYhcVKwlmQka/4OeLluNcxaJCcFa0nuAQ6X1B5AUi9gW2CypPHpD8i8IOnI4gklHaj0h4LS4V9IGpy+75v20vqspHFp/zSNKjddekZzhaS/S3pZ0gHru+JmlXJSsBYjIt4C/s6HP5QyiKRjt/eBr0TEnsBBwNWV9rYqqS1wA0nfNn1Jut64tArTtYmIvYGzabgDO7OqalF9H5nxYRPSn9K/J5J0F/1TSZ8j6SOoK8mPoLxRwfx2AnYDHknzSGvg9SpMd1/691mSDtLMmoSTgrU09wPXSNoT+EhEPJc2A3UB+kbE8rT74g5F061g9TPr+vECpkbEfmsZR2PTfZD+XYn/T60JufnIWpSIWAQ8TtJcU3+BuSPJDwItT3/3uWeJSV8D+khqL6kjyU9eAkwHukjaD5JmIUm7VhDKuk5nlit/A7GWaCRJ80z9nUh/AB6QNAmYDPyzeIKImCNpFDAFeAV4Pi1flt5ien2aLNqQ/OrW1IYCWNfpzPLmrrPNzCzj5iMzM8u4+cgsB5JuJPmd6kLXRcRvaxGPWaXcfGRmZhk3H5mZWcZJwczMMk4KZmaWcVIwM7PM/wPzMzXO68HucAAAAABJRU5ErkJggg==\n", "text/plain": [ "
" ] @@ -546,7 +546,7 @@ }, { "data": { - "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYUAAAEXCAYAAABCjVgAAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDMuMC4zLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvnQurowAAHzVJREFUeJzt3XmUFOW9xvHvw7C5ISpohGEzCkqMK2pUTFwjbphVMcQrccviGlfu1ajBY+ISYxLjEqLGJYoSYwxGIu5LNBJwwyCiiCgjKoi4ICLL/O4fVVM2Q89MA13TA/N8zplDV9VbVb/uGfrpeqvqbUUEZmZmAG0qXYCZmbUcDgUzM8s4FMzMLONQMDOzjEPBzMwyDgUzM8s4FGyVSOopab6kqkrXUm6S9pRUswrr/1PSUeWsqZIk9ZYUktpWuhbLj0PBSiJphqRP0wCo++kWEW9GxLoRsXQltjlM0r+aaHOYpKckLZD0aANtvifptvTxSElTJdVKGlak7U8lvSPpQ0k3SOpQsCwkbb6iz6MhEXFARNxUru01RdIh6XPbsGDeoZLekrT+SmxvhqR9y1ultXQOBVsRh6QBUPczq7HGSqzq39j7wG+AixtpcyAwNn38AvAT4Nki9ewPDAf2AXoDmwE/X8X6WoyIuAd4GLgCQFJn4BrgxxHxYSVrq89HGy2XQ8FWSf0uBUmPSrpI0pPAAmCz9IhguqSPJb0uaaikrYBrgV3To44Pim0/Ih6MiNFA0QBKQ2c/4L60/VUR8RCwsEjzo4DrI2JyRMwDLgSGpdt5PG3zQlrP4QX7OF3SbElvS/rBCrw2j0o6Nn28uaTH0iOU9yTdUdBuN0kT0mUTJO1WbxsXSnoyff3ul9Slkd2eDByQBuAVwGMRMaaRGgdLmizpg3RfW6XzbwF6Avekr8dZBasNlfRm+jzOKdhWG0nDJb0maa6k0XVHLQV/J8dIepMkvKwligj/+KfJH2AGsG+R+b2BANqm048CbwJfAtoC6wMfAf3S5ZsCX0ofDwP+VeL+jwUeLTL/K8C/i8z/FzCs3rwXgMMLpruktW+UTgewecHyPYElwAigHckRyQJgg3T594BJjdT8KHBs+ngUcA7JB7GOwMB0/obAPODI9PU6Ip3eqGAbrwF9gbXS6YubeK2OAN4D5gBdG2nXF/iEJFTbAWcB04D2xX7nBb/rP6a1bAt8BmyVLj8VeBqoBjoAfwBG1Vv3ZmAdYK1K/037p/iPjxRsRdydfqL8QNLdjbS7MZJP40tI3lRrga0lrRURb0fE5DLWdBCfdx01ZV2gsBul7vF6jayzGBgREYsjYiwwH+gHEBG3RcQ2Je57MdAL6BYRCyOi7lzKQcCrEXFLRCyJiFHAy8AhBev+KSJeiYhPgdHAdk3s62mSML4/IuY00u5w4N6IeCAiFgO/Inmz362RdQB+HhGfRsQLJEG7bTr/h8A5EVETEZ8BFwDfqddVdEFEfJI+F2uBHAq2Ir4REZ3Tn2800m5m3YOI+ITkzedHwNuS7pW0ZRlrKjyf0JT5QKeC6brHHzeyztw03OosIAmXFXUWIOA/aXfN0en8bsAb9dq+AXQvmH5nBfc/kuQT+YGFXVFFLLPviKgl+d11b3CNxuvpBfyt7oMDMAVYCmxS0H4m1qI5FCwPywy9GxHjImI/kq6jl0m6H5Zrt6IkfSHd5nInlRswmc8/1ZI+fjci5q5KHaWIiHci4riI6Ebyifrq9EqnWSRvpoV6Am+tzH4kHQP0IDnZ/n/AHyW1b6D5MvuWpHTdun2v6O9nJnBAwQeHzhHRMSIKn4uHZW7hHAqWK0mbpCcz1yHpf55P8ukR4F2gupE3LSRVSepI0t/eRlJHSe3SxQcC90VEFLRvn7YX0C5tX/d3fjNwjKT+kjYAzgVuLNjduyRXJJWdpO9Kqk4n55G8OS4lOcrpm15W2zY9wd0f+MdK7KMbcBlwXNp9cy0wl+RcRjGjgYMk7ZO+pqeT/I6eSpev6OtxLXCRpF5pPV0lHbqiz8Mqy6FgeWtD8mYzi+Ty0q+RfIqF5AqUycA7kt5rYP0jgU9JLq3cI31cd6RRrOvo/rTNbiTdKJ8CXwWIiPuAS4FHSLpN3gDOL1j3AuCmtPvjsKaeWHoVVannR3YCxkuaD4wBTomI19OjlINJXqO5JN1MB0dEQ69HY64Gbo+IJwDSsDwOOFXSl+o3joipwPeBK0lOTB9CctnxorTJL4Fz09fjjBL2/9v0ud0v6WOScxu7rMTzsApSwYcss9VGevLyHeCL0cKuwa+TXuZ6XUTcXOlazErlIwVbXW0I/KwFB8LaJF0vr1e6FrMVkVsoKBlCYLak/zawXJJ+J2mapEmSdsirFlvzRMTsiLim0nUUI2ljkqOYx0julzBbbeR5pHAjMKiR5QcAW6Q/x5P0GZut9tLA6hQRQ8P9s7aayS0UIuJxkhOLDTkUuDkSTwOdJW2aVz1mZta0Sg5K1Z1lb2SpSee9Xb+hpONJjiZYZ511dtxyy5W892nWcyu33qrotn1l912J/VZy363x9a7wc37xreY/rfPl7umgr63w9V5ZzzzzzHsR0bWpdpUMBRWZV/RQOyJGklxeyIABA2LixIkrt8cLVnj04FV3wcTK7rsS+63kvlvj650+597D7232Xc+4+KCK7HfixQcBlXvOld73ypBU/875oip59VENyd2TdappYCRMMzNrHpUMhTHA/6RXIX0F+DAilus6MjOz5pNb95GkUSRDD3dR8pWG55MMz0tEXEtyJ+qBJEP1LgBKHqfezMzykVsoRMQRTSwP4IS89m9WCb0X3tbs+5zR7Hts+Tp1aMNJu2xAr87tUNHTlytvypQpAPxxcPNfLFm378Z07NiR6upq2rVr12TbYvyVeGa2xjlplw3Y4YvdaLv2eiSDv5bPVtWdAVhcU/TLAnNVt++GRARz586lpqaGPn36rNQ+PMyFma1xenVul0sgtHSS2GijjVi4sNi30ZbGoWBmaxyhVhcIdVb1eTsUzMws43MKZrbGG/z7J8u6vVJuIvtKv2qemvIml17wv/znyceRRIcOHbn0mj9R3bMXH3/0IRefdzbPTxgPwHY77cLwEZewXqf1eWvmmxy427acPeISvveD4wH4xblnMmjP3Rk2bFhZn0t9PlIwM8vJuDF3Mefdd7jzgSf564NPccV1t9CpU3LX+wVnnkx1z97c++Rz3Pvkc3Tv0Yufn3VKtu6GXbpy2/XXsnjRooY2nwuHgplZTubMfpcuG29CmzbJW+0mm3anU+fOvPn6dF568XmOP+XMrO0PTz2LyZOeY+aM5Cs4NthwI3Ye+FXG3DmqWWt2KJiZ5WT/Q77B4w/ex2H778GvRpzLlP9OAmD6qy/Tr/+XqaqqytpWVVXRr/+Xee2Vz+9FOPonP+XmkVexdOnS5badF4eCmVlONtm0O39/dAInDz+PNm3E8UMOZfy/HiOigauEkgXZZHXPXmy93Q6MvfsvzVazTzSbmeWofYcODNxrPwbutR8bddmYh8fdy9Cjf8TLkydRW1ubdS3V1tYydcp/2Wzzfsusf+yJp3H6D49ix112a5Z6faRgZpaTKS++wOx3knE+a2treeXlyXTr3oOefTZjyy9tw8jf/SprO/J3v2KrrbelZ5/NltlGn8378sW+W/L4Q+OapWYfKZjZGm/MibuXbVvbNDHUBMCSJUto37497783h5+ffQqLPvsMgK2325Ehw44D4OeXXckvzzuLgwfuQESwzY47ccFlVxbd3rEnncbhg75WtufQGIeCmVmZvfbKFKp79WH3vfZl9732LdqmU+fO/PJ3I4su696jJ3c99O9sul//L/P8m++XFEiryqFgZlZGo2+5gVF/GsmZ5/+i0qWsFIeCmVkZHXbk0Rx25NGVLmOl+USzmZllHApmZpZxKJiZWcahYGZmGZ9oNrM13jbX9SrvBi/4sMkm7779Fr8450ymvzqV2tpavrrv/px2zghee3Uqc959mz32/joA1/z6YtZeex2O+tFJ5a1xJflIwcyszCKCnx73P+y1/0Hc88QzjHl8Igs++YQrL72QqZNf5ImHHyjbvso9WJ6PFMzMyuw/Tz5Ohw4d+MbhQ4FkBNQzz7+IQbtuQ9u27SCC5yc8zdEn/BSA116dyjHfPZi3Z9Uw9JgfM/ToHwLwj7vu4LYbRrJk8SK23n5H7rjpeqqqqlh33XU57bTTGDduHJdffjkDBw4sW+0+UjAzK7Npr7xM/y9vt8y8ddfrRLfqnhx38hl8/ZBvMnrcEwwa/C0AZrz2Ctf8+a/ces9D/OGKS1i8eDHTX53KuHv+xk1/u4/R456gqk0Vt956KwCffPIJW2+9NePHjy9rIICPFMzMyq/eENiF84sNmb3H3l+nfYcOtO/QgQ27dOX992Yz/snHmDLpBYYevDcACxcuZKvNegDJkce3v/3tXEp3KJiZldkX+27Jg2PHLDNv/scf8c6st6hqs3wHTfv2HbLHbdq0YcmSpUTAId8dwinDz8+W1Y191LFjx2W+oKec3H1kZlZmuwz8Ggs//ZR77rwdSE4GX37hzxj83e+xUdeNWfDJ/Ka3sftXefDeMcx9bw4AH86bxxtvvJFr3eAjBTNrBSYdW74301JGKpXEFdfdwkXnnMHI315GbW0tA/fej5PP/hmfLljADVf9hsP23yM70VzMF/tuyQlnnsOPh36L2tpa2rZrxw0jr6VXrzJfXluPQ8HMLAdf6FbNlX+6fbn57Tt04LZ7H25wvcIhswcN/lZ2Mho+D6T585s+0lhZ7j4yM7OMQ8HMzDIOBTNb4wRBRFS6jIpY1eftUDCzNc4bHyxmyYKPWl0wRARz586lY8eOK70Nn2g2szXOlePncRLQq/N7iCI3ka2CKR+vBcC78z4t63ZXZN+N6dixI9XV1Su9D4eCma1xPvqslosen5vLtmdcfBAABwy/N5ftl7LvPLn7yMzMMrmGgqRBkqZKmiZpeJHlPSU9Iuk5SZMkHZhnPWZm1rjcQkFSFXAVcADQHzhCUv96zc4FRkfE9sAQ4Oq86jEzs6bleaSwMzAtIqZHxCLgduDQem0C6JQ+Xh+YlWM9ZmbWhDxDoTsws2C6Jp1X6ALg+5JqgLFA0e+jk3S8pImSJs6ZMyePWs3MjHxDodh1YPUvGj4CuDEiqoEDgVskLVdTRIyMiAERMaBr1645lGpmZpBvKNQAPQqmq1m+e+gYYDRARPwb6Ah0ybEmMzNrRJ6hMAHYQlIfSe1JTiSPqdfmTWAfAElbkYSC+4fMzCokt1CIiCXAicA4YArJVUaTJY2QNDhtdjpwnKQXgFHAsGht96WbmbUgud7RHBFjSU4gF847r+DxS8DuedZgZmal8x3NZmaWcSiYmVnGoWBmZhmPkmprpN4Lb2v2fc5o9j2alZ+PFMzMLONQMDOzjEPBzMwyDgUzM8s4FMzMLONQMDOzjEPBzMwyDgUzM8s4FMzMLONQMDOzjEPBzMwyDgUzM8s4FMzMLONQMDOzjEPBzMwyDgUzM8s4FMzMLONQMDOzjEPBzMwyDgUzM8s4FMzMLONQMDOzjEPBzMwyDgUzM8s4FMzMLONQMDOzjEPBzMwyDgUzM8s4FMzMLONQMDOzTK6hIGmQpKmSpkka3kCbwyS9JGmypNvyrMfMzBrXNq8NS6oCrgL2A2qACZLGRMRLBW22AP4X2D0i5knaOK96zMysaXkeKewMTIuI6RGxCLgdOLRem+OAqyJiHkBEzM6xHjMza0KeodAdmFkwXZPOK9QX6CvpSUlPSxpUbEOSjpc0UdLEOXPm5FSumZnlGQoqMi/qTbcFtgD2BI4ArpPUebmVIkZGxICIGNC1a9eyF2pmZok8Q6EG6FEwXQ3MKtLm7xGxOCJeB6aShISZmVVAnqEwAdhCUh9J7YEhwJh6be4G9gKQ1IWkO2l6jjWZmVkjcguFiFgCnAiMA6YAoyNisqQRkganzcYBcyW9BDwCnBkRc/OqyczMGpfbJakAETEWGFtv3nkFjwM4Lf0xM7MK8x3NZmaWcSiYmVnGoWBmZpmSQkHSdyWtlz4+V9JdknbItzQzM2tupR4p/CwiPpY0ENgfuAm4Jr+yzMysEkoNhaXpvwcB10TE34H2+ZRkZmaVUmoovCXpD8BhwFhJHVZgXTMzW02U+sZ+GMmNZoMi4gNgQ+DM3KoyM7OKKCkUImIBMBsYmM5aAryaV1FmZlYZpV59dD5wNskX4gC0A/6cV1FmZlYZpXYffRMYDHwCEBGzgPXyKsrMzCqj1FBYlI5TFACS1smvJDMzq5RSQ2F0evVRZ0nHAQ8Cf8yvLDMzq4SSRkmNiF9J2g/4COgHnBcRD+RamZmZNbsmQ0FSFTAuIvYFHARmZmuwJruPImIpsEDS+s1Qj5mZVVCpX7KzEHhR0gOkVyABRMTJuVRlZmYVUWoo3Jv+mJnZGqzUE803SWoP9E1nTY2IxfmVZWZmlVBSKEjak2S47BmAgB6SjoqIx/MrzczMmlup3UeXA1+PiKkAkvoCo4Ad8yrMzMyaX6k3r7WrCwSAiHiFZPwjMzNbg5R6pDBR0vXALen0UOCZfEoyM7NKKTUUfgycAJxMck7hceDqvIoyM7PKKDUU2gK/jYhfQ3aXc4fcqjIzs4ooNRQeAvYF5qfTawH3A7vlUZSVT++Ft1VkvzMqslczW1WlnmjuGBF1gUD6eO18SjIzs0opNRQ+kbRD3YSkAcCn+ZRkZmaVUmr30anAXyTNIvminW7A4blVZWZmFdHokYKknSR9ISImAFsCdwBLgPuA15uhPjMza0ZNdR/9AViUPt4V+D/gKmAeMDLHuszMrAKa6j6qioj308eHAyMj4q/AXyU9n29pZmbW3Jo6UqiSVBcc+wAPFywr9XyEmZmtJpp6Yx8FPCbpPZKrjZ4AkLQ58GHOtZmZWTNrNBQi4iJJDwGbAvdHRKSL2gAn5V2cmZk1r1K+o/npiPhbRBR+DecrEfFsU+tKGiRpqqRpkoY30u47kiK9/8HMzCqk1JvXVlg6PtJVwAFAf+AISf2LtFuPZKC98XnVYmZmpcktFICdgWkRMT0iFgG3A4cWaXchcCmwMMdazMysBHmGQndgZsF0TTovI2l7oEdE/KOxDUk6XtJESRPnzJlT/krNzAzINxRUZF5kC6U2wBXA6U1tKCJGRsSAiBjQtWvXMpZoZmaF8gyFGqBHwXQ1MKtgej1ga+BRSTOArwBjfLLZzKxy8gyFCcAWkvpIag8MAcbULYyIDyOiS0T0jojewNPA4IiYmGNNZmbWiNxCISKWACcC44ApwOiImCxphKTBee3XzMxWXq5DVUTEWGBsvXnnNdB2zzxrMTOzpuXZfWRmZqsZh4KZmWUcCmZmlnEomJlZxqFgZmYZh4KZmWUcCmZmlnEomJlZxqFgZmYZh4KZmWUcCmZmlnEomJlZxqFgZmYZh4KZmWUcCmZmlnEomJlZxqFgZmaZXL95zVq33gtva/Z9zmj2PZqtWXykYGZmGYeCmZllHApmZpZxKJiZWcahYGZmGYeCmZllHApmZpZxKJiZWcahYGZmGYeCmZllHApmZpZxKJiZWcahYGZmGYeCmZllHApmZpZxKJiZWSbXL9mRNAj4LVAFXBcRF9dbfhpwLLAEmAMcHRFv5FlTpfgLZ8xsdZDbkYKkKuAq4ACgP3CEpP71mj0HDIiIbYA7gUvzqsfMzJqWZ/fRzsC0iJgeEYuA24FDCxtExCMRsSCdfBqozrEeMzNrQp6h0B2YWTBdk85ryDHAP4stkHS8pImSJs6ZM6eMJZqZWaE8Q0FF5kXRhtL3gQHAZcWWR8TIiBgQEQO6du1axhLNzKxQnieaa4AeBdPVwKz6jSTtC5wDfC0iPsuxHjMza0KeRwoTgC0k9ZHUHhgCjClsIGl74A/A4IiYnWMtZmZWgtxCISKWACcC44ApwOiImCxphKTBabPLgHWBv0h6XtKYBjZnZmbNINf7FCJiLDC23rzzCh7vm+f+zcxsxfiOZjMzy+R6pNDS+K5iM7PG+UjBzMwyDgUzM8s4FMzMLONQMDOzjEPBzMwyDgUzM8s4FMzMLONQMDOzjEPBzMwyDgUzM8s4FMzMLONQMDOzjEPBzMwyDgUzM8s4FMzMLONQMDOzjEPBzMwyDgUzM8s4FMzMLONQMDOzjEPBzMwyDgUzM8s4FMzMLONQMDOzjEPBzMwyDgUzM8s4FMzMLONQMDOzjEPBzMwyDgUzM8s4FMzMLONQMDOzjEPBzMwyuYaCpEGSpkqaJml4keUdJN2RLh8vqXee9ZiZWeNyCwVJVcBVwAFAf+AISf3rNTsGmBcRmwNXAJfkVY+ZmTUtzyOFnYFpETE9IhYBtwOH1mtzKHBT+vhOYB9JyrEmMzNrhCIinw1L3wEGRcSx6fSRwC4RcWJBm/+mbWrS6dfSNu/V29bxwPHpZD9gai5FN64L8F6Traxc/Ho3H7/WzatSr3eviOjaVKO2ORZQ7BN//QQqpQ0RMRIYWY6iVpakiRExoJI1tCZ+vZuPX+vm1dJf7zy7j2qAHgXT1cCshtpIagusD7yfY01mZtaIPENhArCFpD6S2gNDgDH12owBjkoffwd4OPLqzzIzsybl1n0UEUsknQiMA6qAGyJisqQRwMSIGANcD9wiaRrJEcKQvOopg4p2X7VCfr2bj1/r5tWiX+/cTjSbmdnqx3c0m5lZxqFgZmaZVhsKkm6QNDu9V6Ju3oaSHpD0avrvBvXW2UnS0vQejLp5PSXdL2mKpJc8VEfDJFVJek7SP9LpE9MhTkJSl4J260u6R9ILkiZL+kG97XSS9Jak3zf3c1gdSOon6fmCn48knSrpQkmT0nn3S+qWth+azp8k6SlJ26bze0h6JP3bnizplMo+s5Yl7/eQ9CKd8em27kgv2Mldqw0F4EZgUL15w4GHImIL4KF0GsiG7biE5MR5oZuByyJiK5K7uGfnVfAa4BRgSsH0k8C+wBv12p0AvBQR2wJ7ApfX+w9xIfBYjnWu1iJiakRsFxHbATsCC4C/kfydbpPO/wdwXrrK68DXImIbkte27kToEuD09G/7K8AJRYaqac1uJN/3kEuAK9JtzSMZFih3rTYUIuJxlr8nonDYjZuAbxQsOwn4KwVv+ul/kLYR8UC6zfkRsSC3oldjkqqBg4Dr6uZFxHMRMaNI8wDWS4c8WZfk97Qk3c6OwCbA/XnXvIbYB3gtIt6IiI8K5q9DeqNoRDwVEfPS+U+T3FNERLwdEc+mjz8mCfTuzVZ5C5fne0j6t783yfA/xbaVm1YbCg3YJCLehuQ/BLAxgKTuwDeBa+u17wt8IOmutFvksvTTgC3vN8BZQG0JbX8PbEVys+OLwCkRUSupDXA5cGZuVa55hgCj6iYkXSRpJjCUz48UCh0D/LP+zLRLY3tgfC5VrjnK9R6yEfBBRCxJ29XQTIHsUCjNb4CzI2JpvfltgT2AM4CdgM2AYc1bWssn6WBgdkQ8U+Iq+wPPA92A7YDfS+oE/AQYGxEz86l0zZJ2uQ0G/lI3LyLOiYgewK3AifXa70USCmfXm78uySfcU+sdbVjpVvQ9pKQhgPKQ59hHq6N3JW0aEW9L2pTPD/MGALenA7h2AQ6UtIQkvZ+LiOkAku4m6Xu9vvlLb9F2BwZLOhDoCHSS9OeI+H4D7X8AXJze3T5N0uvAlsCuwB6SfkLSrdRe0vyIWO67OgxIhq1/NiLeLbLsNuBe4HwASduQdO0dEBFz6xpJakcSCLdGxF35l7zaK9d7yA1AZ0lt06OFYsME5cJHCssqHHbjKODvABHRJyJ6R0Rvkj6+n0TE3SRDeWwgqW7kwb2Bl5q35JYvIv43IqrT128IyXAmDQUCwJskfeFI2oRkZNzpETE0Inqm2zkDuNmB0KgjWLbraIuCZYOBl9P5PYG7gCMj4pWC9iL5gDMlIn7dLBWv/sryHpJ+IHqEZPifZbaVt1YbCpJGAf8G+kmqkXQMcDGwn6RXgf3S6Qalh4JnAA9JepHkkO+P+Va+5pB0sqQakk9BkyTVnYS+ENgtfU0fIjns9tDOK0DS2iR/w4Wf7i+W9F9Jk4Cvk1wNBsm5hY2Aq9PLVSem83cHjgT2Lri89cBmegotXjO8h5wNnKZkGKCNaKYeCA9zYWZmmVZ7pGBmZstzKJiZWcahYGZmGYeCmZllHApmZpZxKJiZWcahYK2GpEcl7V9v3qmSrm5knfllruHGwmGTzVoah4K1JqNY/nvAlxkwzqy1cyhYa3IncLCkDpCN/NkNeF7SQ5KelfSipEPrryhpT6VfDpRO/17SsPTxjpIek/SMpHHpmDdNami99IjmEkn/kfSKpD1W9YmblcqhYK1GOtDbf/j8i1GGAHcAnwLfjIgdgL1IvtSn2CiVy0kHjLsS+E5E7EgykNlFZVivbUTsDJxKOmidWXPwKKnW2tR1If09/fdokvFmfiHpqyTf99Cd5It83ilhe/2ArYEH0hypAt4uw3p1YxY9A/QuYXtmZeFQsNbmbuDXknYA1oqIZ9NuoK7AjhGxWNIMkiG+Cy1h2SPruuUCJkfEritYR1PrfZb+uxT/P7Vm5O4ja1UiYj7wKEl3Td0J5vVJvgRocfpFM72KrPoG0F9SB0nrkw7tDUwFukraFZJuIUlfKqGUlV3PLFf+BGKt0SiS7pm6K5FuBe5Jh4x+nvR7BgpFxExJo4FJwKvAc+n8Reklpr9Lw6ItybdsTW6sgJVdzyxvHjrbzMwy7j4yM7OMu4/MciDpKpJvLiv024j4UyXqMSuVu4/MzCzj7iMzM8s4FMzMLONQMDOzjEPBzMwy/w/fmB3hK55bmAAAAABJRU5ErkJggg==\n", + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYUAAAEXCAYAAABCjVgAAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDMuMC4zLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvnQurowAAHz1JREFUeJzt3XmUFOW9xvHvw7C5ISpohGEzCkqMK2pUTFwjbphVMcQrccviGlfu1ajRY+ISYxLjEqLGJYISYwxGIu5LNBJwwyCiiCgjKoi4ICLL/O4fVVM2Q89MM3RNA/N8zplDV/VbVb/uGfrpeqvqLUUEZmZmAG0qXYCZma06HApmZpZxKJiZWcahYGZmGYeCmZllHApmZpZxKNhKkdRT0nxJVZWupdwk7SmpZiWW/6eko8pZUyVJ6i0pJLWtdC2WH4eClUTSDEmfpgFQ99MtIt6MiHUjYmkz1jlM0r+aaHOYpKckLZD0aANtvidpZPp4hKSpkmolDSvS9qeS3pH0oaQbJXUoeC4kbb6ir6MhEXFARNxcrvU1RdIh6WvbsGDeoZLekrR+M9Y3Q9K+5a3SVnUOBVsRh6QBUPczq7HGSqzs39j7wG+ASxppcyAwNn38AvAT4Nki9ewPDAf2AXoDmwE/X8n6VhkRcQ/wMHAlgKTOwLXAjyPiw0rWVp/3NlZdDgVbKfW7FCQ9KuliSU8CC4DN0j2C6ZI+lvS6pKGStgKuA3ZN9zo+KLb+iHgwIkYDRQMoDZ39gPvS9ldHxEPAwiLNjwJuiIjJETEPuAgYlq7n8bTNC2k9hxds43RJsyW9LekHK/DePCrp2PTx5pIeS/dQ3pN0R0G73SRNSJ+bIGm3euu4SNKT6ft3v6QujWz2ZOCANACvBB6LiDGN1DhY0mRJH6Tb2iqdfyvQE7gnfT/OKlhsqKQ309dxTsG62kgaLuk1SXMlja7bayn4OzlG0psk4WWroojwj3+a/AFmAPsWmd8bCKBtOv0o8CbwJaAtsD7wEdAvfX5T4Evp42HAv0rc/rHAo0XmfwX4d5H5/wKG1Zv3AnB4wXSXtPaN0ukANi94fk9gCXAh0I5kj2QBsEH6/PeASY3U/ChwbPp4FHAOyRexjsDAdP6GwDzgyPT9OiKd3qhgHa8BfYG10ulLmnivjgDeA+YAXRtp1xf4hCRU2wFnAdOA9sV+5wW/6z+mtWwLfAZslT5/KvA0UA10AP4AjKq37C3AOsBalf6b9k/xH+8p2Iq4O/1G+YGkuxtpd1Mk38aXkHyo1gJbS1orIt6OiMllrOkgPu86asq6QGE3St3j9RpZZjFwYUQsjoixwHygH0BEjIyIbUrc9mKgF9AtIhZGRN2xlIOAVyPi1ohYEhGjgJeBQwqW/VNEvBIRnwKjge2a2NbTJGF8f0TMaaTd4cC9EfFARCwGfkXyYb9bI8sA/DwiPo2IF0iCdtt0/g+BcyKiJiI+Ay4AvlOvq+iCiPgkfS22CnIo2Ir4RkR0Tn++0Ui7mXUPIuITkg+fHwFvS7pX0pZlrKnweEJT5gOdCqbrHn/cyDJz03Crs4AkXFbUWYCA/6TdNUen87sBb9Rr+wbQvWD6nRXc/giSb+QHFnZFFbHMtiOiluR3173BJRqvpxfwt7ovDsAUYCmwSUH7mdgqzaFgeVhm6N2IGBcR+5F0Hb1M0v2wXLsVJekL6TqXO6jcgMl8/q2W9PG7ETF3ZeooRUS8ExHHRUQ3km/U16RnOs0i+TAt1BN4qznbkXQM0IPkYPv/AX+U1L6B5stsW5LSZeu2vaK/n5nAAQVfHDpHRMeIKHwtHpZ5FedQsFxJ2iQ9mLkOSf/zfJJvjwDvAtWNfGghqUpSR5L+9jaSOkpqlz59IHBfRERB+/ZpewHt0vZ1f+e3AMdI6i9pA+Bc4KaCzb1LckZS2Un6rqTqdHIeyYfjUpK9nL7pabVt0wPc/YF/NGMb3YDLgePS7pvrgLkkxzKKGQ0cJGmf9D09neR39FT6/Iq+H9cBF0vqldbTVdKhK/o6rLIcCpa3NiQfNrNITi/9Gsm3WEjOQJkMvCPpvQaWPxL4lOTUyj3Sx3V7GsW6ju5P2+xG0o3yKfBVgIi4D7gMeISk2+QN4PyCZS8Abk67Pw5r6oWlZ1GVenxkJ2C8pPnAGOCUiHg93Us5mOQ9mkvSzXRwRDT0fjTmGuD2iHgCIA3L44BTJX2pfuOImAp8H7iK5MD0ISSnHS9Km/wSODd9P84oYfu/TV/b/ZI+Jjm2sUszXodVkAq+ZJmtNtKDl+8AX4xV7Bz8OulprtdHxC2VrsWsVN5TsNXVhsDPVuFAWJuk6+X1StditiJyCwUlQwjMlvTfBp6XpN9JmiZpkqQd8qrF1jwRMTsirq10HcVI2phkL+YxkuslzFYbee4p3AQMauT5A4At0p/jSfqMzVZ7aWB1ioih4f5ZW83kFgoR8TjJgcWGHArcEomngc6SNs2rHjMza1olB6XqzrIXstSk896u31DS8SR7E6yzzjo7brllM699mvVc85ZbGd22r+y2K7HdSm67Nb7fFX7NL77V8od1vtw9HfS1Fb7fzfXMM8+8FxFdm2pXyVBQkXlFd7UjYgTJ6YUMGDAgJk6c2LwtXrDCowevvAsmVnbbldhuJbfdGt/v9DX3Hn5vi296xiUHVWS7Ey85CKjca670tptDUv0r54uq5NlHNSRXT9appoGRMM3MrGVUMhTGAP+TnoX0FeDDiFiu68jMzFpObt1HkkaRDD3cRcktDc8nGZ6XiLiO5ErUA0mG6l0AlDxOvZmZ5SO3UIiII5p4PoAT8tq+WSX0Xjiyxbc5o8W3uOrr1KENJ+2yAb06t0NFD18235QpUwD44+CWP1mybtuN6dixI9XV1bRr167JtsX4lnhmtsY5aZcN2OGL3Wi79nokg7+Wz1bVnQFYXFP0ZoG5qtt2QyKCuXPnUlNTQ58+fZq1DQ9zYWZrnF6d2+USCKs6SWy00UYsXFjsbrSlcSiY2RpHqNUFQp2Vfd0OBTMzy/iYgpmt8Qb//smyrq+Ui8i+0q+ap6a8yWUX/C//efJxJNGhQ0cuu/ZPVPfsxccffcgl553N8xPGA7DdTrsw/MJLWa/T+rw1800O3G1bzr7wUr73g+MB+MW5ZzJoz90ZNmxYWV9Lfd5TMDPLybgxdzHn3Xe484En+euDT3Hl9bfSqVNy1fsFZ55Mdc/e3Pvkc9z75HN079GLn591Srbshl26MvKG61i8aFFDq8+FQ8HMLCdzZr9Ll403oU2b5KN2k02706lzZ958fTovvfg8x59yZtb2h6eexeRJzzFzRnILjg023IidB36VMXeOatGaHQpmZjnZ/5Bv8PiD93HY/nvwqwvPZcp/JwEw/dWX6df/y1RVVWVtq6qq6Nf/y7z2yufXIhz9k59yy4irWbp06XLrzotDwcwsJ5ts2p2/PzqBk4efR5s24vghhzL+X48R0cBZQskT2WR1z15svd0OjL37Ly1Wsw80m5nlqH2HDgzcaz8G7rUfG3XZmIfH3cvQo3/Ey5MnUVtbm3Ut1dbWMnXKf9ls837LLH/siadx+g+PYsdddmuRer2nYGaWkykvvsDsd5JxPmtra3nl5cl0696Dnn02Y8svbcOI3/0qazvid79iq623pWefzZZZR5/N+/LFvlvy+EPjWqRm7ymY2RpvzIm7l21d2zQx1ATAkiVLaN++Pe+/N4efn30Kiz77DICtt9uRIcOOA+Dnl1/FL887i4MH7kBEsM2OO3HB5VcVXd+xJ53G4YO+VrbX0BiHgplZmb32yhSqe/Vh9732Zfe99i3aplPnzvzydyOKPte9R0/ueujf2XS//l/m+TffLymQVpZDwcysjEbfeiOj/jSCM8//RaVLaRaHgplZGR125NEcduTRlS6j2Xyg2czMMg4FMzPLOBTMzCzjUDAzs4wPNJvZGm+b63uVd4UXfNhkk3fffotfnHMm01+dSm1tLV/dd39OO+dCXnt1KnPefZs99v46ANf++hLWXnsdjvrRSeWtsZm8p2BmVmYRwU+P+x/22v8g7nniGcY8PpEFn3zCVZddxNTJL/LEww+UbVvlHizPewpmZmX2nycfp0OHDnzj8KFAMgLqmedfzKBdt6Ft23YQwfMTnuboE34KwGuvTuWY7x7M27NqGHrMjxl69A8B+MdddzDyxhEsWbyIrbffkTtuvoGqqirWXXddTjvtNMaNG8cVV1zBwIEDy1a79xTMzMps2isv0//L2y0zb931OtGtuifHnXwGXz/km4we9wSDBn8LgBmvvcK1f/4rt93zEH+48lIWL17M9FenMu6ev3Hz3+5j9LgnqGpTxW233QbAJ598wtZbb8348ePLGgjgPQUzs/KrNwR24fxiQ2bvsffXad+hA+07dGDDLl15/73ZjH/yMaZMeoGhB+8NwMKFC9lqsx5Asufx7W9/O5fSHQpmZmX2xb5b8uDYMcvMm//xR7wz6y2q2izfQdO+fYfscZs2bViyZCkRcMh3h3DK8POz5+rGPurYseMyN+gpJ3cfmZmV2S4Dv8bCTz/lnjtvB5KDwVdc9DMGf/d7bNR1YxZ8Mr/pdez+VR68dwxz35sDwIfz5vHGG2/kWjd4T8HMWoFJx5bvw7SUkUolceX1t3LxOWcw4reXU1tby8C99+Pks3/GpwsWcOPVv+Gw/ffIDjQX88W+W3LCmefw46Hfora2lrbt2nHjiOvo1avMp9fW41AwM8vBF7pVc9Wfbl9ufvsOHRh578MNLlc4ZPagwd/KDkbD54E0f37TexrN5e4jMzPLOBTMzCzjUDCzNU4QRESly6iIlX3dDgUzW+O88cFiliz4qNUFQ0Qwd+5cOnbs2Ox1+ECzma1xrho/j5OAXp3fQxS5iGwlTPl4LQDenfdpWde7IttuTMeOHamurm72NhwKZrbG+eizWi5+fG4u655xyUEAHDD83lzWX8q28+TuIzMzy+QaCpIGSZoqaZqk4UWe7ynpEUnPSZok6cA86zEzs8blFgqSqoCrgQOA/sARkvrXa3YuMDoitgeGANfkVY+ZmTUtzz2FnYFpETE9IhYBtwOH1msTQKf08frArBzrMTOzJuQZCt2BmQXTNem8QhcA35dUA4wFit6PTtLxkiZKmjhnzpw8ajUzM/INhWLngdU/afgI4KaIqAYOBG6VtFxNETEiIgZExICuXbvmUKqZmUG+oVAD9CiYrmb57qFjgNEAEfFvoCPQJceazMysEXmGwgRgC0l9JLUnOZA8pl6bN4F9ACRtRRIK7h8yM6uQ3EIhIpYAJwLjgCkkZxlNlnShpMFps9OB4yS9AIwChkVruy7dzGwVkusVzRExluQAcuG88woevwTsnmcNZmZWOl/RbGZmGYeCmZllHApmZpbxKKm2Ruq9cGSLb3NGi2/RrPy8p2BmZhmHgpmZZRwKZmaWcSiYmVnGoWBmZhmHgpmZZRwKZmaWcSiYmVnGoWBmZhmHgpmZZRwKZmaWcSiYmVnGoWBmZhmHgpmZZRwKZmaWcSiYmVnGoWBmZhmHgpmZZRwKZmaWcSiYmVnGoWBmZhmHgpmZZRwKZmaWcSiYmVnGoWBmZhmHgpmZZRwKZmaWcSiYmVnGoWBmZhmHgpmZZXINBUmDJE2VNE3S8AbaHCbpJUmTJY3Msx4zM2tc27xWLKkKuBrYD6gBJkgaExEvFbTZAvhfYPeImCdp47zqMTOzpuW5p7AzMC0ipkfEIuB24NB6bY4Dro6IeQARMTvHeszMrAl5hkJ3YGbBdE06r1BfoK+kJyU9LWlQsRVJOl7SREkT58yZk1O5ZmaWZyioyLyoN90W2ALYEzgCuF5S5+UWihgREQMiYkDXrl3LXqiZmSXyDIUaoEfBdDUwq0ibv0fE4oh4HZhKEhJmZlYBeYbCBGALSX0ktQeGAGPqtbkb2AtAUheS7qTpOdZkZmaNyC0UImIJcCIwDpgCjI6IyZIulDQ4bTYOmCvpJeAR4MyImJtXTWZm1rjcTkkFiIixwNh6884reBzAaemPmZlVmK9oNjOzjEPBzMwyDgUzM8uUFAqSvitpvfTxuZLukrRDvqWZmVlLK3VP4WcR8bGkgcD+wM3AtfmVZWZmlVBqKCxN/z0IuDYi/g60z6ckMzOrlFJD4S1JfwAOA8ZK6rACy5qZ2Wqi1A/2w0guNBsUER8AGwJn5laVmZlVREmhEBELgNnAwHTWEuDVvIoyM7PKKPXso/OBs0luiAPQDvhzXkWZmVlllNp99E1gMPAJQETMAtbLqygzM6uMUkNhUTpOUQBIWie/kszMrFJKDYXR6dlHnSUdBzwI/DG/sszMrBJKGiU1In4laT/gI6AfcF5EPJBrZWZm1uKaDAVJVcC4iNgXcBCYma3Bmuw+ioilwAJJ67dAPWZmVkGl3mRnIfCipAdIz0ACiIiTc6nKzMwqotRQuDf9MTOzNVipB5pvltQe6JvOmhoRi/Mry8zMKqGkUJC0J8lw2TMAAT0kHRURj+dXmpmZtbRSu4+uAL4eEVMBJPUFRgE75lWYmZm1vFIvXmtXFwgAEfEKyfhHZma2Bil1T2GipBuAW9PpocAz+ZRkZmaVUmoo/Bg4ATiZ5JjC48A1eRVlZmaVUWootAV+GxG/huwq5w65VWVmZhVRaig8BOwLzE+n1wLuB3bLoygrn94LR1ZkuzMqslUzW1mlHmjuGBF1gUD6eO18SjIzs0opNRQ+kbRD3YSkAcCn+ZRkZmaVUmr30anAXyTNIrnRTjfg8NyqMjOzimh0T0HSTpK+EBETgC2BO4AlwH3A6y1Qn5mZtaCmuo/+ACxKH+8K/B9wNTAPGJFjXWZmVgFNdR9VRcT76ePDgRER8Vfgr5Kez7c0MzNraU3tKVRJqguOfYCHC54r9XiEmZmtJpr6YB8FPCbpPZKzjZ4AkLQ58GHOtZmZWQtrNBQi4mJJDwGbAvdHRKRPtQFOyrs4MzNrWaXco/npiPhbRBTehvOViHi2qWUlDZI0VdI0ScMbafcdSZFe/2BmZhVS6sVrKywdH+lq4ACgP3CEpP5F2q1HMtDe+LxqMTOz0uQWCsDOwLSImB4Ri4DbgUOLtLsIuAxYmGMtZmZWgjxDoTsws2C6Jp2XkbQ90CMi/tHYiiQdL2mipIlz5swpf6VmZgbkGwoqMi+yJ6U2wJXA6U2tKCJGRMSAiBjQtWvXMpZoZmaF8gyFGqBHwXQ1MKtgej1ga+BRSTOArwBjfLDZzKxy8gyFCcAWkvpIag8MAcbUPRkRH0ZEl4joHRG9gaeBwRExMceazMysEbmFQkQsAU4ExgFTgNERMVnShZIG57VdMzNrvlyHqoiIscDYevPOa6DtnnnWYmZmTcuz+8jMzFYzDgUzM8s4FMzMLONQMDOzjEPBzMwyDgUzM8s4FMzMLONQMDOzjEPBzMwyDgUzM8s4FMzMLONQMDOzjEPBzMwyDgUzM8s4FMzMLONQMDOzjEPBzMwyud55zVq33gtHtvg2Z7T4Fs3WLN5TMDOzjEPBzMwyDgUzM8s4FMzMLONQMDOzjEPBzMwyDgUzM8s4FMzMLONQMDOzjEPBzMwyDgUzM8s4FMzMLONQMDOzjEPBzMwyDgUzM8s4FMzMLJPrTXYkDQJ+C1QB10fEJfWePw04FlgCzAGOjog38qypUnzDGTNbHeS2pyCpCrgaOADoDxwhqX+9Zs8BAyJiG+BO4LK86jEzs6bl2X20MzAtIqZHxCLgduDQwgYR8UhELEgnnwaqc6zHzMyakGcodAdmFkzXpPMacgzwz2JPSDpe0kRJE+fMmVPGEs3MrFCeoaAi86JoQ+n7wADg8mLPR8SIiBgQEQO6du1axhLNzKxQngeaa4AeBdPVwKz6jSTtC5wDfC0iPsuxHjMza0KeewoTgC0k9ZHUHhgCjClsIGl74A/A4IiYnWMtZmZWgtxCISKWACcC44ApwOiImCzpQkmD02aXA+sCf5H0vKQxDazOzMxaQK7XKUTEWGBsvXnnFTzeN8/tm5nZivEVzWZmlnEomJlZxqFgZmYZh4KZmWUcCmZmlnEomJlZxqFgZmYZh4KZmWUcCmZmlnEomJlZxqFgZmYZh4KZmWUcCmZmlnEomJlZxqFgZmYZh4KZmWUcCmZmlnEomJlZxqFgZmYZh4KZmWUcCmZmlnEomJlZpm2lC2hJvReObPFtzmjxLZqZNZ/3FMzMLONQMDOzjEPBzMwyDgUzM8s4FMzMLONQMDOzjEPBzMwyDgUzM8s4FMzMLONQMDOzjEPBzMwyDgUzM8vkGgqSBkmaKmmapOFFnu8g6Y70+fGSeudZj5mZNS63UJBUBVwNHAD0B46Q1L9es2OAeRGxOXAlcGle9ZiZWdPy3FPYGZgWEdMjYhFwO3BovTaHAjenj+8E9pGkHGsyM7NGKCLyWbH0HWBQRBybTh8J7BIRJxa0+W/apiadfi1t8169dR0PHJ9O9gOm5lJ047oA7zXZysrF73fL8Xvdsir1fveKiK5NNcrzJjvFvvHXT6BS2hARI4AR5SiquSRNjIgBlayhNfH73XL8XresVf39zrP7qAboUTBdDcxqqI2ktsD6wPs51mRmZo3IMxQmAFtI6iOpPTAEGFOvzRjgqPTxd4CHI6/+LDMza1Ju3UcRsUTSicA4oAq4MSImS7oQmBgRY4AbgFslTSPZQxiSVz1lUNHuq1bI73fL8Xvdslbp9zu3A81mZrb68RXNZmaWcSiYmVmm1YaCpBslzU6vlaibt6GkByS9mv67Qb1ldpK0NL0Go25eT0n3S5oi6SUP1dEwSVWSnpP0j3T6xHSIk5DUpaDd+pLukfSCpMmSflBvPZ0kvSXp9y39GlYHkvpJer7g5yNJp0q6SNKkdN79krql7Yem8ydJekrStun8HpIeSf+2J0s6pbKvbNWS92dIepLO+HRdd6Qn7OSu1YYCcBMwqN684cBDEbEF8FA6DWTDdlxKcuC80C3A5RGxFclV3LPzKngNcAowpWD6SWBf4I167U4AXoqIbYE9gSvq/Ye4CHgsxzpXaxExNSK2i4jtgB2BBcDfSP5Ot0nn/wM4L13kdeBrEbENyXtbdyB0CXB6+rf9FeCEIkPVtGY3ke9nyKXAlem65pEMC5S7VhsKEfE4y18TUTjsxs3ANwqeOwn4KwUf+ul/kLYR8UC6zvkRsSC3oldjkqqBg4Dr6+ZFxHMRMaNI8wDWS4c8WZfk97QkXc+OwCbA/XnXvIbYB3gtIt6IiI8K5q9DeqFoRDwVEfPS+U+TXFNERLwdEc+mjz8mCfTuLVb5Ki7Pz5D0b39vkuF/iq0rN602FBqwSUS8Dcl/CGBjAEndgW8C19Vr3xf4QNJdabfI5em3AVveb4CzgNoS2v4e2IrkYscXgVMiolZSG+AK4MzcqlzzDAFG1U1IuljSTGAon+8pFDoG+Gf9mWmXxvbA+FyqXHOU6zNkI+CDiFiStquhhQLZoVCa3wBnR8TSevPbAnsAZwA7AZsBw1q2tFWfpIOB2RHxTImL7A88D3QDtgN+L6kT8BNgbETMzKfSNUva5TYY+EvdvIg4JyJ6ALcBJ9ZrvxdJKJxdb/66JN9wT623t2GlW9HPkJKGAMpDnmMfrY7elbRpRLwtaVM+380bANyeDuDaBThQ0hKS9H4uIqYDSLqbpO/1hpYvfZW2OzBY0oFAR6CTpD9HxPcbaP8D4JL06vZpkl4HtgR2BfaQ9BOSbqX2kuZHxHL36jAgGbb+2Yh4t8hzI4F7gfMBJG1D0rV3QETMrWskqR1JINwWEXflX/Jqr1yfITcCnSW1TfcWig0TlAvvKSyrcNiNo4C/A0REn4joHRG9Sfr4fhIRd5MM5bGBpLqRB/cGXmrZkld9EfG/EVGdvn9DSIYzaSgQAN4k6QtH0iYkI+NOj4ihEdEzXc8ZwC0OhEYdwbJdR1sUPDcYeDmd3xO4CzgyIl4paC+SLzhTIuLXLVLx6q8snyHpF6JHSIb/WWZdeWu1oSBpFPBvoJ+kGknHAJcA+0l6FdgvnW5Quit4BvCQpBdJdvn+mG/law5JJ0uqIfkWNElS3UHoi4Dd0vf0IZLdbg/tvAIkrU3yN1z47f4SSf+VNAn4OsnZYJAcW9gIuCY9XXViOn934Ehg74LTWw9soZewymuBz5CzgdOUDAO0ES3UA+FhLszMLNNq9xTMzGx5DgUzM8s4FMzMLONQMDOzjEPBzMwyDgUzM8s4FKzVkPSopP3rzTtV0jWNLDO/zDXcVDhsstmqxqFgrckolr8P+DIDxpm1dg4Fa03uBA6W1AGykT+7Ac9LekjSs5JelHRo/QUl7an05kDp9O8lDUsf7yjpMUnPSBqXjnnTpIaWS/doLpX0H0mvSNpjZV+4WakcCtZqpAO9/YfPb4wyBLgD+BT4ZkTsAOxFclOfYqNULicdMO4q4DsRsSPJQGYXl2G5thGxM3Aq6aB1Zi3Bo6Raa1PXhfT39N+jScab+YWkr5Lc76E7yY183ilhff2ArYEH0hypAt4uw3J1YxY9A/QuYX1mZeFQsNbmbuDXknYA1oqIZ9NuoK7AjhGxWNIMkiG+Cy1h2T3ruucFTI6IXVewjqaW+yz9dyn+f2otyN1H1qpExHzgUZLumroDzOuT3ARocXqjmV5FFn0D6C+pg6T1SYf2BqYCXSXtCkm3kKQvlVBKc5czy5W/gVhrNIqke6buTKTbgHvSIaOfJ73PQKGImClpNDAJeBV4Lp2/KD3F9HdpWLQlucvW5MYKaO5yZnnz0NlmZpZx95GZmWXcfWSWA0lXk9y5rNBvI+JPlajHrFTuPjIzs4y7j8zMLONQMDOzjEPBzMwyDgUzM8v8P4ZXHeGHKfPqAAAAAElFTkSuQmCC\n", "text/plain": [ "
" ] @@ -594,7 +594,7 @@ }, { "data": { - "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYUAAAEXCAYAAABCjVgAAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDMuMC4zLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvnQurowAAHoFJREFUeJzt3XmclXXd//HXm2FzSw1GEwYcVDBRH7kQ1g11U7ggJrRYapZZKm1g/NzSXNO8y8rcbjdcyi0UNRWVxC2X7FYBdyAUEWUk2VQUUNk+vz+uay6PwyyH4VxznJn38/E4j7n28znXzFzvc32vc76XIgIzMzOADuUuwMzMPjkcCmZmlnEomJlZxqFgZmYZh4KZmWUcCmZmlnEoWJMkDZFUUzA+V9LeLVzDw5KOasnnLDdJf5f0g3LXUSqSqiWFpI7lrsUa5lBoJ9ID+fuSlkl6W9I9knqVu676SDpT0g3lrqPcImL/iLi2pZ5P0oGS3pT06YJpIyW9IWnzZmyvxd882IZzKLQvB0bEpsA2wALg4jLXY58gEXEX8BBwPoCkLYDLgJ9GxNJy1laXzzby41BohyLiA+BWoH/tNEldJP1R0uuSFki6XNJGTW0rXe8CSfPTxwWSuqTzHpH0rXR4cNp0MDwd31vSs/VsbxjwK+Dg9KzmuYLZ20p6XNJ7ku6T1L1gvS9I+pekdyQ9J2lIIzXPlXSCpOclLZd0taSt0+aa9yQ9IGnLYrYt6YeSZqbrzZH044J5QyTVSDpO0kJJ/5H0w6b2acH6WZOZpB3S/blU0mJJNxcs91+SpqTzpkj6rzrbOLuh/VaPY4D9Je1HEg6PRMTERmocIWl6um8elrRTOv16oDdwV/p7PLFgtcPSv7PFkk4p2FYHSSdJekXSEkkTas9aCpqejpT0Okl4WR4iwo928ADmAnunwxsD1wLXFcy/AJgIfBrYDLgL+G06bwhQ08C2zgKeALYCKoF/AWcXzLs4Hf4V8ApwbsG8Cxuo9UzghjrTHk7X7wdslI7/Lp3XE1gCDCd5o7NPOl7ZyL54Atg6XXch8DSwO9CF5IBzRjHbBg4AtgcE/DewAtijYL+tTl9rp3QbK4At0/nfBZ5v5Hf2MHBUOjweOCWtoSswOJ3+aeBt4PtAR+DQdLxbU/utkec9FFgMLGpoH6bL9QOWp/ukE3AiMBvoXPfvJB2vBgK4Mq3lc8CHwE7p/LHp76Uq/T1cAYyvs+51wCbARuX+n2qrj7IX4EcL/aKTf9BlwDvpgWo+sGs6T+k/9/YFy38ReDUdHkLDofAKMLxg3n7A3HR4aO1BD7gXOAp4Ih1/BPhmA7WeSf2hcGrB+M+Ae9PhXwLX11l+MvCDRvbFYQXjtwGXFYyPAe5o5rbvAH5RsN/eBzoWzF8IfKHI39nDfBQK1wHjgKo6y3wfeKrOtP8DjmhqvzXyvH2AVcCNTSx3GjChYLwD8AYwpO7fSTpee2CvKpj2FHBIOjwTGFowb5u0jo4F625X7v+ltv5w81H78vWI2ILkXdho4BFJnyF5h78xMC1tBniH5CBeWcQ2ewCvFYy/lk6D5ODUT9LWwG4kB7ZeafPFQODR9az/zYLhFcCm6fC2wLdra0/rH0xyUGnIgoLh9+sZL2rbkvaX9ISkt9J5w4HC5pklEbG6gbrXx4kk4f1U2lzzo3R63f1POt6zYLyh/daQcSS/q+GFTVH1+NhzR8RaYF6d565PY7/H2wv280xgDckZXa15TWzbNpAv1rRDEbEG+JukK0gOcH8jORDuHBFvrOfm5pP8M09Px3un04iIFZKmAb8AXoyIlZL+BRwLvBIRixsqcT1rmEfybv7o9Vxvg7adXju5DTgcuDMiVkm6g+TgXVIR8SZwdPq8g4EHJD3KR/u/UG+SUF9vko4EegFfA6YCV0raPSJW1rP4fGDXgnWVrlv7N9Sc3+OPIuLxeuqqbuY2bT35TKEdUmIksCUwM32HdyVwvqSt0mV6phcbmzIeOFVSZXoGcDpQ+HHSR0jPStLxh+uM12cBUC2p2L/PG4ADJe0nqUJS1/Qib1WR6zd3251JzroWAasl7Q/sW4LnXIekbxe8nrdJDo5rgEkkZ2PfldRR0sEkHyC4uxnP0QP4A3B0RHwIXE5y/eSUBlaZABwgaaikTsBxJNcI/pXOXwBstx4lXA6cI2nbtJ7K9O/UWpBDoX25S9Iy4F3gHJJ28dp3+L8kuUj4hKR3gQeAHYvY5m9I3lE+D7xAcsH2NwXzHyG5cP1oA+P1uSX9uUTS000VEBHzgJEkF7MXkbzjPIES/H03tu2IeI/k0zoTSA7U3yW5WF8USYdJmt70kgB8Hngy/f1NJLlu8WpELCF5V38cyQH8ROBrjZyFNeZS4KaIeAwgkob9o4Gxknauu3BEzAK+R/LR5sXAgSQfe649q/gtyRuGdyQdX8TzX5i+tvskvUdy0XmvZrwO2wBKL+iY2SdM2jx0VURcV+5arP3wmYLZJ5CkjUmaXl4tdy3WvjgUzD5h0us6b5I0tf2zzOVYO+PmIzMzy/hMwczMMq3uewrdu3eP6urqcpdhZtaqTJs2bXFENPmF1FYXCtXV1UydOrXcZZiZtSqS6n7zvV5uPjIzs4xDwczMMg4FMzPLtLprCmZmTVm1ahU1NTV88MEH5S6lxXXt2pWqqio6derUrPUdCmbW5tTU1LDZZptRXV1N0nlr+xARLFmyhJqaGvr06dOsbbj5yMzanA8++IBu3bq1q0AAkES3bt026Awpt1CQdE16X9oXG5gvSRdJmq3kXrl75FWLmbU/7S0Qam3o687zTOEvwLBG5u8P9E0fo4DLcqzFzMyKkNs1hYh4tOBuSfUZSXLj+CDpw38LSdtExH/yqsnM2qfqk+4p6fbm/u6AJpfZdNNNeffddxk7diwPPfQQkujatSsTJkygT58+LF26lDFjxvD448mN5gYNGsTFF1/M5ptvzty5c+nTpw8XXXQRY8aMAWD06NEMGDCAI444oqSvpa5yXmjuycfvt1qTTlsnFCSNIjmboHfv3s1/xjM3b/66zX7OpeV97nI8bzmfuz3ub7/mde03Aebn/Mmj+c80Pj/WcvPl5zL/lRk8f++1dOjQgZr5C9jk/ddh/jscefQJ7PLZ7bnusVsBOOOPl3HUYQdxy7jfw4L5bNX901z4p9/z4wP3onPnTrB8Ub6vJ1XOC831NXzV22VrRIyLiAERMaCysph7yZuZld9/Fixmm62706FDcqit6rE1W27xKWa/+jrTXpjJaWM/uvX36f9vFFOfn8Erc5P3ypXdtmTooIFce8tdLVpzOUOhhuQm37WqSG/4bmbWFnznwH246/5H2W2fQzju13/imRf/DcCMl19lt513pKKiIlu2oqKC3XbekekvzcmmnTT6h5x3xQ2sWbOmxWouZyhMBA5PP4X0BWCpryeYWVtS1WNrZj16O789eQwdOoihB/+EBx97koigvg8JRcTHmlD69O7JwN125q+3/73Fas7tmoKk8cAQoLukGuAMoBNARFwOTAKGk9wsfgXww7xqMTMrly5dOrP/Vwex/1cHsXVlN+6Y/DC/OPJQnnlxFmvXrs2altauXctzM15ip74f/9LZr475EQeNOpEv79Uyn9rP7UwhIg6NiG0iolNEVEXE1RFxeRoIROLnEbF9ROwaEe4P28zalKdfmMn8N5MLxGvXruX5GS+zbdU27NCnN7vvsiO/ufCqbNnfXHgVe+z6WXbo8/EP03x2hz7077sddz/wWIvU7G4uzKzNm3tMj9JtrMfuTS6yevVqunTuzMLFb3H0CWfz4cpVAAzcbWdGH3EwAFf/8QzGnHYuOwwaQQR8cc9dufqPZ9S7vVOOOZLd9zu0dK+hEQ4FM7MSmz5rDttXVzHsK4MY9pVB9S6z5Raf4oaLz6l3XnWvHrz40C3Z+Od27sfammlFBdKGciiYmZXQ5dfdykXXjOeCXx9f7lKaxaFgZlZCPzn8IH5y+EHlLqPZ3EuqmZllHApmZpZxKJiZWcahYGZmGV9oNrO2b9yQ0m6vtpfWRtTMX8DPT/kdM16aw9oIvrb3l/jDqWOZ8dIc5i9YxPChg5NNnXc5m26yMcf/5PDS1thMPlMwMyuxiOCbRx/P14cN4eXH7+Slx25n2fIVnHLu//Ls9FlMeuifJXuuUneW5zMFM7MSe+ifT9G1S2d+ePBIIOkB9fwzj2PbvQ6gU8eORAT/fOpZTh6ddPk246U5DDnoaF5/403GHvVdjjky+fbyDbfdw0XX3MTKlavYa/dduPTaCVRUVLDpppty7LHHMnnyZM477zwGDx5cstp9pmBmVmLTX5rDnrvu9LFpn9psU6qrenDqL47i4BH78uz9N3HwyP0A+PfsuUy+8RKeuuc6fv2ncaxatYqZL8/h5on38fgd1/Ds/TdRUVHBjTfeCMDy5cvZZZddePLJJ0saCOAzBTOzkku6xl63b+yGph8wdDBdunSmS5fObNV9SxYseosH//kU016YyeeHfx+A9z/4kK369AeSM49vfetbudTuUDAzK7Gd+23PbZMe/Ni0d99bxrz5C6josG4DTZcunbPhiooKVq9ZQwT84NsH8tuTx3y0YNr3UdeuXT92g55ScvORmVmJDf3SQFa8/wHX3XI3kFwMPu6s8zniOweydWU33lu2oultDB7IrXc/wMLFbwHw1ttLee2113KtG3ymYGbtwaiHS7etInoqlcTtV53Hz371W86+4ErWRjD8q4P4n5NGs3zF+/zukj+z2z6HZBea69O/33b85sSfse+hP2NtrKVTx45cMu7PbLvttqV7LfVwKJiZ5aBXz89w17UXrjO9S5fOTJl0Q4PrFXaZffDI/bKL0UAWSMuWLStdoXW4+cjMzDIOBTMzyzgUzKwNCiKi3EWUxYa+boeCmbU5XZfOYcny1e0uGCKCJUuW0LVr12ZvwxeazazNqXr6XGr4JYs23w5Y98tiG2TpzOTnOwtLu931ee5GdO3alaqqqmY/hUPBzNqcTivfoc8TJ+ez8doeUs/8Qj7bL+a5c+TmIzMzyzgUzMws41AwM7OMQ8HMzDIOBTMzyzgUzMws41AwM7OMQ8HMzDIOBTMzy+QaCpKGSZolabakk+qZ31vSPyQ9I+l5ScPzrMfMzBqXWyhIqgAuAfYH+gOHSupfZ7FTgQkRsTtwCHBpXvWYmVnT8jxTGAjMjog5EbESuAkYWWeZAD6VDm8OzM+xHjMza0KeodATmFcwXpNOK3Qm8D1JNcAkYEx9G5I0StJUSVMXLVqUR61mZka+oVBff7V1Ozc/FPhLRFQBw4HrJa1TU0SMi4gBETGgsrIyh1LNzAzyDYUaoFfBeBXrNg8dCUwAiIj/A7oC3XOsyczMGpFnKEwB+krqI6kzyYXkiXWWeR0YCiBpJ5JQcPuQmVmZ5BYKEbEaGA1MBmaSfMpouqSzJI1IFzsOOFrSc8B44Ihob/fPMzP7BMn1zmsRMYnkAnLhtNMLhmcAg/KswczMiudvNJuZWcahYGZmGYeCmZllHApmZpZxKJiZWcahYGZmGYeCmZllHApmZpZxKJiZWcahYGZmGYeCmZllHApmZpZxKJiZWcahYGZmGYeCmZllHApmZpZxKJiZWcahYGZmGYeCmZllHApmZpZxKJiZWcahYGZmGYeCmZllHApmZpZxKJiZWcahYGZmGYeCmZllHApmZpZxKJiZWcahYGZmGYeCmZllHApmZpbJNRQkDZM0S9JsSSc1sMx3JM2QNF3SX/Osx8zMGtcxrw1LqgAuAfYBaoApkiZGxIyCZfoCJwODIuJtSVvlVY+ZmTUtzzOFgcDsiJgTESuBm4CRdZY5GrgkIt4GiIiFOdZjZmZNyDMUegLzCsZr0mmF+gH9JD0u6QlJw3Ksx8zMmpBb8xGgeqZFPc/fFxgCVAGPSdolIt752IakUcAogN69e5e+UjMzA/I9U6gBehWMVwHz61nmzohYFRGvArNIQuJjImJcRAyIiAGVlZW5FWxm1t4VFQqSvi1ps3T4VEl/k7RHE6tNAfpK6iOpM3AIMLHOMncAX0m3252kOWnO+rwAMzMrnWLPFE6LiPckDQb2A64FLmtshYhYDYwGJgMzgQkRMV3SWZJGpItNBpZImgH8AzghIpY054WYmdmGK/aawpr05wHAZRFxp6Qzm1opIiYBk+pMO71gOIBj04eZmZVZsWcKb0i6AvgOMElSl/VY18zMWoliD+zfIWnqGZZ+MujTwAm5VWVmZmVRVChExApgITA4nbQaeDmvoszMrDyK/fTRGcAvSbqkAOgE3JBXUWZmVh7FNh99AxgBLAeIiPnAZnkVZWZm5VFsKKxMPykUAJI2ya8kMzMrl2JDYUL66aMtJB0NPABcmV9ZZmZWDkV9TyEi/ihpH+BdYEfg9Ii4P9fKzMysxTUZCul9ESZHxN6Ag8DMrA1rsvkoItYAKyRt3gL1mJlZGRXbzcUHwAuS7if9BBJARByTS1VmZlYWxYbCPenDzMzasGIvNF+bdn/dL500KyJW5VeWmZmVQ1GhIGkISXfZc0nuqNZL0g8i4tH8SjMzs5ZWbPPRecC+ETELQFI/YDywZ16FmZlZyyv2y2udagMBICJeIun/yMzM2pBizxSmSroauD4dPwyYlk9JZmZWLsWGwk+BnwPHkFxTeBS4NK+izMysPIoNhY7AhRHxJ8i+5dwlt6rMzKwsir2m8CCwUcH4RiSd4pmZWRtSbCh0jYhltSPp8Mb5lGRmZuVSbCgsl7RH7YikAcD7+ZRkZmblUuw1hbHALZLmk9xopwdwcG5VmZlZWTR6piDp85I+ExFTgM8CNwOrgXuBV1ugPjMza0FNNR9dAaxMh78I/Aq4BHgbGJdjXWZmVgZNNR9VRMRb6fDBwLiIuA24TdKz+ZZmZmYtrakzhQpJtcExFHioYF6x1yPMzKyVaOrAPh54RNJikk8bPQYgaQdgac61mZlZC2s0FCLiHEkPAtsA90VEpLM6AGPyLs7MzFpWk01AEfFEPdNeyqccMzMrp2K/vGZmZu2AQ8HMzDIOBTMzy+QaCpKGSZolabakkxpZ7iBJkfapZGZmZZJbKKT3XLgE2B/oDxwqqX89y21GcvOeJ/OqxczMipPnmcJAYHZEzImIlcBNwMh6ljsb+D3wQY61mJlZEfIMhZ7AvILxmnRaRtLuQK+IuLuxDUkaJWmqpKmLFi0qfaVmZgbkGwqqZ1pkM6UOwPnAcU1tKCLGRcSAiBhQWVlZwhLNzKxQnqFQA/QqGK8C5heMbwbsAjwsaS7wBWCiLzabmZVPnqEwBegrqY+kzsAhwMTamRGxNCK6R0R1RFQDTwAjImJqjjWZmVkjcguFiFgNjAYmAzOBCRExXdJZkkbk9bxmZtZ8uXZ/HRGTgEl1pp3ewLJD8qzFzMya5m80m5lZxqFgZmYZh4KZmWUcCmZmlnEomJlZxqFgZmYZh4KZmWUcCmZmlnEomJlZxqFgZmYZh4KZmWUcCmZmlnEomJlZxqFgZmYZh4KZmWUcCmZmlnEomJlZxqFgZmYZh4KZmWVyvUezmVmeqj/4a4s/59wWf8aW5TMFMzPLOBTMzCzjUDAzs4xDwczMMg4FMzPLOBTMzCzjUDAzs4xDwczMMg4FMzPLOBTMzCzjUDAzs4z7Pmoh7qPFzFqDXENB0jDgQqACuCoifldn/rHAUcBqYBHwo4h4Lc+azMxKoa2+0cstFCRVAJcA+wA1wBRJEyNiRsFizwADImKFpJ8CvwcOzqsmMyu9tnpwbK/yvKYwEJgdEXMiYiVwEzCycIGI+EdErEhHnwCqcqzHzMyakGfzUU9gXsF4DbBXI8sfCfy9vhmSRgGjAHr37l2q+tqFcryLg/K/k/O7V7PmyfNMQfVMi3oXlL4HDAD+UN/8iBgXEQMiYkBlZWUJSzQzs0J5ninUAL0KxquA+XUXkrQ3cArw3xHxYY71mJlZE/I8U5gC9JXUR1Jn4BBgYuECknYHrgBGRMTCHGsxM7Mi5HamEBGrJY0GJpN8JPWaiJgu6SxgakRMJGku2hS4RRLA6xExIq+azNoyX0exUsj1ewoRMQmYVGfa6QXDe+f5/GZmtn7czYWZmWXczYXlxs0ZZq2PQ8GshByE1tq1q1DwP6yZWeN8TcHMzDIOBTMzyzgUzMws41AwM7OMQ8HMzDIOBTMzyzgUzMws41AwM7OMQ8HMzDIOBTMzyzgUzMws41AwM7OMQ8HMzDIOBTMzyzgUzMws41AwM7OMQ8HMzDIOBTMzyzgUzMws41AwM7OMQ8HMzDIOBTMzyzgUzMws41AwM7OMQ8HMzDIOBTMzyzgUzMws41AwM7OMQ8HMzDK5hoKkYZJmSZot6aR65neRdHM6/0lJ1XnWY2ZmjcstFCRVAJcA+wP9gUMl9a+z2JHA2xGxA3A+cG5e9ZiZWdPyPFMYCMyOiDkRsRK4CRhZZ5mRwLXp8K3AUEnKsSYzM2uEIiKfDUsHAcMi4qh0/PvAXhExumCZF9NlatLxV9JlFtfZ1ihgVDq6IzArl6Ib1x1Y3ORSVire3y3H+7pllWt/bxsRlU0t1DHHAup7x183gYpZhogYB4wrRVHNJWlqRAwoZw3tifd3y/G+blmf9P2dZ/NRDdCrYLwKmN/QMpI6ApsDb+VYk5mZNSLPUJgC9JXUR1Jn4BBgYp1lJgI/SIcPAh6KvNqzzMysSbk1H0XEakmjgclABXBNREyXdBYwNSImAlcD10uaTXKGcEhe9ZRAWZuv2iHv75bjfd2yPtH7O7cLzWZm1vr4G81mZpZxKJiZWcahAEi6RtLC9HsTtdPOlvS8pGcl3SepR8G8Ien06ZIeKU/VrZukuZJeSPfj1DrzjpcUkrrXmf55SWvS78BYkST9QtKL6d/r2HTamZLeSPf/s5KGp9P3kTQt/d1Mk/TV8lbfOpTqGNJU10AtIiLa/QP4MrAH8GLBtE8VDB8DXJ4ObwHMAHqn41uVu/7W+ADmAt3rmd6L5MMJrxXOJ/mwwkPAJOCgctffWh7ALsCLwMYkHyx5AOgLnAkcX8/yuwM9CtZ9o9yvoTU8SnEMSf/GXwG2AzoDzwH9W/q1+EwBiIhHqfP9iIh4t2B0Ez76Ut13gb9FxOvpcgtbpMj243zgRNb9EuMY4DbA+3v97AQ8ERErImI18AjwjYYWjohnIqL2+0TTga6SurRAna1aiY4hxXQNlDuHQiMknSNpHnAYcHo6uR+wpaSH09Prw8tXYasWwH3pPhwFIGkEyTvT5woXlNST5EB2ecuX2eq9CHxZUjdJGwPD+ehLpaPT5o1rJG1Zz7rfAp6JiA9bqti2Zj2PIT2BeQWr16TTWpRDoRERcUpE9AJuBGr7bOoI7AkcAOwHnCapX5lKbM0GRcQeJL3o/lzSl4FT+Ogfp9AFwC8jYk1LFtgWRMRMkt6H7wfuJWmSWA1cBmwP7Ab8BzivcD1JO6fr/bgl621r1vMYUlS3P3lzKBTnryTvmiBJ73sjYnkkHfc9CnyubJW1UrVNFOmp8+3AfwN9gOckzSXpFuVpSZ8BBgA3pdMPAi6V9PVy1N0aRcTVEbFHRHyZpInj5YhYEBFrImItcCVJ0wUAkqpIfieHR8Qr5am6zSnmGFJM10C5cyg0QFLfgtERwL/T4TuBL0nqmJ6O7wXMbOn6WjNJm0jarHYY2BeYEhFbRUR1RFST/IPsERFvRkSfgum3Aj+LiDvKVX9rI2mr9Gdv4JvAeEnbFCzyDZJmJiRtAdwDnBwRj7d0rW1JM44hxXQNlLs8e0ltNSSNB4YA3SXVAGcAwyXtCKwl+STMTyA5HZd0L/B8Ou+qiHix3g1bQ7YGbk9vndER+GtE3Fvektq02yR1A1YBP4+ItyVdL2k3kuaJuXzUTDQa2IGkSeO0dNq+/kBF40p1DKmva6AWfy3pR6HMzMzcfGRmZh9xKJiZWcahYGZmGYeCmZllHApmZpZxKJiZWcahYO1G2tfMfnWmjZV0aSPrLCtxDX9x19/2SeZQsPZkPOveB/yQdLqZ4VCw9uVW4Gu1XUFLqgZ6AM9KelDS0+nNZdbprji9KcrdBeP/K+mIdHhPSY+kPV5OrtOFRIMaWi89ozlX0lOSXpL0pQ194WbFcihYuxERS4CngGHppEOAm4H3gW+kvbZ+BThPaR8cTZHUCbiY5MY/ewLXAOeUYL2OETEQGEvSZYJZi3DfR9be1DYh3Zn+/BFJl8X/k3bfvZakD/utgTeL2N6OJHcouz/NkQqSrqg3dL2/pT+nAdVFbM+sJBwK1t7cAfxJ0h7ARhHxdNoMVAnsGRGr0i66u9ZZbzUfP7OunS9gekR8cT3raGq92hvbrMH/p9aC3Hxk7UpELAMeJmmuqb3AvDmwMA2ErwDb1rPqa0B/SV0kbQ4MTafPAiolfRGSZqH0BjVNae56ZrnyOxBrj8aTNM/UfhLpRuAuSVOBZ/mo3/tMRMyTNIGku+OXgWfS6SvTj5helIZFR5I7xTXa5XFz1zPLm7vONjOzjJuPzMws4+YjsxxIugQYVGfyhRHx53LUY1YsNx+ZmVnGzUdmZpZxKJiZWcahYGZmGYeCmZll/j+Z5Ya0yysJKQAAAABJRU5ErkJggg==\n", + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYUAAAEXCAYAAABCjVgAAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDMuMC4zLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvnQurowAAHo1JREFUeJzt3XmclXXd//HXm2FzSw1GEwYcVDBRH7kQ1g11U7ggJrRYapZZKm1g/NzSNDXNu6zM7XbDpVxR1FRUErdcslsF3IFQRJSRZFNRRGT7/P64rrk8DrMchnPNcWbez8fjPObaz+dcM3O9z/W9zvleigjMzMwAOpS7ADMz++RwKJiZWcahYGZmGYeCmZllHApmZpZxKJiZWcahYE2SNERSTcH4XEl7t3AND0s6qiWfs9wk/V3SD8pdR6lIqpYUkjqWuxZrmEOhnUgP5B9IWibpbUn3SOpV7rrqI+kMSdeXu45yi4j9I+Kalno+SQdKelPSpwumjZT0hqTNm7G9Fn/zYBvOodC+HBgRmwLbAAuAi8pcj32CRMRdwEPAeQCStgAuBX4aEUvLWVtdPtvIj0OhHYqIFcCtQP/aaZK6SPqTpNclLZB0maSNmtpWut75kuanj/MldUnnPSLpW+nw4LTpYHg6vrekZ+vZ3jDgV8DB6VnNcwWzt5X0uKT3JN0nqXvBel+Q9C9J70h6TtKQRmqeK+kESc9Lel/SVZK2Tptr3pP0gKQti9m2pB9KmpmuN0fSjwvmDZFUI+k4SQsl/UfSD5vapwXrZ01mknZI9+dSSYsl3Vyw3H9JmpLOmyLpv+ps46yG9ls9jgH2l7QfSTg8EhETG6lxhKTp6b55WNJO6fTrgN7AXenv8cSC1Q5L/84WSzqlYFsdJJ0k6RVJSyRNqD1rKWh6OlLS6yThZXmICD/awQOYC+ydDm8MXANcWzD/fGAi8GlgM+Au4HfpvCFATQPbOhN4AtgKqAT+BZxVMO+idPhXwCvAOQXzLmig1jOA6+tMezhdvx+wUTr++3ReT2AJMJzkjc4+6XhlI/viCWDrdN2FwNPA7kAXkgPO6cVsGzgA2B4Q8N/AcmCPgv22On2tndJtLAe2TOd/F3i+kd/Zw8BR6fB44JS0hq7A4HT6p4G3ge8DHYFD0/FuTe23Rp73UGAxsKihfZgu1w94P90nnYATgdlA57p/J+l4NRDAFWktnwM+BHZK549Nfy9V6e/hcmB8nXWvBTYBNir3/1RbfZS9AD9a6Bed/IMuA95JD1TzgV3TeUr/ubcvWP6LwKvp8BAaDoVXgOEF8/YD5qbDQ2sPesC9wFHAE+n4I8A3G6j1DOoPhVMLxn8G3JsO/xK4rs7yk4EfNLIvDisYvw24tGB8DHBHM7d9B/CLgv32AdCxYP5C4AtF/s4e5qNQuBYYB1TVWeb7wFN1pv0fcERT+62R5+0DrAJuaGK5XwMTCsY7AG8AQ+r+naTjtQf2qoJpTwGHpMMzgaEF87ZJ6+hYsO525f5fausPNx+1L1+PiC1I3oWNBh6R9BmSd/gbA9PSZoB3SA7ilUVsswfwWsH4a+k0SA5O/SRtDexGcmDrlTZfDAQeXc/63ywYXg5smg5vC3y7tva0/sEkB5WGLCgY/qCe8aK2LWl/SU9IeiudNxwobJ5ZEhGrG6h7fZxIEt5Ppc01P0qn193/pOM9C8Yb2m8NGUfyuxpe2BRVj489d0SsBebVee76NPZ7vL1gP88E1pCc0dWa18S2bQP5Yk07FBFrgL9JupzkAPc3kgPhzhHxxnpubj7JP/P0dLx3Oo2IWC5pGvAL4MWIWCnpX8CxwCsRsbihEtezhnkk7+aPXs/1Nmjb6bWT24DDgTsjYpWkO0gO3iUVEW8CR6fPOxh4QNKjfLT/C/UmCfX1JulIoBfwNWAqcIWk3SNiZT2Lzwd2LVhX6bq1f0PN+T3+KCIer6eu6mZu09aTzxTaISVGAlsCM9N3eFcA50naKl2mZ3qxsSnjgVMlVaZnAKcBhR8nfYT0rCQdf7jOeH0WANWSiv37vB44UNJ+kiokdU0v8lYVuX5zt92Z5KxrEbBa0v7AviV4znVI+nbB63mb5OC4BphEcjb2XUkdJR1M8gGCu5vxHD2APwJHR8SHwGUk109OaWCVCcABkoZK6gQcR3KN4F/p/AXAdutRwmXA2ZK2TeupTP9OrQU5FNqXuyQtA94FziZpF699h/9LkouET0h6F3gA2LGIbf6W5B3l88ALJBdsf1sw/xGSC9ePNjBen1vSn0skPd1UARExDxhJcjF7Eck7zhMowd93Y9uOiPdIPq0zgeRA/V2Si/VFkXSYpOlNLwnA54En09/fRJLrFq9GxBKSd/XHkRzATwS+1shZWGMuAW6KiMcAImnYPxoYK2nnugtHxCzgeyQfbV4MHEjysefas4rfkbxheEfS8UU8/wXpa7tP0nskF533asbrsA2g9IKOmX3CpM1DV0bEteWuxdoPnymYfQJJ2pik6eXVctdi7YtDwewTJr2u8yZJU9s/y1yOtTNuPjIzs4zPFMzMLNPqvqfQvXv3qK6uLncZZmatyrRp0xZHRJNfSG11oVBdXc3UqVPLXYaZWasiqe433+vl5iMzM8s4FMzMLONQMDOzTKu7pmBm1pRVq1ZRU1PDihUryl1Ki+vatStVVVV06tSpWes7FMyszampqWGzzTajurqapPPW9iEiWLJkCTU1NfTp06dZ23DzkZm1OStWrKBbt27tKhAAJNGtW7cNOkPKLRQkXZ3el/bFBuZL0oWSZiu5V+4eedViZu1PewuEWhv6uvM8U/grMKyR+fsDfdPHKODSHGsxM7Mi5HZNISIeLbhbUn1Gktw4Pkj68N9C0jYR8Z+8ajKz9qn6pHtKur25vz+gyWU23XRT3n33XcaOHctDDz2EJLp27cqECRPo06cPS5cuZcyYMTz+eHKjuUGDBnHRRRex+eabM3fuXPr06cOFF17ImDFjABg9ejQDBgzgiCOOKOlrqaucF5p78vH7rdak09YJBUmjSM4m6N27d/Of8YzNm79us59zaXmfuxzPW87nbo/72695XftNgPk5f/Jo/jONz4+13HzZOcx/ZQbP33sNHTp0oGb+Ajb54HWY/w5HHn0Cu3x2e6597FYATv/TpRx12EHcMu4PsGA+W3X/NBf8+Q/8+MC96Ny5E7y/KN/Xkyrnheb6Gr7q7bI1IsZFxICIGFBZWcy95M3Myu8/Cxazzdbd6dAhOdRW9diaLbf4FLNffZ1pL8zk12M/uvX3af9vFFOfn8Erc5P3ypXdtmTooIFcc8tdLVpzOUOhhuQm37WqSG/4bmbWFnznwH246/5H2W2fQzjuN3/mmRf/DcCMl19lt513pKKiIlu2oqKC3XbekekvzcmmnTT6h5x7+fWsWbOmxWouZyhMBA5PP4X0BWCpryeYWVtS1WNrZj16O787eQwdOoihB/+EBx97koigvg8JRcTHmlD69O7JwN125sbb/95iNed2TUHSeGAI0F1SDXA60AkgIi4DJgHDSW4Wvxz4YV61mJmVS5cundn/q4PY/6uD2LqyG3dMfphfHHkoz7w4i7Vr12ZNS2vXruW5GS+xU9+Pf+nsV8f8iINGnciX92qZT+3ndqYQEYdGxDYR0SkiqiLiqoi4LA0EIvHziNg+InaNCPeHbWZtytMvzGT+m8kF4rVr1/L8jJfZtmobdujTm9132ZHfXnBltuxvL7iSPXb9LDv0+fiHaT67Qx/6992Oux94rEVqdjcXZtbmzT2mR+k21mP3JhdZvXo1XTp3ZuHitzj6hLP4cOUqAAbutjOjjzgYgKv+dDpjfn0OOwwaQQR8cc9duepPp9e7vVOOOZLd9zu0dK+hEQ4FM7MSmz5rDttXVzHsK4MY9pVB9S6z5Raf4vqLzq53XnWvHrz40C3Z+Od27sfammlFBdKGciiYmZXQZdfeyoVXj+f83xxf7lKaxaFgZlZCPzn8IH5y+EHlLqPZ3EuqmZllHApmZpZxKJiZWcahYGZmGV9oNrO2b9yQ0m6vtpfWRtTMX8DPT/k9M16aw9oIvrb3l/jjqWOZ8dIc5i9YxPChg5NNnXsZm26yMcf/5PDS1thMPlMwMyuxiOCbRx/P14cN4eXH7+Slx25n2fvLOeWc/+XZ6bOY9NA/S/Zcpe4sz2cKZmYl9tA/n6Jrl8788OCRQNID6nlnHMe2ex1Ap44diQj++dSznDw66fJtxktzGHLQ0bz+xpuMPeq7HHNk8u3l62+7hwuvvomVK1ex1+67cMk1E6ioqGDTTTfl2GOPZfLkyZx77rkMHjy4ZLX7TMHMrMSmvzSHPXfd6WPTPrXZplRX9eDUXxzFwSP25dn7b+LgkfsB8O/Zc5l8w8U8dc+1/ObP41i1ahUzX57DzRPv4/E7rubZ+2+ioqKCG264AYD333+fXXbZhSeffLKkgQA+UzAzK7mka+x1+8ZuaPoBQwfTpUtnunTpzFbdt2TBord48J9PMe2FmXx++PcB+GDFh2zVpz+QnHl861vfyqV2h4KZWYnt3G97bpv04MemvfveMubNX0BFh3UbaLp06ZwNV1RUsHrNGiLgB98+kN+dPOajBdO+j7p27fqxG/SUkpuPzMxKbOiXBrL8gxVce8vdQHIx+Lgzz+OI7xzI1pXdeG/Z8qa3MXggt979AAsXvwXAW28v5bXXXsu1bvCZgpm1B6MeLt22iuipVBK3X3kuP/vV7zjr/CtYG8Hwrw7if04azfvLP+D3F/+F3fY5JLvQXJ/+/bbjtyf+jH0P/RlrYy2dOnbk4nF/Ydttty3da6mHQ8HMLAe9en6Gu665YJ3pXbp0Zsqk6xtcr7DL7INH7pddjAayQFq2bFnpCq3DzUdmZpZxKJiZWcahYGZtUBAR5S6iLDb0dTsUzKzN6bp0DkveX93ugiEiWLJkCV27dm32Nnyh2czanKqnz6GGX7Jo8+2Adb8stkGWzkx+vrOwtNtdn+duRNeuXamqqmr2UzgUzKzN6bTyHfo8cXI+G6/tIfWML+Sz/WKeO0duPjIzs4xDwczMMg4FMzPLOBTMzCzjUDAzs4xDwczMMg4FMzPLOBTMzCzjUDAzs0yuoSBpmKRZkmZLOqme+b0l/UPSM5KelzQ8z3rMzKxxuYWCpArgYmB/oD9wqKT+dRY7FZgQEbsDhwCX5FWPmZk1Lc8zhYHA7IiYExErgZuAkXWWCeBT6fDmwPwc6zEzsybkGQo9gXkF4zXptEJnAN+TVANMAsbUtyFJoyRNlTR10aJFedRqZmbkGwr19Vdbt3PzQ4G/RkQVMBy4TtI6NUXEuIgYEBEDKisrcyjVzMwg31CoAXoVjFexbvPQkcAEgIj4P6Ar0D3HmszMrBF5hsIUoK+kPpI6k1xInlhnmdeBoQCSdiIJBbcPmZmVSW6hEBGrgdHAZGAmyaeMpks6U9KIdLHjgKMlPQeMB46I9nb/PDOzT5Bc77wWEZNILiAXTjutYHgGMCjPGszMrHj+RrOZmWUcCmZmlnEomJlZxqFgZmYZh4KZmWUcCmZmlnEomJlZxqFgZmYZh4KZmWUcCmZmlnEomJlZxqFgZmYZh4KZmWUcCmZmlnEomJlZxqFgZmYZh4KZmWUcCmZmlnEomJlZxqFgZmYZh4KZmWUcCmZmlnEomJlZxqFgZmYZh4KZmWUcCmZmlnEomJlZxqFgZmYZh4KZmWUcCmZmlnEomJlZxqFgZmaZXENB0jBJsyTNlnRSA8t8R9IMSdMl3ZhnPWZm1riOeW1YUgVwMbAPUANMkTQxImYULNMXOBkYFBFvS9oqr3rMzKxpeZ4pDARmR8SciFgJ3ASMrLPM0cDFEfE2QEQszLEeMzNrQp6h0BOYVzBek04r1A/oJ+lxSU9IGpZjPWZm1oTcmo8A1TMt6nn+vsAQoAp4TNIuEfHOxzYkjQJGAfTu3bv0lZqZGZDvmUIN0KtgvAqYX88yd0bEqoh4FZhFEhIfExHjImJARAyorKzMrWAzs/auqFCQ9G1Jm6XDp0r6m6Q9mlhtCtBXUh9JnYFDgIl1lrkD+Eq63e4kzUlz1ucFmJlZ6RR7pvDriHhP0mBgP+Aa4NLGVoiI1cBoYDIwE5gQEdMlnSlpRLrYZGCJpBnAP4ATImJJc16ImZltuGKvKaxJfx4AXBoRd0o6o6mVImISMKnOtNMKhgM4Nn2YmVmZFXum8Iaky4HvAJMkdVmPdc3MrJUo9sD+HZKmnmHpJ4M+DZyQW1VmZlYWRYVCRCwHFgKD00mrgZfzKsrMzMqj2E8fnQ78kqRLCoBOwPV5FWVmZuVRbPPRN4ARwPsAETEf2CyvoszMrDyKDYWV6SeFAkDSJvmVZGZm5VJsKExIP320haSjgQeAK/Iry8zMyqGo7ylExJ8k7QO8C+wInBYR9+damZmZtbgmQyG9L8LkiNgbcBCYmbVhTTYfRcQaYLmkzVugHjMzK6Niu7lYAbwg6X7STyABRMQxuVRlZmZlUWwo3JM+zMysDSv2QvM1affX/dJJsyJiVX5lmZlZORQVCpKGkHSXPZfkjmq9JP0gIh7NrzQzM2tpxTYfnQvsGxGzACT1A8YDe+ZVmJmZtbxiv7zWqTYQACLiJZL+j8zMrA0p9kxhqqSrgOvS8cOAafmUZGZm5VJsKPwU+DlwDMk1hUeBS/IqyszMyqPYUOgIXBARf4bsW85dcqvKzMzKothrCg8CGxWMb0TSKZ6ZmbUhxYZC14hYVjuSDm+cT0lmZlYuxYbC+5L2qB2RNAD4IJ+SzMysXIq9pjAWuEXSfJIb7fQADs6tKjMzK4tGzxQkfV7SZyJiCvBZ4GZgNXAv8GoL1GdmZi2oqeajy4GV6fAXgV8BFwNvA+NyrMvMzMqgqeajioh4Kx0+GBgXEbcBt0l6Nt/SzMyspTV1plAhqTY4hgIPFcwr9nqEmZm1Ek0d2McDj0haTPJpo8cAJO0ALM25NjMza2GNhkJEnC3pQWAb4L6IiHRWB2BM3sWZmVnLarIJKCKeqGfaS/mUY2Zm5VTsl9fMzKwdcCiYmVnGoWBmZplcQ0HSMEmzJM2WdFIjyx0kKdI+lczMrExyC4X0ngsXA/sD/YFDJfWvZ7nNSG7e82RetZiZWXHyPFMYCMyOiDkRsRK4CRhZz3JnAX8AVuRYi5mZFSHPUOgJzCsYr0mnZSTtDvSKiLsb25CkUZKmSpq6aNGi0ldqZmZAvqGgeqZFNlPqAJwHHNfUhiJiXEQMiIgBlZWVJSzRzMwK5RkKNUCvgvEqYH7B+GbALsDDkuYCXwAm+mKzmVn55BkKU4C+kvpI6gwcAkysnRkRSyOie0RUR0Q18AQwIiKm5liTmZk1IrdQiIjVwGhgMjATmBAR0yWdKWlEXs9rZmbNl2v31xExCZhUZ9ppDSw7JM9azMysaf5Gs5mZZRwKZmaWcSiYmVnGoWBmZhmHgpmZZRwKZmaWcSiYmVnGoWBmZhmHgpmZZRwKZmaWcSiYmVnGoWBmZhmHgpmZZRwKZmaWcSiYmVnGoWBmZhmHgpmZZRwKZmaWcSiYmVkm13s0m5nlqXrFjS3+nHNb/Blbls8UzMws41AwM7OMQ8HMzDIOBTMzyzgUzMws41AwM7OMQ8HMzDIOBTMzyzgUzMws41AwM7OMQ8HMzDIOBTMzy+TaIZ6kYcAFQAVwZUT8vs78Y4GjgNXAIuBHEfFanjWVizvuMmtb2ur/dG6hIKkCuBjYB6gBpkiaGBEzChZ7BhgQEcsl/RT4A3BwXjWZWem11YNje5Vn89FAYHZEzImIlcBNwMjCBSLiHxGxPB19AqjKsR4zM2tCns1HPYF5BeM1wF6NLH8k8Pf6ZkgaBYwC6N27d6nqaxfK8S4Oyv9Ozu9ezZonzzMF1TMt6l1Q+h4wAPhjffMjYlxEDIiIAZWVlSUs0czMCuV5plAD9CoYrwLm111I0t7AKcB/R8SHOdZjZmZNyPNMYQrQV1IfSZ2BQ4CJhQtI2h24HBgREQtzrMXMzIqQ25lCRKyWNBqYTPKR1KsjYrqkM4GpETGRpLloU+AWSQCvR8SIvGoya8t8HcVKIdfvKUTEJGBSnWmnFQzvnefzm5nZ+vE3ms3MLJPrmYK1b27OMGt9HApmJeQgtNbOzUdmZpZxKJiZWcahYGZmGYeCmZllHApmZpZxKJiZWcahYGZmGYeCmZllHApmZpZxKJiZWcahYGZmGYeCmZllHApmZpZxKJiZWcahYGZmGYeCmZllHApmZpZxKJiZWcahYGZmGYeCmZllHApmZpZxKJiZWcahYGZmGYeCmZllHApmZpbpWO4CWlL1ihtb/Dnntvgzmpk1n88UzMws41AwM7OMQ8HMzDIOBTMzy+QaCpKGSZolabakk+qZ30XSzen8JyVV51mPmZk1LrdQkFQBXAzsD/QHDpXUv85iRwJvR8QOwHnAOXnVY2ZmTcvzTGEgMDsi5kTESuAmYGSdZUYC16TDtwJDJSnHmszMrBGKiHw2LB0EDIuIo9Lx7wN7RcTogmVeTJepScdfSZdZXGdbo4BR6eiOwKxcim5cd2Bxk0tZqXh/txzv65ZVrv29bURUNrVQnl9eq+8df90EKmYZImIcMK4URTWXpKkRMaCcNbQn3t8tx/u6ZX3S93eezUc1QK+C8SpgfkPLSOoIbA68lWNNZmbWiDxDYQrQV1IfSZ2BQ4CJdZaZCPwgHT4IeCjyas8yM7Mm5dZ8FBGrJY0GJgMVwNURMV3SmcDUiJgIXAVcJ2k2yRnCIXnVUwJlbb5qh7y/W473dcv6RO/v3C40m5lZ6+NvNJuZWcahYGZmGYcCIOlqSQvT703UTjtL0vOSnpV0n6QeBfOGpNOnS3qkPFW3bpLmSnoh3Y9T68w7XlJI6l5n+uclrUm/A2NFkvQLSS+mf69j02lnSHoj3f/PShqeTt9H0rT0dzNN0lfLW33rUKpjSFNdA7WIiGj3D+DLwB7AiwXTPlUwfAxwWTq8BTAD6J2Ob1Xu+lvjg+T+Q93rmd6L5MMJrxXOJ/mwwkPAJOCgctffWh7ALsCLwMYkHyx5AOgLnAEcX8/yuwM9CtZ9o9yvoTU8SnEMSf/GXwG2AzoDzwH9W/q1+EwBiIhHqfP9iIh4t2B0Ez76Ut13gb9FxOvpcgtbpMj24zzgRNb9EuMY4DbA+3v97AQ8ERHLI2I18AjwjYYWjohnIqL2+0TTga6SurRAna1aiY4hxXQNlDuHQiMknS1pHnAYcFo6uR+wpaSH09Prw8tXYasWwH3pPhwFIGkEyTvT5woXlNST5EB2WcuX2eq9CHxZUjdJGwPD+ehLpaPT5o2rJW1Zz7rfAp6JiA9bqti2Zj2PIT2BeQWr16TTWpRDoRERcUpE9AJuAGr7bOoI7AkcAOwH/FpSvzKV2JoNiog9SHrR/bmkLwOn8NE/TqHzgV9GxJqWLLAtiIiZJL0P3w/cS9IksRq4FNge2A34D3Bu4XqSdk7X+3FL1tvWrOcxpKhuf/LmUCjOjSTvmiBJ73sj4v1IOu57FPhc2SprpWqbKNJT59uB/wb6AM9JmkvSLcrTkj4DDABuSqcfBFwi6evlqLs1ioirImKPiPgySRPHyxGxICLWRMRa4AqSpgsAJFWR/E4Oj4hXylN1m1PMMaSYroFy51BogKS+BaMjgH+nw3cCX5LUMT0d3wuY2dL1tWaSNpG0We0wsC8wJSK2iojqiKgm+QfZIyLejIg+BdNvBX4WEXeUq/7WRtJW6c/ewDeB8ZK2KVjkGyTNTEjaArgHODkiHm/pWtuSZhxDiukaKHd59pLaakgaDwwBukuqAU4HhkvaEVhL8kmYn0ByOi7pXuD5dN6VEfFivRu2hmwN3J7eOqMjcGNE3Fvektq02yR1A1YBP4+ItyVdJ2k3kuaJuXzUTDQa2IGkSePX6bR9/YGKxpXqGFJf10At/lrSj0KZmZm5+cjMzD7iUDAzs4xDwczMMg4FMzPLOBTMzCzjUDAzs4xDwdqNtK+Z/epMGyvpkkbWWVbiGv7qrr/tk8yhYO3JeNa9D/gh6XQzw6Fg7cutwNdqu4KWVA30AJ6V9KCkp9Oby6zTXXF6U5S7C8b/V9IR6fCekh5Je7ycXKcLiQY1tF56RnOOpKckvSTpSxv6ws2K5VCwdiMilgBPAcPSSYcANwMfAN9Ie239CnCu0j44miKpE3ARyY1/9gSuBs4uwXodI2IgMJakywSzFuG+j6y9qW1CujP9+SOSLov/J+2+ey1JH/ZbA28Wsb0dSe5Qdn+aIxUkXVFv6Hp/S39OA6qL2J5ZSTgUrL25A/izpD2AjSLi6bQZqBLYMyJWpV10d62z3mo+fmZdO1/A9Ij44nrW0dR6tTe2WYP/T60FufnI2pWIWAY8TNJcU3uBeXNgYRoIXwG2rWfV14D+krpI2hwYmk6fBVRK+iIkzULpDWqa0tz1zHLldyDWHo0naZ6p/STSDcBdkqYCz/JRv/eZiJgnaQJJd8cvA8+k01emHzG9MA2LjiR3imu0y+PmrmeWN3edbWZmGTcfmZlZxs1HZjmQdDEwqM7kCyLiL+Wox6xYbj4yM7OMm4/MzCzjUDAzs4xDwczMMg4FMzPL/H9qUIay+rkP6gAAAABJRU5ErkJggg==\n", "text/plain": [ "
" ] @@ -628,7 +628,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "Jsons below the mean are 90.48% of all jsons\n" + "Jsons below the mean are 80.93% of all jsons\n" ] } ], @@ -638,6 +638,13 @@ "total = bellow_mean_count + above_mean_count\n", "print(\"Jsons below the mean are {0:.2f}% of all jsons\".format(bellow_mean_count/total * 100))" ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "I idenfified lots of falses positives for \"valid_json\", my false positives were all small values, like a number passing on as a 'valid json', it did not make too much of a difference in the overall analysis, but made me think, are there more false positives? how can I eliminate them? " + ] } ], "metadata": { From b77dccf91a843055989922f55ef85a4952d241d6 Mon Sep 17 00:00:00 2001 From: Camila Resende Date: Wed, 17 Apr 2019 19:40:36 -0300 Subject: [PATCH 19/23] Add new notebookt 'isJson_Occurrence_of_operation_symbols_domains.ipynb' --- .../isJson_Value_Distribution.ipynb | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/analyses/2019_03_aliamcami_value_analyses/isJson_Value_Distribution.ipynb b/analyses/2019_03_aliamcami_value_analyses/isJson_Value_Distribution.ipynb index 3ff820d..f7b718a 100644 --- a/analyses/2019_03_aliamcami_value_analyses/isJson_Value_Distribution.ipynb +++ b/analyses/2019_03_aliamcami_value_analyses/isJson_Value_Distribution.ipynb @@ -109,7 +109,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "[########################################] | 100% Completed | 1.5s\n" + "[########################################] | 100% Completed | 1.7s\n" ] } ], @@ -142,7 +142,7 @@ { "data": { "text/plain": [ - "" + "" ] }, "execution_count": 4, From 2be179c944896ce9ff1362ae47508de92074fe7f Mon Sep 17 00:00:00 2001 From: Camila Resende Date: Wed, 17 Apr 2019 19:41:14 -0300 Subject: [PATCH 20/23] Clean run of the dataPrep with all columns --- .../isJson_dataPrep.ipynb | 724 ++++++++++++------ 1 file changed, 501 insertions(+), 223 deletions(-) diff --git a/analyses/2019_03_aliamcami_value_analyses/isJson_dataPrep.ipynb b/analyses/2019_03_aliamcami_value_analyses/isJson_dataPrep.ipynb index f0dbbc4..7577bfa 100644 --- a/analyses/2019_03_aliamcami_value_analyses/isJson_dataPrep.ipynb +++ b/analyses/2019_03_aliamcami_value_analyses/isJson_dataPrep.ipynb @@ -52,7 +52,7 @@ "# client\n", "\n", "#Create folder to save/read new data\n", - "DIR = 'sample0_prep/'\n", + "DIR = 'sample_0_prep/'\n", "FILE_NAME = 's0'\n", "\n", "if not os.path.exists(DIR):\n", @@ -106,8 +106,12 @@ { "data": { "text/plain": [ - "Index(['value_1000', 'value', 'value_len', 'symbol', 'script_url', 'location',\n", - " 'operation'],\n", + "Index(['argument_0', 'argument_1', 'argument_2', 'argument_3', 'argument_4',\n", + " 'argument_5', 'argument_6', 'argument_7', 'argument_8', 'arguments',\n", + " 'arguments_n_keys', 'call_stack', 'crawl_id', 'file_name', 'func_name',\n", + " 'in_iframe', 'location', 'operation', 'script_col', 'script_line',\n", + " 'script_loc_eval', 'script_url', 'symbol', 'time_stamp', 'value',\n", + " 'value_1000', 'value_len'],\n", " dtype='object')" ] }, @@ -117,10 +121,10 @@ } ], "source": [ - "#Original sample \n", + "#Original sample sample_0.parquet'\n", "df = dd.read_parquet('sample_0.parquet', \n", - " engine='pyarrow', \n", - " columns=['value_1000', 'value', 'value_len', 'symbol', 'script_url', 'location', 'operation'])\n", + " engine='pyarrow', )\n", + "# columns=['value_1000', 'value', 'value_len', 'symbol', 'script_url', 'location', 'operation'])\n", "\n", "# df.astype({'value_1000': str, 'value': str,'value_len': int,'symbol': int,'script_url': str})\n", "df.columns" @@ -147,8 +151,12 @@ "name": "stdout", "output_type": "stream", "text": [ - "[########################################] | 100% Completed | 58.7s\n", - "1356.9776628910975 0 4496861 26310.62140481331 11292867\n" + "[########################################] | 100% Completed | 1min 19.3s\n", + "MEAN: 1356.9776628910975,\n", + "MIN: 0,\n", + "MAX: 4496861,\n", + "std: 26310.62140481331,\n", + "LEN: 11292867\n" ] } ], @@ -160,7 +168,7 @@ " df_std = df['value_len'].std()\n", " df_len = df['value_len'].count()\n", " (df_mean, df_min, df_max, df_std, df_len) = dd.compute(df_mean, df_min, df_max, df_std, df_len);\n", - " print(df_mean, df_min, df_max, df_std, df_len)" + " print(\"MEAN: {},\\nMIN: {},\\nMAX: {},\\nstd: {},\\nLEN: {}\".format(df_mean, df_min, df_max, df_std, df_len))" ] }, { @@ -227,7 +235,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "[########################################] | 100% Completed | 6min 23.0s\n" + "[########################################] | 100% Completed | 7min 22.3s\n" ] } ], @@ -355,12 +363,12 @@ "name": "stdout", "output_type": "stream", "text": [ - "Notebook name: s0_domains_isjson\n" + "Notebook name: s0_domains_isJson\n" ] } ], "source": [ - "FILE_NAME += '_isjson'\n", + "FILE_NAME += '_isJson'\n", "print('Notebook name: ', FILE_NAME)" ] }, @@ -387,7 +395,7 @@ "metadata": {}, "outputs": [], "source": [ - "df['is_json'] = df['value'].apply(is_json, meta=False)" + "df['is_json'] = df['value'].apply(is_json, meta='O')" ] }, { @@ -399,7 +407,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "[########################################] | 100% Completed | 4min 21.6s\n" + "[########################################] | 100% Completed | 5min 12.2s\n" ] } ], @@ -492,8 +500,15 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "# Add Column: value_md5\n", - "Include new columns called \"value_md5\" that is the md5 of value column" + "# Add json keys" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Extract the top level keys, sort them and add as a list into another column named 'json_keys'\n", + "Will be using \"https://github.com/rnd0101/json_schema_inferencer\" to guess the json schema and save it into another column called \"json_schema\"" ] }, { @@ -505,12 +520,12 @@ "name": "stdout", "output_type": "stream", "text": [ - "Notebook name: s0_domains_isjson_md5\n" + "Notebook name: s0_domains_isJson_jsonKeys\n" ] } ], "source": [ - "FILE_NAME += '_md5'\n", + "FILE_NAME += '_jsonKeys'\n", "print('Notebook name: ', FILE_NAME)" ] }, @@ -520,41 +535,43 @@ "metadata": {}, "outputs": [], "source": [ - "def md5(value):\n", - " return hashlib.md5(value.encode('utf-8')).hexdigest()" + " def jsonKeys(r):\n", + " if(r['is_json']):\n", + " try:\n", + " dct = json.loads(r['value'])\n", + " keys = list(dct.keys())\n", + " keys.sort()\n", + " return str(keys)\n", + " except ValueError as e:\n", + " return ''\n", + " else:\n", + " return ''" ] }, { "cell_type": "code", "execution_count": 18, "metadata": {}, - "outputs": [], - "source": [ - "df['value_md5'] = df['value'].apply(md5, meta='O')" - ] - }, - { - "cell_type": "code", - "execution_count": 19, - "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "[########################################] | 100% Completed | 2min 45.9s\n" + "[########################################] | 100% Completed | 8min 32.7s\n" ] } ], "source": [ - "#save\n", + "df['json_keys'] = df.apply(jsonKeys,axis=1, meta='O')\n", "save_parquet(df=df, name=FILE_NAME)" ] }, { "cell_type": "code", - "execution_count": 20, - "metadata": {}, + "execution_count": 19, + "metadata": { + "scrolled": true + }, "outputs": [ { "data": { @@ -578,198 +595,282 @@ " \n", " \n", " value_1000\n", - " value_md5\n", + " is_json\n", + " json_keys\n", " \n", " \n", " \n", " \n", " 0\n", " fXDcab74\n", - " 7df64196939a8b6ff11482ed6df4b25a\n", + " False\n", + " \n", " \n", " \n", " 1\n", " fXDcab74\n", - " 7df64196939a8b6ff11482ed6df4b25a\n", + " False\n", + " \n", " \n", " \n", " 2\n", " Mozilla/5.0 (X11; Linux x86_64; rv:52.0) Gecko...\n", - " bc0aac3569031babbd73e069947a4b12\n", + " False\n", + " \n", " \n", " \n", " 3\n", " Mozilla/5.0 (X11; Linux x86_64; rv:52.0) Gecko...\n", - " bc0aac3569031babbd73e069947a4b12\n", + " False\n", + " \n", " \n", " \n", " 4\n", " _ga=GA1.2.1529583939.1513387469; _gid=GA1.2.17...\n", - " 324dd29b8c6438bc700ac2d85e33f12d\n", + " False\n", + " \n", " \n", " \n", "\n", "" ], "text/plain": [ - " value_1000 \\\n", - "0 fXDcab74 \n", - "1 fXDcab74 \n", - "2 Mozilla/5.0 (X11; Linux x86_64; rv:52.0) Gecko... \n", - "3 Mozilla/5.0 (X11; Linux x86_64; rv:52.0) Gecko... \n", - "4 _ga=GA1.2.1529583939.1513387469; _gid=GA1.2.17... \n", - "\n", - " value_md5 \n", - "0 7df64196939a8b6ff11482ed6df4b25a \n", - "1 7df64196939a8b6ff11482ed6df4b25a \n", - "2 bc0aac3569031babbd73e069947a4b12 \n", - "3 bc0aac3569031babbd73e069947a4b12 \n", - "4 324dd29b8c6438bc700ac2d85e33f12d " + " value_1000 is_json json_keys\n", + "0 fXDcab74 False \n", + "1 fXDcab74 False \n", + "2 Mozilla/5.0 (X11; Linux x86_64; rv:52.0) Gecko... False \n", + "3 Mozilla/5.0 (X11; Linux x86_64; rv:52.0) Gecko... False \n", + "4 _ga=GA1.2.1529583939.1513387469; _gid=GA1.2.17... False " ] }, - "execution_count": 20, + "execution_count": 19, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "#read\n", + "#read \n", "df = read_parquet(FILE_NAME)\n", - "df[['value_1000', 'value_md5']].head()" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Saving other possible usefull filtered samples to future analyses" + "df[['value_1000', 'is_json', 'json_keys']].head()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "## value_len > df_mean\n", - "1356 is the value_len mean\n", - "\n", - "To filter the data into something that is more interesting to this task I decided to only work with values that are at above the mean.\n", - "\n", - "All values above the mean count up to 499805 rows. That is just 4,42% of the whole sample, and a lot easier to work on. " + "# Add Column: keys_md5\n", + "Include new columns called \"keys_md5\" that is the md5 of json_keys column" ] }, { "cell_type": "code", - "execution_count": 21, + "execution_count": 20, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "Notebook name: s0_domains_isjson_md5_above_mean\n" + "Notebook name: s0_domains_isJson_jsonKeys_md5\n" ] } ], "source": [ - "name = FILE_NAME + '_above_mean'\n", - "print('Notebook name: ', name)" + "FILE_NAME += '_md5'\n", + "print('Notebook name: ', FILE_NAME)" + ] + }, + { + "cell_type": "code", + "execution_count": 21, + "metadata": {}, + "outputs": [], + "source": [ + "def md5(value):\n", + " if (value == ''):\n", + " return ''\n", + " else:\n", + " return hashlib.md5(value.encode('utf-8')).hexdigest()" ] }, { "cell_type": "code", "execution_count": 22, "metadata": {}, + "outputs": [], + "source": [ + "df['keys_md5'] = df['json_keys'].apply(md5, meta='O')" + ] + }, + { + "cell_type": "code", + "execution_count": 23, + "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "[########################################] | 100% Completed | 50.5s\n", - "Npartition: 245\n", - "[########################################] | 100% Completed | 1min 38.3s\n" + "[########################################] | 100% Completed | 3min 49.6s\n" ] } ], "source": [ - "#Save\n", - "save_parquet(df= df[df['value_len'] > df_mean], name= name, recalculate_partition=True)" + "#save\n", + "save_parquet(df=df, name=FILE_NAME)" ] }, { "cell_type": "code", - "execution_count": 23, + "execution_count": 24, "metadata": { - "scrolled": false + "scrolled": true }, "outputs": [ { "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
value_1000keys_md5
0fXDcab74
1fXDcab74
2Mozilla/5.0 (X11; Linux x86_64; rv:52.0) Gecko...
3Mozilla/5.0 (X11; Linux x86_64; rv:52.0) Gecko...
4_ga=GA1.2.1529583939.1513387469; _gid=GA1.2.17...
\n", + "
" + ], "text/plain": [ - "Index(['value_1000', 'value', 'value_len', 'symbol', 'script_url', 'location',\n", - " 'operation', 'location_domain', 'script_domain', 'is_json',\n", - " 'value_md5'],\n", - " dtype='object')" + " value_1000 keys_md5\n", + "0 fXDcab74 \n", + "1 fXDcab74 \n", + "2 Mozilla/5.0 (X11; Linux x86_64; rv:52.0) Gecko... \n", + "3 Mozilla/5.0 (X11; Linux x86_64; rv:52.0) Gecko... \n", + "4 _ga=GA1.2.1529583939.1513387469; _gid=GA1.2.17... " ] }, - "execution_count": 23, + "execution_count": 24, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "#Read\n", - "df = read_parquet(name)\n", - "df.columns" + "#read\n", + "df = read_parquet(FILE_NAME)\n", + "df[['value_1000', 'keys_md5']].head()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "## Filter to parquet containing only JSON " + "# TLD\n", + "Include new columns called \"script_tld\" that is the the TLD for the script_domain" ] }, { "cell_type": "code", - "execution_count": 24, + "execution_count": 25, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "Notebook name: s0_domains_isjson_md5_JSON_ONLY\n" + "Notebook name: s0_domains_isJson_jsonKeys_md5_TLD\n" ] } ], "source": [ - "name = FILE_NAME + '_JSON_ONLY'\n", - "print('Notebook name: ', name)" + "FILE_NAME += '_TLD'\n", + "print('Notebook name: ', FILE_NAME)" ] }, { "cell_type": "code", - "execution_count": 25, + "execution_count": 26, + "metadata": {}, + "outputs": [], + "source": [ + "def extractTLD(domain):\n", + " return domain.split('.')[-1]" + ] + }, + { + "cell_type": "code", + "execution_count": 27, + "metadata": {}, + "outputs": [], + "source": [ + "df['script_tld'] = df['script_domain'].apply(extractTLD, meta='O')" + ] + }, + { + "cell_type": "code", + "execution_count": 28, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "[########################################] | 100% Completed | 28.9s\n", - "Npartition: 233\n", - "[########################################] | 100% Completed | 1min 5.0s\n" + "[########################################] | 100% Completed | 3min 59.4s\n" ] } ], "source": [ - "save_parquet(df=df[df['is_json'] == True], name=name, recalculate_partition=True)" + "#save\n", + "save_parquet(df=df, name=FILE_NAME)" ] }, { "cell_type": "code", - "execution_count": 26, - "metadata": {}, + "execution_count": 29, + "metadata": { + "scrolled": true + }, "outputs": [ { "data": { @@ -792,144 +893,194 @@ " \n", " \n", " \n", - " value_1000\n", - " is_json\n", + " script_domain\n", + " script_tld\n", " \n", " \n", " \n", " \n", " 0\n", - " {\"im-settings\":\"{\\\"val\\\":{\\\"settings\\\":{\\\"Site...\n", - " True\n", + " vk.com\n", + " com\n", " \n", " \n", " 1\n", - " {\"APLUS_S_CORE_0.17.12_20171214163401_2ee09a0c...\n", - " True\n", + " vk.com\n", + " com\n", " \n", " \n", " 2\n", - " {\"APLUS_S_CORE_0.17.12_20171214163401_2ee09a0c...\n", - " True\n", + " vk.com\n", + " com\n", " \n", " \n", " 3\n", - " {\"dueljs_channel_comm\":\"[{\\\"id\\\":4734405521279...\n", - " True\n", + " baidustatic.com\n", + " com\n", " \n", " \n", " 4\n", - " {\"dueljs_channel_comm\":\"[{\\\"id\\\":4734405521279...\n", - " True\n", + " google.com\n", + " com\n", " \n", " \n", "\n", "" ], "text/plain": [ - " value_1000 is_json\n", - "0 {\"im-settings\":\"{\\\"val\\\":{\\\"settings\\\":{\\\"Site... True\n", - "1 {\"APLUS_S_CORE_0.17.12_20171214163401_2ee09a0c... True\n", - "2 {\"APLUS_S_CORE_0.17.12_20171214163401_2ee09a0c... True\n", - "3 {\"dueljs_channel_comm\":\"[{\\\"id\\\":4734405521279... True\n", - "4 {\"dueljs_channel_comm\":\"[{\\\"id\\\":4734405521279... True" + " script_domain script_tld\n", + "0 vk.com com\n", + "1 vk.com com\n", + "2 vk.com com\n", + "3 baidustatic.com com\n", + "4 google.com com" ] }, - "execution_count": 26, + "execution_count": 29, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "#read all_json_above_mean\n", - "df = read_parquet(name)\n", - "df[['value_1000', 'is_json']].head()" + "#read\n", + "df = read_parquet(FILE_NAME)\n", + "df[['script_domain', 'script_tld']].head()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "### Add json keys and schema columns" + "# Saving other possible usefull filtered samples to future analyses" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "Extract the top level keys, sort them and add as a list into another column named 'json_keys'\n", - "Will be using \"https://github.com/rnd0101/json_schema_inferencer\" to guess the json schema and save it into another column called \"json_schema\"" + "## value_len > df_mean\n", + "1356 is the value_len mean\n", + "\n", + "To filter the data into something that is more interesting to this task I decided to only work with values that are at above the mean.\n", + "\n", + "All values above the mean count up to 499805 rows. That is just 4,42% of the whole sample, and a lot easier to work on. " ] }, { "cell_type": "code", - "execution_count": 27, + "execution_count": 30, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "Notebook name: s0_domains_isjson_md5_JSON_ONLY_schema_keys\n" + "Notebook name: s0_domains_isJson_jsonKeys_md5_TLD_above_mean\n" ] } ], "source": [ - "name += '_schema_keys'\n", + "name = FILE_NAME + '_above_mean'\n", "print('Notebook name: ', name)" ] }, { "cell_type": "code", - "execution_count": 28, + "execution_count": 31, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[########################################] | 100% Completed | 2min 23.6s\n" + ] + } + ], "source": [ - "from json_schema_inferencer.guess_json_schema import guess_schema\n", - "\n", - "def jsonSchema(myjson):\n", - " try:\n", - " dct = json.loads(myjson)\n", - " value = guess_schema(dct)\n", - " l = list(value['properties'])\n", - " l.sort()\n", - " return l\n", - " except ValueError as e:\n", - " return list()\n", - " \n", - "def jsonKeys(myjson):\n", - " try:\n", - " dct = json.loads(myjson)\n", - " keys = list(dct.keys())\n", - " keys.sort()\n", - " return keys\n", - " except ValueError as e:\n", - " return list()" + "#Save\n", + "save_parquet(df= df[df['value_len'] > df_mean], name= name)" ] }, { "cell_type": "code", - "execution_count": 29, + "execution_count": 32, + "metadata": { + "scrolled": false + }, + "outputs": [ + { + "data": { + "text/plain": [ + "Index(['argument_0', 'argument_1', 'argument_2', 'argument_3', 'argument_4',\n", + " 'argument_5', 'argument_6', 'argument_7', 'argument_8', 'arguments',\n", + " 'arguments_n_keys', 'call_stack', 'crawl_id', 'file_name', 'func_name',\n", + " 'in_iframe', 'location', 'operation', 'script_col', 'script_line',\n", + " 'script_loc_eval', 'script_url', 'symbol', 'time_stamp', 'value',\n", + " 'value_1000', 'value_len', 'location_domain', 'script_domain',\n", + " 'is_json', 'json_keys', 'keys_md5', 'script_tld'],\n", + " dtype='object')" + ] + }, + "execution_count": 32, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "#Read\n", + "df = read_parquet(name)\n", + "df.columns" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Filter to parquet containing only JSON " + ] + }, + { + "cell_type": "code", + "execution_count": 33, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "[########################################] | 100% Completed | 4min 18.1s\n" + "Notebook name: s0_domains_isJson_jsonKeys_md5_TLD_JSON_ONLY\n" ] } ], "source": [ - "df['json_keys'] = df.value.apply(jsonKeys, meta='O')\n", - "df['json_schema'] = df.value.apply(jsonSchema, meta='O')\n", - "save_parquet(df=df, name=name)\n" + "name = FILE_NAME + '_JSON_ONLY'\n", + "print('Notebook name: ', name)" ] }, { "cell_type": "code", - "execution_count": 30, + "execution_count": 34, "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[########################################] | 100% Completed | 1min 20.0s\n" + ] + } + ], + "source": [ + "save_parquet(df=df[df['is_json'] == True], name=name)" + ] + }, + { + "cell_type": "code", + "execution_count": 35, + "metadata": { + "scrolled": true + }, "outputs": [ { "data": { @@ -953,77 +1104,57 @@ " \n", " \n", " value_1000\n", - " json_keys\n", - " json_schema\n", + " is_json\n", " \n", " \n", " \n", " \n", " 0\n", " {\"im-settings\":\"{\\\"val\\\":{\\\"settings\\\":{\\\"Site...\n", - " [im-settings]\n", - " [im-settings]\n", + " True\n", " \n", " \n", " 1\n", " {\"APLUS_S_CORE_0.17.12_20171214163401_2ee09a0c...\n", - " [APLUS_S_CORE_0.17.12_20171214163401_2ee09a0c]\n", - " [APLUS_S_CORE_0.17.12_20171214163401_2ee09a0c]\n", + " True\n", " \n", " \n", " 2\n", " {\"APLUS_S_CORE_0.17.12_20171214163401_2ee09a0c...\n", - " [APLUS_S_CORE_0.17.12_20171214163401_2ee09a0c]\n", - " [APLUS_S_CORE_0.17.12_20171214163401_2ee09a0c]\n", + " True\n", " \n", " \n", " 3\n", " {\"dueljs_channel_comm\":\"[{\\\"id\\\":4734405521279...\n", - " [LastSearch, LastSearch_e, dueljs_channel_comm...\n", - " [LastSearch, LastSearch_e, dueljs_channel_comm...\n", + " True\n", " \n", " \n", " 4\n", " {\"dueljs_channel_comm\":\"[{\\\"id\\\":4734405521279...\n", - " [LastSearch, LastSearch_e, dueljs_channel_comm...\n", - " [LastSearch, LastSearch_e, dueljs_channel_comm...\n", + " True\n", " \n", " \n", "\n", "" ], "text/plain": [ - " value_1000 \\\n", - "0 {\"im-settings\":\"{\\\"val\\\":{\\\"settings\\\":{\\\"Site... \n", - "1 {\"APLUS_S_CORE_0.17.12_20171214163401_2ee09a0c... \n", - "2 {\"APLUS_S_CORE_0.17.12_20171214163401_2ee09a0c... \n", - "3 {\"dueljs_channel_comm\":\"[{\\\"id\\\":4734405521279... \n", - "4 {\"dueljs_channel_comm\":\"[{\\\"id\\\":4734405521279... \n", - "\n", - " json_keys \\\n", - "0 [im-settings] \n", - "1 [APLUS_S_CORE_0.17.12_20171214163401_2ee09a0c] \n", - "2 [APLUS_S_CORE_0.17.12_20171214163401_2ee09a0c] \n", - "3 [LastSearch, LastSearch_e, dueljs_channel_comm... \n", - "4 [LastSearch, LastSearch_e, dueljs_channel_comm... \n", - "\n", - " json_schema \n", - "0 [im-settings] \n", - "1 [APLUS_S_CORE_0.17.12_20171214163401_2ee09a0c] \n", - "2 [APLUS_S_CORE_0.17.12_20171214163401_2ee09a0c] \n", - "3 [LastSearch, LastSearch_e, dueljs_channel_comm... \n", - "4 [LastSearch, LastSearch_e, dueljs_channel_comm... " + " value_1000 is_json\n", + "0 {\"im-settings\":\"{\\\"val\\\":{\\\"settings\\\":{\\\"Site... True\n", + "1 {\"APLUS_S_CORE_0.17.12_20171214163401_2ee09a0c... True\n", + "2 {\"APLUS_S_CORE_0.17.12_20171214163401_2ee09a0c... True\n", + "3 {\"dueljs_channel_comm\":\"[{\\\"id\\\":4734405521279... True\n", + "4 {\"dueljs_channel_comm\":\"[{\\\"id\\\":4734405521279... True" ] }, - "execution_count": 30, + "execution_count": 35, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "#read \n", + "#read all_json_above_mean\n", "df = read_parquet(name)\n", - "df[['value_1000', 'json_keys', 'json_schema']].head()" + "df[['value_1000', 'is_json']].head()" ] }, { @@ -1035,14 +1166,14 @@ }, { "cell_type": "code", - "execution_count": 37, + "execution_count": 36, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "Notebook name: s0_domains_isjson_md5_nonJSON_ONLY\n" + "Notebook name: s0_domains_isJson_jsonKeys_md5_TLD_nonJSON_ONLY\n" ] } ], @@ -1054,16 +1185,16 @@ }, { "cell_type": "code", - "execution_count": 38, + "execution_count": 37, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "[########################################] | 100% Completed | 1min 54.5s\n", - "Npartition: 116\n", - "[########################################] | 100% Completed | 1min 13.1s\n" + "[########################################] | 100% Completed | 4min 34.1s\n", + "Npartition: 285\n", + "[########################################] | 100% Completed | 2min 11.3s\n" ] } ], @@ -1073,17 +1204,9 @@ }, { "cell_type": "code", - "execution_count": 35, + "execution_count": 38, "metadata": {}, "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/anaconda3/envs/overscripted/lib/python3.6/site-packages/dask/dataframe/core.py:4494: UserWarning: Insufficient elements for `head`. 5 elements requested, only 0 elements available. Try passing larger `npartitions` to `head`.\n", - " warnings.warn(msg.format(n, len(r)))\n" - ] - }, { "data": { "text/html": [ @@ -1105,33 +1228,195 @@ " \n", " \n", " \n", - " value_1000\n", + " argument_0\n", + " argument_1\n", + " argument_2\n", + " argument_3\n", + " argument_4\n", + " argument_5\n", + " argument_6\n", + " argument_7\n", + " argument_8\n", + " arguments\n", + " ...\n", + " time_stamp\n", " value\n", + " value_1000\n", " value_len\n", - " symbol\n", - " script_url\n", - " location\n", - " operation\n", " location_domain\n", " script_domain\n", " is_json\n", - " value_md5\n", " json_keys\n", - " json_schema\n", + " keys_md5\n", + " script_tld\n", " \n", " \n", " \n", + " \n", + " 0\n", + " None\n", + " None\n", + " None\n", + " None\n", + " None\n", + " None\n", + " None\n", + " None\n", + " None\n", + " {}\n", + " ...\n", + " 2017-12-16 19:02:31.406\n", + " fXDcab74\n", + " fXDcab74\n", + " 8\n", + " vk.com\n", + " vk.com\n", + " False\n", + " \n", + " \n", + " com\n", + " \n", + " \n", + " 1\n", + " None\n", + " None\n", + " None\n", + " None\n", + " None\n", + " None\n", + " None\n", + " None\n", + " None\n", + " {}\n", + " ...\n", + " 2017-12-16 19:02:31.407\n", + " fXDcab74\n", + " fXDcab74\n", + " 8\n", + " vk.com\n", + " vk.com\n", + " False\n", + " \n", + " \n", + " com\n", + " \n", + " \n", + " 2\n", + " None\n", + " None\n", + " None\n", + " None\n", + " None\n", + " None\n", + " None\n", + " None\n", + " None\n", + " {}\n", + " ...\n", + " 2017-12-16 19:02:31.659\n", + " Mozilla/5.0 (X11; Linux x86_64; rv:52.0) Gecko...\n", + " Mozilla/5.0 (X11; Linux x86_64; rv:52.0) Gecko...\n", + " 68\n", + " vk.com\n", + " vk.com\n", + " False\n", + " \n", + " \n", + " com\n", + " \n", + " \n", + " 3\n", + " None\n", + " None\n", + " None\n", + " None\n", + " None\n", + " None\n", + " None\n", + " None\n", + " None\n", + " {}\n", + " ...\n", + " 2017-12-16 00:24:09.355\n", + " Mozilla/5.0 (X11; Linux x86_64; rv:52.0) Gecko...\n", + " Mozilla/5.0 (X11; Linux x86_64; rv:52.0) Gecko...\n", + " 68\n", + " baidu.com\n", + " baidustatic.com\n", + " False\n", + " \n", + " \n", + " com\n", + " \n", + " \n", + " 4\n", + " None\n", + " None\n", + " None\n", + " None\n", + " None\n", + " None\n", + " None\n", + " None\n", + " None\n", + " {}\n", + " ...\n", + " 2017-12-16 01:24:30.372\n", + " _ga=GA1.2.1529583939.1513387469; _gid=GA1.2.17...\n", + " _ga=GA1.2.1529583939.1513387469; _gid=GA1.2.17...\n", + " 288\n", + " serienjunkies.org\n", + " google.com\n", + " False\n", + " \n", + " \n", + " com\n", + " \n", " \n", "\n", + "

5 rows × 33 columns

\n", "" ], "text/plain": [ - "Empty DataFrame\n", - "Columns: [value_1000, value, value_len, symbol, script_url, location, operation, location_domain, script_domain, is_json, value_md5, json_keys, json_schema]\n", - "Index: []" + " argument_0 argument_1 argument_2 argument_3 argument_4 argument_5 \\\n", + "0 None None None None None None \n", + "1 None None None None None None \n", + "2 None None None None None None \n", + "3 None None None None None None \n", + "4 None None None None None None \n", + "\n", + " argument_6 argument_7 argument_8 arguments ... time_stamp \\\n", + "0 None None None {} ... 2017-12-16 19:02:31.406 \n", + "1 None None None {} ... 2017-12-16 19:02:31.407 \n", + "2 None None None {} ... 2017-12-16 19:02:31.659 \n", + "3 None None None {} ... 2017-12-16 00:24:09.355 \n", + "4 None None None {} ... 2017-12-16 01:24:30.372 \n", + "\n", + " value \\\n", + "0 fXDcab74 \n", + "1 fXDcab74 \n", + "2 Mozilla/5.0 (X11; Linux x86_64; rv:52.0) Gecko... \n", + "3 Mozilla/5.0 (X11; Linux x86_64; rv:52.0) Gecko... \n", + "4 _ga=GA1.2.1529583939.1513387469; _gid=GA1.2.17... \n", + "\n", + " value_1000 value_len \\\n", + "0 fXDcab74 8 \n", + "1 fXDcab74 8 \n", + "2 Mozilla/5.0 (X11; Linux x86_64; rv:52.0) Gecko... 68 \n", + "3 Mozilla/5.0 (X11; Linux x86_64; rv:52.0) Gecko... 68 \n", + "4 _ga=GA1.2.1529583939.1513387469; _gid=GA1.2.17... 288 \n", + "\n", + " location_domain script_domain is_json json_keys keys_md5 script_tld \n", + "0 vk.com vk.com False com \n", + "1 vk.com vk.com False com \n", + "2 vk.com vk.com False com \n", + "3 baidu.com baidustatic.com False com \n", + "4 serienjunkies.org google.com False com \n", + "\n", + "[5 rows x 33 columns]" ] }, - "execution_count": 35, + "execution_count": 38, "metadata": {}, "output_type": "execute_result" } @@ -1141,13 +1426,6 @@ "df = read_parquet(name)\n", "df.head()" ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] } ], "metadata": { From f30f68ab2093d57dc2d32ee763b9cfb2ba373c1e Mon Sep 17 00:00:00 2001 From: Camila Resende Date: Mon, 22 Apr 2019 00:08:51 -0300 Subject: [PATCH 21/23] Add isJson_Identify_Source.ipynb --- .../isJson_Identify_Source.ipynb | 350 +++++ ...urrence_of_operation_symbols_domains.ipynb | 1279 +++++++++++++++++ 2 files changed, 1629 insertions(+) create mode 100644 analyses/2019_03_aliamcami_value_analyses/isJson_Identify_Source.ipynb create mode 100644 analyses/2019_03_aliamcami_value_analyses/isJson_Occurrence_of_operation_symbols_domains.ipynb diff --git a/analyses/2019_03_aliamcami_value_analyses/isJson_Identify_Source.ipynb b/analyses/2019_03_aliamcami_value_analyses/isJson_Identify_Source.ipynb new file mode 100644 index 0000000..23bae7d --- /dev/null +++ b/analyses/2019_03_aliamcami_value_analyses/isJson_Identify_Source.ipynb @@ -0,0 +1,350 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Start Dask" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/anaconda3/envs/overscripted/lib/python3.6/site-packages/dask/config.py:168: YAMLLoadWarning: calling yaml.load() without Loader=... is deprecated, as the default Loader is unsafe. Please read https://msg.pyyaml.org/load for full details.\n", + " data = yaml.load(f.read()) or {}\n" + ] + } + ], + "source": [ + "import dask.dataframe as dd\n", + "from dask.diagnostics import ProgressBar\n", + "import matplotlib.pyplot as plt\n", + "import pandas as pd\n", + "import numpy as np\n", + "%matplotlib inline" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Objective\n", + "\n", + "The objective of this notebook is to answer: \n", + " - \"The JSON values are always from the same location or related domains?\" \n", + "\n", + "To answer this we will use the sample data set produced by the notebook \"isJson_dataPrep.ipynb\":\n", + "- 's0_domains_isJson_jsonKeys_md5_TLD_JSON_ONLY.parquet'\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Findings\n", + "To answer the question \"The JSON values are always from the same location or related domains?\" \n", + "NO not aways, but usually. 83.09% of the JSONs are produced by a single script domain. \n", + "\n", + "---\n", + "\n", + "About 71% of the JSONs are seen more than once across the data set, that means that they MAY have different origins.\n", + "- Most JSON are from a single script domain. \n", + "- Almost 17% of the JSONs have multiple origins[1], mostly they have 2 to 3 origins, very few have more than this. \n", + "- They may be related for 40% of them have the same TLD[2]. \n", + "- Some of the ones that have multiple script domains have the same location domain (41%) calling different scripts but producing the same JSON[3]. \n", + "- They may have some similarities in usage, 99% of them have a single simbol across the different domains[4]\n", + "\n", + "---\n", + " For further investigation: \n", + " 1. Are this jsons any different? Are they big/small jsons? I may be that they have the same top keys but are in reality very different? \n", + " 2. Do the scripts domains that produces the same json have any relation between them? How can I relate domains?\n", + " 3. What does it mean to different scripts get the same JSON for a single Location? \n", + " 4. Are they used for the same purpose? can we really say that based on the symbol? " + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "DIR = 'sample_0_prep/'" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "Index(['symbol', 'location_domain', 'script_domain', 'json_keys', 'keys_md5',\n", + " 'script_tld', 'value_len'],\n", + " dtype='object')" + ] + }, + "execution_count": 3, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df = dd.read_parquet(DIR + 's0_domains_isJson_jsonKeys_md5_TLD_JSON_ONLY.parquet',\n", + " engine='pyarrow',\n", + " columns=['symbol', 'location_domain', 'script_domain', 'json_keys', 'keys_md5', 'script_tld', 'value_len'])\n", + "df.columns" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "---\n", + "\n", + "# The JSON values are always from the same location or related domains?\n", + "How many locations one JSON has?\n", + "All bigger json have the same locations?\n", + "what is \"related domains\"?\n" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[########################################] | 100% Completed | 1.8s\n", + "The total number of different keys_md5 is 14374\n" + ] + } + ], + "source": [ + "with ProgressBar():\n", + " group_by_keys_md5 = df.compute().groupby(['keys_md5'])\n", + " group_by_keys_md5_number_of_different_keys = len(group_by_keys_md5)\n", + " print(\"The total number of different {} is {}\".format('keys_md5', group_by_keys_md5_number_of_different_keys))" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [], + "source": [ + "agg = group_by_keys_md5.agg(['nunique'])" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "There are a total of 10222(71.11%) JSONs that appear in multiple rows\n" + ] + } + ], + "source": [ + "\n", + "json_multiple_appearances = agg['symbol'][group_by_keys_md5['symbol'].count() > 1]\n", + "json_multiple_appearances_len = len(json_multiple_appearances)\n", + "agg_len = len(agg['symbol'])\n", + "print('There are a total of {0}({1:0.2f}%) JSONs that appear in multiple rows'.format(\n", + " json_multiple_appearances_len, \n", + " json_multiple_appearances_len*100/agg_len))" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [], + "source": [ + "def get_multiple(agg, column, title=''):\n", + " agg_len = len(agg[column])\n", + " x = agg[agg[column]['nunique'] > 1]\n", + " x_len = len(x)\n", + " print(title + '{0} ({1:0.2f}%) multiple {2},\\n{3} ({4:0.2f}%) unique {2}'.format(\n", + " x_len,\n", + " x_len*100/agg_len,\n", + " column, \n", + " agg_len - x_len,\n", + " (agg_len - x_len) * 100 / agg_len\n", + " ))\n", + " return x" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### JSONs origin: script domain\n", + "\n", + "Plot that shows that most JSONs are originated from a single script domain" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "SCRIPT DOMAIN data: from the total of json\n", + "2430 (16.91%) multiple script_domain,\n", + "11944 (83.09%) unique script_domain\n" + ] + }, + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 8, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAXcAAAEnCAYAAABSTgMJAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDMuMC4zLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvnQurowAAGfFJREFUeJzt3X2UXXV97/H3hzwYxKCYjAvIBCZARENBqUNoi61cDSUEm7S31pVUriAPubqIcrVaI9dyIbVWsGK5y7hKpC5dKIaI1gwajYraXiwPGZGLTUJuxvCQQ0odAghUISR87x97T9ycnMnsk5yZfc5vPq+1zsp++J19vrNz5jO/89v77K2IwMzM0nJI1QWYmVnrOdzNzBLkcDczS5DD3cwsQQ53M7MEOdzNzBLkcLe2JekZSce1uu1YkXRMXteEUdr+mZJqo7HtEV73HZK+O9ava81xuCdG0o8kXZxPXy7pgTxgapJurmv7Vkl3S/pPSTslfVlSd2H9BZJC0ofqnleTdOZo/ywR8bKI2NbqtiORdJGk+yU9Lek/JH1L0tRmtxMRD+d17Snxmj35vp54YFWPnYj4ckT8YdV12P453BMl6XzgvwHzIuJlQC9wW2H924CbgOuA6cBJwHPA7ZKOKGzqceDDkg4fw9orCzhJbwI+DiyJiKnAa4E1B7Cdtg9pS5vDPV2nAesj4ucAEfFoRKwCkCTgU8DH8l7YryPiUeBi4Bng/YXtbAbuqFu2l6S5kvolPZX3cq8driBJl0gakPS4pD5JRxfWhaRLJW0FthaWnZBPT5N0a/46GyR9TNLtdc8favsFSSvzHvfTku6SdHwT++2OiPhpvt8ej4gvRsTT+bYPlfQpSQ9J+qWk2/NlQz3viyQ9DPygvjeef6r62/zT0i8lrZX0yvx1/yX/98n8k9bvNth/h+Y/2xOSNuW1Fte/Nn+NJyVtlLSwsO4Lkj4r6dv59n8s6UhJf59v735JpxbaL5f083z/bZL0J4V1FzTY9++WtDXf1sr8PWYVcrin607gnZI+JKm3btz3ROAY4KvFJ0TEC8DXgLPqtvVXwPsLQVR0HXBdRBwOHM8wvVxJbwb+Fng7cBTwELC6rtkfA6cDcxpsYiXwn8CRwPn5Y3+WAFcBRwADwN8UavmmpOXDPO8u4GxJV0k6Q9JL6tb/HfAG4PeAVwJ/CbxQWP8mst7+2cNs/53AhcDRwG7gf+fL/yD/9xX5UM4dDZ77v8j28fH59vfuA0mTgFuB7wKvAt4LfFnSiYXnvx34KNkntefI/mjfk8/fAhT/MP8c+H3g5WT78UuSjhrmZwJ4K9kfm9flrzPcz29jJSL8SOgB/Ai4OJ9+B/B9slDcCSzPl78RCGBKg+e/G9iaT18A3J5PrwGuzqdrwJn59L+Q/fJPH6GufwSuKcy/DHge6MnnA3hz3XMCOAGYkLc9sbDuY0O1Fdvm018AbiisWwDc38Q+PIcsKJ8k+yRzbV7DIcCvgdc1eE5PXsNxDZZNLPzffKKwfg6wK9/2i9oOU9c2YH5hfilQy6d/H3gUOKSw/ivAlYV98rnCuvcCmwvzJwNP7ue17wUW1b8vCvv+jYX5NUPvNT+qe7jnnrDIhlzmAa8gC+0Vks4GHsubNOqJHVVYX3QF8B5JR9Ytvwh4NXB/Plzy1mHKOZqstz5U2zNkf3BmFNpsH+a5XcDEuvXDtR3yaGH6V2R/TEqJiG9HxB+R9cwXkYXZxWQ93ClkvdrhjFRXcf1DwKR8u2Uc3eD5L1oX2aev4vri/v2PwvSvG8zv3UeS3inp3nyI50ngt0ao84D3t40Oh/s4EBHPR8RXgfvIfkm3kPW+/6zYTtIhwJ9SOPBa2Mb9wNeBy+uWb42IJWRDAVcDt0g6rEEZO4BjC691GDANeKS4uWF+hEGyIYzuwrKZw7RtmYh4ISJuA35Att8eA54lGxYZ9mkjbLZY9zFkn0geK/E8gH9v8PwhO4CZ+f9hcX1x/5Yi6Vjgc8AyYFpEvAL4N8Dj6B3E4Z6o/KDXuZKmSjpE0jlkZ8TcFdln5w8CH5X05/mBuiOBG4DDgU8Ps9mrgHeRfRIYep3zJHXlPcYn88WNTv27CXiXpNfn49gfz2t5cKSfJbJTCb8OXCnppZJeQzZ23XKSFklaLOkIZeaSjaPfmf+MnweulXS0pAmSfrfBuPz+nCdpjqSXAiuAW/Kfb5Bs7H5/5+qvAT6S19ZNNrQy5C6y4be/lDRJ2amqf8S+xzXKOIzsj80ggKR3kf1xsw7icE9TAE+R9bIfJgvda4D3RMTtABFxM9mpku8n6zluAg4FzoiInQ03GvEAcCPZL/+Q+cBGSc+QHVxdHBHPNnjubWQHZr9G1gM9HljcxM+0jOzg3qN5DV8hOyjYtPyMkcuHWf0EcAnZGTtPAV8CPhkRX87XfxD4GbCB7DTRq2nu9+hGsvHvR8mGeN4HEBG/Ijvo++N8KOR3Gjz3KrKhlgfIDpzeOLQiInYBC8mOFzwGfBZ4Z/6JqykRsYnsbKo7yIZuTgZ+3Ox2rFrKOnGWCkn3ACsi4htV1zKaJF0NHBkRI5010zYk/Qj4UkTcUHUtlj733BMi6SSy0/B+WnUtrSbpNZJOKQyVXAT8U9V1mbUrf4suEXlP9jzgwxHx0EjtO9BUsqGYo4FfkA0brK20IrM25mEZM7MEeVjGzCxBlQ3LTJ8+PXp6eqp6eTOzjvSTn/zksYjoGqldZeHe09NDf39/VS9vZtaRJJU6puZhGTOzBDnczcwS5HA3M0tQW53n/vzzz1Or1Xj22X2+vd42pkyZQnd3N5MmTaq6FDOzYbVVuNdqNaZOnUpPTw/teCOXiGDnzp3UajVmzZpVdTlmZsNqq2GZZ599lmnTprVlsANIYtq0aW39ycLMDNos3IG2DfYh7V6fmRm0YbibmdnBa6sx93o9y7/V0u09+IlzS7X7zne+w2WXXcaePXu4+OKLWb58uHspm5m1p7YO9yrs2bOHSy+9lO9973t0d3dz2mmnsXDhQubMmVN1aWYv0urOz3hXtvPXKTwsU+fuu+/mhBNO4LjjjmPy5MksXryYtWt9ZVkz6ywO9zqPPPIIM2f+5h7E3d3dPPJI0/cYNjOrlMO9TqPr2/sMGTPrNKXCXdJ8SVskDUja5+iipGMk/VDSTyXdJ2lB60sdG93d3Wzfvn3vfK1W4+ijj66wIjOz5o0Y7pImACvJ7qo+B1giqf7o4keBNRFxKtkd7T/b6kLHymmnncbWrVt54IEH2LVrF6tXr2bhwoVVl2Vm1pQyZ8vMBQYiYhuApNXAImBToU0Ah+fTLwd2tKK4Ko5eT5w4kc985jOcffbZ7NmzhwsvvJCTTjppzOswMzsYZcJ9BrC9MF8DTq9rcyXwXUnvBQ4D5rWkuoosWLCABQs6dmTJzKzUmHujo4n1Rx2XAF+IiG5gAXCjpH22LWmppH5J/YODg81Xa2ZmpZQJ9xowszDfzb7DLhcBawAi4g5gCjC9fkMRsSoieiOit6trxFsAmpnZASoT7huA2ZJmSZpMdsC0r67Nw8BbACS9lizcD6hr3uhUxHbS7vWZmUGJcI+I3cAyYD2wmeysmI2SVkgaOo3kL4BLJP1f4CvABXEAKThlyhR27tzZtgE6dD33KVOmVF2Kmdl+lbq2TESsA9bVLbuiML0JOONgi+nu7qZWq9HO4/FDd2IyM2tnbXXhsEmTJvkOR2ZmLeDLD5iZJcjhbmaWIIe7mVmCHO5mZglyuJuZJcjhbmaWIIe7mVmCHO5mZglyuJuZJcjhbmaWIIe7mVmCHO5mZglyuJuZJcjhbmaWIIe7mVmCSoW7pPmStkgakLS8wfpPS7o3f/w/SU+2vlQzMytrxJt1SJoArATOIrtZ9gZJffndlwCIiPcX2r8XOHUUajUzs5LK9NznAgMRsS0idgGrgUX7ab+E7D6qZmZWkTLhPgPYXpiv5cv2IelYYBbwg2HWL5XUL6m/ne+TambW6cqEuxosi2HaLgZuiYg9jVZGxKqI6I2I3q6urrI1mplZk8qEew2YWZjvBnYM03YxHpIxM6tcmXDfAMyWNEvSZLIA76tvJOlE4AjgjtaWaGZmzRox3CNiN7AMWA9sBtZExEZJKyQtLDRdAqyOiOGGbMzMbIyMeCokQESsA9bVLbuibv7K1pVlZmYHw99QNTNLkMPdzCxBDnczswQ53M3MEuRwNzNLkMPdzCxBDnczswQ53M3MEuRwNzNLkMPdzCxBDnczswQ53M3MEuRwNzNLkMPdzCxBDnczswQ53M3MElQq3CXNl7RF0oCk5cO0ebukTZI2SrqptWWamVkzRrwTk6QJwErgLLKbZW+Q1BcRmwptZgMfAc6IiCckvWq0CjYzs5GV6bnPBQYiYltE7AJWA4vq2lwCrIyIJwAi4hetLdPMzJpRJtxnANsL87V8WdGrgVdL+rGkOyXNb7QhSUsl9UvqHxwcPLCKzcxsRGXCXQ2WRd38RGA2cCawBLhB0iv2eVLEqojojYjerq6uZms1M7OSyoR7DZhZmO8GdjRoszYino+IB4AtZGFvZmYVKBPuG4DZkmZJmgwsBvrq2nwD+C8AkqaTDdNsa2WhZmZW3ojhHhG7gWXAemAzsCYiNkpaIWlh3mw9sFPSJuCHwIciYudoFW1mZvs34qmQABGxDlhXt+yKwnQAH8gfZmZWMX9D1cwsQQ53M7MEOdzNzBLkcDczS5DD3cwsQQ53M7MEOdzNzBLkcDczS5DD3cwsQQ53M7MEOdzNzBLkcDczS5DD3cwsQQ53M7MEOdzNzBLkcDczS1CpcJc0X9IWSQOSljdYf4GkQUn35o+LW1+qmZmVNeKdmCRNAFYCZ5HdCHuDpL6I2FTX9OaIWDYKNZqZWZPK9NznAgMRsS0idgGrgUWjW5aZmR2MMuE+A9hemK/ly+r9qaT7JN0iaWajDUlaKqlfUv/g4OABlGtmZmWUCXc1WBZ187cCPRFxCvB94IuNNhQRqyKiNyJ6u7q6mqvUzMxKKxPuNaDYE+8GdhQbRMTOiHgun/0c8IbWlGdmZgeiTLhvAGZLmiVpMrAY6Cs2kHRUYXYhsLl1JZqZWbNGPFsmInZLWgasByYAn4+IjZJWAP0R0Qe8T9JCYDfwOHDBKNZsZmYjGDHcASJiHbCubtkVhemPAB9pbWlmZnag/A1VM7MEOdzNzBLkcDczS5DD3cwsQQ53M7MEOdzNzBLkcDczS5DD3cwsQQ53M7MEOdzNzBLkcDczS5DD3cwsQQ53M7MEOdzNzBLkcDczS5DD3cwsQaXCXdJ8SVskDUhavp92b5MUknpbV6KZmTVrxHCXNAFYCZwDzAGWSJrToN1U4H3AXa0u0szMmlOm5z4XGIiIbRGxC1gNLGrQ7q+Ba4BnW1ifmZkdgDLhPgPYXpiv5cv2knQqMDMivrm/DUlaKqlfUv/g4GDTxZqZWTllwl0NlsXeldIhwKeBvxhpQxGxKiJ6I6K3q6urfJVmZtaUMuFeA2YW5ruBHYX5qcBvAT+S9CDwO0CfD6qamVWnTLhvAGZLmiVpMrAY6BtaGRG/jIjpEdETET3AncDCiOgflYrNzGxEI4Z7ROwGlgHrgc3AmojYKGmFpIWjXaCZmTVvYplGEbEOWFe37Iph2p558GWZmdnB8DdUzcwS5HA3M0uQw93MLEEOdzOzBDnczcwS5HA3M0uQw93MLEEOdzOzBDnczcwS5HA3M0uQw93MLEEOdzOzBDnczcwS5HA3M0uQw93MLEEOdzOzBJUKd0nzJW2RNCBpeYP175b0M0n3Srpd0pzWl2pmZmWNGO6SJgArgXOAOcCSBuF9U0ScHBGvB64Brm15pWZmVlqZnvtcYCAitkXELmA1sKjYICKeKsweBkTrSjQzs2aVuYfqDGB7Yb4GnF7fSNKlwAeAycCbG21I0lJgKcAxxxzTbK1mZlZSmZ67Gizbp2ceESsj4njgw8BHG20oIlZFRG9E9HZ1dTVXqZmZlVYm3GvAzMJ8N7BjP+1XA398MEWZmdnBKRPuG4DZkmZJmgwsBvqKDSTNLsyeC2xtXYlmZtasEcfcI2K3pGXAemAC8PmI2ChpBdAfEX3AMknzgOeBJ4DzR7NoMzPbvzIHVImIdcC6umVXFKYva3FdZmZ2EPwNVTOzBDnczcwS5HA3M0uQw93MLEEOdzOzBDnczcwS5HA3M0uQw93MLEEOdzOzBDnczcwS5HA3M0uQw93MLEEOdzOzBDnczcwS5HA3M0uQw93MLEGlwl3SfElbJA1IWt5g/QckbZJ0n6TbJB3b+lLNzKysEcNd0gRgJXAOMAdYImlOXbOfAr0RcQpwC3BNqws1M7PyyvTc5wIDEbEtInYBq4FFxQYR8cOI+FU+eyfQ3doyzcysGWXCfQawvTBfy5cN5yLg241WSFoqqV9S/+DgYPkqzcysKWXCXQ2WRcOG0nlAL/DJRusjYlVE9EZEb1dXV/kqzcysKRNLtKkBMwvz3cCO+kaS5gH/E3hTRDzXmvLMzOxAlOm5bwBmS5olaTKwGOgrNpB0KnA9sDAiftH6Ms3MrBkjhntE7AaWAeuBzcCaiNgoaYWkhXmzTwIvA74q6V5JfcNszszMxkCZYRkiYh2wrm7ZFYXpeS2uy8zMDoK/oWpmliCHu5lZgkoNy4xnPcu/VXUJSXnwE+dWXYLZuOCeu5lZghzuZmYJcribmSXI4W5mliCHu5lZghzuZmYJcribmSXI4W5mliCHu5lZghzuZmYJcribmSXI4W5mliCHu5lZgkqFu6T5krZIGpC0vMH6P5B0j6Tdkt7W+jLNzKwZI4a7pAnASuAcYA6wRNKcumYPAxcAN7W6QDMza16Z67nPBQYiYhuApNXAImDTUIOIeDBf98Io1GhmZk0qMywzA9hemK/ly5omaamkfkn9g4ODB7IJMzMroUy4q8GyOJAXi4hVEdEbEb1dXV0HsgkzMyuhTLjXgJmF+W5gx+iUY2ZmrVAm3DcAsyXNkjQZWAz0jW5ZZmZ2MEYM94jYDSwD1gObgTURsVHSCkkLASSdJqkG/BlwvaSNo1m0mZntX5mzZYiIdcC6umVXFKY3kA3XmJlZG/A3VM3MEuRwNzNLkMPdzCxBDnczswQ53M3MEuRwNzNLkMPdzCxBDnczswQ53M3MEuRwNzNLkMPdzCxBDnczswQ53M3MEuRwNzNLkMPdzCxBDnczswSVCndJ8yVtkTQgaXmD9S+RdHO+/i5JPa0u1MzMyhsx3CVNAFYC5wBzgCWS5tQ1uwh4IiJOAD4NXN3qQs3MrLwyPfe5wEBEbIuIXcBqYFFdm0XAF/PpW4C3SFLryjQzs2aUuYfqDGB7Yb4GnD5cm4jYLemXwDTgsWIjSUuBpfnsM5K2HEjR1tB06vZ3O5I/041Hfm+21rFlGpUJ90Y98DiANkTEKmBVide0Jknqj4jequswq+f3ZjXKDMvUgJmF+W5gx3BtJE0EXg483ooCzcyseWXCfQMwW9IsSZOBxUBfXZs+4Px8+m3ADyJin567mZmNjRGHZfIx9GXAemAC8PmI2ChpBdAfEX3APwI3Shog67EvHs2irSEPd1m78nuzAnIH28wsPf6GqplZghzuZmYJcribmSXI4W5mliCHe4eTdKykefn0oZKmVl2TGfi9WTWHeweTdAnZtXyuzxd1A9+oriKzjN+b1XO4d7ZLgTOApwAiYivwqkorMsv4vVkxh3tney6/Uiew99IP/uKCtQO/NyvmcO9s/yzpcuBQSWcBXwVurbgmM/B7s3L+hmoHk3QI2Y1S/pDsypzrgRt8XR+rmt+b1XO4m5klqMz13K3NSPoZ+xm/jIhTxrAcs7383mwf7rl3IEn7vRNLRDw0VrWYFfm92T4c7h1O0pFk97kNYENEPFpxSWbWBny2TAeTdDFwN/BfyW6ScqekC6utygwkPS3pqbrHdkn/JOm4qusbD9xz72D5DcZ/LyJ25vPTgH+NiBOrrczGO0lXkd2O8yays2UWA0cCW4D3RMSZ1VU3Prjn3tlqwNOF+aeB7RXVYlY0PyKuj4inI+KpiFgFLIiIm4Ejqi5uPPDZMp3tEeAuSWvJxtwXAXdL+gBARFxbZXE2rr0g6e1k15eBbNhwiIcLxoDDvbP9PH8MWZv/66vvWdXeAVwHfJYszO8EzpN0KLCsysLGC4+5m5klyD33DiTp7yPif0i6lQYfcSNiYQVlme0lqQu4BOihkDMR4bO5xojDvTPdmP/7d5VWYTa8tcD/Ab4P7Km4lnHJ4d6BIuIn+eTrI+K64jpJlwH/PPZVmb3ISyPiw1UXMZ75VMjOdn6DZReMdRFmDXxT0oKqixjPfEC1A0laAvw58Eayj75DpgJ7ImJeJYWZ5SQ9DRwGPAc8T/ZFpoiIwystbBzxsExn+lfg34HpwKcKy58G7qukIrOCiPDpuBVzz93MWkbSayLifkm/3Wh9RNwz1jWNVw73DpR/5G30H+ePvlYpSasiYqmkHxYW732vRsSbKyhrXHK4m1nL5Zce+E5EPCXpr4DfBv7aPfex43DvYJKOabQ8Ih4e61rMiiTdFxGnSHoj8HGyY0OXR8TpFZc2bviAamf7VmF6CjCL7JKqJ1VTjtleQ19cOhf4h4hYK+nKCusZdxzuHSwiTi7O5wex/ntF5ZgVPSLpemAecLWkl+Dv1YwpD8skRtI9EdHwTAWzsSLppcB84GcRsVXSUcDJEfHdiksbNxzuHWzouu25Q4A3AK+MiLMrKsnM2oSHZTrbVH5zmtlu4Fbga9WVY2btwj33DibpNOByXnxZ1YiIUyoryszagsO9g+U3yP4g8G/AC0PLI+Khyooys7bgYZnONhgRt1ZdhJm1H/fcO5iktwBLgNvIrr4HQER8vbKizKwtuOfe2d4FvAaYxG+GZQJwuJuNcw73zva6+i8ymZmBvzHW6e6UNKfqIsys/XjMvYNJ2gwcDzxANuY+dMlfnwppNs453DuYpGMbLfepkGbmcDczS5DH3M3MEuRwNzNLkMPdzCxBDnczswT9f6MTkJLV/X8VAAAAAElFTkSuQmCC\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "multiple_script_domain = get_multiple(agg, 'script_domain', 'SCRIPT DOMAIN data: from the total of json\\n')\n", + "pd.DataFrame([[len(multiple_script_domain)/agg_len], \n", + " [(agg_len - len(multiple_script_domain))/agg_len]], \n", + " ['multiple', 'single']).plot(kind='bar', title='JSONs origin: Script domain')" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "count 2430.000000\n", + "mean 2.483128\n", + "std 1.213823\n", + "min 2.000000\n", + "25% 2.000000\n", + "50% 2.000000\n", + "75% 3.000000\n", + "max 34.000000\n", + "Name: nunique, dtype: float64" + ] + }, + "execution_count": 9, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "multiple_script_domain.script_domain['nunique'].describe()" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "7 (0.29%) multiple symbol,\n", + "2423 (99.71%) unique symbol\n" + ] + } + ], + "source": [ + "# Out of the multiple_script_domain\n", + "multiple_script_domain_symbol = get_multiple(multiple_script_domain, 'symbol')" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "1413 (58.15%) multiple script_tld,\n", + "1017 (41.85%) unique script_tld\n" + ] + } + ], + "source": [ + "# Out of the multiple_script_domain\n", + "multiple_script_domain_location_tld = get_multiple(multiple_script_domain, 'script_tld')" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "98 (4.03%) multiple location_domain,\n", + "2332 (95.97%) unique location_domain\n" + ] + } + ], + "source": [ + "# Out of the multiple_script_domain\n", + "multiple_script_domain_location_tld = get_multiple(multiple_script_domain, 'location_domain')" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.6.8" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/analyses/2019_03_aliamcami_value_analyses/isJson_Occurrence_of_operation_symbols_domains.ipynb b/analyses/2019_03_aliamcami_value_analyses/isJson_Occurrence_of_operation_symbols_domains.ipynb new file mode 100644 index 0000000..cd64c26 --- /dev/null +++ b/analyses/2019_03_aliamcami_value_analyses/isJson_Occurrence_of_operation_symbols_domains.ipynb @@ -0,0 +1,1279 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Start Dask" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/anaconda3/envs/overscripted/lib/python3.6/site-packages/dask/config.py:168: YAMLLoadWarning: calling yaml.load() without Loader=... is deprecated, as the default Loader is unsafe. Please read https://msg.pyyaml.org/load for full details.\n", + " data = yaml.load(f.read()) or {}\n" + ] + } + ], + "source": [ + "import dask.dataframe as dd\n", + "from dask.diagnostics import ProgressBar\n", + "import matplotlib.pyplot as plt\n", + "import pandas as pd\n", + "import numpy as np\n", + "%matplotlib inline" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "This notebook uses the parquet produced by the \"isJson_dataPrep.ipynb\":\n", + "- ‘s0_domains_isJson_jsonKeys_md5_TLD.parquet'\n", + "\t- It contains all the original 10% sample with extra columns.\n", + "\n", + "# Objective\n", + "Show and compare between samples the presence and occurrence of operation/symbols/domain/tld. \n", + "\n", + "I'll be doing two of each graph to show the difference between the whole data and the filtered data by only rows that have the value_len above the mean\n", + "\n", + "# Overview\n", + "### Operation\n", + "Most operation used across the entire sample is GET. \n", + "99.67% of the valid JSONs has GET as operation. If filtered by values_len above the mean then all 100% of the valid JSONs are GET. \n", + "\n", + "### Symbols\n", + "The difference for the unique symbols counting for the whole sample and the filtered one is really big. The one thing I can say is that 'window.localStorage' is the one that produces most JSONs (65%) and ‘window.document.cookie' is the one responsible for 34% of the non-JSON, anything else may require further investigation and understanding. \n", + "\n", + "### Domain\n", + "'Baidu' has the most occurrences for valid JSON values (15%) but it's only in the 5th position when it comes to the values above the mean (5.9%).\n", + "‘Google.Analytics’ is the top one for the non-JSON values for both all values and bigger values.\n", + "\n", + "\n", + "### TLD\n", + "The TLD is more balanced between the non-JSON and JSON values, and the top ones remain for the filtered data. \n" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "DIR = 'sample_0_prep/'" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[########################################] | 100% Completed | 6.8s\n" + ] + } + ], + "source": [ + "columns=['operation', 'symbol', 'script_domain', 'is_json', 'keys_md5', 'script_tld', 'value_len']\n", + "df = dd.read_parquet(DIR + 's0_domains_isJson_jsonKeys_md5_TLD.parquet',\n", + " engine='pyarrow',\n", + " columns=columns)\n", + "with ProgressBar():\n", + " mean = df['value_len'].mean().compute()\n", + "\n", + "df_a = df[df.value_len > mean]" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Support code\n", + "This section is where some support code is placed. \n", + "Some of the code here is where the math actually happen and the other section uses it. " + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[########################################] | 100% Completed | 7.1s\n", + "[########################################] | 100% Completed | 7.3s\n" + ] + } + ], + "source": [ + "with ProgressBar():\n", + " df_json = df[df.is_json == True].compute()\n", + " df_other = df[df.is_json == False].compute()" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[########################################] | 100% Completed | 7.1s\n", + "[########################################] | 100% Completed | 6.8s\n" + ] + } + ], + "source": [ + "with ProgressBar():\n", + " df_a_json = df_a[df_a.is_json == True].compute()\n", + " df_a_other = df_a[df_a.is_json == False].compute()" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [], + "source": [ + "def calcUniquePercentual(df, column):\n", + " v = df[column].value_counts()\n", + " l = df[column].count()\n", + " return v/l" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [], + "source": [ + "def plotUsageComparation(df_json, df_other, column):\n", + " nonjsons = calcUniquePercentual(df_other, column=column)\n", + " jsons = calcUniquePercentual(df_json, column=column)\n", + " p1 = pd.DataFrame({'json': jsons,'other':nonjsons}).sort_values('json', ascending=False)\n", + " p1.plot(kind='bar')\n", + " return p1" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [], + "source": [ + "def plotTopUsageComparation(df_json, df_other, column, top):\n", + " nonjsons = calcUniquePercentual(df_other, column=column)\n", + " jsons = calcUniquePercentual(df_json, column=column)\n", + " \n", + " p1 = pd.DataFrame({'json': jsons,'other':nonjsons})\n", + " top_json = p1.sort_values('json', ascending=False).head(top)\n", + " top_other = p1.sort_values('other', ascending=False).head(top)\n", + " tops = pd.concat([top_json, top_other]).drop_duplicates()\n", + " tops.plot(kind='bar')\n", + " return tops" + ] + }, + { + "cell_type": "code", + "execution_count": 41, + "metadata": {}, + "outputs": [], + "source": [ + "def plotUniqueValuesComparation(df_json, df_other, column):\n", + " nonjsons = calcUniquePercentual(df_other, column=column)\n", + " jsons = calcUniquePercentual(df_json, column=column)\n", + " \n", + " #Value counts\n", + " count_nonjson = len(nonjsons)\n", + " count_json = len(jsons)\n", + " p1 = pd.DataFrame([count_json, count_nonjson], \n", + " index= [ 'Json', 'Other' ], \n", + " columns=['Value Counts'])\n", + " p1.plot(kind='bar')\n", + " print(\"There are {} unique {} present on the non-json dataset and {} on the JSONs\".format(count_nonjson,\n", + " column,\n", + " count_json))\n", + " return p1" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# OPERATION:\n", + "\n", + "The operation columns can have 3 different values \n", + " - GET\n", + " - SET\n", + " - CALL\n", + "\n", + "We can see below that pretty much all[1] JSONs have the operation GET when the whole sample is analysed and ALL JSONs have GET when we filter the sample to values above the mean. \n", + "The GET operation is the most common among the non-json values as well. \n", + "\n", + "---\n", + " For futher investigation: \n", + "1. Are the JSONs that have SET as operation really JSON? Are they false positives? Why are they different? " + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Full sample:" + ] + }, + { + "cell_type": "code", + "execution_count": 25, + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAXcAAAEDCAYAAADOc0QpAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDMuMC4zLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvnQurowAAEh1JREFUeJzt3X+QVeV9x/H3NwjSqMEE11ZZCExKVNwkK6yAoQOY6ChqAY1NtCapEyOTSdQ6/qhEWsfaZPLD1MTOaBpNNDWpqDGNMpaOk4mKvy2gkAEdHWK0bmASJEIVawTn2z/ulazryp5dLnt3H96vmZ2955znnvPdubuffe5zz3lOZCaSpLK8q9kFSJIaz3CXpAIZ7pJUIMNdkgpkuEtSgQx3SSqQ4S5JBTLcJalAhrskFWivZh34gAMOyPHjxzfr8JI0JK1cufLFzGzprV3Twn38+PGsWLGiWYeXpCEpIp6v0s5hGUkqkOEuSQUy3CWpQE0bc5ekKrZt20ZnZyevvfZas0sZUCNHjqS1tZXhw4f36/m9hntE3ACcBPwuM9t62B7A1cAJwKvAmZn5eL+qkaRuOjs72W+//Rg/fjy1uClfZrJp0yY6OzuZMGFCv/ZRZVjmh8DxO9k+B5hY/1oAfLdflUhSD1577TVGjx69xwQ7QEQwevToXXq30mu4Z+b9wO930mQecFPWPArsHxEH9bsiSepmTwr2N+3qz9yID1THAC90We6sr5MkNUkjPlDt6d9LjzdmjYgF1IZuGDduXAMOXd34hf85oMd77usnDujxpD1Fo/+Wq/6tfvSjH+Xhhx9u6LF3p0b03DuBsV2WW4H1PTXMzOsysyMzO1paer16VpIGjaEU7NCYcF8CfDZqpgNbMnNDA/YrSYPGvvvuy4YNG5g5cybt7e20tbXxwAMPALB48WI+9KEP0dbWxiWXXPKW5yxatIiPfOQjTJ8+nd/+9rcDVm+v4R4Ri4FHgEMiojMizoqIL0TEF+pNlgLPAuuA64Ev7rZqJamJbr75Zo477jhWrVrF6tWraW9vZ/369VxyySXcc889rFq1iuXLl3PHHXcAsHXrVqZPn87q1auZOXMm119//YDV2uuYe2ae3sv2BL7UsIokaZA68sgj+dznPse2bduYP38+7e3t3HPPPcyePZs3h5rPOOMM7r//fubPn8+IESM46aSTAJgyZQo///nPB6xWpx+QpIpmzpzJ/fffz5gxY/jMZz7DTTfdRK1/27Phw4fvOKVx2LBhbN++faBKNdwlqarnn3+eAw88kLPPPpuzzjqLxx9/nGnTprFs2TJefPFF3njjDRYvXsysWbOaXapzy0gaWpp1mnFEcN9993HllVcyfPhw9t13X2666SYOOuggvva1r3H00UeTmZxwwgnMmzevKTW+pd6dvaXYnTo6OnIgb9bhee7S0PTUU09x2GGHNbWGTZs2MXnyZJ5/vtJ9Mhqmp589IlZmZkdvz3VYRpJ2Yv369Rx11FFcdNFFzS6lTxyWkaSdOPjgg3nmmWeaXUaf2XOXpAIZ7pJUIMNdkgpkuEtSgfxAVdLQcvmoBu9vS7+etnnzZm6++Wa++MXadFr33Xcf3/rWt7jrrrsaWV2/2XOXpH7YvHkz1157bcP21+ipCQx3Sargqquuoq2tjba2Nr7zne+wcOFCfvWrX9He3s7FF18MwCuvvMKpp57KoYceyhlnnLFj3pmVK1cya9YspkyZwnHHHceGDbVZ0WfPns2ll17KrFmzuPrqqxtar8MyktSLlStXcuONN/LYY4+RmUybNo0f//jHrFmzhlWrVgG1YZknnniCtWvXcvDBBzNjxgweeughpk2bxrnnnsudd95JS0sLt956K4sWLeKGG24Aau8Ali1b1vCaDXdJ6sWDDz7IySefzD777APAKaecsuNGHV1NnTqV1tZWANrb23nuuefYf//9WbNmDcceeywAb7zxBgcddNCO53zqU5/aLTUb7pLUi6pzcO299947Hr85xW9mcvjhh/PII4/0+Jw3/2E0mmPuktSLmTNncscdd/Dqq6+ydetWfvaznzFjxgxefvnlXp97yCGHsHHjxh3hvm3bNtauXbu7S7bnLmmI6eepi7ti8uTJnHnmmUydOhWAz3/+80yZMoUZM2bQ1tbGnDlzOPHEnmeCHTFiBLfffjvnnXceW7ZsYfv27Zx//vkcfvjhu7Vmp/zdTZzyV2qMwTDlb7M45a8k6S0Md0kqkOEuadBr1vBxM+3qz2y4SxrURo4cyaZNm/aogM9MNm3axMiRI/u9D8+WkTSotba20tnZycaNG5tdyoAaOXLkjgui+sNwlzSoDR8+nAkTJjS7jCHHYRlJKpDhLkkFMtwlqUCGuyQVyHCXpAIZ7pJUIMNdkgpkuEtSgSqFe0QcHxFPR8S6iFjYw/ZxEXFvRDwREb+MiBMaX6okqapewz0ihgHXAHOAScDpETGpW7O/B27LzCOA04BrG12oJKm6Kj33qcC6zHw2M18HbgHmdWuTwHvqj0cB6xtXoiSpr6qE+xjghS7LnfV1XV0OfDoiOoGlwLk97SgiFkTEiohYsadNAiRJA6lKuEcP67rPvXk68MPMbAVOAH4UEW/bd2Zel5kdmdnR0tLS92olSZVUCfdOYGyX5VbePuxyFnAbQGY+AowEDmhEgZKkvqsS7suBiRExISJGUPvAdEm3Nv8DfBwgIg6jFu6Ou0hSk/Qa7pm5HTgHuBt4itpZMWsj4oqImFtvdiFwdkSsBhYDZ+aedNsUSRpkKt2sIzOXUvugtOu6y7o8fhKY0djSJEn95RWqklQgw12SCmS4S1KBDHdJKpDhLkkFMtwlqUCGuyQVyHCXpAIZ7pJUIMNdkgpkuEtSgQx3SSqQ4S5JBTLcJalAhrskFchwl6QCGe6SVCDDXZIKZLhLUoEMd0kqkOEuSQUy3CWpQIa7JBXIcJekAhnuklQgw12SCmS4S1KBDHdJKpDhLkkFMtwlqUCGuyQVyHCXpALtVaVRRBwPXA0MA76fmV/voc0ngcuBBFZn5l83sM6h5/JRA3y8LQN7PEmDWq/hHhHDgGuAY4FOYHlELMnMJ7u0mQh8GZiRmS9FxIG7q2BJUu+qDMtMBdZl5rOZ+TpwCzCvW5uzgWsy8yWAzPxdY8uUJPVFlXAfA7zQZbmzvq6rDwIfjIiHIuLR+jCOJKlJqoy5Rw/rsof9TARmA63AAxHRlpmb37KjiAXAAoBx48b1uVhJUjVVeu6dwNguy63A+h7a3JmZ2zLz18DT1ML+LTLzuszsyMyOlpaW/tYsSepFlXBfDkyMiAkRMQI4DVjSrc0dwNEAEXEAtWGaZxtZqCSpul7DPTO3A+cAdwNPAbdl5tqIuCIi5tab3Q1siogngXuBizNz0+4qWpK0c5XOc8/MpcDSbusu6/I4gQvqX5KkJvMKVUkqkOEuSQUy3CWpQIa7JBXIcJekAhnuklQgw12SCmS4S1KBDHdJKpDhLkkFMtwlqUCGuyQVyHCXpAIZ7pJUIMNdkgpkuEtSgQx3SSqQ4S5JBTLcJalAhrskFchwl6QCGe6SVCDDXZIKZLhLUoEMd0kqkOEuSQUy3CWpQIa7JBXIcJekAhnuklQgw12SCmS4S1KBDHdJKlClcI+I4yPi6YhYFxELd9Lu1IjIiOhoXImSpL7qNdwjYhhwDTAHmAScHhGTemi3H3Ae8Fiji5Qk9U2VnvtUYF1mPpuZrwO3APN6aPdPwDeB1xpYnySpH6qE+xjghS7LnfV1O0TEEcDYzLxrZzuKiAURsSIiVmzcuLHPxUqSqqkS7tHDutyxMeJdwLeBC3vbUWZel5kdmdnR0tJSvUpJUp9UCfdOYGyX5VZgfZfl/YA24L6IeA6YDizxQ1VJap4q4b4cmBgREyJiBHAasOTNjZm5JTMPyMzxmTkeeBSYm5krdkvFkqRe9RrumbkdOAe4G3gKuC0z10bEFRExd3cXKEnqu72qNMrMpcDSbusue4e2s3e9LEnSrvAKVUkqkOEuSQUy3CWpQIa7JBXIcJekAhnuklQgw12SCmS4S1KBDHdJKpDhLkkFMtwlqUCGuyQVyHCXpAIZ7pJUIMNdkgpkuEtSgQx3SSqQ4S5JBTLcJalAhrskFchwl6QCGe6SVCDDXZIKZLhLUoEMd0kq0F7NLkCSGuryUQN8vC0De7yK7LlLUoEMd0kqkOEuSQUy3CWpQIa7JBXIcJekAhnuklSgSuEeEcdHxNMRsS4iFvaw/YKIeDIifhkRv4iI9ze+VElSVb2Ge0QMA64B5gCTgNMjYlK3Zk8AHZn5YeB24JuNLlSSVF2VnvtUYF1mPpuZrwO3APO6NsjMezPz1frio0BrY8uUJPVFlXAfA7zQZbmzvu6dnAX8V08bImJBRKyIiBUbN26sXqUkqU+qhHv0sC57bBjxaaADuLKn7Zl5XWZ2ZGZHS0tL9SolSX1SZeKwTmBsl+VWYH33RhFxDLAImJWZf2hMeZKk/qjSc18OTIyICRExAjgNWNK1QUQcAXwPmJuZv2t8mZKkvug13DNzO3AOcDfwFHBbZq6NiCsiYm692ZXAvsBPImJVRCx5h91JkgZApfncM3MpsLTbusu6PD6mwXVJknaBV6hKUoEMd0kqkOEuSQUy3CWpQIa7JBXIcJekAhnuklQgw12SClTpIiZpj3L5qAE+3paBPZ72CPbcJalAhrskFchwl6QCGe6SVCDDXZIKZLhLUoEMd0kqkOEuSQUy3CWpQIa7JBXIcJekAhnuklQgw12SCmS4S1KBDHdJKpDhLkkFMtwlqUCGuyQVyHCXpAIZ7pJUIMNdkgpkuEtSgQx3SSpQpXCPiOMj4umIWBcRC3vYvndE3Frf/lhEjG90oZKk6noN94gYBlwDzAEmAadHxKRuzc4CXsrMPwe+DXyj0YVKkqqr0nOfCqzLzGcz83XgFmBetzbzgH+rP74d+HhEROPKlCT1xV4V2owBXuiy3AlMe6c2mbk9IrYAo4EXuzaKiAXAgvriKxHxdH+KHgoCDqDbz79b/aP/SxvI125oK/31e3+VRlXCvafKsx9tyMzrgOsqHHPIi4gVmdnR7DrUd752Q5uvX02VYZlOYGyX5VZg/Tu1iYi9gFHA7xtRoCSp76qE+3JgYkRMiIgRwGnAkm5tlgB/U398KnBPZr6t5y5JGhi9DsvUx9DPAe4GhgE3ZObaiLgCWJGZS4AfAD+KiHXUeuyn7c6ih4g9YvipUL52Q5uvHxB2sCWpPF6hKkkFMtwlqUCGuyQVyHCXqM2PVGWdNFQY7g0SEROqrNOg9UjFddKQUOUKVVXzU2Byt3W3A1OaUIsqiog/ozZ9xp9ExBH88Wrr9wDvblphqiQiLtjZ9sy8aqBqGWwM910UEYcChwOjIuKULpveA4xsTlXqg+OAM6lded01CP4XuLQZBalP9mt2AYOV57nvooiYB8wH5vLWK3dfBm7JzIebUpj6JCI+kZk/bXYdUqMY7g0SEUdlpmO0Q1R9eOarwMGZOad+z4KjMvMHTS5NOxER/7Kz7Zl53kDVMtg4LNM4myLiF8CfZmZbRHwYmJuZX2l2YarkxvrXovryM8Ct1KbW0OC1stkFDFb23BskIpYBFwPfy8wj6uvWZGZbcytTFRGxPDOPjIgnurx+qzKzvdm1Sf1hz71x3p2Z/93tBlTbm1WM+mxrRIymfh+CiJgObGluSaoqIlqAS6jdCnTHiQyZ+bGmFdVkhnvjvBgRH+CP4XAqsKG5JakPLqD2gfgHIuIhoIXa9NUaGv6d2jDaicAXqE1BvrGpFTWZ4d44X6I21eihEfEb4NfAGc0tSX3wAWo3gR8LfILarST9+xg6RmfmDyLibzNzGbCsPlS6x/KXt3HmA0uBe6ld+bsVOCYiVmbmqqZWpir+ITN/EhHvBY4B/hn4Lm+/X7AGp2317xsi4kRqd4trbWI9Tef0A43TQe3t4HuB/andCHw2cH1E/F0T61I1b9S/nwj8a2beCYxoYj3qm69ExCjgQuAi4PvA+c0tqbkM98YZDUzOzIsy80JqYd8CzKR2BaQGt99ExPeATwJL65OG+fcxdPwVtbP/1mTm0cCxwMlNrqmp/OVtnHHA612WtwHvz8z/A/7QnJLUB5+kdivJ4zNzM/A+aqe2amj4cP11AyAzfw8c0cR6ms4x98a5GXg0Iu6sL/8lsDgi9gGebF5ZqiIzXwX+o8vyBjzbaSh5V0S8NzNfAoiI97GH55sXMTVQREwB/oLazIIPZuaKJpck7REi4rPAl6nNxJrU3ol9NTN/1NTCmshwl1SE+nxAH6PWufpFZu7R75gNd0kqkB+oSlKBDHdJKpDhLkkFMtwlqUD/D8IypErrsxOtAAAAAElFTkSuQmCC\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "p1 = plotUsageComparation(df_json, df_other, 'operation')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Above the mean sample:" + ] + }, + { + "cell_type": "code", + "execution_count": 26, + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAXcAAAECCAYAAAAFL5eMAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDMuMC4zLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvnQurowAAEUZJREFUeJzt3X+M1/V9wPHnq/zwWkHd4MzE00I6quK1IlwBSwOYagTtZFrT6lw3EytpWjXGlkm1Mc5l61Y72y7RbbhqZxtQZ1ckysKSWrX+LEdFCxoNtTivkPa8VqZYK7jX/vieeJ6H97njy33v3vd8JCb3+Xzf970Xhnvyuc99vp9vZCaSpLK8p9EDSJLqz7hLUoGMuyQVyLhLUoGMuyQVyLhLUoGMuyQVyLhLUoGMuyQVaGyjvvDkyZNz6tSpjfrykjQibdy48cXMbO5vXcPiPnXqVNrb2xv15SVpRIqI56us87SMJBXIuEtSgYy7JBWoYefcJamK3bt309HRwWuvvdboUYZUU1MTLS0tjBs3blCfb9wlDWsdHR1MnDiRqVOnEhGNHmdIZCZdXV10dHQwbdq0QT1Hv6dlIuLmiPh1RGzex+MREf8UEVsj4smImDWoSSSpD6+99hqTJk0aNWEHiAgmTZq0Xz+tVDnn/h1g8bs8vgSY3v3fMuCfBz2NJPVhNIX9Tfv7Z+437pn5APCbd1myFLg1ax4FDouII/ZrKknSfqnHOfcjgRd6bHd079vRe2FELKN2dM/RRx9dhy994E1dcU+jR6hk29+f0egRpCFR7+/Jqt87H/3oR3n44Yfr+rUPpHrEva+fHfp81+3MXAmsBGhra/OduevpmkMbPUE11+xs9ATSoIyksEN9rnPvAI7qsd0CbK/D80rSsDFhwgR27NjBggULmDlzJq2trfz4xz8GYPXq1XzoQx+itbWVK6644m2fc9VVV3HCCScwb948fvWrXw3ZvPWI+1rgL7qvmpkH7MzMd5ySkaSRbtWqVZx22mls2rSJJ554gpkzZ7J9+3auuOIK7r33XjZt2sSGDRtYs2YNALt27WLevHk88cQTLFiwgJtuumnIZq1yKeRq4BHgmIjoiIgLI+JzEfG57iXrgOeArcBNwOcP2LSS1EAf+chHuOWWW7jmmmv42c9+xsSJE9mwYQOLFi2iubmZsWPHcv755/PAAw8AMH78eD7xiU8AMHv2bLZt2zZks/Z7zj0zz+vn8QS+ULeJJGmYWrBgAQ888AD33HMPn/nMZ1i+fDmHHHLIPtePGzdu7yWNY8aMYc+ePUM1qveWkaSqnn/+eQ4//HAuuugiLrzwQn76058yd+5c7r//fl588UXeeOMNVq9ezcKFCxs9qrcfkDSyNOqy34jgvvvu47rrrmPcuHFMmDCBW2+9lSOOOIKvfvWrnHzyyWQmp59+OkuXLm3IjG+bt3ZWZei1tbXlSHizjhFznXvTnzV6hGq8FFID9PTTT3Pcccc1dIauri5mzZrF889Xep+Muunrzx4RGzOzrb/P9bSMJL2L7du3c9JJJ/GlL32p0aMMiKdlJOldTJkyhWeffbbRYwyYR+6SVCDjLkkFMu6SVCDjLkkF8heqkkaWet8BdZCX57700kusWrWKz3++dseV++67j69//evcfffd9Zxu0Dxyl6RBeOmll7jxxhvr9nz1vjWBcZekCq6//npaW1tpbW3lm9/8JitWrODnP/85M2fOZPny5QC88sornHPOORx77LGcf/75vPki0Y0bN7Jw4UJmz57Naaedxo4dtRvnLlq0iCuvvJKFCxfyrW99q67zelpGkvqxceNGbrnlFh577DEyk7lz5/K9732PzZs3s2nTJqB2Wubxxx9ny5YtTJkyhfnz5/PQQw8xd+5cLrnkEu666y6am5u5/fbbueqqq7j55puB2k8A999/f91nNu6S1I8HH3yQs846i4MPPhiAs88+e+8bdfQ0Z84cWlpaAJg5cybbtm3jsMMOY/PmzZx66qkAvPHGGxxxxFtvM/3pT3/6gMxs3CWpH1XvwXXQQQft/fjNW/xmJscffzyPPPJIn5/z5j8Y9eY5d0nqx4IFC1izZg2vvvoqu3bt4gc/+AHz58/n5Zdf7vdzjznmGDo7O/fGfffu3WzZsuVAj+yRu6QRpgF3Fp01axYXXHABc+bMAeCzn/0ss2fPZv78+bS2trJkyRLOOKPvWxGPHz+eO++8k0svvZSdO3eyZ88eLrvsMo4//vgDOrO3/O2Ht/ytM2/5qwEaDrf8bRRv+StJehvjLkkFMu6Shr1GnT5upP39Mxt3ScNaU1MTXV1doyrwmUlXVxdNTU2Dfg6vlpE0rLW0tNDR0UFnZ2ejRxlSTU1Ne18QNRjGXdKwNm7cOKZNm9boMUYcT8tIUoGMuyQVyLhLUoGMuyQVyLhLUoGMuyQVyLhLUoGMuyQVqFLcI2JxRDwTEVsjYkUfjx8dET+KiMcj4smIOL3+o0qSquo37hExBrgBWALMAM6LiBm9ln0FuCMzTwTOBW6s96CSpOqqHLnPAbZm5nOZ+TpwG7C015oEDun++FBge/1GlCQNVJV7yxwJvNBjuwOY22vNNcB/R8QlwMHAKXWZTpI0KFWO3KOPfb3vvXke8J3MbAFOB74bEe947ohYFhHtEdE+2u7wJklDqUrcO4Cjemy38M7TLhcCdwBk5iNAEzC59xNl5srMbMvMtubm5sFNLEnqV5W4bwCmR8S0iBhP7Rema3ut+R/g4wARcRy1uHtoLkkN0m/cM3MPcDGwHnia2lUxWyLi2og4s3vZF4GLIuIJYDVwQY6mt02RpGGm0pt1ZOY6YF2vfVf3+PgpYH59R5MkDZavUJWkAhl3SSqQcZekAhl3SSqQcZekAhl3SSqQcZekAhl3SSqQcZekAhl3SSqQcZekAhl3SSqQcZekAhl3SSqQcZekAhl3SSqQcZekAhl3SSqQcZekAhl3SSqQcZekAhl3SSqQcZekAhl3SSqQcZekAhl3SSqQcZekAhl3SSqQcZekAhl3SSqQcZekAhl3SSqQcZekAlWKe0QsjohnImJrRKzYx5pPRcRTEbElIlbVd0xJ0kCM7W9BRIwBbgBOBTqADRGxNjOf6rFmOvBlYH5m/jYiDj9QA0uS+lflyH0OsDUzn8vM14HbgKW91lwE3JCZvwXIzF/Xd0xJ0kBUifuRwAs9tju69/X0QeCDEfFQRDwaEYv7eqKIWBYR7RHR3tnZObiJJUn9qhL36GNf9toeC0wHFgHnAf8WEYe945MyV2ZmW2a2NTc3D3RWSVJFVeLeARzVY7sF2N7Hmrsyc3dm/gJ4hlrsJUkNUCXuG4DpETEtIsYD5wJre61ZA5wMEBGTqZ2mea6eg0qSqus37pm5B7gYWA88DdyRmVsi4tqIOLN72XqgKyKeAn4ELM/MrgM1tCTp3fV7KSRAZq4D1vXad3WPjxO4vPs/SVKD+QpVSSqQcZekAhl3SSqQcZekAhl3SSqQcZekAhl3SSqQcZekAhl3SSqQcZekAhl3SSqQcZekAhl3SSqQcZekAhl3SSqQcZekAhl3SSqQcZekAhl3SSqQcZekAhl3SSqQcZekAhl3SSqQcZekAhl3SSqQcZekAhl3SSqQcZekAhl3SSqQcZekAhl3SSqQcZekAlWKe0QsjohnImJrRKx4l3XnRERGRFv9RpQkDVS/cY+IMcANwBJgBnBeRMzoY91E4FLgsXoPKUkamCpH7nOArZn5XGa+DtwGLO1j3d8AXwNeq+N8kqRBqBL3I4EXemx3dO/bKyJOBI7KzLvrOJskaZCqxD362Jd7H4x4D/AN4Iv9PlHEsohoj4j2zs7O6lNKkgakStw7gKN6bLcA23tsTwRagfsiYhswD1jb1y9VM3NlZrZlZltzc/Pgp5Ykvasqcd8ATI+IaRExHjgXWPvmg5m5MzMnZ+bUzJwKPAqcmZntB2RiSVK/+o17Zu4BLgbWA08Dd2Tmloi4NiLOPNADSpIGbmyVRZm5DljXa9/V+1i7aP/HkiTtD1+hKkkFMu6SVCDjLkkFMu6SVCDjLkkFMu6SVCDjLkkFMu6SVCDjLkkFMu6SVCDjLkkFMu6SVCDjLkkFMu6SVCDjLkkFMu6SVCDjLkkFMu6SVCDjLkkFMu6SVCDjLkkFMu6SVCDjLkkFMu6SVCDjLkkFMu6SVCDjLkkFMu6SVCDjLkkFMu6SVCDjLkkFMu6SVCDjLkkFqhT3iFgcEc9ExNaIWNHH45dHxFMR8WRE/DAi3l//USVJVfUb94gYA9wALAFmAOdFxIxeyx4H2jLzw8CdwNfqPagkqboqR+5zgK2Z+Vxmvg7cBiztuSAzf5SZr3ZvPgq01HdMSdJAVIn7kcALPbY7uvfty4XAf/X1QEQsi4j2iGjv7OysPqUkaUCqxD362Jd9Loz4c6ANuK6vxzNzZWa2ZWZbc3Nz9SklSQMytsKaDuCoHtstwPbeiyLiFOAqYGFm/r4+40mSBqPKkfsGYHpETIuI8cC5wNqeCyLiROBfgTMz89f1H1OSNBD9xj0z9wAXA+uBp4E7MnNLRFwbEWd2L7sOmAD8R0Rsioi1+3g6SdIQqHJahsxcB6zrte/qHh+fUue5JEn7wVeoSlKBjLskFci4S1KBjLskFci4S1KBjLskFci4S1KBjLskFci4S1KBjLskFci4S1KBjLskFci4S1KBjLskFci4S1KBjLskFci4S1KBjLskFci4S1KBjLskFci4S1KBjLskFci4S1KBjLskFci4S1KBjLskFci4S1KBjLskFci4S1KBjLskFci4S1KBjLskFahS3CNicUQ8ExFbI2JFH48fFBG3dz/+WERMrfegkqTq+o17RIwBbgCWADOA8yJiRq9lFwK/zcw/Br4B/EO9B5UkVVflyH0OsDUzn8vM14HbgKW91iwF/r374zuBj0dE1G9MSdJAjK2w5kjghR7bHcDcfa3JzD0RsROYBLzYc1FELAOWdW++EhHPDGZovVPAZHr9/x6W/tp/80ehkfF3c+R4f5VFVeLe13djDmINmbkSWFnha2qAIqI9M9saPYfUm383G6PKaZkO4Kge2y3A9n2tiYixwKHAb+oxoCRp4KrEfQMwPSKmRcR44Fxgba81a4G/7P74HODezHzHkbskaWj0e1qm+xz6xcB6YAxwc2ZuiYhrgfbMXAt8G/huRGyldsR+7oEcWn3ydJeGK/9uNkB4gC1J5fEVqpJUIOMuSQUy7pJUIOMuqe4i4qAq+3TgGPcRKiKmVdknNcgjFffpAKnyClUNT98HZvXadycwuwGzSABExB9Rux3JeyPiRN569fohwPsaNtgoZNxHmIg4FjgeODQizu7x0CFAU2OmkvY6DbiA2ivZr++x/3+BKxsx0Gjlde4jTEQsBf4UOJO3v1L4ZeC2zHy4IYNJPUTEJzPz+42eYzQz7iNURJyUmZ7D1LDUfXrmb4Epmbmk+z0gTsrMbzd4tFHDX6iOXF0R8cOI2AwQER+OiK80eiip2y3UblkypXv7WeCyxo0z+hj3kesm4MvAboDMfBLv6aPhY3Jm3gH8H9TuUQW80diRRhfjPnK9LzN/0mvfnoZMIr3TroiYRPf7OkTEPGBnY0caXbxaZuR6MSI+wFvfPOcAOxo7krTX5dR+4f+BiHgIaKZ2O3ANEeM+cn2B2q1Uj42IXwK/AM5v7EjSXh8AllB7E59PUntrTnszhLxaZoSKiMu7P3wvtdNru6j92LsxMzc1bDAJiIgnM/PDEfEx4O+AfwSuzMze77+sA8Rz7iNXG/A54A+Aw6i98fgi4KaI+KsGziXBW788PQP4l8y8CxjfwHlGHY/cR6iIWA98MjNf6d6eQO32A2dRO3qf0cj5NLpFxN3AL4FTqN0S43fATzLzhIYONop45D5yHQ283mN7N/D+zPwd8PvGjCTt9Slq17kvzsyXgD8Eljd2pNHFX3CMXKuARyPiru7tPwFWR8TBwFONG0uCzHwV+M8e2zvwaq4h5WmZESwiZgMfo3bnvQczs73BI0kaJoy7JBXIc+6SVCDjLkkFMu6SVCDjLkkF+n9G2kb/7zCg+wAAAABJRU5ErkJggg==\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "p2 = plotUsageComparation(df_a_json, df_a_other, 'operation')" + ] + }, + { + "cell_type": "code", + "execution_count": 34, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
jsonotherjsonother
get0.9967190.6167281.00.999047
set0.0032810.131672NaN0.000953
callNaN0.251601NaNNaN
\n", + "
" + ], + "text/plain": [ + " json other json other\n", + "get 0.996719 0.616728 1.0 0.999047\n", + "set 0.003281 0.131672 NaN 0.000953\n", + "call NaN 0.251601 NaN NaN" + ] + }, + "execution_count": 34, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAXcAAAEDCAYAAADOc0QpAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDMuMC4zLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvnQurowAAFhZJREFUeJzt3X2QVfWd5/H3NzzIjqBGbXfB1kBlHUWa2EoLKFsNzmgh6sro9EaFdbDKh0o56rqJLIzsptjsTJkZXUenSmejiUyMC+o4K1LKllsVA8SHuDTajCDRQgfXHiiDJLCK6wSo7/5xW9JpWvr05dKXPr5fVZT3nPs79366Ln4499fnITITSVK5fKneASRJtWe5S1IJWe6SVEKWuySVkOUuSSVkuUtSCVnuklRClrsklZDlLkklNLReb3ziiSfm2LFj6/X2kjQorVu37sPMbOhrXN3KfezYsbS3t9fr7SVpUIqI94qMc1pGkkrIcpekErLcJamE6jbnLklF7Nmzh87OTj799NN6RxlQI0aMoLGxkWHDhlW1veUu6YjW2dnJqFGjGDt2LBFR7zgDIjPZsWMHnZ2djBs3rqrX6HNaJiIeiYhfRMSGz3k+IuKvImJzRPx9RJxTVRJJ6sWnn37KCSec8IUpdoCI4IQTTjikbytF5tz/Brj4IM/PAk7r+nMT8NdVp5GkXnyRiv0zh/oz91numbkG+OVBhswGHs2KnwHHRcToQ0olSToktZhzPxl4v9tyZ9e6bT0HRsRNVPbuOfXUU2vw1keuiT+cWNV2b8x7o8ZJpHIZu/C5mr7elu9eWmjc+eefz8svv1zT9z6calHuvX136PWu25n5EPAQQEtLy4DembvavxBFP/ha2XTG+Kq2G//zTTVOIqm7wVTsUJvj3DuBU7otNwJba/C6knTEGDlyJNu2baO1tZXm5maampr46U9/CsCyZcuYOHEiTU1NLFiw4Le2WbRoEWeddRZTp07lgw8+GLC8tSj3FcAfdR01MxXYlZkHTMlI0mC3dOlSZs6cSUdHB+vXr6e5uZmtW7eyYMECXnjhBTo6Oli7di3Lly8HYPfu3UydOpX169fT2trKww8/PGBZixwKuQx4BTg9Ijoj4vqI+EZEfKNryErgXWAz8DBw82FLK0l1dO6557JkyRIWL17MG2+8wahRo1i7di0zZsygoaGBoUOHMnfuXNasWQPA8OHDueyyywCYNGkSW7ZsGbCsfc65Z+Y1fTyfwB/XLJEkHaFaW1tZs2YNzz33HNdeey3z58/nmGOO+dzxw4YN239I45AhQ9i7d+9ARfXaMpJU1HvvvcdJJ53EjTfeyPXXX89rr73GlClTWL16NR9++CH79u1j2bJlTJ8+vd5RvfyApMFloI9g+0xEsGrVKu6++26GDRvGyJEjefTRRxk9ejR33XUXF1xwAZnJJZdcwuzZs+uSsTvLXZL6sGPHDo4//njmzZvHvHnzDnh+zpw5zJkz54D1H3/88f7HbW1ttLW1Hdac3TktI0kHsXXrVs477zzuuOOOekfpF/fcJekgxowZw9tvv13vGP3mnrsklZDlLkklZLlLUglZ7pJUQv5CVdLgsvjYGr/erqo227lzJ0uXLuXmmytXXFm1ahX33HMPzz77bC3TVc09d0mqws6dO3nwwQdr9nq1vjSB5S5JBdx77700NTXR1NTEfffdx8KFC3nnnXdobm5m/vz5QOWkpba2Ns444wzmzp1L5dJbsG7dOqZPn86kSZOYOXMm27ZVLpw7Y8YM7rzzTqZPn879999f07xOy0hSH9atW8eSJUt49dVXyUymTJnCY489xoYNG+jo6AAq0zKvv/46GzduZMyYMUybNo2XXnqJKVOmcOutt/LMM8/Q0NDAE088waJFi3jkkUeAyjeA1atX1zyz5S5JfXjxxRe54oorOProowG48sor99+oo7vJkyfT2NgIQHNzM1u2bOG4445jw4YNXHTRRQDs27eP0aN/c5vpq6666rBkttwlqQ+fTa/05aijjtr/+LNL/GYmEyZM4JVXXul1m8/+wag159wlqQ+tra0sX76cTz75hN27d/P0008zbdo0Pvrooz63Pf3009m+ffv+ct+zZw8bN2483JHdc5c0yFR56OKhOOecc7juuuuYPHkyADfccAOTJk1i2rRpNDU1MWvWLC69tPdLEQ8fPpynnnqK2267jV27drF3715uv/12JkyYcFgzR9GvG7XW0tKS7e3tA/Z+Yxc+V9V21V47euIPJ1a13ZN3VXc41Pifb6pqO+lIt2nTJsaPH1/vGHXR288eEesys6WvbZ2WkaQSstwlqYQsd0kqIctdkkrIcpekErLcJamEPM5d0qBS7WHGn+eNeW8UGnf++efz8ssv1/S9Dyf33CWpgMFU7GC5S1IhI0eOZNu2bbS2ttLc3ExTU9P+i4ctW7aMiRMn0tTUxIIFC35rm0WLFnHWWWcxdepUPvjggwHLa7lLUkFLly5l5syZdHR0sH79epqbm9m6dSsLFizghRdeoKOjg7Vr17J8+XIAdu/ezdSpU1m/fj2tra08/PDDA5bVcpekgs4991yWLFnC4sWLeeONNxg1ahRr165lxowZNDQ0MHToUObOncuaNWuAynVlLrvsMgAmTZrEli1bBiyr5S5JBbW2trJmzRpOPvlkrr32Wh599NGDXg542LBhRATwm0sADxTLXZIKeu+99zjppJO48cYbuf7663nttdeYMmUKq1ev5sMPP2Tfvn0sW7aM6dOn1zuqh0JKGlyKHrpYaxHBqlWruPvuuxk2bBgjR47k0UcfZfTo0dx1111ccMEFZCaXXHIJs2fPrkvG7gqVe0RcDNwPDAG+n5nf7fH8qcAPgeO6xizMzJU1zipJdbFjxw6OP/545s2bx7x58w54fs6cOcyZM+eA9R9//PH+x21tbbS1tR3WnN31OS0TEUOAB4BZwJnANRFxZo9h/xF4MjPPBq4GHqx1UEmqh61bt3Leeedxxx131DtKvxTZc58MbM7MdwEi4nFgNvBmtzEJHNP1+Fhgay1DSlK9jBkzhrfffrveMfqtSLmfDLzfbbkTmNJjzGLgf0XErcDRwIU1SSdJqkqRo2Wil3U9j/25BvibzGwELgF+FBEHvHZE3BQR7RHRvn379v6nlSQVUqTcO4FTui03cuC0y/XAkwCZ+QowAjix5wtl5kOZ2ZKZLQ0NDdUlliT1qUi5rwVOi4hxETGcyi9MV/QY83+A3weIiPFUyt1dc0mqkz7n3DNzb0TcAjxP5TDHRzJzY0R8B2jPzBXAt4CHI+LfU5myuS4PdtqWJFVp0xnja/p643++qartdu7cydKlS7n55psBWLVqFffccw/PPvtsLeNVrdBx7l3HrK/sse7b3R6/CUyrbTRJOnLt3LmTBx98cH+5H6q9e/cydGjtziv18gOSVMC9995LU1MTTU1N3HfffSxcuJB33nmH5uZm5s+fD1ROWmpra+OMM85g7ty5+687s27dOqZPn86kSZOYOXMm27ZtA2DGjBnceeedTJ8+nfvvv7+meb38gCT1Yd26dSxZsoRXX32VzGTKlCk89thjbNiwgY6ODqAyLfP666+zceNGxowZw7Rp03jppZeYMmUKt956K8888wwNDQ088cQTLFq0iEceeQSofANYvXp1zTNb7pLUhxdffJErrriCo48+GoArr7xy/406ups8eTKNjY0ANDc3s2XLFo477jg2bNjARRddBMC+ffsYPXr0/m2uuuqqw5LZcpekPhQ9PuSoo47a//izS/xmJhMmTOCVV17pdZvP/sGoNefcJakPra2tLF++nE8++YTdu3fz9NNPM23aND766KM+tz399NPZvn37/nLfs2cPGzduPNyR3XOXNLhUe+jioTjnnHO47rrrmDx5MgA33HADkyZNYtq0aTQ1NTFr1iwuvfTSXrcdPnw4Tz31FLfddhu7du1i79693H777UyYMOGwZo56HY7e0tKS7e3tA/Z+Yxc+V9V2W77b+wfWl4k/nFjVdk/eVd2dWurxF14aCJs2bWL8+Noe2z5Y9PazR8S6zGzpa1unZSSphCx3SSohy13SEe+LeDWTQ/2ZLXdJR7QRI0awY8eOL1TBZyY7duxgxIgRVb+GR8tIOqI1NjbS2dnJF+0eECNGjNh/QlQ1LHdJR7Rhw4Yxbty4escYdJyWkaQSstwlqYQsd0kqIctdkkrIcpekErLcJamELHdJKiHLXZJKyHKXpBKy3CWphCx3SSohy12SSshyl6QSstwlqYQsd0kqIctdkkrIcpekErLcJamELHdJKiHLXZJKqNANsiPiYuB+YAjw/cz8bi9jvg4sBhJYn5lzapizfhYfW912406tbQ5J6oc+yz0ihgAPABcBncDaiFiRmW92G3Ma8CfAtMz8VUScdLgCS5L6VmRaZjKwOTPfzcxfA48Ds3uMuRF4IDN/BZCZv6htTElSfxQp95OB97std3at6+53gd+NiJci4mdd0zgHiIibIqI9Itq3b99eXWJJUp+KlHv0si57LA8FTgNmANcA34+I4w7YKPOhzGzJzJaGhob+ZpUkFVSk3DuBU7otNwJbexnzTGbuycx/AN6iUvaSpDooUu5rgdMiYlxEDAeuBlb0GLMcuAAgIk6kMk3zbi2DSpKK67PcM3MvcAvwPLAJeDIzN0bEdyLi8q5hzwM7IuJN4CfA/MzccbhCS5IOrtBx7pm5EljZY923uz1O4JtdfyRJdeYZqpJUQpa7JJWQ5S5JJWS5S1IJWe6SVEKWuySVkOUuSSVkuUtSCVnuklRClrsklZDlLkklZLlLUglZ7pJUQpa7JJWQ5S5JJWS5S1IJWe6SVEKWuySVkOUuSSVkuUtSCVnuklRClrsklZDlLkklZLlLUglZ7pJUQpa7JJWQ5S5JJWS5S1IJWe6SVEKWuySVkOUuSSVkuUtSCRUq94i4OCLeiojNEbHwIOPaIiIjoqV2ESVJ/dVnuUfEEOABYBZwJnBNRJzZy7hRwG3Aq7UOKUnqnyJ77pOBzZn5bmb+GngcmN3LuP8C/AXwaQ3zSZKqUKTcTwbe77bc2bVuv4g4GzglM5+tYTZJUpWKlHv0si73PxnxJeAvgW/1+UIRN0VEe0S0b9++vXhKSVK/FCn3TuCUbsuNwNZuy6OAJmBVRGwBpgIrevulamY+lJktmdnS0NBQfWpJ0kEVKfe1wGkRMS4ihgNXAys+ezIzd2XmiZk5NjPHAj8DLs/M9sOSWJLUpz7LPTP3ArcAzwObgCczc2NEfCciLj/cASVJ/Te0yKDMXAms7LHu258zdsahx5IkHQrPUJWkErLcJamELHdJKiHLXZJKyHKXpBKy3CWphCx3SSohy12SSshyl6QSstwlqYQsd0kqIctdkkrIcpekErLcJamELHdJKiHLXZJKyHKXpBKy3CWphCx3SSohy12SSshyl6QSstwlqYQsd0kqIctdkkrIcpekEhpa7wCS1KvFx1a53a7a5hik3HOXpBKy3CWphCx3SSohy12SSshyl6QSstwlqYQsd0kqoULlHhEXR8RbEbE5Ihb28vw3I+LNiPj7iPhxRHyl9lElSUX1We4RMQR4AJgFnAlcExFn9hj2OtCSmV8DngL+otZBJUnFFdlznwxszsx3M/PXwOPA7O4DMvMnmflJ1+LPgMbaxpQk9UeRcj8ZeL/bcmfXus9zPfA/e3siIm6KiPaIaN++fXvxlJKkfilS7tHLuux1YMS/BVqAu3t7PjMfysyWzGxpaGgonlKS1C9FLhzWCZzSbbkR2NpzUERcCCwCpmfmP9UmniSpGkX23NcCp0XEuIgYDlwNrOg+ICLOBr4HXJ6Zv6h9TElSf/RZ7pm5F7gFeB7YBDyZmRsj4jsRcXnXsLuBkcDfRkRHRKz4nJeTJA2AQtdzz8yVwMoe677d7fGFNc4lSToEnqEqSSVkuUtSCVnuklRClrsklZDlLkklZLlLUglZ7pJUQpa7JJVQoZOYpEFr8bFVbLOr9jmkAeaeuySVkOUuSSVkuUtSCVnuklRClrsklZDlLkklZLlLUglZ7pJUQpa7JJWQ5S5JJWS5S1IJWe6SVEKWuySVkOUuSSVkuUtSCVnuklRClrsklZDlLkklZLlLUglZ7pJUQpa7JJWQ5S5JJWS5S1IJFSr3iLg4It6KiM0RsbCX54+KiCe6nn81IsbWOqgkqbg+yz0ihgAPALOAM4FrIuLMHsOuB36Vmf8S+Evgz2sdVJJUXJE998nA5sx8NzN/DTwOzO4xZjbww67HTwG/HxFRu5iSpP4YWmDMycD73ZY7gSmfNyYz90bELuAE4MPugyLiJuCmrsWPI+KtakIPpOr/hdpwIj1+/iJ6fiUqzH9La+c/R1WfnY4Q5f/8vlJkUJFy7601sooxZOZDwEMF3nPQi4j2zGypdw71n5/d4ObnV1FkWqYTOKXbciOw9fPGRMRQ4Fjgl7UIKEnqvyLlvhY4LSLGRcRw4GpgRY8xK4B5XY/bgBcy84A9d0nSwOhzWqZrDv0W4HlgCPBIZm6MiO8A7Zm5AvgB8KOI2Exlj/3qwxl6kPhCTD+VlJ/d4ObnB4Q72JJUPp6hKkklZLlLUglZ7pJUQpa7ROX6SEXWSYOF5V4jETGuyDodsV4puE4aFIqcoapi/g44p8e6p4BJdciigiLiX1C5fMY/i4iz+c3Z1scAv1O3YCokIr55sOcz896BynKksdwPUUScAUwAjo2IK7s9dQwwoj6p1A8zgeuonHndvQj+L3BnPQKpX0bVO8CRyuPcD1FEzAb+ALic3z5z9yPg8cx8uS7B1C8R8YeZ+Xf1ziHViuVeIxFxXmY6RztIdU3P/BkwJjNndd2z4LzM/EGdo+kgIuKvDvZ8Zt42UFmONE7L1M6OiPgx8M8zsykivgZcnpl/Wu9gKmRJ159FXctvA09QubSGjlzr6h3gSOWee41ExGpgPvC9zDy7a92GzGyqbzIVERFrM/PciHi92+fXkZnN9c4mVcM999r5ncz83z1uQLW3XmHUb7sj4gS67kMQEVOBXfWNpKIiogFYQOV+N/sPZMjM36tbqDqz3Gvnw4j4Kr8phzZgW30jqR++SeUX4l+NiJeABiqXr9bg8N+pTKNdCnyDyiXIt9c1UZ1Z7rXzx1QuNXpGRPwj8A/A3PpGUj98lcpN4E8B/pDKrST9/2PwOCEzfxAR/y4zVwOru6ZKv7D8y1s7fwCsBH5C5czf3cCFEbEuMzvqmkxF/KfM/NuI+DJwIfBfgb/mwPsF68i0p+u/2yLiUip3i2usY5668/IDtdNC5evgl4HjqNwIfAbwcET8hzrmUjH7uv57KfDfMvMZYHgd86h//jQijgW+BdwBfB+4vb6R6styr50TgHMy847M/BaVsm8AWqmcAakj2z9GxPeArwMruy4a5v8fg8e/oXL034bMvAC4CLiizpnqyr+8tXMq8Otuy3uAr2Tm/wP+qT6R1A9fp3IryYszcydwPJVDWzU4fK3rcwMgM38JnF3HPHXnnHvtLAV+FhHPdC3/a2BZRBwNvFm/WCoiMz8B/ke35W14tNNg8qWI+HJm/gogIo7nC95vnsRUQxExCfhXVK4s+GJmttc5kvSFEBF/BPwJlSuxJpVvYn+WmT+qa7A6stwllULX9YB+j8rO1Y8z8wv9jdlyl6QS8heqklRClrsklZDlLkklZLlLUgn9fwMN6ewEpG0IAAAAAElFTkSuQmCC\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "p3 = pd.concat([p1, p2], axis=1, sort=False).drop_duplicates()\n", + "p3.plot(kind='bar')\n", + "p3" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# SYMBOLS\n", + "There is a pretty big difference[1] of unique values that appear on the whole sample and the filtered sample. \n", + "For the whole sample there is as much as 245 different symbols on the non json values, but it is drastically reduced to 2 symbols[2] for the filtered sample with only value_len above the mean, most being 'window.document.cookie' (99%). \n", + "For the valid JSONs there are only 12 symbols total reduced to 5 symbols[3]. \n", + "\n", + "---\n", + " For futher investigation: \n", + "1. Why is the difference so big? does it have any meaning?\n", + "2. What are the meaning of the 2 symbols of the non-json? are they special? why only 2? Why 'window.document.cookie'?\n", + "3. Why these 5 symbols? what do they do? what do they represent?\n", + "\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Full Sample:" + ] + }, + { + "cell_type": "code", + "execution_count": 42, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "There are 245 unique symbol present on the non-json dataset and 12 on the JSONs\n" + ] + }, + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAXoAAAEOCAYAAACHE9xHAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDMuMC4zLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvnQurowAAE8NJREFUeJzt3X9wXWWdx/H3FxqoClYshQVaCDhFobaEEll2caVLcalF5cfKjw7ajnWtzoDKLhXbsjP+WMXKoLLsrJ3BUSlM15atMjCIbKFTBpzxV1MjtFS0owVCawmIFQRqW777R04wtGmTJrm9yZP3a+bOOee5zznnmyb95OS5zz03MhNJUrkOqHcBkqTaMuglqXAGvSQVzqCXpMIZ9JJUOINekgpn0EtS4Qx6SSqcQS9JhRtR7wIADj/88GxsbKx3GZI0pLS0tDyTmWN66jcogr6xsZHVq1fXuwxJGlIi4vHe9HPoRpIK12PQR8S4iFgVEesjYl1EfKpq/1xEPBURrdVjepd95kfEhoh4LCLOreUXIEnau94M3ewArs7MNRFxKNASEfdVz309M2/o2jkiTgYuAyYARwP3R8SJmblzIAuXJPVOj0GfmZuBzdX68xGxHjhmL7ucDyzNzG3A7yJiA3A68ON9KWz79u20tbXx8ssv78tuqqGRI0cyduxYGhoa6l2KpH2wTy/GRkQjcCrwU+BM4MqImAmspuOq/zk6fgn8pMtubXTziyEi5gBzAI499tjdztXW1sahhx5KY2MjEbEvZaoGMpNnn32WtrY2jj/++HqXI2kf9PrF2Ig4BPgecFVm/glYBLwFaKLjiv+rnV272X23TzfJzJszszkzm8eM2X120Msvv8zo0aMN+UEiIhg9erR/YUlDUK+CPiIa6Aj5JZn5fYDM3JKZOzPzFeCbdAzPQMcV/Lguu48FNvWlOEN+cPH7IQ1NvZl1E8C3gPWZ+bUu7Ud16XYhsLZavwu4LCIOjojjgfHAzwauZEnSvujNGP2ZwIeARyKitWpbAMyIiCY6hmU2Ah8DyMx1EXE78CgdM3auGIgZN43zftDfQ7zGxoXn7fX5KVOmMH/+fM4996+zQ2+88UZ+/etf841vfGOP+x1yyCG88MILA1LjrbfeyvXXX09mkpnMnj2buXPnDsixO1133XUsWLBgQI+p/W+g/38Mdz3lw1DT4xV9Zv4oMyMzJ2VmU/W4JzM/lJkTq/b3V7NzOvf5Uma+JTPfmpk/rO2XUBszZsxg6dKlr2lbunQpM2bM2C/n/+EPf8iNN97IihUrWLduHWvWrGHUqFEDfp7rrrtuwI8paXDxnbF78IEPfIC7776bbdu2AbBx40Y2bdrEO9/5Tl544QWmTp3K5MmTmThxInfeeedu+z/wwAO8973vfXX7yiuv5JZbbgGgpaWFs846i9NOO41zzz2XzZs377b/l7/8ZW644QaOPvpooGNq40c/+lEAWltbOeOMM5g0aRIXXnghzz33HNDxV0jnrSSeeeYZOu8fdMstt3DRRRcxbdo0xo8fzzXXXAPAvHnzeOmll2hqauLyyy/nz3/+M+eddx6nnHIKb3/721m2bNkA/EtKqjeDfg9Gjx7N6aefzr333gt0XM1feumlRAQjR47kjjvuYM2aNaxatYqrr76azN0mFnVr+/btfOITn2D58uW0tLQwe/Zsrr322t36rV27ltNOO63bY8ycOZOvfOUrPPzww0ycOJHPf/7zPZ63tbWVZcuW8cgjj7Bs2TKefPJJFi5cyOte9zpaW1tZsmQJ9957L0cffTS//OUvWbt2LdOmTevV1yRpcDPo96Lr8E3XYZvMZMGCBUyaNIlzzjmHp556ii1btvTqmI899hhr167l3e9+N01NTXzxi1+kra2t1zVt3bqVP/7xj5x11lkAzJo1iwcffLDH/aZOncqoUaMYOXIkJ598Mo8/vvu9kCZOnMj999/PZz7zGR566KGaDBVJ2v8M+r244IILWLlyJWvWrOGll15i8uTJACxZsoT29nZaWlpobW3lyCOP3G1++YgRI3jllVde3e58PjOZMGECra2ttLa28sgjj7BixYrdzj1hwgRaWlr2qd6u59y1noMPPvjV9QMPPJAdO3bstv+JJ55IS0sLEydOZP78+XzhC1/Yp/NLGpwM+r045JBDmDJlCrNnz37Ni7Bbt27liCOOoKGhgVWrVnV7dXzcccfx6KOPsm3bNrZu3crKlSsBeOtb30p7ezs//nHHHSG2b9/OunXrdtt//vz5XHPNNfz+978HYNu2bdx0002MGjWKww47jIceegiA22677dWr+8bGxld/OSxfvrxXX2NDQwPbt28HYNOmTbz+9a/ngx/8IHPnzmXNmjW9OoakwW1Q3I++N+o13WnGjBlcdNFFr5mBc/nll/O+972P5uZmmpqaeNvb3rbbfuPGjeOSSy5h0qRJjB8/nlNPPRWAgw46iOXLl/PJT36SrVu3smPHDq666iomTJjwmv2nT5/Oli1bOOecc8hMIoLZs2cDsHjxYj7+8Y/z4osvcsIJJ/Cd73wHgLlz53LJJZdw2223cfbZZ/fq65szZw6TJk1i8uTJzJw5k09/+tMccMABNDQ0sGjRoj79m0kaXKK3LyLWUnNzc+76wSPr16/npJNOqlNF2hO/L4OT8+gH1lCZRx8RLZnZ3FM/h24kqXAGvSQVblAH/WAYVtJf+f2QhqZBG/QjR47k2WefNVwGic770Y8cObLepUjaR4N21s3YsWNpa2ujvb293qWo0vkJU5KGlkEb9A0NDX6SkSQNgEE7dCNJGhgGvSQVzqCXpMIZ9JJUOINekgpn0EtS4Qx6SSqcQS9JhTPoJalwBr0kFc6gl6TCGfSSVDiDXpIKZ9BLUuEMekkqnEEvSYUz6CWpcAa9JBXOoJekwhn0klS4HoM+IsZFxKqIWB8R6yLiU1X7myPivoj4TbU8rGqPiLgpIjZExMMRMbnWX4Qkac96c0W/A7g6M08CzgCuiIiTgXnAyswcD6ystgHeA4yvHnOARQNetSSp13oM+szcnJlrqvXngfXAMcD5wOKq22Lggmr9fODW7PAT4E0RcdSAVy5J6pV9GqOPiEbgVOCnwJGZuRk6fhkAR1TdjgGe7LJbW9UmSaqDXgd9RBwCfA+4KjP/tLeu3bRlN8ebExGrI2J1e3t7b8uQJO2jXgV9RDTQEfJLMvP7VfOWziGZavl01d4GjOuy+1hg067HzMybM7M5M5vHjBnT1/olST3ozaybAL4FrM/Mr3V56i5gVrU+C7izS/vMavbNGcDWziEeSdL+N6IXfc4EPgQ8EhGtVdsCYCFwe0R8BHgCuLh67h5gOrABeBH48IBWLEnaJz0GfWb+iO7H3QGmdtM/gSv6WZckaYD4zlhJKpxBL0mFM+glqXAGvSQVzqCXpMIZ9JJUOINekgpn0EtS4Qx6SSqcQS9JhTPoJalwBr0kFc6gl6TCGfSSVDiDXpIKZ9BLUuEMekkqnEEvSYUz6CWpcAa9JBXOoJekwhn0klQ4g16SCmfQS1LhDHpJKpxBL0mFM+glqXAGvSQVzqCXpMIZ9JJUOINekgpn0EtS4Qx6SSpcj0EfEd+OiKcjYm2Xts9FxFMR0Vo9pnd5bn5EbIiIxyLi3FoVLknqnd5c0d8CTOum/euZ2VQ97gGIiJOBy4AJ1T7fiIgDB6pYSdK+6zHoM/NB4A+9PN75wNLM3JaZvwM2AKf3oz5JUj/1Z4z+yoh4uBraOaxqOwZ4skuftqpNklQnfQ36RcBbgCZgM/DVqj266ZvdHSAi5kTE6ohY3d7e3scyJEk96VPQZ+aWzNyZma8A3+SvwzNtwLguXccCm/ZwjJszszkzm8eMGdOXMiRJvdCnoI+Io7psXgh0zsi5C7gsIg6OiOOB8cDP+leiJKk/RvTUISK+C0wBDo+INuCzwJSIaKJjWGYj8DGAzFwXEbcDjwI7gCsyc2dtSpck9UaPQZ+ZM7pp/tZe+n8J+FJ/ipIkDRzfGStJhTPoJalwBr0kFc6gl6TCGfSSVDiDXpIKZ9BLUuEMekkqnEEvSYUz6CWpcAa9JBXOoJekwhn0klQ4g16SCmfQS1LhDHpJKpxBL0mFM+glqXAGvSQVzqCXpMIZ9JJUOINekgpn0EtS4Qx6SSqcQS9JhTPoJalwBr0kFc6gl6TCGfSSVDiDXpIKZ9BLUuEMekkqnEEvSYUz6CWpcD0GfUR8OyKejoi1XdreHBH3RcRvquVhVXtExE0RsSEiHo6IybUsXpLUs95c0d8CTNulbR6wMjPHAyurbYD3AOOrxxxg0cCUKUnqqx6DPjMfBP6wS/P5wOJqfTFwQZf2W7PDT4A3RcRRA1WsJGnf9XWM/sjM3AxQLY+o2o8BnuzSr61q201EzImI1RGxur29vY9lSJJ6MtAvxkY3bdldx8y8OTObM7N5zJgxA1yGJKlTX4N+S+eQTLV8umpvA8Z16TcW2NT38iRJ/dXXoL8LmFWtzwLu7NI+s5p9cwawtXOIR5JUHyN66hAR3wWmAIdHRBvwWWAhcHtEfAR4Ari46n4PMB3YALwIfLgGNUuS9kGPQZ+ZM/bw1NRu+iZwRX+LkiQNHN8ZK0mFM+glqXAGvSQVzqCXpMIZ9JJUOINekgpn0EtS4Qx6SSqcQS9JhTPoJalwBr0kFc6gl6TCGfSSVDiDXpIKZ9BLUuEMekkqnEEvSYUz6CWpcAa9JBXOoJekwhn0klQ4g16SCmfQS1LhDHpJKpxBL0mFM+glqXAGvSQVzqCXpMIZ9JJUOINekgpn0EtS4Qx6SSqcQS9JhRvRn50jYiPwPLAT2JGZzRHxZmAZ0AhsBC7JzOf6V6Ykqa8G4or+HzOzKTObq+15wMrMHA+srLYlSXVSi6Gb84HF1fpi4IIanEOS1Ev9DfoEVkRES0TMqdqOzMzNANXyiO52jIg5EbE6Ila3t7f3swxJ0p70a4weODMzN0XEEcB9EfGr3u6YmTcDNwM0NzdnP+uQJO1Bv67oM3NTtXwauAM4HdgSEUcBVMun+1ukJKnv+hz0EfGGiDi0cx34J2AtcBcwq+o2C7izv0VKkvquP0M3RwJ3RETncf4nM++NiJ8Dt0fER4AngIv7X6Ykqa/6HPSZ+VvglG7anwWm9qcoSdLA8Z2xklQ4g16SCmfQS1LhDHpJKpxBL0mFM+glqXAGvSQVzqCXpMIZ9JJUOINekgpn0EtS4Qx6SSqcQS9JhTPoJalwBr0kFc6gl6TCGfSSVDiDXpIKZ9BLUuEMekkqnEEvSYUz6CWpcAa9JBXOoJekwhn0klQ4g16SCjei3gUMJY3zflDvEoqyceF59S5BGha8opekwhn0klQ4g16SCmfQS1LhDHpJKpxBL0mFq1nQR8S0iHgsIjZExLxanUeStHc1CfqIOBD4b+A9wMnAjIg4uRbnkiTtXa2u6E8HNmTmbzPzL8BS4PwanUuStBe1CvpjgCe7bLdVbZKk/axWt0CIbtryNR0i5gBzqs0XIuKxGtUyHB0OPFPvInoSX6l3BaoDfzYH1nG96VSroG8DxnXZHgts6tohM28Gbq7R+Ye1iFidmc31rkPalT+b9VGroZufA+Mj4viIOAi4DLirRueSJO1FTa7oM3NHRFwJ/B9wIPDtzFxXi3NJkvauZrcpzsx7gHtqdXztlUNiGqz82ayDyMyee0mShixvgSBJhTPoJalwBr2kmomIAyLi7+tdx3DnGP0QFxFjgI8CjXR5cT0zZ9erJqmriPhxZv5dvesYzvxw8KHvTuAh4H5gZ51rkbqzIiL+Gfh+emVZF17RD3ER0ZqZTfWuQ9qTiHgeeAMdFyIv0XGLlMzMN9a1sGHEMfqh7+6ImF7vIqQ9ycxDM/OAzGzIzDdW24b8fuQV/RDX5WrpL8D2qtmrJQ0aERHA5cDxmfkfETEOOCozf1bn0oYNg15STUXEIuAV4OzMPCkiDgNWZOY76lzasOGLsQWIiPcD76o2H8jMu+tZj7SLv83MyRHxC4DMfK662aH2E8foh7iIWAh8Cni0enyqapMGi+3Vx4smvDol+JX6ljS8OHQzxEXEw0BTZr5SbR8I/CIzJ9W3MqlDRFwOXApMBhYDHwD+PTP/t66FDSMO3ZThTcAfqvVR9SxE2lVmLomIFmAqHVMrL8jM9XUua1gx6Ie+LwO/iIhVdPwnehcwv74lSbv5DfAnqsyJiGMz84n6ljR8OHRTgIg4CngHHUH/08z8fZ1Lkl4VEZ8APgtsoeNNU51vmHJ4cT8x6Ie4iDgTaM3MP0fEB+kYB/3PzHy8zqVJAETEBjpm3jxb71qGK2fdDH2LgBcj4hTg08DjwK31LUl6jSeBrfUuYjhzjH7o25GZGRHnAzdl5rciYla9i5Ii4t+q1d8CD0TED4Btnc9n5tfqUtgwZNAPfc9HxHzgQ8A/VNMrG+pckwRwaLV8onocVD2gmlOv/cMx+iEuIv4GmAH8PDN/VN1HZEpm3lbn0iQAIuLiXefMd9em2jHoh6jqZmad37yollmtbwM2ANdm5so6lCe9KiLWZObkntpUOw7dDFGZeeienquGb94OLKmW0n4XEe8BpgPHRMRNXZ56I7CjPlUNTwZ9gTJzJ/DLiPiveteiYW0TsBq4GPg1HX9x7qRjPv2/1rGuYcehG0k1ERENwJeAfwE20jGsOA74DrAgM7fveW8NJOfRS6qV64HDgOMyc3JmngqcQMf9mG6oa2XDjFf0kmoiIn4DnLjrB4JXryH9KjPH16ey4ccrekm1kruGfNW4E+fR71cGvaRaeTQiZu7aWN2T6Vd1qGfYcuhGUk1ExDHA94GXgBY6ruLfAbwOuDAzn6pjecOKQS+ppiLibGACHbNu1vkmvv3PoJekwjlGL0mFM+glqXAGvSQVzqCXpMIZ9JJUuP8HBP+YLYpPQ7EAAAAASUVORK5CYII=\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "p1 = plotUniqueValuesComparation(df_json, df_other, 'symbol')" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
jsonother
window.localStorage0.6530110.020276
window.sessionStorage0.3077430.009565
HTMLCanvasElement.style0.0208590.000780
window.document.cookieNaN0.342406
window.navigator.userAgentNaN0.149935
window.Storage.getItemNaN0.101712
\n", + "
" + ], + "text/plain": [ + " json other\n", + "window.localStorage 0.653011 0.020276\n", + "window.sessionStorage 0.307743 0.009565\n", + "HTMLCanvasElement.style 0.020859 0.000780\n", + "window.document.cookie NaN 0.342406\n", + "window.navigator.userAgent NaN 0.149935\n", + "window.Storage.getItem NaN 0.101712" + ] + }, + "execution_count": 13, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAXcAAAGBCAYAAACdC/H6AAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDMuMC4zLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvnQurowAAIABJREFUeJzt3XuYXFWV/vHvSwgG5eIIQZEAiRhQiBBISJAo4AUBQRgFFAQH5PZTQVAEAZlBRB3vKDCgwEAEFRBxgHBRHATCHZNAuAREuWobhBC5RBgggfX7Y59KKp1KutJ9uk+d3e/neeqx69SpzirpXn3O3muvrYjAzMzyskLVAZiZWfmc3M3MMuTkbmaWISd3M7MMObmbmWXIyd3MLENO7mZmGXJyNzPLkJO7mVmGVqzqH15zzTVj5MiRVf3zZma1NGPGjKcjYnhP51WW3EeOHMn06dOr+ufNzGpJ0uPtnOdhGTOzDDm5m5llyMndzCxDlY25m5m1Y/78+XR1dfHSSy9VHcqAGjZsGCNGjGDo0KG9er+Tu5l1tK6uLlZddVVGjhyJpKrDGRARwdy5c+nq6mLUqFG9+h4eljGzjvbSSy+xxhprDJrEDiCJNdZYo093K07uZtbxBlNib+jrZ3ZyNzPLUO3G3Ecee1Wv3vfYt3cuORIzq0Jvc8DStJsbtt56a2699dZS/+3+5Ct3M7M21Cmxg5O7mVlbVlllFZ544gm22WYbxo4dy5gxY7jpppsAuPDCC3nXu97FmDFjOOaYYxZ7z/HHH89mm23GVlttxZNPPjlg8Tq5m5m16YILLmCHHXZg5syZ3H333YwdO5bZs2dzzDHHcN111zFz5kymTZvGZZddBsALL7zAVlttxd13380222zD2WefPWCxOrmbmbVpyy23ZPLkyZx44once++9rLrqqkybNo3tttuO4cOHs+KKK7LPPvtw4403ArDSSiuxyy67ADBu3Dgee+yxAYvVyd3MrE3bbLMNN954I+ussw6f+tSnOP/884mIpZ4/dOjQhSWNQ4YMYcGCBQMVqpO7mVm7Hn/8cdZaay0OPvhgDjzwQO68804mTpzI1KlTefrpp3n11Ve58MIL2XbbbasOtX6lkGY2uFVV1iyJG264ge9973sMHTqUVVZZhfPPP5+1116bb33rW7zvfe8jIvjwhz/MbrvtVkmMi8W7rFuK/jR+/PjozWYdrnM3G1weeOAB3vnOd1Yaw9y5c9liiy14/PG29skoTavPLmlGRIzv6b0eljEzW4bZs2fz7ne/m6OOOqrqUJaLh2XMzJbhrW99K3/605+qDmO5+crdzCxDTu5mZhlycjczy1BbyV3SjpIelPSQpGOXcs7HJd0vaZakC8oN08zMlkePE6qShgCnA9sDXcA0SVMi4v6mc0YDxwGTIuIZSWv1V8BmNsiduHrJ3++5Xr3t2Wef5YILLuBzn/scADfccAPf//73ufLKK8uMrtfauXKfADwUEY9ExCvARUD3Cv2DgdMj4hmAiHiq3DDNzDrLs88+yxlnnFHa9yu7NUE7yX0d4K9Nz7uKY802BDaUdIuk2yXt2OobSTpE0nRJ0+fMmdO7iM3MKnDyySczZswYxowZw49+9COOPfZYHn74YcaOHcvRRx8NwD//+U/22GMP3vGOd7DPPvss7DszY8YMtt12W8aNG8cOO+zAE088AcB2223HV77yFbbddltOOeWUUuNtp8691UZ+3Ze1rgiMBrYDRgA3SRoTEc8u9qaIs4CzIK1QXe5ozcwqMGPGDCZPnswdd9xBRDBx4kR+/vOfc9999zFz5kwgDcvcddddzJo1i7e+9a1MmjSJW265hYkTJ/L5z3+eyy+/nOHDh/PLX/6S448/nnPPPRdIdwBTp04tPeZ2knsXsG7T8xHA7Bbn3B4R84FHJT1ISvbTSonSzKxCN998Mx/96Ed5wxveAMDHPvaxhRt1NJswYQIjRowAYOzYsTz22GO88Y1v5L777mP77bcH4NVXX2Xttdde+J5PfOIT/RJzO8l9GjBa0ijgb8BewCe7nXMZsDfwU0lrkoZpHikzUDOzqrTbg+t1r3vdwq8bLX4jgk022YTbbrut5XsafzDK1uOYe0QsAA4DrgEeAC6OiFmSTpK0a3HaNcBcSfcD1wNHR8TcfonYzGyAbbPNNlx22WW8+OKLvPDCC1x66aVMmjSJefPm9fjejTbaiDlz5ixM7vPnz2fWrFn9HXJ7vWUi4mrg6m7HTmj6OoAji4eZWf/pZeliX2yxxRbsv//+TJgwAYCDDjqIcePGMWnSJMaMGcNOO+3Ezju37jy70korcckll3D44Yfz3HPPsWDBAr7whS+wySab9GvMbvlrZh2tE1r+VsUtf83MbDFO7mZmGXJyN7OOV9XwcZX6+pmd3M2sow0bNoy5c+cOqgQfEcydO5dhw4b1+nt4JyYz62gjRoygq6uLwdayZNiwYQsXRPWGk7uZdbShQ4cyatSoqsOoHQ/LmJllyMndzCxDTu5mZhlycjczy5CTu5lZhpzczcwy5ORuZpYhJ3czsww5uZuZZcjJ3cwsQ07uZmYZcnI3M8uQk7uZWYac3M3MMuTkbmaWISd3M7MMtZXcJe0o6UFJD0k6tsXr+0uaI2lm8Tio/FDNzKxdPe7EJGkIcDqwPdAFTJM0JSLu73bqLyPisH6I0czMllM7V+4TgIci4pGIeAW4CNitf8MyM7O+aCe5rwP8tel5V3Gsu90l3SPpEknrtvpGkg6RNF3S9MG22a2Z2UBqJ7mrxbHo9vwKYGREbApcC5zX6htFxFkRMT4ixg8fPnz5IjUzs7a1k9y7gOYr8RHA7OYTImJuRLxcPD0bGFdOeGZm1hvtJPdpwGhJoyStBOwFTGk+QdLaTU93BR4oL0QzM1tePVbLRMQCSYcB1wBDgHMjYpakk4DpETEFOFzSrsAC4B/A/v0Ys5mZ9aDH5A4QEVcDV3c7dkLT18cBx5UbmpmZ9ZZXqJqZZcjJ3cwsQ07uZmYZcnI3M8uQk7uZWYac3M3MMuTkbmaWISd3M7MMObmbmWXIyd3MLENO7mZmGXJyNzPLkJO7mVmGnNzNzDLk5G5mliEndzOzDDm5m5llyMndzCxDTu5mZhlycjczy5CTu5lZhpzczcwy1FZyl7SjpAclPSTp2GWct4ekkDS+vBDNzGx59ZjcJQ0BTgd2AjYG9pa0cYvzVgUOB+4oO0gzM1s+7Vy5TwAeiohHIuIV4CJgtxbnfR34LvBSifGZmVkvtJPc1wH+2vS8qzi2kKTNgXUj4splfSNJh0iaLmn6nDlzljtYMzNrTzvJXS2OxcIXpRWAHwJf6ukbRcRZETE+IsYPHz68/SjNzGy5tJPcu4B1m56PAGY3PV8VGAPcIOkxYCtgiidVzcyq005ynwaMljRK0krAXsCUxosR8VxErBkRIyNiJHA7sGtETO+XiM3MrEc9JveIWAAcBlwDPABcHBGzJJ0kadf+DtDMzJbfiu2cFBFXA1d3O3bCUs7dru9hmZlZX3iFqplZhpzczcwy5ORuZpYhJ3czsww5uZuZZcjJ3cwsQ07uZmYZcnI3M8uQk7uZWYac3M3MMuTkbmaWISd3M7MMObmbmWWora6QZtaPTly9l+97rtw4LCu+cjczy5CTu5lZhpzczcwy5ORuZpYhJ3czsww5uZuZZcjJ3cwsQ07uZmYZcnI3M8tQW8ld0o6SHpT0kKRjW7z+GUn3Spop6WZJG5cfqpmZtavH9gOShgCnA9sDXcA0SVMi4v6m0y6IiJ8U5+8KnAzs2A/xZm/ksVf16n2PfXvnkiMxszpr58p9AvBQRDwSEa8AFwG7NZ8QEc83PX0DEOWFaGZmy6udxmHrAH9tet4FTOx+kqRDgSOBlYD3t/pGkg4BDgFYb731ljdWMzNrUztX7mpxbIkr84g4PSI2AI4B/r3VN4qIsyJifESMHz58+PJFamZmbWsnuXcB6zY9HwHMXsb5FwH/2pegzMysb9pJ7tOA0ZJGSVoJ2AuY0nyCpNFNT3cG/lxeiGZmtrx6HHOPiAWSDgOuAYYA50bELEknAdMjYgpwmKQPAvOBZ4D9+jNoMzNbtrZ2YoqIq4Grux07oenrI0qOy8zM+sArVM3MMuTkbmaWISd3M7MMObmbmWXIyd3MLENO7mZmGXJyNzPLkJO7mVmGnNzNzDLk5G5mliEndzOzDDm5m5llyMndzCxDTu5mZhlycjczy5CTu5lZhpzczcwy5ORuZpYhJ3czsww5uZuZZcjJ3cwsQ07uZmYZcnI3M8tQW8ld0o6SHpT0kKRjW7x+pKT7Jd0j6feS1i8/VDMza1ePyV3SEOB0YCdgY2BvSRt3O+0uYHxEbApcAny37EDNzKx97Vy5TwAeiohHIuIV4CJgt+YTIuL6iHixeHo7MKLcMM3MbHm0k9zXAf7a9LyrOLY0BwK/afWCpEMkTZc0fc6cOe1HaWZmy6Wd5K4Wx6LlidK+wHjge61ej4izImJ8RIwfPnx4+1GamdlyWbGNc7qAdZuejwBmdz9J0geB44FtI+LlcsIzM7PeaOfKfRowWtIoSSsBewFTmk+QtDlwJrBrRDxVfphmZrY8ekzuEbEAOAy4BngAuDgiZkk6SdKuxWnfA1YBfiVppqQpS/l2ZmY2ANoZliEirgau7nbshKavP1hyXGZm1gdeoWpmliEndzOzDDm5m5llyMndzCxDTu5mZhlycjczy5CTu5lZhpzczcwy5ORuZpYhJ3czswy11X7AzKzXTly9l+97rtw4BhlfuZuZZcjJ3cwsQ07uZmYZcnI3M8uQk7uZWYac3M3MMuTkbmaWISd3M7MMObmbmWXIyd3MLENO7mZmGWoruUvaUdKDkh6SdGyL17eRdKekBZL2KD9MMzNbHj0md0lDgNOBnYCNgb0lbdzttL8A+wMXlB2gmZktv3a6Qk4AHoqIRwAkXQTsBtzfOCEiHitee60fYjQzs+XUTnJfB/hr0/MuYGJv/jFJhwCHAKy33nq9+RZmZp2lQ1satzPmrhbHojf/WEScFRHjI2L88OHDe/MtzMysDe0k9y5g3abnI4DZ/ROOmZmVoZ3kPg0YLWmUpJWAvYAp/RuWmZn1RY/JPSIWAIcB1wAPABdHxCxJJ0naFUDSlpK6gD2BMyXN6s+gzcxs2draQzUirgau7nbshKavp5GGa8zMrAN4haqZWYac3M3MMuTkbmaWISd3M7MMObmbmWXIyd3MLENO7mZmGXJyNzPLkJO7mVmGnNzNzDLk5G5mliEndzOzDDm5m5llyMndzCxDTu5mZhlycjczy5CTu5lZhpzczcwy5ORuZpYhJ3czswy1tUF2Fk5cvZfve67cOAa5kcde1av3PfbtnUuOxCxvvnI3M8uQk7uZWYbaGpaRtCNwCjAE+O+I+Ha3118HnA+MA+YCn4iIx8oN1ZbJw05m1qTHK3dJQ4DTgZ2AjYG9JW3c7bQDgWci4u3AD4HvlB2omZm1r51hmQnAQxHxSES8AlwE7NbtnN2A84qvLwE+IEnlhWlmZstDEbHsE6Q9gB0j4qDi+aeAiRFxWNM59xXndBXPHy7Oebrb9zoEOKR4uhHwYFkfpA1rAk/3eFZ9+fPVV86fDfz5yrZ+RAzv6aR2xtxbXYF3/4vQzjlExFnAWW38m6WTND0ixlfxbw8Ef776yvmzgT9fVdoZlukC1m16PgKYvbRzJK0IrA78o4wAzcxs+bWT3KcBoyWNkrQSsBcwpds5U4D9iq/3AK6LnsZ7zMys3/Q4LBMRCyQdBlxDKoU8NyJmSToJmB4RU4BzgJ9Jeoh0xb5XfwbdS5UMBw0gf776yvmzgT9fJXqcUDUzs/rxClUzsww5uZuZZcjJ3cwsQ07uZmYZcnKvKSX7SjqheL6epAlVx1U2SStL2qjqOPqDpPUlfbD4emVJq1YdU1kk7dnOsbqStKmkXSV9rPGoOqbusk3ukt4s6RxJvymebyzpwKrjKtEZwLuBvYvn80gN3rIh6SPATOC3xfOxkrqvsaglSQeT+jCdWRwaAVxWXUSlO67NY7Uj6VzgXGB34CPFY5dKg2oh552YfgpMBo4vnv8J+CWpJj8HEyNiC0l3AUTEM8Uis5ycSGpcdwNARMyUNLK6cEp1KOmz3QEQEX+WtFa1IfWdpJ2ADwPrSDq16aXVgAXVRFW6rSKie2fcjpPtlTuwZkRcDLwGaTEW8Gq1IZVqftGOOQAkDaf4rBlZEBG5Npx/ueiyCixs25HDopPZwHTgJWBG02MKsEOFcZXpthZtzztOzlfuL0hag0XJbysgp0RxKnApsJakb5LaPvx7tSGV7j5JnwSGSBoNHA7cWnFMZZkq6SvAypK2Bz4HXFFxTH0WEXcDd0u6ICLmVx1PPzmPlOD/DrxMapwYEbFptWEtLtsVqpK2AE4DxgD3AcOBPSLinkoDK5GkdwAfIP1w/T4iHqg4pFJJej1pWO1DpM94DfD1iHip0sBKIGkF0iY3zZ/tv3PpySRpEmlYbX3SRWQjAb6tyrjKULRZORK4l6a75Yh4vLKgWsg2ucPCW92NSD9YD+Z0JSHpTS0Oz8vpM1p9Sfoj8EXSkMzC4dCImFtZUCWRdF1EvL/qOHqSbXJfSmnSc8C9EfHUQMdTNkmPkdosP0P64/VG4AngKeDgiJhRXXR9I+kKljH+HBG7DmA4pZJ0cUR8XNK9tN7zoKNu7XtL0h0RMbHqOPqDpDNIv29XkIZlAIiI/6ksqBZyTu5XkUoFry8ObQfcDmwInBQRP6sotFJI+glwaURcUzz/ELAjcDFwSp1/sSRtu6zXI2LqQMVSNklrR8QTktZv9Xqn3dr3lqRvk7rI/g+LJ8A7KwuqJJImtzgcEXHAgAezDDkn9yuAgyLiyeL5m4EfAwcBN0bEmCrj66tWu780jkmaGRFjq4qtLJJ2Aa6OiNyqgJC0U0T8ptuxz0TET6qKqUySrm9xOOownJGLnEshRzYSe+EpYMOI+AeQw7j0PyQdU6xyXF/Sl4FnivLIXJLhXsCfJX1X0jurDqZk/yFpYaKTdAxLbjxfWxHxvhaPLBK7pA0l/b7YO7qxWrXjKtVyTu43SbpS0n6S9gMuB26U9Abg2YpjK8MnWbSq8XJgveLYEODjFcZVmojYF9gceBiYLOk2SYdkskx/V+A/Jb23KGWdUBzLQuYrxM8mrbadD1BU4HXcBkU5D8uItDx4EmnC8Wbg17mUmg0mktYE9gW+ADwAvB04NSJOqzSwPipWpF5Lqig5IKefzSKpTwaOj4jNisq1uyLiXRWH1meSpkXElpLuiojNi2MdNxSa7SKm4hflkuKRnWJF6peBTYBhjeO53PoCSNoV+DSwAfAzYEJEPFXUvz9AWsdQK5LmsXiVzErA24A9JEVErFZNZKVbMyIulnQcLNyuM5cV4k9L2oBFCyT3IFWqdZRsk3uxIvU04J2kX6AhwAsZ/fL8gtQrZxfgM6QNyudUGlH59gB+GBE3Nh+MiBcldVRlQrsiIochpXbkvEL8UNK+qe+Q9DfgUWCfakNaUrbJHfgv0jjYr4DxwL+RbudzsUZEnCPpiKI0cKqk2pYILsUT3RO7pO9ExDER8fuqgipLcWeyTfH0hoi4ssp4SnYkqZ/MBpJuoVghXm1IpYmI+GAxf7dCRMyTNKrqoLrLeUKViHgIGBIRr0bEZOB9VcdUokbFzxOSdpa0OWmCNSfbtzi204BH0Q+KOvAjgPuLxxHFsSwU9ezbAlsD/w/YJKPWH78GiIgXImJecazjhn9zvnJ/sWiBO1PSd0ljYm+oOKYyfUPS6sCXSMNPq5GWe9eepM+SGmltIKk5IawK3FJNVKX7MDC2UcMv6TzgLuDYSqMqSYsV4htKqvUK8aKX0ybA6t0+32o0zXt1ipyT+6dIdyaHkZLeuqTqmdoratlHF7fxz5HXHQnABcBvgG+xeLKbV6xTyMUbgcbnWb3KQPrBgSxlhbikuq4Q34g0x/VG0gYdDfOAgyuJaBmyLIUskt95RZ10liRdHxG5JfXFFBUJXRHxsqTtgE2B8yOi9usUJO0NfJuU/EQaez8uIi6qNLCS5LxCXNK7I+K2quPoSZbJHUDSNcBHmjdEyEmx8GV1UsXMC43jOfTuaJA0kzQZPpLUEncKsFFEfLjKuMoiaW1gS1JyvyMi/l5xSKWRdG9zTXux7uTeiBjTXB9eJ5JOY9kN7Q4fwHB6lPOwzGPALUp7bjYnv5Mri6hcWxf/e1LTsQCyqXMHXivqoz8G/CgiTlOxrWAmtmRRtcxrZLBZR5ObJF1JqlaDVClzU81XiE+vOoDlkXNyn108ViBNxGUl9yGZwvxi+OLfWDTGObTCeEpTVMZsSVqvAHC4pK0jIotNpEm14B8D3kO6MzkvIhoVJbX82Y2I8wAk7RkRv2p+TdKe1US1dNkOyzQUfUgiIv5ZdSxlKiplvsqiK7+ppFbGuSwUQWmfys8At0XEhUUt8SciovYlg0UVUHO1zBDS8vws+rl3J+k9wN4RcWjVsfSVpDsjYouejlUt2yt3SWNIS9bfVDx/Gvi3iJhVaWDlOZe0fWCjSdinSL08Wm1SUksRcT9p39TG80dJk5C5yLlaBkljgb2BT5BWcXbUZhbLS9JOpBLWdSSd2vTSasCCaqJaumyTO2l58JERcT1AUW1xNovGqutug4hoLu38WjEBmTVJJ0bEiVXHUYJvAXcVfc8XVstUG1LfSdqQtDJ8b2AuacJfmQwjziaNu+9KavbWMI8OXGOS7bCMpLsjYrOejtWVpNuAoyPi5uL5JOD7EfHuaiPrX5I+EhFZTDzmWC0j6TXgJuDAYoU4kh6JDDbGbpA0lHRhvF5EPFh1PEuTc/uBRyT9h6SRxePfSbeGufgMcLqkx5T2U/0v0jLvbBR/sLrLYhGTpI8CL0bElIi4HHhJ0r9WHVcJdgf+Dlwv6WxJHyD98crJjsBM4LeQhp+KqryOkvOV+78AXyPN1gPcCJyYwwIYAEmjIuJRSasBRMTzjWNVx1aWukxc9Uar/t91rf9uRdIqpJ2l9iaV555H2vP3d5UGVgJJM0if6Yamfu73dNpkeM5j7h/svqigKFf61VLOr5tfA1tExPNNxy4BxlUUT2kkvZs0NzJc0pFNL61Gat2cg1Z3zdn8PhbVab8AfiHpTcCepFYStU/uwIKIeC6ty+pc2fwwtXAcSybyVsdqpW7Ni3ppJWAV0s9n8xqF58mnbex0SScDp5MWn32exSfpakvSCsA9jRYDRT+gM4tHDu6T9ElgiKTRpIquWyuOaQnZJfe6lSv1Qq2aF/VGU3/6n0bE41XH008+D/wHqZoE0hVtx22y3BsR8ZqkuyWtFxF/qTqefvB54HjgZeBCUmuMr1caUQvZjblL2gwYS1qWf0LTS/OA6yPimUoCK1ldmhf1RVFWdxSpt8zCC5GcthLMlaTrSJVAf2Dx9h/ZbALe6bJL7g2ShkbE/KJsaQzwt7r2kW4m6WDSRM6fi2ZM55AqFB4H9s+scdjdwE9IwxUL99+MiCyGL7qTdEhEnFV1HGWQtG2r48VdWa0VHS+7J87nSDXwZ0bESwMf1ZJyHJb5CXBaRMwqlujfRkoMb5J0VERcWG2EfXYE8NPi672BzUgbLG8OnAK8t5qw+sWCiPhx1UEMoM6eoVsOETFV0vqkfQeuVdrUPJfJ8EdI2wY2cskngCeBDUkLJT9VUVyLybHO/b1NLQY+DfypaD06DvhydWGVZkFENLbY24XU33xuRFxLXjtNAVwh6XOS1pb0psaj6qDKoNZ7buZQSQIsvMO8hEWTqOsAl1UXUak2j4hPRsQVxWNfYELRN6djynRzTO7N/du3p/iBymH1X+G1ItkNAz4AXNv02soVxdRf9gOOJlUizCgetWq7ugy/bnGs4/bh7INDgUmkCici4s/AWpVGVJ7hktZrPCm+XrN42jH7R2Q3LAM8K2kX4G+kH64DASStSB7J7wRSghsCTGncpRRjnI9UGVjZIqLjdpTvq0FSygrwckS80qgFL37/cpng+xJws6SHSUNpo4DPFb3qz6s0siY5Jvf/B5wKvAX4QtMV+weAqyqLqiQRcWUxlrlqt8qf6aSxv2wU47RHknp4HFLUFG9U7B1bV9mXshamSvoKsLKk7UkbnmfREygiri5+Ft9BSu5/bJpE/VF1kS0u22qZwUDS1ixZJnh+ZQGVTNIvSUMx/1Zsz7Yyqbf72B7e2vFyL2UtFjIdCHyIlACvAf47Mk04kt7SaUO/2SV31Wyfw96S9DNgA1IDo0aZYOTy+QAkTY+I8c09V3Lp7ClpOOlKfSSL/3E+oKqY+ksxCT4iIu6pOpb+IumqiNi56jia5Tgsk8uEW0/GAxvneiVUeKW4Wg8ASRuQVgXm4HJSa9xraarhz4WkG0h9z1ckXYDMkTQ1Io5c5htrqtMSO2SY3Bv7HA4C95HmFZ6oOpB+9FVSW9V1Jf2CNEG+f6URlef1EXFM1UH0o9WLTqUHAZMj4qtKWwtmQWnbwNERMbm4C1ul0zqyZpfcG4r/w48BNqapCiGjpetrAvdL+gNNV7M5Le+OiP+VdCewFWnc9oiIeLrisMpypaQPR8TVVQfST1YsNiP5OKkPSzYkfZV057wRaWvLocDPSRcfHSPb5E5qN/pLYGfSxhb7AXMqjahcJ1YdwABZh1T2uSKwjSQiotZ7cRaOAL4i6RVSbbRIcyarVRtWaU4iTaLeHBHTJL0N+HPFMZXlo6QV4XcCRMRsSasu+y0DL7sJ1QZJMyJiXHMT/WLMr2XPizqS9GZScyaAP+TQO6eZpHOBTYFZwGvF4chx0tHqQ9IfImJCY+OYor79Nm/WMXAaS/SfkLQzaXPbERXGUypJHwe+B9xAuuo7TdLREZHTKsetImLjqoPoD0XTt32AURHxdUnrAmtHxB8qDq0UkibTomotkz/MF0s6E3hj0WbhAFJPmY6S85X7LqRqhHWB00grAL8WER2312FvFB0Tt29crRdzDNfmUCbYIOkc4AcRcX/VsZRN0o9JdyPvj4h3Km0L+buI2LKHt9aCpN2bng4jDWXMzqVUt1iYtbCGPyL+t+KQlpBtcs+dpHuLhmiN5yu2Gq5yAAAS/0lEQVQAdzcfqztJ25BWNf6dNGncGJfuqNvf3mi6pc+uhr+V4ufz2owKGjpetsMyks4jVVc8Wzz/F9JVYA63hQC/lXQNi7cdza3y4lxS+9R7WTTmnov5koawqIZ/OPl9xmajgfV6PKsGJM1j6f3cvxQRHdHjKdvkDmzaSOwAEfGMpCx2lgeIiKOLW99JpCvasyLi0orDKttfchlGa+FU4FJgLUnfJO0Nm8U2e9AyAf6dVJqcg5NJc3gXkH739iKtOXmQdEGyXWWRNcl2WKYYk96u0VyrWAI9Nadhi9xJOoPUYOsKFq/lz6EUstEh8gOkBPH7iHig4pCsDZLuiIiJ3Y7dHhFbddLQWs5X7j8AbpXUqB7ZE/hmhfGUQtLNEfGeFldGudVJQ2rR/DJp4qohgCySO2n3nptIv4crS9oip20SM/ZaUa3WyC17NL3WMVfL2V65A0jaGGhM4FyXY9WF1ZOkr5NaKTzMooQQOU84NiaRq46jr4oFWacA7yb9t7sd+CJpD4lxEXFzheEtlHty3wzYhvQf4KaIuLvikEpTNNHqioiXJW1HWuxzfvM8Q91J2hD4MfDmouXvpsCuEfGNikPrM0kPAu+KiI7ZucfykuM2ewBIOoLUgmBN0vZeP5f0+WqjKtWvgVclvR04h7QbzAXVhlS6s4HjKBakFS1j96o0ovLcR5pPyI6kIZKu7fnMepI0TNKhks6QdG7jUXVc3eU85n4gMDEiXgCQ9B3gNtKCphy8FhELJH0U+FFEnCbprqqDKtnrI+IPja3aCguqCqZk3wLuknQfmTV+i4hXJb0oafWIeK7qePrBz4A/AjuQeujsA3TcZHjOyV0s3if71eJYLuZL2pvUEK2xXdvQCuPpD08Xw0+NWvA9yKfF8XnAd8izhh/gJeBeSf8LvNA4mMkK1bdHxJ6SdouI8yRdQGqS1lFyTu6TgTskNWq//5U0fJGLT5O6XX4zIh6VNIrUdjQnhwJnAe+Q9DfgUWDfakMqzdMRcWrVQfSjq8hgz+KlaPStelbSGFIN/8jqwmkt9wnVLYD3kK7Yb4yI3IYtgIWrb9fNdRuzouveChExr+pYyiLpZNJwzBQWH5bJphRS0krAhsXTByNi/rLOr4tiA5JfA+8CfgqsAvxHRJxZZVzdZZfci8VKSxUR/xioWPpTq23MSIu0ar+NmaRlfoaIOHmgYukvkq5vcTibUsiigus84DHSxdW6wH4RcWOFYfVZ0SNnj4i4uOpYepLjsMwM0hhtY3y98ddLxddvqyKofpDzNmYdt/FB2SLifVXH0M9+AHwoIh6EhWWtFwLjKo2qjyLiNUmHAU7uAy0iRlUdwwDJdhuziPha1TH0N0kntDoeEScNdCz9ZGgjsQNExJ8k5TLh/7+SjiLt9NY8WdxRowLZ1rkPAo1tzB7ObRszSRc3ff2dbq/9buAj6hcvND1eBXaiAyfl+mC6pHMkbVc8zibdVefgANJk/42kzzSD1BGyo2Q35r4suSx/zl23HueL/Tdrfi0nkl4HTImIHaqOpQzF5zmUpoIG4HSvyB04g+rKPafELmlDSb8vFsEgaVNJubSMXdYVR65XI68nn/kggM9ExMkR8bGI+GhE/BD4bNVBlUHSUEmHS7qkeBzWiUNO2SZ3SQdIGl11HP0o56X5r5e0uaRxpG6Jm0vaovG86uDKIOleSfcUj1mkXuCnVB1XifZrcWz/gQ6in/yYNDF8RvEYVxzrKNlNqDYZCewraX3SmNhNpOZhMyuNqjw5L81/grQhAqQFIs2lj38f+HD6xS5NXy8AnoyI2v/3K1ZNfxIYJal5o5VVgbnVRFW6Lbv1bL+u2D+io2Sb3CPiBABJKwMHA0cDPwKGVBlXibJdmj8IygQB1gZmNRZmSVpF0iYRcUfFcfXVraSfwzVJ5ZAN84BcSnVflbRBRDwMC1sAv9rDewZcthOqxfjzJNLqsbuAm0lX7lkkwOIH6ixga+AZiqX5EfFYlXGVSdKewG8jYl7x33ML4Os5rDQumrxtEcUvYLE4ZnpO80K5kvQBUnuTR0iTxesDB0TEdZUG1k3Oyf1O0u3uVcBU4PaIeKnaqMqX49L8Bkn3RMSmkt5D6qL4feAr3bc4qyNJMyNibLdj90TEplXFVCZJW5E6sL4TWIl0x/xCDjuFFZVAABuRkvsfASLi5aW+qQLZTqgWV0AfAP4AbE/qUNcRO6SUQdIRklYDXgR+KOlOSR/q6X0107jV3Rn4cURcTkoUOXikqLgYWjyOIF0J5uK/gL1Jay9WBg4in3bbt0XEyxFxT0TcXST126oOqrtsk3vRrW1f0qz9J4AuoKNum/rogIh4nrS/6FqkLpHfrjak0v1N0pmkVbhXF1dMufzMfoY0pPa34jEROKTSiEoWEQ8BQyLi1YiYDNR6LkXSW1pUcG1R9NF5fcXhLSHbCVVSr+wbgVOBabl0pGvSKJP5MKm3zN3qVjqTgY8DOwLfj4hni3YLR1ccUyki4inyKV1t5cWiK+RMSd8lTbK+oeKY+moHUjnnCNJkceP3bR7wlYpiWqpsx9wh35ajAJImA+uQttfbjDSmeUNE1LoxUyuS1gKGNZ5HxF8qDKcUkkaQhikmkSqebgaOiIiuSgMrSVGC/BRpA5kvAqsDZxRX87UmafeI+HXVcfQk2+QuaVvgfDJrOdpQVFeMBR4prmrXANbJqae7pF1JV0hvJSWK9YA/RsQmlQZWgmKHogtIW7ZBGkLcJyK2ry4qWxZJHwHuiYjHi+cnALsDj5P+MD9aZXzd5ZzcZwCf7N5yNMcr21wVC0PeD1wbEZtLeh+wd0TUfmx6KdUySxyrK0n3smSriOdIDba+ERG1W9BUtNTeKiJelLQLaXHd3sDmwJ6d1hcol8mpVpZoOUp+e4wupij/zMn8IgmsIGmFiLiedLeSg6cl7StpSPHYl3xWcAL8hlSGvE/xuIK0SvzvpN2L6igi4sXi648B50TEjIj4b2B4hXG1lPOE6nRJ57Dotncf8mk52lKGC2CelbQKaWL8F5KeIp8WCweQygV/SLrCvbU4lotJETGp6fm9km6JiEnFH7I6UvHz+CKpzPqMpteGtX5LdXJO7p8ltRw9nEUtR89Y5jtqRNIBpBW3WfRwX4rdgP8jTcjtQ5qUy2Izi2JSeNeq4+hHq0ia2GinIGkCabU41PcP9I9IW1o+DzwQEdMBJG1OB7b+yHbMPXeSTiL1ys61MRqSvgj8KpcKEgBJp7GMtsURcfgAhtNvJG0JnEtK6CIlxIOAWcDOddiDtBVJ65DWldwdEa8Vx9YmDQN3VBVXdsl9KRM5C+WyvLuhqTHaUaRqmVwaoyHpq6Ra938AFwGXRMST1UbVN5IarXAnARuTtmoD2BOYERFfrCSwfiJpdVKeebbqWAabHJP7+sWXhxb/2zzm/mIue1Tm3hitmaRNSauMdwe6IuKDFYfUZ5KuJ20gPb94PhT4Xd07YkraNyJ+LunIVq9HxMmtjtedOnCXt+zG3JtqULtP6Bwr6RYyGbMlzdZn3xit8BSpymIu6ZY4B28l9ThvbKq8SnGs7hqrUFetNIoB1mmJHTJM7k3eIOk9EXEzgKStqf/y54UiYgtJq5LG3bcHzpb0ZES8p+LQSiPps6Qr9uHAJcDBEXF/tVGV5tvAXcUVPMC2wInVhVOOiDiz+PKMiJhTaTD9pC7FDDkn9wOBc4sxP4BnyajUrGiM9l5SUhgP/JU0qZqT9YEv5DRJ3BARkyX9htQwDODYiMhllymAWyU9SppT+J+IeKbqgEo0khrs8pbdmHt3RVtcRcRzVcdSJklXkco7byLPxmgASBoCvJmmC5FOq0qw1oryx72AfwXuBy6KiJ9XG1V5Or2YIdvkXrSH3Z30V7Y5MeQy5p51YzQASYeRhiqeBF4rDkduFU8NnTgpVwZJa5KW6u/TaQmwN+pSzJDzsMzlpF4WM4CO2iGlDK0ao0nKpjFa4QvARnXsQ9IbOSX24o75o6Qr9w2AS4EJlQZVnloUM+R85X5fRIypOo7+MhgaoxWTjdtHRF1XNC5VXSbleqsYb78MuDgiOm6Xor5qKmZ4D2ktRscVM+R85X6rpHdFxL1VB9JPlmiMVtRK5+QR4IZifmHh3VcmtdIjqcGkXB+8LTK9cqxLMUPOV+73A28HHiUlBpHReK2kc0krcZsXaa0YEZ+uLqpyFStUlxARXxvoWPpLp0/K9Zak4cCXgU1YfKOV91cWVEnqUsyQc3Jfv9XxxiKnuismjA8l3RYubIwWHbYDu7VWl0m53pL0O1IZ5FGk/WL3A+ZExDGVBlaSOhQzZJfcJa0WEc9LelOr1yPiH62OW+fJ/OrvTmowKddbkmZExDhJ9zTuliVNjYhtq46tr+qyy1uOY+4XALuQxjGDRZvYUjx/WxVBlWWQNUb7Benqbxearv4qjagkg2CFceNK9glJOwOzSRtL5+BkUl+gxYoZgI4qZsguuUfELsWXN1OMi0XEHysMqWyNz9eyMdrAh9Ov1oiIcyQdERFTgamSplYdVBnqMinXB98oVod/ibQR+Gqkvvw5qEUxQ3bDMg2S3k+6Knov6Wr9LlKiP6XSwErS2NWmp2N1Jun2iNhK0jXAqaSrv0siYoOKQ+uzukzK2ZLqUsyQbXKHhUvXtwTeR7qt/7+IeEe1UZVD0kzgsG6N0c6ITDZYBig2Ib6JNKbZuPr7WkRMqTSwktRhUq63ivmSg1lyhXjt+zvVpZgh2+Qu6fekLpC3kRLEzRHxVLVRlUfSONJON4s1RouI3DbJzlJdJuV6S9KtpN+7GcCrjeMR8evKghpkck7uPyRNcLwM3EL663pbRPxfpYGVLMfGaJK+CzwSET/pdvyLwFtyKKfLfYWxpJk53UVC/YoZsk3uDcVu5Z8m1du+JSJeV3FIpci5MVqxAG1MFHtUNh1fAbgnh7YSzSWCyzpWV5K+AdwaEVdXHUtZ6rbLW7bJvego+F7S1fvjLKqcua7SwEoi6bcsaozWfNv7g8qCKomkWRGxyfK+Vid1mZTrLUnzSMOiL5PKIhsrxFerNLAS1KWYIbtSyCYrk+pRZ+TYeAoYERE7Vh1EP3lR0ujuTbUkjQZyGVb7LOkK8HCaJuUqjahEEZHzNnu12OUt2yv33Ek6Czgtx8ZoknYiVcd8g3RnAqkW/DjSzkzZ3Opb/dSlmMHJvaYGQWO0McDRQGN8/T7g+3X/Y1a3Sbky5bYZSacXMzi511TujdGWRtL3I+KoquPorbpNytmS6lLM4OReM4O9MZqkv0TEelXH0Vd1mZTrrZw3I6lLMUPOE6q5yroxWhvU8ym1UItJuT4YSb6bkdSimMFX7jUl6Wfk2RiNpd2VkBL73RFR++6CdZmU66scNyOpSzGDk3tN5dwYrdh/s/tdyUIRMWpgI+o/nT4p11s5b0ZSl2IGJ/cay7kxWu7qMinXWzlvRlKXYgaPuddUi8ZoW+bSGE3SMsvlMhm6uJxFk3Id1U2wDDluRtIoZgDmVR1LO5zc6+seUmuFMaQk8aykXBqjTQdmsWjXpe6TxrXfZo+aTMr1VqabkdSqmMHDMjWXY2O0ovvj7qQ/WhcBl0bEP6uNqlx1mZTrrZw3I6lLMYOTe03l3hgNQNIoYG9gN9Jn/M9MSulqMynXF7luRlKXYgYn95qSdDQpoefaGA0ASZsAewGfAr4cERdXHFIp6jIp11uDYDOSji9mcHK3jiPpbaSEvhtprPYi4Mocqi0GywrjnDcjqcsub55QtU70EGnC+HLgeWA94HNSmr+KiJOrC63PajUp1wdDG4kdICL+JGlolQGVqBbFDL5yt44j6avLej0ivjZQsfSXukzK9Vbum5FA5xczOLlbx5F0WET8V9Vx9Ke6TMr1VrFI61DSZ1y4GUlE1L6mvy7FDE7u1nFy6/u9NHWYlLMl1aWYwcndOs5gSO51mZRbXoN5M5JO4wlV60SbSnq+xfFsNlmmJpNyvbBL8b8tNyMZ+HAGL1+5W8eRdFdEbF51HAOh0yfleiv3zUjqwFfuZhVoMSl3LvXvvdIs981IOp6Tu3WiX1UdwABYGTiZDp+U64MDgXMlLbYZSYXxDDoelrGOI+k0lj0pd/gAhmN9kOtmJHXgK3frRNObvv4asMxFTdZ5um9G0rS6OIvNSOrAV+7W0QbT5GpOJP2WRZuRvNo4HhE/qCyoQcZX7tbpfPVRT1lvRlIHK1QdgJll6VZJ76o6iMHMwzLWcSTNY9EV++tZtPglp0VMWRsMm5F0Oid3Mytd7puR1IHH3M2sNI3NSIB5Vccy2PnK3cxKI+nKiNhF0qO02IwkInLZjKTjObmbWely34ykDpzczax0uW9GUgdO7mbWL7wZSbU8oWpmpWuxGcmWOWxGUidexGRm/eEe4BXSZiSbAmMkrVxtSIOLh2XMrN/kuhlJHXhYxsxKNwg2I+l4Tu5m1h9y34yk43lYxswsQ55QNTPLkJO7mVmGnNzNzDLk5G5mlqH/D10kugapLjbJAAAAAElFTkSuQmCC\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "plotTopUsageComparation(df_json, df_other, 'symbol', 3)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Above the mean Sample:" + ] + }, + { + "cell_type": "code", + "execution_count": 43, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "There are 2 unique symbol present on the non-json dataset and 5 on the JSONs\n" + ] + }, + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAW4AAAEOCAYAAACpVv3VAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDMuMC4zLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvnQurowAAEKNJREFUeJzt3X9s1HWex/HXGxitLsgarHciYDXBX0gtpXLe6QkHKAi6q5yL2+Bi0jsbk/PXKSLFixdzHqIxG9bLaY7crihLFu44jRdUjtWDiImny9Si/FDXGFwrioXVrih0i7zvj04JlMJ8kZl+590+H0nTmel3vvOGlme+fOY7U3N3AQDi6Jf2AACAY0O4ASAYwg0AwRBuAAiGcANAMIQbAIIh3AAQDOEGgGAINwAEM6AYOz3ttNO8oqKiGLsGgF4pm83udPfyJNsWJdwVFRXasGFDMXYNAL2SmX2UdFuWSgAgGMINAMEQbgAIpihr3ABKQ3t7u5qbm7V37960R0FOWVmZhg0bpkwm8533QbiBXqy5uVmDBg1SRUWFzCztcfo8d9euXbvU3Nyss88++zvvJ1G4zWybpK8kfStpn7vXfOdHBNBj9u7dS7RLiJlpyJAhamlpOa79HMsR91+5+87jejQAPY5ol5ZCfD94chIAgkl6xO2S1piZS/o3d1/cdQMzq5dUL0kjRowo3IRFUjHvhbRH6FW2LZye9ghIoNA/9/m+7xMmTFBDQ4OmTJly4LZFixbp/fff1xNPPHHE+w0cOFC7d+8uyIzPPPOMHn30Ubm73F11dXWaM2dOQfbdacGCBZo/f35B93k0SY+4L3P3aklXS/o7M7ui6wbuvtjda9y9prw80as2AfRytbW1Wr58+SG3LV++XLW1tT3y+C+99JIWLVqkNWvWaPPmzWpsbNTgwYML/jgLFiwo+D6PJlG43X177vPnkp6TNK6YQwHoHW644QatWrVKbW1tkqRt27Zp+/btuvzyy7V7925NmjRJ1dXVGj16tJ5//vnD7r9u3Tpdc801B67fdtttWrJkiSQpm81q/PjxGjt2rKZMmaJPP/30sPs//PDDeuyxxzR06FBJHafi3XLLLZKkpqYmXXrppaqsrNT111+vL774QlLH/xI637Jj586d6nzfpSVLlmjGjBmaOnWqRo4cqblz50qS5s2bpz179qiqqkqzZs3S119/renTp+viiy/WRRddpBUrVhTgb/JQecNtZt8zs0GdlyVdJWlTwScB0OsMGTJE48aN0+rVqyV1HG3feOONMjOVlZXpueeeU2Njo9auXat77rlH7p5ov+3t7br99tu1cuVKZbNZ1dXV6f777z9su02bNmns2LHd7mP27Nl65JFH9Pbbb2v06NF68MEH8z5uU1OTVqxYoXfeeUcrVqzQxx9/rIULF+qkk05SU1OTli1bptWrV2vo0KHauHGjNm3apKlTpyb6Mx2LJEfcfyLpNTPbKOlNSS+4++qCTwKgVzp4ueTgZRJ31/z581VZWanJkyfrk08+0Y4dOxLt87333tOmTZt05ZVXqqqqSg899JCam5sTz9Ta2qovv/xS48ePlyTdfPPNevXVV/Peb9KkSRo8eLDKysp04YUX6qOPDn9fqNGjR+vll1/Wfffdp/Xr1xdlaSbvk5Pu/qGkiwv+yAD6hOuuu0533323GhsbtWfPHlVXV0uSli1bppaWFmWzWWUyGVVUVBz2Cs8BAwZo//79B653ft3dNWrUKL3++utHfexRo0Ypm81q4sSJiec9+DG7znPiiSceuNy/f3/t27fvsPufe+65ymazevHFF9XQ0KCrrrpKDzzwQOLHT4LTAQEU1cCBAzVhwgTV1dUd8qRka2urTj/9dGUyGa1du7bbo9ezzjpLW7ZsUVtbm1pbW/XKK69Iks477zy1tLQcCHd7e7s2b9582P0bGho0d+5cffbZZ5KktrY2Pf744xo8eLBOPfVUrV+/XpK0dOnSA0ffFRUVymazkqSVK1cm+jNmMhm1t7dLkrZv366TTz5ZN910k+bMmaPGxsZE+zgWvOQd6EPSOm2ztrZWM2bMOOQMk1mzZunaa69VTU2NqqqqdP755x92v+HDh2vmzJmqrKzUyJEjNWbMGEnSCSecoJUrV+qOO+5Qa2ur9u3bp7vuukujRo065P7Tpk3Tjh07NHnyZLm7zEx1dXWSpKefflq33nqrvvnmG51zzjl66qmnJElz5szRzJkztXTp0sRH6vX19aqsrFR1dbVmz56te++9V/369VMmk9GTTz75nf7OjsaSPhlwLGpqarzUf5EC53EXFudxl6atW7fqggsuSHsMdNHd98XMsknfToSlEgAIhnADQDCEG+jlirEciu+uEN8Pwg30YmVlZdq1axfxLhGd78ddVlZ2XPvhrBKgFxs2bJiam5uP+/2fUTidvwHneBBuoBfLZDLH9ZtWUJpYKgGAYAg3AARDuAEgGMINAMEQbgAIhnADQDCEGwCCIdwAEAzhBoBgCDcABEO4ASAYwg0AwRBuAAiGcANAMIQbAIIh3AAQDOEGgGAINwAEQ7gBIBjCDQDBEG4ACIZwA0AwhBsAgkkcbjPrb2ZvmdmqYg4EADi6YznivlPS1mINAgBIJlG4zWyYpOmS/r244wAA8kl6xL1I0lxJ+4s4CwAggbzhNrNrJH3u7tk829Wb2QYz29DS0lKwAQEAh0pyxH2ZpB+Y2TZJyyVNNLNfdt3I3Re7e42715SXlxd4TABAp7zhdvcGdx/m7hWSfizpf939pqJPBgDoFudxA0AwA45lY3dfJ2ldUSYBACTCETcABEO4ASAYwg0AwRBuAAiGcANAMIQbAIIh3AAQDOEGgGAINwAEQ7gBIBjCDQDBEG4ACIZwA0AwhBsAgiHcABAM4QaAYAg3AARDuAEgGMINAMEQbgAIhnADQDCEGwCCIdwAEAzhBoBgCDcABEO4ASAYwg0AwRBuAAiGcANAMIQbAIIh3AAQDOEGgGAINwAEkzfcZlZmZm+a2UYz22xmD/bEYACA7g1IsE2bpInuvtvMMpJeM7OX3P3/ijwbAKAbecPt7i5pd+5qJvfhxRwKAHBkida4zay/mTVJ+lzSr939jW62qTezDWa2oaWlpdBzAgByEoXb3b919ypJwySNM7OLutlmsbvXuHtNeXl5oecEAOQc01kl7v6lpHWSphZlGgBAXknOKik3s+/nLp8kabKkd4s9GACge0nOKjlD0tNm1l8dof8Pd19V3LEAAEeS5KyStyWN6YFZAAAJ8MpJAAiGcANAMIQbAIIh3AAQDOEGgGAINwAEQ7gBIBjCDQDBEG4ACIZwA0AwhBsAgiHcABAM4QaAYAg3AARDuAEgGMINAMEQbgAIhnADQDCEGwCCIdwAEAzhBoBgCDcABEO4ASAYwg0AwRBuAAiGcANAMIQbAIIh3AAQDOEGgGAINwAEQ7gBIBjCDQDB5A23mQ03s7VmttXMNpvZnT0xGACgewMSbLNP0j3u3mhmgyRlzezX7r6lyLMBALqR94jb3T9198bc5a8kbZV0ZrEHAwB075jWuM2sQtIYSW8UYxgAQH5JlkokSWY2UNJ/SbrL3f/QzdfrJdVL0ogRIwo2INAXVcx7Ie0RepVtC6enPUJBJTriNrOMOqK9zN2f7W4bd1/s7jXuXlNeXl7IGQEAB0lyVolJ+rmkre7+0+KPBAA4miRH3JdJ+omkiWbWlPuYVuS5AABHkHeN291fk2Q9MAsAIAFeOQkAwRBuAAiGcANAMIQbAIIh3AAQDOEGgGAINwAEQ7gBIBjCDQDBEG4ACIZwA0AwhBsAgiHcABAM4QaAYAg3AARDuAEgGMINAMEQbgAIhnADQDCEGwCCIdwAEAzhBoBgCDcABEO4ASAYwg0AwRBuAAiGcANAMIQbAIIh3AAQDOEGgGAINwAEQ7gBIBjCDQDB5A23mf3CzD43s009MRAA4OiSHHEvkTS1yHMAABLKG253f1XS73tgFgBAAgVb4zazejPbYGYbWlpaCrVbAEAXBQu3uy929xp3rykvLy/UbgEAXXBWCQAEQ7gBIJgkpwP+StLrks4zs2Yz+5vijwUAOJIB+TZw99qeGAQAkAxLJQAQDOEGgGAINwAEQ7gBIBjCDQDBEG4ACIZwA0AwhBsAgiHcABAM4QaAYAg3AARDuAEgGMINAMEQbgAIhnADQDCEGwCCIdwAEAzhBoBgCDcABEO4ASAYwg0AwRBuAAiGcANAMIQbAIIh3AAQDOEGgGAINwAEQ7gBIBjCDQDBEG4ACIZwA0AwhBsAgkkUbjObambvmdkHZjav2EMBAI4sb7jNrL+kf5V0taQLJdWa2YXFHgwA0L0kR9zjJH3g7h+6+x8lLZf0w+KOBQA4kiThPlPSxwddb87dBgBIwYAE21g3t/lhG5nVS6rPXd1tZu8dz2A44DRJO9MeIh97JO0JkBJ+PgvnrKQbJgl3s6ThB10fJml7143cfbGkxUkfGMmY2QZ3r0l7DqA7/HymI8lSyW8kjTSzs83sBEk/lvTfxR0LAHAkeY+43X2fmd0m6X8k9Zf0C3ffXPTJAADdSrJUInd/UdKLRZ4F3WP5CaWMn88UmPthzzMCAEoYL3kHgGAINwAEQ7gBJGJm/czsL9KeA6xxlxwzK5d0i6QKHfTksbvXpTUT0MnMXnf3P097jr4u0Vkl6FHPS1ov6WVJ36Y8C9DVGjP7a0nPOkd9qeGIu8SYWZO7V6U9B9AdM/tK0vfUcVCxRx1vieHufkqqg/UxrHGXnlVmNi3tIYDuuPsgd+/n7hl3PyV3nWj3MI64S8xBRzR/lNSeu5kjGpQEMzNJsySd7e7/ZGbDJZ3h7m+mPFqfQrgBJGZmT0raL2miu19gZqdKWuPul6Q8Wp/Ck5MlyMx+IOmK3NV17r4qzXmAg/yZu1eb2VuS5O5f5N58Dj2INe4SY2YLJd0paUvu487cbUApaM/9OkOXDpy+uj/dkfoelkpKjJm9LanK3ffnrveX9Ja7V6Y7GSCZ2SxJN0qqlvS0pBsk/YO7/2eqg/UxLJWUpu9L+n3u8uA0BwEO5u7LzCwraZI6TgW8zt23pjxWn0O4S8/Dkt4ys7Xq+IdxhaSGdEcCDvFbSX9Qrh9mNsLdf5fuSH0LSyUlyMzOkHSJOsL9hrt/lvJIgCTJzG6X9I+SdqjjRTidL8BhKa8HEe4SY2aXSWpy96/N7CZ1rCX+zN0/Snk0QGb2gTrOLNmV9ix9GWeVlJ4nJX1jZhdLulfSR5KeSXck4ICPJbWmPURfxxp36dnn7m5mP5T0uLv/3MxuTnso9G1mdnfu4oeS1pnZC5LaOr/u7j9NZbA+inCXnq/MrEHSTyT9Ze50wEzKMwGDcp9/l/s4Ifch5c7pRs9hjbvEmNmfSqqV9Bt3fy33XhAT3H1pyqMBMrMfdT1nu7vbUFyEu0Tk3lyq85thuc+eu9wm6QNJ97v7KymMB0iSzKzR3avz3YbiYqmkRLj7oCN9LbdccpGkZbnPQI8ys6slTZN0ppk9ftCXTpG0L52p+i7CHYC7fytpo5n9S9qzoM/aLmmDpB9Jel8d/xv8Vh3nc/99inP1SSyVAMjLzDKS/lnS30rapo4lvOGSnpI0393bj3xvFBrncQNI4lFJp0o6y92r3X2MpHPU8V46j6U6WR/EETeAvMzst5LO7foLgnPPv7zr7iPTmaxv4ogbQBLe3W91zz3/wtFfDyPcAJLYYmazu96Yez+dd1OYp09jqQRAXmZ2pqRnJe2RlFXHUfYlkk6SdL27f5LieH0O4QaQmJlNlDRKHWeVbOYFYekg3AAQDGvcABAM4QaAYAg3AARDuAEgGMINAMH8P/RhZ+yV/TDFAAAAAElFTkSuQmCC\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "p2 = plotUniqueValuesComparation(df_a_json, df_a_other, 'symbol')" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
jsonother
window.localStorage0.822601NaN
window.sessionStorage0.171297NaN
HTMLCanvasElement.ownerDocument0.004006NaN
window.name0.0020760.000182
HTMLCanvasElement.style0.000021NaN
window.document.cookieNaN0.999818
\n", + "
" + ], + "text/plain": [ + " json other\n", + "window.localStorage 0.822601 NaN\n", + "window.sessionStorage 0.171297 NaN\n", + "HTMLCanvasElement.ownerDocument 0.004006 NaN\n", + "window.name 0.002076 0.000182\n", + "HTMLCanvasElement.style 0.000021 NaN\n", + "window.document.cookie NaN 0.999818" + ] + }, + "execution_count": 15, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAXcAAAGxCAYAAACZcfZXAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDMuMC4zLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvnQurowAAIABJREFUeJzt3XmcXFWZ//HPF0gIOyMEBQIkIoshsoZFgoALyjYwKi4Irgg/V0AFQXEQcVcGFxQVB1AcAREHQUBhBBJ2TQKBBBBBFokghMiOSALP749zK6lUOulKcrvPrdPf9+vVr9S9dbt5iq5+6tyzPEcRgZmZlWW53AGYmVn9nNzNzArk5G5mViAndzOzAjm5m5kVyMndzKxATu5mZgVycjczK5CTu5lZgVbI9R9ee+21Y/To0bn+82ZmPWnq1KmPRsTI/q7LltxHjx7NlClTcv3nzcx6kqT7u7nO3TJmZgVycjczK5CTu5lZgbL1ufdlzpw5zJw5k+eeey53KINqxIgRjBo1imHDhuUOxcwK0ajkPnPmTFZbbTVGjx6NpNzhDIqIYPbs2cycOZMxY8bkDsfMCtFvt4ykMyQ9ImnGIp6XpO9KulvSrZK2XdpgnnvuOdZaa60hk9gBJLHWWmsNubsVMxtY3fS5/wTYczHP7wVsUn0dBvxgWQIaSom9ZSi+ZjMbWP0m94i4GvjHYi7ZHzgrkhuBNSWtW1eAZma25Oroc18feKDteGZ17qFl/cGjj71kWX/EAu772j5dXbfzzjtz/fXX1/rfNjMbTHUk9776FPrcdVvSYaSuGzbccMMa/tMDw4ndzLp2whpL+X1P1BtHhzrmuc8ENmg7HgU82NeFEXFaRIyPiPEjR/ZbGiGbVVddlYceeohdd92VrbfemnHjxnHNNdcAcM455/CqV72KcePGccwxxyzwPccddxxbbbUVO+20Ew8//HCu8M3MaknuFwHvqWbN7AQ8ERHL3CWT29lnn82b3vQmpk2bxi233MLWW2/Ngw8+yDHHHMOVV17JtGnTmDx5Mr/+9a8BeOaZZ9hpp5245ZZb2HXXXfnxj3+c+RWY2VDWzVTIc4AbgM0kzZR0iKQPSfpQdcmlwD3A3cCPgY8MWLSDaPvtt+fMM8/khBNOYPr06ay22mpMnjyZ3XffnZEjR7LCCitw0EEHcfXVVwMwfPhw9t13XwC222477rvvvozRm9lQ12+fe0Qc2M/zAXy0togaYtddd+Xqq6/mkksu4d3vfjdHH300q6+++iKvHzZs2Lwpjcsvvzxz584drFDNzBbi2jKLcP/997POOutw6KGHcsghh3DTTTex4447MmnSJB599FFeeOEFzjnnHHbbbbfcoZqZLaRR5Qc6dTt1sW6SmDhxIt/85jcZNmwYq666KmeddRbrrrsuX/3qV3nta19LRLD33nuz//77Z4nRzGxxlHpVBt/48eOjc7OOO+64g1e+8pVZ4mmZPXs22267Lfff31U9/No04bWb2VIY5KmQkqZGxPj+rnO3TJsHH3yQV7/61Rx11FG5QzEzWyaN7pYZbOuttx5//vOfc4dhZrbM3HI3MyuQk7uZWYGc3M3MCuTkbmZWoGYPqC7tFKNF/rylm3r0+OOPc/bZZ/ORj6TKChMnTuSkk07i4osvrjM6M7PauOXehccff5xTTz21tp/n0gRmNtCc3Ptw8sknM27cOMaNG8e3v/1tjj32WP7yl7+w9dZbc/TRRwPw9NNPc8ABB7D55ptz0EEH0VoMNnXqVHbbbTe222473vSmN/HQQ6lA5u67785nP/tZdtttN77zne9ke21mNjQ0u1smg6lTp3LmmWfyhz/8gYhgxx135H/+53+YMWMG06ZNA1K3zM0338xtt93Geuutx4QJE7juuuvYcccd+fjHP86FF17IyJEj+cUvfsFxxx3HGWecAaQ7gEmTJuV8eWY2RDi5d7j22mt585vfzCqrrALAW97ylnkbdbTbYYcdGDVqFABbb7019913H2uuuSYzZsxgjz32AOCFF15g3XXnbyf7jne8YxBegZmZk/tCuq21s+KKK8573CrxGxFsscUW3HDDDX1+T+sDw8xsoLnPvcOuu+7Kr3/9a5599lmeeeYZLrjgAiZMmMBTTz3V7/duttlmzJo1a15ynzNnDrfddttAh2xmtpBmt9wHeAPZvmy77ba8733vY4cddgDggx/8INtttx0TJkxg3Lhx7LXXXuyzT9+liIcPH87555/P4YcfzhNPPMHcuXM58sgj2WKLLQbzJZiZueRvUwzl127W01zy18zMBouTu5lZgRqX3HN1E+U0FF+zmQ2sRiX3ESNGMHv27CGV7CKC2bNnM2LEiNyhmFlBGjVbZtSoUcycOZNZs2blDmVQjRgxYt6CKDOzOjQquQ8bNowxY8bkDsPMrOc1qlvGzMzq4eRuZlYgJ3czswI5uZuZFcjJ3cysQE7uZmYFcnI3MytQo+a5d2P0sZcs1ffd97W+y/SamZWoq5a7pD0l3SnpbknH9vH8hpKuknSzpFsl7V1/qGZm1q1+k7uk5YHvA3sBY4EDJY3tuOxzwHkRsQ3wTuDUugM1M7PuddNy3wG4OyLuiYjngXOB/TuuCWD16vEawIP1hWhmZkuqmz739YEH2o5nAjt2XHMCcLmkjwOrAG+oJTozM1sq3bTc1ce5zpq8BwI/iYhRwN7AzyQt9LMlHSZpiqQpQ63yo5nZYOomuc8ENmg7HsXC3S6HAOcBRMQNwAhg7c4fFBGnRcT4iBg/cuTIpYvYzMz61U1ynwxsImmMpOGkAdOLOq75K/B6AEmvJCV3N83NzDLpN7lHxFzgY8BlwB2kWTG3STpR0n7VZZ8CDpV0C3AO8L4YStspmZk1TFeLmCLiUuDSjnPHtz2+HZhQb2hmZra0XH7AzKxATu5mZgVycjczK5CTu5lZgZzczcwK5ORuZlYgJ3czswI5uZuZFcjJ3cysQE7uZmYFcnI3MyuQk7uZWYGc3M3MCuTkbmZWICd3M7MCObmbmRXIyd3MrEBO7mZmBXJyNzMrkJO7mVmBnNzNzArk5G5mViAndzOzAjm5m5kVyMndzKxATu5mZgVycjczK5CTu5lZgZzczcwK5ORuZlYgJ3czswI5uZuZFcjJ3cysQF0ld0l7SrpT0t2Sjl3ENW+XdLuk2ySdXW+YZma2JFbo7wJJywPfB/YAZgKTJV0UEbe3XbMJ8BlgQkQ8JmmdgQrYzMz6103LfQfg7oi4JyKeB84F9u+45lDg+xHxGEBEPFJvmGZmtiS6Se7rAw+0Hc+szrXbFNhU0nWSbpS0Z18/SNJhkqZImjJr1qyli9jMzPrVTXJXH+ei43gFYBNgd+BA4L8lrbnQN0WcFhHjI2L8yJEjlzRWMzPrUjfJfSawQdvxKODBPq65MCLmRMS9wJ2kZG9mZhl0k9wnA5tIGiNpOPBO4KKOa34NvBZA0tqkbpp76gzUzMy6129yj4i5wMeAy4A7gPMi4jZJJ0rar7rsMmC2pNuBq4CjI2L2QAVtZmaL1+9USICIuBS4tOPc8W2PA/hk9WVmZpl5haqZWYGc3M3MCuTkbmZWICd3M7MCObmbmRXIyd3MrEBO7mZmBXJyNzMrkJO7mVmBnNzNzArk5G5mViAndzOzAjm5m5kVyMndzKxATu5mZgVycjczK5CTu5lZgZzczcwK5ORuZlYgJ3czswI5uZuZFcjJ3cysQE7uZmYFcnI3MyuQk7uZWYGc3M3MCuTkbmZWICd3M7MCObmbmRXIyd3MrEBO7mZmBXJyNzMrUFfJXdKeku6UdLekYxdz3QGSQtL4+kI0M7Ml1W9yl7Q88H1gL2AscKCksX1ctxpwOPCHuoM0M7Ml003LfQfg7oi4JyKeB84F9u/jui8C3wCeqzE+MzNbCt0k9/WBB9qOZ1bn5pG0DbBBRFxcY2xmZraUuknu6uNczHtSWg74FvCpfn+QdJikKZKmzJo1q/sozcxsiXST3GcCG7QdjwIebDteDRgHTJR0H7ATcFFfg6oRcVpEjI+I8SNHjlz6qM3MbLG6Se6TgU0kjZE0HHgncFHryYh4IiLWjojRETEauBHYLyKmDEjEZmbWr36Te0TMBT4GXAbcAZwXEbdJOlHSfgMdoJmZLbkVurkoIi4FLu04d/wirt192cMyM7Nl4RWqZmYFcnI3MyuQk7uZWYGc3M3MCuTkbmZWICd3M7MCObmbmRXIyd3MrEBO7mZmBXJyNzMrkJO7mVmBnNzNzArk5G5mViAndzOzAjm5m5kVyMndzKxATu5mZgVycjczK5CTu5lZgZzczcwK5ORuZlYgJ3czswI5uZuZFcjJ3cysQE7uZmYFcnI3MyuQk7uZWYGc3M3MCuTkbmZWICd3M7MCObmbmRXIyd3MrEBO7mZmBeoquUvaU9Kdku6WdGwfz39S0u2SbpV0haSN6g/VzMy61W9yl7Q88H1gL2AscKCksR2X3QyMj4gtgfOBb9QdqJmZda+blvsOwN0RcU9EPA+cC+zffkFEXBURz1aHNwKj6g3TzMyWRDfJfX3ggbbjmdW5RTkE+G1fT0g6TNIUSVNmzZrVfZRmZrZEuknu6uNc9HmhdDAwHvhmX89HxGkRMT4ixo8cObL7KM3MbIms0MU1M4EN2o5HAQ92XiTpDcBxwG4R8a96wjMzs6XRTct9MrCJpDGShgPvBC5qv0DSNsCPgP0i4pH6wzQzsyXRb3KPiLnAx4DLgDuA8yLiNkknStqvuuybwKrALyVNk3TRIn6cmZkNgm66ZYiIS4FLO84d3/b4DTXHZWZmy6Cr5G6DZ/SxlyzV9933tX1qjsTMepnLD5iZFcjJ3cysQE7uZmYFcnI3MyuQk7uZWYGc3M3MCuTkbmZWICd3M7MCObmbmRXIyd3MrEBO7mZmBXJyNzMrkJO7mVmBnNzNzArk5G5mViAndzOzAjm5m5kVyMndzKxATu5mZgVycjczK5CTu5lZgZzczcwK5ORuZlYgJ3czswI5uZuZFcjJ3cysQE7uZmYFcnI3MyuQk7uZWYGc3M3MCuTkbmZWoK6Su6Q9Jd0p6W5Jx/bx/IqSflE9/wdJo+sO1MzMutdvcpe0PPB9YC9gLHCgpLEdlx0CPBYRrwC+BXy97kDNzKx73bTcdwDujoh7IuJ54Fxg/45r9gd+Wj0+H3i9JNUXppmZLYkVurhmfeCBtuOZwI6LuiYi5kp6AlgLeLSOIK0co4+9ZKm+776v7VNzJPUr+bVZ7+kmuffVAo+luAZJhwGHVYdPS7qzi/9+LfR11qbgDxu/vt5V8mur+PX15QtL3bmxUTcXdZPcZwIbtB2PAh5cxDUzJa0ArAH8o/MHRcRpwGndBFY3SVMiYnyO//Zg8OvrXSW/NvDry6WbPvfJwCaSxkgaDrwTuKjjmouA91aPDwCujIiFWu5mZjY4+m25V33oHwMuA5YHzoiI2ySdCEyJiIuA04GfSbqb1GJ/50AGbWZmi9dNtwwRcSlwace549sePwe8rd7QapelO2gQ+fX1rpJfG/j1ZSH3npiZlcflB8zMCuTkbmZWICd3M7MCObn3KCUHSzq+Ot5Q0g6546qTpCu6OWfNJGklSZvljmMgSNpI0huqxytJWi13TJ2KTe6SXirpdEm/rY7HSjokd1w1OhV4NXBgdfwUqcBbz5M0QtJLgLUl/Zukl1Rfo4H18kZXj9Lfn5L+HZgG/K463lpS5/qYniTpUFINrR9Vp0YBv84XUd+KTe7AT0hz81vJ4M/Akdmiqd+OEfFR4DmAiHgMGJ43pNr8P2AqsHn1b+vrQgr5AKP89+cJpKKDjwNExDRgdMZ46vRRYALwJEBE3AWskzWiPpSc3NeOiPOAFyEtxgJeyBtSreZU5ZgDQNJIqtfa6yLiOxExBjgqIl4eEWOqr60i4nu546tJ6e/PuRHxRO4gBsi/qgq5AFQlVxo3p7yrRUw96hlJazE/+e0ElPRm+y5wAbCOpC+Tyj58Lm9I9YqIUyTtTGrxrdB2/qxsQdWn9PfnDEnvApaXtAlwOHB95pjqMknSZ4GVJO0BfAT4TeaYFlLsIiZJ2wKnAOOAGcBI4ICIuDVrYDWStDnwelJVzisi4o7MIdVK0s+AjUl9t61WbUTE4fmiqkfp709JKwPHAW8kvT8vA75YrWbvaZKWI21Q1P7a/rtp9bSKTe4w73ZpM9Iv4M6ImJM5pNpUA46dnirsNd4BjG3aH01dSn5/Wn7FdstIekvHqU2rTUSmR8QjOWKq2U2kMsuPkZLDmsBDkh4BDo2IqTmDq8kM4GXAQ7kDqVs1XrI387uc3iiJiDg5a2DLSNJvWEz/c0TsN4jh1ErSeRHxdknT6eM1RsSWGcJapGKTO+m26dXAVdXx7sCNpCR/YkT8LFdgNfkdcEFEXAYg6Y3AnsB5pGmSnbtl9aK1gdsl/RH4V+tkLyeINr8hzXSaTiED4ZWTcgcwgI6o/t03axRdKrZbpmpBfDAiHq6OXwr8APggcHVEjMsZ37Lqa4OA1jlJ0yJi61yx1UXSbn2dj4hJgx1L3STd2rSWXp0k7QtcGhElfXABIGmviPhtx7kPRcQPc8XUl5KnQo5uJfbKI8CmEfEPoIS+zX9IOqZaKbeRpE8Dj1W3+0X8QVVJ/D5gWPV4Mqk7qgS/re62SvVO4C5J35D0ytzB1Ow/Jb2udSDpGGD/jPH0qeRumWskXQz8sjp+K3C1pFWoFlb0uHcBnyetjBNwbXVueeDtGeOqTbUS8DDgJaRZM+sDPyTNEOp1NwIXVDMv5pB+hxERq+cNqx4RcbCk1UkrqM+UFMCZwDkR8VTe6JbZfsDFko4mdYVuXp1rlJK7ZURK6BOYn/x+VerMixJJmkZa5fiHiNimOjc9Il6VN7JlJ+ke4D9IA/zFviclrQ0cTFp9ewfwCuC7EXFK1sCWkaR1gN+TVk5/oIm/w2Jb7tX/7POrr+JUK1I/DWwBjGidj4jXLfKbes+/IuL59Dnd3JWAS+kuYEYTk0IdJO0HvJ90x/UzYIeIeKSa/34HaY5/T5H0FAu+/4YDLwcOkNS4u65ik3u14u8U4JWkX8LywDNN+wUsg58DvyCN3H+ItEH5rKwR1a8nVgIupYeAiVXhsPaZQD09FbLNAcC3IuLq9pMR8aykD2SKaZlEROMqPy5OyQOq3yP1990FrESaJdNzrYXFWCsiTgfmRMSkiPgAsFPuoGp2LOkDazqpmNillFNi4V7gClLDY7W2r1I81JnYJX0dICJ6vmyzpP0knVR9NXJqZMl97q1pgfOmnEm6PiJ2zh1bHSTdGBE7SbqMVGfmQeD8iNg4c2hmSLopIrbtOFfE9E9JXwO2J909Q2pETo2IY/NFtbBiu2WAZyUNB6ZJ+gbpNniVzDHV6UuS1gA+RbojWR34RN6Q6lW1iL4IbER6rxYzo6TUMRNJHyZ1n20sqb1OzmrAdXmiqt3ewNatOfySfgrcTLrTbIySk/u7Sd1OHyMlvQ1Is2d6XjWXfZOIuJhUSfC1mUMaKN8G3kKZM0pKHTM5G/gt8FUWTHZPVWtMSrEm0Ho9a+QMZFGK7Japkt9PI+Lg3LEMFElXRUSpSR1IrxF4faGrHKdGxHYd3YaTIqLPVbm9RtLGwMyI+Jek3YEtgbMioufXmEg6EPgaqbSJgF2Bz0TEuVkD61Bkyz0iXpA0UtLw9qL6hble0vdIrb9nWicjopQVnJC6LS6VNInyZpS0Vkk/JGkf0pjJqIzx1O1XwHhJrwBOBy4iter3zhpVDSLiHEkTSf3uAo6JiL/njWphRSb3yn3AddW+je3Jr4TEANAaGD6x7VwAPd1n2+HLwNOkPulSthBsKX3M5MWImFtVZ/12tfHKzbmDqtH2pBY7pHIfjZuiW3Jyf7D6Wo6yppgBUHqXTOUlEVFk/ZVqvATKHTOZU3VfvAf49+rcsIzx1KaP2TKHS9o5Ij6TMayFFNnn3k7SaqQZFk/njqVOVavv88xvPUwCTixp38rqj+jKiLg8dyx1q2bLHMrCWwj25AKfTpLGkgaKb6i6McYA74iIr2UObZlVs4DaZ8ssD9zctGmexSZ3SeNIy55bOxY9CrwnIm7LF1V9JP2KtJnFT6tT7wa2iojOTUp6VrXcexVSf3tRxbUkXQ9cQ6pNMm9j7Ij4VbagrCtVct+9Nfun2hVtopP7IKn+eI6LiKuq492BrxS0iGmhmu2l1HEfCobi70rSCRFxQu44lpVny+S3SiuxA0TExKrcbyn+KWmXiLgWQNIE4J+ZY6qVpF37Ot+5rL1HXSxp74i4NHcgg6iErR97ZrZMyS33C0gbO7S20zsYGB8R/5EvqvpI2go4i/kLKB4D3hsRty76u3pLtZtWywhS+d+pvb6KE8rucoLU2IiI6/o714skvZk0FvREdbwmqZvm13kjW1DJyf3fgC8Au1SnrgZOKGERBYCkMRFxb7UhAhHxZOtc7tgGiqQNgG9ExIG5Y7HFW0RtmYXO9aJFdIne3NpzoClK7pZ5Q0Qc3n5C0tuYvzNTr/sVsG1EPNl27nxgu0zxDIaZQE/vfVs6Sa8mrcEYKemTbU+tTiq7XYK+quk2Lpc2LqAafYaFE3lf53qKpM1JxabWqBaItKxOWwGqEkg6hfmbIywHbA3cki+igVVIy3Y4sCopt7SvL3mSVOO9BFMknQx8n/T+/DgNHE8orltG0l6kJc5vJy3Nb1kdGBsRO2QJrCaS9idtz7YfaUl3y1PAuRFxfZbABoCk97YdzgXuK6HPdiiQtFFE3J87joFQTcz4T+AN1anLgS9HxDOL/q7BV2Jy34rUwjsROL7tqaeAqyLisSyB1UzSqyPihtxxDKTqj+i5iHihOl4eWDEins0b2bKrdiO6JiLuyh3LQJC0KXAUCy/S6vnB8F5RXHJvkTQsIuZIGkbqp/1bRDySO65lJelQ0oKJu5Q2Fz2dVMr4fuB9JRUOk3Qjaezk6ep4VeDyEtYqSDqRNNi/EemW/hpSsp+WNbCaSLoF+CELL9JqXPdFHSQdFhGn5Y6jXXF97pJ+CJwSEbdVS/RvIL25XiLpqIg4J2+Ey+wI4CfV4wOBrUib9G4DfAd4TZ6wBsSI9rIREfG00gbLPS8ijgeQtBKpDMHRpPr1pQw6zo2IH+QOYhApdwCdStxD9TVtJQbeD/w5Il5FmkXy6Xxh1WZuRLTKxe5LqpE9OyJ+T1k7TQE8I2neAKOk7ShkoZakz1WbY18OvILUhVFSyd/fSPqIpHUlvaT1lTuoOlR1cjo1rv5RcS13oL1++x5Us2Mi4u+pF6PnvShpXdKipdeTyuK2rJQnpAFzJPBLSQ9Wx+sC78gYT53eQhokvoRU9O3GiHgub0i1ag2GH912Lkh3mb3uV0DnrKbGTUMuMbk/Xu29+TdgAnAIgKQVKCP5HQ9MId2+X9S6S5G0G3BPzsDqFhGTq6mfm5Fue//UdtfS0yJi26pi6S6kRsiPJT0cEbv08609ISL6at32tF6bhlxicv9/wHeBlwFHttV8eD2pldTTIuJiSRsBq3XM/JlCOa1aIA2KAx9mflnjiZJ+VEKCr6qWvgbYDRgPPEAaVC1CNTbySWDDiDhM0ibAZm117HvRZqSu0DWZX6Me0ky8Q7NEtBjFzpYZCiTtzMJTzc7KFlDNJP03aYOH9rLGL0TEB/NFVQ9Jl5BKYlwDTC7hA6udpF+QZsq8JyLGVQPHN5RQCbNXpiEXl9w7VjUupLMkQa+S9DNgY2Aa86eaRSmvD9J0uojYqr9zvUrScGDT6vDOkhK8pCkRMb695kopv7te2WilxG6ZKbkDGCTjSStuy/p0XtALkjaOiL8ASHo5bXOme1k1RnIWaa9fARtIem8h5YwBnq9a6wEgaWPaNjnvcReS7rh+T4Pfj8Ul94j4af9XFWEGaVzhodyBDKCjgask3UNKgBuRpreW4GTgjRFxJ8xb0XkODZtxsQw+D/yO9KH1c9Lkhvdljag+K0fEMbmD6E9x3TIt1a3TMcBY2kayS1n+LOkqUpmFP9LWIoqI/bIFNQAkrciCs2WKaP1JurVzW7a+zvUySWsBO5F+dzdGxKOZQ6qFpC8B1zd9o5XiWu5tfk4qHLYPaaPe9wKzskZUrxNyBzCQqsTwLmDz6tQdpBklRSR3UmXB05m/mcxBNLCy4DJanzRldwVgV0lExP9mjqkORwCflfQ8aV1NIzdaKbnlPjUitmtvDUmaFBG75Y6tLpJeStrqC+CPJdTOAZD0SuBK4DLgZtIfzzak+eCvi4g/ZQyvFtUdyUdJ89xFmjlzakF3JmcAWwK3AS9Wp6Npg44lKzm53xgRO0m6jDTv/UHg/IjYOHNotZD0duCbwERScngNcHREnJ8zrjpIOh84LyLO6zj/VuBdEfHWPJFZtyTdHhFjc8cxEKqCfQcBYyLii9UOYetGxB8zh7aAkpP7vqQR7Q2AU0iryL4QERct9ht7RFV1b49Wa70aY/h9IVPN7oyIzZb0uV4gaTqLn6pbRJ971eX0XxFxe+5Y6ibpB6S7kddFxCuVtvS8PCK27+dbB1Wxfe5tK+GeAF6bM5YBslxHN8xsyikEt7hNDxq1IcJS2Lf696PVv+197j1fp77NT4EbJP2dNE7S6pcu4cNrx6p8xM0AEfFYtWahUYpN7pJ+ChwR1YbY1afrfxXU5/e7qsupVcL4HUCjR++XwDod+2+2CBg52MHUqbU7kaQJETGh7aljJV1H2mSmBGeQVhRPZ36feynmVBvHtObwj6SBr7HY5A5s2UrsMO/TtVG7ky+LiDi66oOeQEp6p0XEBZnDqsuPWXD/zXb/PZiBDKBVJO0SEdfCvFISJZVs/mspXaB9+C5wAakR8mXS3rCfyxvSwkruc78F2L1VXKuqJT2pqu1uDVe1jA6PiG/ljmUgVLXpzwDWqE49DnyglJ20JJ1KKrD1GxZch1HCVMhWhcjXkxpWV0TEHZlDWkjJyf09wGdIdZYB3kbaxPZni/6u5pN0bUTsIukpFhyYa+Rc22Uh6aqIKHG8ZB5Jq5P+Dp/IHUudJJ3Zx+lipkJW3bwbsGBtmUZ9MBeb3AEkjQVaK1KvLHHkvmTVLe8apMVo8wZSm/ZHtDSqee6e+py4AAAUY0lEQVRvZeHiU6X0uRdL0hdJpRT+wvwGVjRt9XvJfe6QysWK9AsYljmWWlWFmGZGxL8k7U5aMHJW+zhDAVobYbcnvGD+B3Yvu5A0k2sq5ay6naeqlfMD4KVVyd8tgf0i4kuZQ6vD24GNI+L5fq/MqNiWu6QjSGU5f0VK8G8mDTqekjWwmkiaRqoMOZq0kvMi0mYIe+eMy7ojaUZEjMsdx0CRNIlU+O1HbSV/i3jNkn4FfLjpK8JLbrkfQpqP+gyApK8DN5AWNJXgxYiYK+nNwLcj4pTWvNtSVOUVvgKsFxF7Vd1sr46I0zOHVofrJb0qIqbnDmSArBwRf+zYt3hurmBq9lXgZkkzaHDRvpKTu1iw1vIL1blSzJF0IKkgWmvLr6K6noCfAGcCx1XHfyb1v5eQ3HcB3ifpXspb5APwaNV12JoLfgDllKf+KfB1Gj6Hv+TkfibwB0mtud//QRlJoeX9pGqXX46IeyWNAf4nc0x1WzsizpP0GYDqTqWxmyMsob1yBzDAPgqcBmwu6W/AvcDBeUOqzaMR8d3cQfSn2D53AEnb0lZ1LyKK6rZoaU3Liohbc8dSJ0kTSTNK/q9a7r0T8PVeruwpafWIeLJad7GQiPjHYMc0kCStQiqV8VTuWOoi6WTS3dZFLNgt06hZXMUl90X90bSU8sdTJb79SHdf00i16idFRF/L9ntS9eF8CjCOtPPUSOCAXv4Qk3RxROxbdccEC3YVRkS8PFNotVhE2Yh5IuLkwYploFQb5XTyVMhBMJUF/2han16tKZE9/cfTZo2qBfhB4MyI+Lyknk16fYmIm6q9Rls7MfX8JtIR0Socdi2phvs1JdSnb7OoshHF6JWFdcW13IeKqnTsG0mDO8dFxOTStmmDeTVXRrPgQp+zsgVUE0mvI3UZvobU4LiZlOi/kzUw65ek4/s637QFaKWUiB2KTiTNb/9LldhfDtyVOaZaSfoZcBIpCW5ffY3PGlRNIuJK4MvAf5KKoW0PfDhrUDWQdF7b4693PHf54Ec0IJ5p+3qBNDg+OmdAfRlSLXdJN0XEtrnjsO5IugMYGwW+SSVdQaoCeQNpU5lrm74ophuSbm5btLTA31v7cyWpSklcFBFvyh1LuyHVci8psUvaVNIV1UIKJG0pqXFlR5fRDOBluYMYILeSNlceRyodMU7SSnlDqsXiPoiL+5CurEwDx/JKHFAFQNIHSH2YRXVVtPkx1fJugIi4VdLZQAm1O1rWBm6X9EcavBJwaUTEJwAkrUpas3Am6YNsxZxx1WDlat+E5YCVqseqvkr48OrcKnF50iyuRvW3Q8HJndQHdrCkjUgzaK4hJftpWaOqT8nLu1tOyB3AQJH0MdJg6nbA/aTa7tdkDaoeDwGt6Y5/b3vcOi7Bvm2P5wIPR0Tj/vaKTe4RcTxAdat7KKmV+23SJ20JSl7e3bIx5d59rURKfFObmBiWVq9ME1xG6wK3tRZmSVpV0hYR8YfMcS2g2AHVqv95ArAqaZrZtaREUUQCrGbHnEYqi/sY1fLuiLgvZ1x1knQiaaZMqXdfxZL0NuB3EfFU9be4LfDFElaJVwX6tm0N9EtaDpjStDG9kpP7TaRbpkuAScCNEfFc3qjqV+Ly7k5td19HAetHRCl3X8VqrbmQtAupiuJJwGcjYsfMoS0zSdMiYuuOc41bY1LsbJnqU/T1wB+BPYDpkq7NG1V9JB1RbdH2LPAtSTdJemPuuOok6XOSfgtcDryClNxH5Y3KutQq8LYP8IOIuBAYnjGeOt0j6XBJw6qvI4B7cgfVqdjkLmkcqQrde4F3ADOBK7MGVa8PRMSTpFWq65BmXHwtb0i1ewuwFvB74H9Jc4mL6FYbAv4m6UekXYsureaCl5JvPkTqDv1b9bUjcFjWiPpQcrfMJVS1O4DJvV6TpFPbbe93gIkRcUGJi0QkrUbqd9+FlCgejohd8kZl/ZG0MrAnMD0i7pK0LvCqiChllWrjFZvcASQNBzatDnu+6FS7anf59YExwFakWUATI2K7rIHVqLr7eg2wG6nswAOkAdU+a3tY80haBxjROo6Iv2YMpxaSRpGqlU4gzVa7FjgiImZmDaxDscm9qiZ4FnAfaQHFBsB7I+LqnHHVpRqh3xq4JyIel7QWabCxmMqQpd99lUzSfsB/AesBjwAbAn+KiC2yBlYDSf8HnA38rDp1MHBQROyRL6qFlZzcpwLviog7q+NNgXNKatkOBSXffZVM0i3A64DfR8Q2kl4LHBgRjeubXlKLmC2z0LncShng6MuwVmIHiIg/U94eowuopn8Wo7r7ugv4PnAq8GdJu+aNyro0JyJmA8tJWi4iriLdaZbgUUkHS1q++joYmJ07qE7FrlAFpkg6nfm3TgeRFsIUq2mLKGpwMvDGzrsv0pJ9a7bHq7o5VwM/l/QI5ZTH+ADwPeBbpD7366tzjVJyt8yKpE165+2hCpwaEf9a7Df2iCFQGK3PhSFNXCxiC6sW1/2T1DtwELAG8POqNW+DoNjkXrqhsDRf0hmkllH73dcKEfH+fFFZNyR9Avhl02aQLAtJp7CYssURcfgghtOv4pJ7RznOhZTW6it5aX7pd18lk/R50rqEfwDnAudHxMN5o1o2kt5bPZwAjAV+UR2/jVQA7hNZAluEEpP7RtXDj1b/trf6nm3aPodLq/TCaFYGSVuSVoi/FZgZEW/IHNIyk3QVaSxoTnU8DLi8aRUxi5stExH3R8T9wISI+HRETK++jgUatQ3WMhqSS/MlnZA7Blsij5DquM8mlckowXrAam3Hq1bnGqW45N5mlaoiHQCSdibtWVmE0gujLUbRM55KIenDkiYCV5B21Dq0oC7RrwE3S/qJpJ8ANwFfyRvSwkqeCnkIcIakNarjx2ngdKWltail+VmDqpmkCRFxXcfpf2QJxpbURsCRJQ3wt0TEmVW10lb54mMjonG7TBXX596pKouriHgidyx1GgpL8yXd1Dl3v69z1kySlgdeSlsjsoTaMr2i2JZ7NdPiraS9VFdo7TVayoBqROzTtjR/M0nFLM2X9GpSSdWRkj7Z9tTqlLNNYtGqPWJPAB4GXqxOB1BK18wCmtjoKDa5AxcCT5D6aIubOtdXYTRJpRRGG04apFqBBQeungQOyBKRLakjgc2GyqKlpiV2KLhbRtKMiBiXO46BMhQKo0naqJr5ZD2mmi64R0mbf7f0yurwklvu10t6VURMzx3IAFmoMFo137YkK0o6japrrXUyIl6XLSLr1j3AxGpsaN6dc0ScnC+k2owGDq7W1DR2dXjJLffbSftu3kt6cwmIUqZjDYWl+VXZ2B+S/oBae3ISEZ4O2XDVCtWFRMQXBjuWgdL01eElJ/eN+jpfym3+UFiaL2lqSd1MVoZeWR1eXHKXtHpEPCnpJX09HxGeJ90jqtWojwAXsOCtvX+HDSdpJPBpYAsW3Gav57vUqn0T5gKXAJOAGyPiubxRLazE5H5xROwr6V5St4Xano6IeHmm0GoxlAqjVb/DTj3/OxwKJF1OKqx1FPAh4L3ArIg4JmtgNemFjduLG1CNiH2rh9dSLfKJiD9lDKlurdfXZ2G0wQ9n4ETEmNwx2FJbKyJOl3REREwCJkmalDuoOvTK6vDiWu4tkl5H+lR9DfByUt/YNRHxnayB1UTSdRExob9zvUzSysAngQ0j4jBJm5DmTl+cOTTrh6QbI2InSZcB3wUeJJX93ThzaMusV1aHF5vcYd7y5+2B15JuDf8ZEZvnjaoekqYBH4uIa6vjnUkDqqXsU4mkX5BmyrwnIsZVsxNuKOk1lkrSvqTktwFwCml18Rci4qKsgdWkFzZuLza5S7qCVAXyBtKb7NqIeCRvVPWRtB1wBmn7MqgKo0VEMZtkS5oSEeMl3RwR21TnbomIrXLHZkNXX6vDgcatDi+uz73NraSNlMeRyhA8LumGiPhn3rDqUc313qrUwmiV56vWegBI2pgCS0mURNI3gHsi4ocd5z8BvKyQAdWe2Li92JZ7S7UD+/tJo/Yvi4gVM4dUi87CaK3zpRRGA5C0B/A50pZml5PmFr8vIibmjMsWrVo8OC4iXuw4vxxwawklQXpl4/ZiW+5VVbrXkD5N7yd1YTRuRHsZFF0YDSAi/q+aU7wT6fb3iIh4NHNYtnjRmdirky+qVZq1902RdDoLzlRr3KrpYpM7sBLp9mlqicWLgFERsWfuIAbB+qQyvysAu0oiIv43c0y2aM9K2qSzqFY106mILlHgw6SpyIfTtjo8a0R9KL5bplRVQa1TCi6M1qqfsyVwG201wSOimB21SiNpL9LsmC8xvzU7HvgMaWemS3PFNtQ4ufeo0gujQXqNETE2dxy2ZKpFPkeTJjMAzABO6vWGSK+tDi+5W6Z0e+UOYBDcIGlsRNyeOxDrXkTMIJUbWICkkyLiqAwh1aWnVoe75d5jhlJhNEm7Ar8B/k6hdydDiaS/RsSGueNYVr2yOtwt995zNqkFMZU+CqORSi2U4gzg3cB05ve5W+8qZbbMKpJ26VgdvkrmmBbi5N5jhkBhtHZ/LWW5+lCxqDtKUmIvJbkfApwhaYHV4Rnj6ZO7ZXpU6YXRACSdCqxJ6pppr+fuqZANtYhS2/OUVOmz6avDndx7WMmF0QAkndnHaU+FtKx6ZXW4u2V6VB+F0bYvqTAaQEn7wQ4VkrZd3POFFLbridXhTu69q+jCaDCvINMPgJdWJX+3BPaLiC9lDs0WbQpp0dms6rhzwL/nt9mjR1aHu1umx5VaGA2g2rnnaOBHbSV/Z5RQfKpUVfXHt5IaHOcCF0TE03mjqlevrA53cu9RfRRGa82cuTJrYDWSNDkitu+o5z7Nm3U0n6QxwIHA/qT351ciYlreqOrRK6vD3S3Tu0ovjAbwaFXDvVXP/QDgobwhWTci4l5JF5Lep+8m7VpURHKnR1aHu+VujSXp5cBpwM7AY6SW0sERcV/OuGzRqt/ZO0kt9gdIXTMXR8RzWQOrQa+tDndyt8aTtAqwXEQ8lTsWWzxJL5IG+y8EnqSj0FZEnJwjrjpIujgi9l3EXP6IiEatDne3jDWOpE8u4jzQ2wliCPhC2+NVs0UxAHptdbiTuzXRarkDsKU2OyK+lzuIAXYmaXX4KVU3VCNXh7tbxsxqI+mmiFjsQqYS9MLq8OVyB2DWSdJ5bY+/3vHc5YMfkdl81erw64B3AHeSVoc3KrGDk7s10yZtj/foeG7kYAZiS2xLSU/28fWUpCdzB1eTW4HnSavDtwTGSVopb0gLc5+7NdHi+grdj9hs01sLzkoVEZ+ABVaHnwm8DGjU6nAnd2uilSVtQ7qzXKl63KoH3rgWkg0tfawOP4NUvK9RnNytiR4irb6FtMVe+9THvw9+OLYEfpk7gEHQE6vDPVvGzGoj6RQW03UWEYcPYjhDmgdUrbEkvU3SatXjz0n636qLxpprCqnO+VRgv7bHrS8bJG65W2NJujUitpS0C/BV4CTgsxGxY+bQrAvt1Txt8Lnlbk32QvXvPsAPIuJCYHjGeGzJuOWYkZO7NdnfJP0IeDtwabV3pd+zZl1wt4w1lqSVgT1Jc6fvkrQu8KqI8CrVhpL0FPNb7CsDz7aeIlVOXD1LYEOQk7s1nqR1gBGt44j4a8ZwzHqCb3GtsSTtJ+ku0iYdk6p/f5s3KrPe4ORuTfZFYCfgzxExBngDqWCTmfXDyd2abE5EzAaWk7RcRFwFeHNssy64/IA12eNVcaargZ9LegRo7HJvsybxgKo1VrV36j9Jd5gHAWsAP69a82a2GE7u1liSPgH8MiJm5o7FrNe4z92abHXgMknXSPqopJfmDsisV7jlbo0naUvSlmZvBWZGxBsyh2TWeG65Wy94hFTHfTawTuZYzHqCk7s1lqQPS5oIXAGsDRwaEVvmjcqsN3gqpDXZRsCRETEtdyBmvcZ97tZokpYHXkpbQ8S1Zcz655a7NVa1EfEJwMPAi9XpANw1Y9YPt9ytsSTdDezoRUtmS84DqtZkDwBP5A7CrBe5W8aa7B5goqRLgH+1TkbEyflCMusNTu7WZH+tvobjvVPNloj73M3MCuSWuzWWpJHAp4EtWHCbvddlC8qsR3hA1Zrs58CfgDHAF4D7gMk5AzLrFe6WscaSNDUitpN0a6vsgKRJEbFb7tjMms7dMtZkc6p/H5K0D/AgMCpjPGY9w8ndmuxLktYAPgWcQqrv/om8IZn1BnfLmJkVyAOq1jiSviHpQ32c/4Skr+eIyazXuOVujSPpdmBcRLzYcX454NaIGJcnMrPe4Za7NVF0Jvbq5IuAMsRj1nOc3K2JnpW0SefJ6tw/M8Rj1nM8W8aa6Hjgt5K+BEytzo0HPgMcmS0qsx7iPndrJEnjgKOBVv/6DOCkiJieLyqz3uHkbj1F0kkRcVTuOMyazsndeoqkv0bEhrnjMGs6D6har/FsGbMueEDVGkfSSxb1FE7uZl1xcrcmmgoEfSfyOX2cM7MO7nM3MyuQW+7WOJK2XdzzEXHTYMVi1qvccrfGkfQicBswq3Wq7enwNntm/XPL3ZroU8BbSaUGzgUuiIin84Zk1lvccrfGkjQGOBDYH7gf+EpETMsblVlv8Dx3a6yIuBe4ELgc2AHYNG9EZr3DLXdrHEkvB95JarE/QOqauTginssamFkPcXK3xqkGVG8ltdqfJM15nyciTs4Rl1kv8YCqNdEX2h6vmi0Ksx7m5G5NNDsivpc7CLNe5gFVa6IP5A7ArNc5uZuZFcgDqtY4kuYCz/b1FGmF6uqDHJJZz3GfuzXR9IjYJncQZr3M3TJmZgVycrcm+mXuAMx6nfvcrXEknULHwqV2EXH4IIZj1pPc525NNKXt8ReAz+cKxKxXueVujSbpZg+umi0597lb07n1YbYUnNzNzArkbhlrHElPMb/FvjLzFzR5EZNZl5zczcwK5G4ZM7MCObmbmRXIyd3MrEBO7mZmBXJyNzMr0P8HbOML3SVICEgAAAAASUVORK5CYII=\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "plotUsageComparation(df_a_json, df_a_other, 'symbol')" + ] + }, + { + "cell_type": "code", + "execution_count": 45, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
Value CountsValue Counts
Json125
Other2452
\n", + "
" + ], + "text/plain": [ + " Value Counts Value Counts\n", + "Json 12 5\n", + "Other 245 2" + ] + }, + "execution_count": 45, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAXoAAAEOCAYAAACHE9xHAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDMuMC4zLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvnQurowAAFCRJREFUeJzt3X+QnVWd5/H3F2hoHTBiCCyQSEcrKMSEJrQsuzhDhuASgw4/RpAUmtTENWMVqOwSMYlTJTqKaDEOy9ZKFVMqgcpMwmSkoBCZSCoUWOWvdGwhIaIpJ0CTGJqILQhkk/CdP/ppbJJO/7653affr6pb97nnnud5vp3u/vSTc889NzITSVK5Dqt3AZKk2jLoJalwBr0kFc6gl6TCGfSSVDiDXpIKZ9BLUuEMekkqnEEvSYU7ot4FABx33HHZ1NRU7zIkaUxpbW19PjMn9ddvVAR9U1MTGzZsqHcZkjSmRMRTA+nn0I0kFa7foI+IKRGxPiK2RMTmiPhM1X5DRDwbEW3VbV6PfZZFxNaIeDIiLqzlFyBJ6ttAhm72Atdl5saIOAZojYgfVM/9Y2be3LNzRJwOXAlMB04CHoqIUzNz30gWLkkamH6DPjN3ADuq7RcjYgtwch+7XAysyszdwH9ExFbgbOBHgylsz549tLe38+qrrw5mN9VQY2MjkydPpqGhod6lSBqEQb0YGxFNwJnAT4BzgWsiYgGwga6r/hfo+iPw4x67tdPLH4aIWAwsBnj7299+wLna29s55phjaGpqIiIGU6ZqIDPZtWsX7e3tTJ06td7lSBqEAb8YGxFHA/8GXJuZfwBuA94JNNN1xf8P3V172f2ATzfJzNszsyUzWyZNOnB20KuvvsrEiRMN+VEiIpg4caL/w5LGoAEFfUQ00BXyKzPzuwCZuTMz92Xma8A/0TU8A11X8FN67D4Z2D6U4gz50cXvhzQ2DWTWTQDfArZk5jd6tJ/Yo9ulwKZq+z7gyog4KiKmAtOAn45cyZKkwRjIGP25wMeAxyOirWpbDsyPiGa6hmW2AX8LkJmbI+Ju4Am6ZuxcPRIzbpqWfm+4h3iDbTdd1Ofzs2fPZtmyZVx44Z9mh95yyy386le/4pvf/OZB9zv66KN56aWXRqTGO++8k69//etkJpnJokWLWLJkyYgcu9uNN97I8uXLR/SYOvRG+vejVvr7vVNt9HtFn5k/zMzIzJmZ2VzdHsjMj2XmjKr9r6rZOd37fCUz35mZ78rM79f2S6iN+fPns2rVqje0rVq1ivnz5x+S83//+9/nlltuYe3atWzevJmNGzcyYcKEET/PjTfeOOLHlDS6+M7Yg/jwhz/M/fffz+7duwHYtm0b27dv533vex8vvfQSc+bMYdasWcyYMYN77733gP0ffvhhPvjBD77++JprruGOO+4AoLW1lfPOO4+zzjqLCy+8kB07dhyw/1e/+lVuvvlmTjrpJKBrauMnPvEJANra2jjnnHOYOXMml156KS+88ALQ9b+Q7qUknn/+ebrXD7rjjju47LLLmDt3LtOmTeP6668HYOnSpbzyyis0Nzdz1VVX8cc//pGLLrqIM844g/e85z2sXr16BP4lJdWbQX8QEydO5Oyzz+bBBx8Euq7mP/KRjxARNDY2cs8997Bx40bWr1/PddddR+YBE4t6tWfPHj71qU+xZs0aWltbWbRoEZ///OcP6Ldp0ybOOuusXo+xYMECvva1r/HYY48xY8YMvvjFL/Z73ra2NlavXs3jjz/O6tWreeaZZ7jpppt405veRFtbGytXruTBBx/kpJNO4he/+AWbNm1i7ty5A/qaJI1uBn0feg7f9By2yUyWL1/OzJkzueCCC3j22WfZuXPngI755JNPsmnTJt7//vfT3NzMl7/8Zdrb2wdcU2dnJ7///e8577zzAFi4cCGPPPJIv/vNmTOHCRMm0NjYyOmnn85TTx24FtKMGTN46KGH+NznPsejjz5ak6EiSYeeQd+HSy65hHXr1rFx40ZeeeUVZs2aBcDKlSvp6OigtbWVtrY2TjjhhAPmlx9xxBG89tprrz/ufj4zmT59Om1tbbS1tfH444+zdu3aA849ffp0WltbB1Vvz3PuX89RRx31+vbhhx/O3r17D9j/1FNPpbW1lRkzZrBs2TK+9KUvDer8kkYng74PRx99NLNnz2bRokVveBG2s7OT448/noaGBtavX9/r1fEpp5zCE088we7du+ns7GTdunUAvOtd76Kjo4Mf/ahrRYg9e/awefPmA/ZftmwZ119/Pb/97W8B2L17N7feeisTJkzg2GOP5dFHHwXgrrvuev3qvqmp6fU/DmvWrBnQ19jQ0MCePXsA2L59O29+85v56Ec/ypIlS9i4ceOAjiFpdBsV69EPRL2mZc2fP5/LLrvsDTNwrrrqKj70oQ/R0tJCc3Mz7373uw/Yb8qUKVxxxRXMnDmTadOmceaZZwJw5JFHsmbNGj796U/T2dnJ3r17ufbaa5k+ffob9p83bx47d+7kggsuIDOJCBYtWgTAihUr+OQnP8nLL7/MO97xDr7zne8AsGTJEq644gruuusuzj///AF9fYsXL2bmzJnMmjWLBQsW8NnPfpbDDjuMhoYGbrvttiH9m0kaXWKgLyLWUktLS+7/wSNbtmzhtNNOq1NFOhi/L6OT8+jHp4hozcyW/vo5dCNJhTPoJalwBr0kFc6gl6TCGfSSVDiDXpIKN2bm0XPDCL8d/4bOPp92mWJJpfCK/iBcplhSKQz6g3CZYpcplkph0B+EyxS7TLFUCoO+Dy5T7DLFUgkM+j64TLHLFEslMOj74DLFLlMslWAMTa/sezpkrbhMsaSxzmWKNSh+X0Ynlyken1ymWJIEGPSSVLxRHfSjYVhJf+L3QxqbRm3QNzY2smvXLsNllMhMdu3aRWNjY71LkTRIo3bWzeTJk2lvb6ejo6PepajS2NjI5MmT612GpEEatUHf0NDA1KlT612GJI15o3boRpI0Mgx6SSqcQS9JhTPoJalwBr0kFa7foI+IKRGxPiK2RMTmiPhM1f62iPhBRPy6uj+2ao+IuDUitkbEYxExq9ZfhCTp4AZyRb8XuC4zTwPOAa6OiNOBpcC6zJwGrKseA3wAmFbdFgMugShJddRv0GfmjszcWG2/CGwBTgYuBlZU3VYAl1TbFwN3ZpcfA2+NiBNHvHJJ0oAMaow+IpqAM4GfACdk5g7o+mMAHF91Oxl4psdu7VWbJKkOBhz0EXE08G/AtZn5h7669tJ2wII1EbE4IjZExAaXOZCk2hlQ0EdEA10hvzIzv1s17+wekqnun6va24EpPXafDGzf/5iZeXtmtmRmy6RJk4ZavySpHwOZdRPAt4AtmfmNHk/dByysthcC9/ZoX1DNvjkH6Owe4pEkHXoDWdTsXOBjwOMR0Va1LQduAu6OiI8DTwOXV889AMwDtgIvA38zohVLkgal36DPzB/S+7g7wJxe+idw9TDrkiSNEN8ZK0mFM+glqXAGvSQVzqCXpMIZ9JJUOINekgpn0EtS4Qx6SSqcQS9JhTPoJalwBr0kFc6gl6TCGfSSVDiDXpIKZ9BLUuEMekkqnEEvSYUz6CWpcAa9JBXOoJekwhn0klQ4g16SCmfQS1LhDHpJKpxBL0mFM+glqXAGvSQVzqCXpMIZ9JJUOINekgpn0EtS4Qx6SSqcQS9Jhes36CPi2xHxXERs6tF2Q0Q8GxFt1W1ej+eWRcTWiHgyIi6sVeGSpIEZyBX9HcDcXtr/MTObq9sDABFxOnAlML3a55sRcfhIFStJGrx+gz4zHwF+N8DjXQysyszdmfkfwFbg7GHUJ0kapuGM0V8TEY9VQzvHVm0nA8/06NNetUmS6mSoQX8b8E6gGdgB/EPVHr30zd4OEBGLI2JDRGzo6OgYYhmSpP4MKegzc2dm7svM14B/4k/DM+3AlB5dJwPbD3KM2zOzJTNbJk2aNJQyJEkDMKSgj4gTezy8FOiekXMfcGVEHBURU4FpwE+HV6IkaTiO6K9DRPwLMBs4LiLagS8AsyOima5hmW3A3wJk5uaIuBt4AtgLXJ2Z+2pTuiRpIPoN+syc30vzt/ro/xXgK8MpSpI0cnxnrCQVzqCXpMIZ9JJUOINekgpn0EtS4Qx6SSqcQS9JhTPoJalwBr0kFc6gl6TCGfSSVDiDXpIKZ9BLUuEMekkqnEEvSYUz6CWpcAa9JBXOoJekwhn0klQ4g16SCmfQS1LhDHpJKpxBL0mFM+glqXAGvSQVzqCXpMIZ9JJUOINekgpn0EtS4Qx6SSqcQS9JhTPoJalwBr0kFc6gl6TC9Rv0EfHtiHguIjb1aHtbRPwgIn5d3R9btUdE3BoRWyPisYiYVcviJUn9G8gV/R3A3P3algLrMnMasK56DPABYFp1WwzcNjJlSpKGqt+gz8xHgN/t13wxsKLaXgFc0qP9zuzyY+CtEXHiSBUrSRq8oY7Rn5CZOwCq++Or9pOBZ3r0a6/aDhARiyNiQ0Rs6OjoGGIZkqT+jPSLsdFLW/bWMTNvz8yWzGyZNGnSCJchSeo21KDf2T0kU90/V7W3A1N69JsMbB96eZKk4Rpq0N8HLKy2FwL39mhfUM2+OQfo7B7ikSTVxxH9dYiIfwFmA8dFRDvwBeAm4O6I+DjwNHB51f0BYB6wFXgZ+Jsa1CxJGoR+gz4z5x/kqTm99E3g6uEWJUkaOb4zVpIKZ9BLUuEMekkqnEEvSYUz6CWpcAa9JBXOoJekwhn0klQ4g16SCmfQS1LhDHpJKpxBL0mFM+glqXAGvSQVzqCXpMIZ9JJUOINekgpn0EtS4Qx6SSqcQS9JhTPoJalwBr0kFc6gl6TCGfSSVDiDXpIKZ9BLUuEMekkqnEEvSYUz6CWpcAa9JBXOoJekwhn0klQ4g16SCnfEcHaOiG3Ai8A+YG9mtkTE24DVQBOwDbgiM18YXpmSpKEaiSv6v8zM5sxsqR4vBdZl5jRgXfVYklQntRi6uRhYUW2vAC6pwTkkSQM03KBPYG1EtEbE4qrthMzcAVDdH9/bjhGxOCI2RMSGjo6OYZYhSTqYYY3RA+dm5vaIOB74QUT8cqA7ZubtwO0ALS0tOcw6JEkHMawr+szcXt0/B9wDnA3sjIgTAar754ZbpCRp6IYc9BHxZxFxTPc28D+ATcB9wMKq20Lg3uEWKUkauuEM3ZwA3BMR3cf558x8MCJ+BtwdER8HngYuH36ZkqShGnLQZ+ZvgDN6ad8FzBlOUZKkkeM7YyWpcAa9JBXOoJekwhn0klQ4g16SCmfQS1LhDHpJKpxBL0mFM+glqXAGvSQVzqCXpMIZ9JJUOINekgpn0EtS4Qx6SSqcQS9JhTPoJalwBr0kFc6gl6TCGfSSVDiDXpIKZ9BLUuEMekkqnEEvSYUz6CWpcAa9JBXuiHoXMJY0Lf1evUsYkG03XVTvEiSNIl7RS1LhDHpJKpxBL0mFM+glqXAGvSQVzlk3JbphQr0rGJgbOutdgTQu1OyKPiLmRsSTEbE1IpbW6jySpL7VJOgj4nDg/wEfAE4H5kfE6bU4lySpb7Uaujkb2JqZvwGIiFXAxcATNTqfpLHAYcW6qNXQzcnAMz0et1dtkqRDrFZX9NFLW76hQ8RiYHH18KWIeLJGtYw7AccBz9e7jn59sbcfE5XMn80Rd8pAOtUq6NuBKT0eTwa29+yQmbcDt9fo/ONaRGzIzJZ61yHtz5/N+qjV0M3PgGkRMTUijgSuBO6r0bkkSX2oyRV9Zu6NiGuAfwcOB76dmZtrcS5JUt9q9oapzHwAeKBWx1efHBLTaOXPZh1EZvbfS5I0ZrnWjSQVzqCXpMIZ9JJqJiIOi4j/Xu86xjvH6Me4iJgEfAJooseL65m5qF41ST1FxI8y87/Vu47xzGWKx757gUeBh4B9da5F6s3aiPhr4LvplWVdeEU/xkVEW2Y217sO6WAi4kXgz+i6EHmFriVSMjPfUtfCxhHH6Me++yNiXr2LkA4mM4/JzMMysyEz31I9NuQPIa/ox7geV0v/H9hTNXu1pFEjIgK4CpiamX8fEVOAEzPzp3Uubdww6CXVVETcBrwGnJ+Zp0XEscDazHxvnUsbN3wxtgAR8VfAX1QPH87M++tZj7Sf/5qZsyLi5wCZ+UK12KEOEcfox7iIuAn4DF2f3vUE8JmqTRot9lQfL5rw+pTg1+pb0vji0M0YFxGPAc2Z+Vr1+HDg55k5s76VSV0i4irgI8AsYAXwYeDvMvNf61rYOOLQTRneCvyu2h4jH8qp8SIzV0ZEKzCHrqmVl2TmljqXNa4Y9GPfV4GfR8R6un6J/gJYVt+SpAP8GvgDVeZExNsz8+n6ljR+OHRTgIg4EXgvXUH/k8z8bZ1Lkl4XEZ8CvgDspOtNU91vmHJ48RAx6Me4iDgXaMvMP0bER+kaB/0/mflUnUuTAIiIrXTNvNlV71rGK2fdjH23AS9HxBnAZ4GngDvrW5L0Bs8AnfUuYjxzjH7s25uZGREXA7dm5rciYmG9i5Ii4n9Xm78BHo6I7wG7u5/PzG/UpbBxyKAf+16MiGXAx4A/r6ZXNtS5JgngmOr+6ep2ZHWDak69Dg3H6Me4iPgvwHzgZ5n5w2odkdmZeVedS5MAiIjL958z31ubasegH6Oqxcy6v3lR3We1vRvYCnw+M9fVoTzpdRGxMTNn9dem2nHoZozKzGMO9lw1fPMeYGV1Lx1yEfEBYB5wckTc2uOptwB761PV+GTQFygz9wG/iIj/W+9aNK5tBzYAlwO/out/nPvomk//v+pY17jj0I2kmoiIBuArwP8EttE1rDgF+A6wPDP3HHxvjSTn0Uuqla8DxwKnZOaszDwTeAdd6zHdXNfKxhmv6CXVRET8Gjh1/w8Er15D+mVmTqtPZeOPV/SSaiX3D/mqcR/Ooz+kDHpJtfJERCzYv7Fak+mXdahn3HLoRlJNRMTJwHeBV4BWuq7i3wu8Cbg0M5+tY3njikEvqaYi4nxgOl2zbjb7Jr5Dz6CXpMI5Ri9JhTPoJalwBr0kFc6gl6TCGfSSVLj/BI9aQOe2deiMAAAAAElFTkSuQmCC\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "p3 = pd.concat([p1, p2], axis=1, sort=False).drop_duplicates()\n", + "p3.plot(kind='bar')\n", + "p3" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# DOMAINS\n", + "\n", + "Again, the difference for unique values for the whole sample and the filtered one is really big. Only 7.2%[1] of the values remain on the filtered sample for the non-JSON values and 30% for the JSON’s. \n", + "\n", + "---\n", + " For futher investigation: \n", + "1. Only few of domains produce bigger values, why? Do they have something in common? Does that mean that some domains only produce bigger values? \n", + " \n", + "2. What are the top domains commonly used for?" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Full Sample" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "metadata": { + "scrolled": true + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "There are 11166 unique script_domain present on the non-json dataset and 3507 on the JSONs\n" + ] + }, + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYcAAAEOCAYAAABiodtuAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDMuMC4zLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvnQurowAAFdNJREFUeJzt3X+MV/Wd7/HnWxhFqyJF9CpwHczSVhFEnFp321uNWEFtF+uqldiFFLekN9a2e2ut2E1MrVptzNb15mpi1h9oyILLttFY649STN3EWhmcKkgtxKqMKI6CbP1FQd/3jzm4I59BcL4jZ+D7fCST7znv8zln3sOPec35nHO+E5mJJEk97VF3A5KkgcdwkCQVDAdJUsFwkCQVDAdJUsFwkCQVDAdJUsFwkCQVDAdJUmFw3Q301YEHHpitra11tyFJu4z29vZXMnPEjozdZcOhtbWVJUuW1N2GJO0yIuK5HR3rtJIkqWA4SJIKhoMkqbDLXnPozaZNm+js7OTtt9+uuxVVhgwZwqhRo2hpaam7FUkfwm4VDp2dney33360trYSEXW30/Qyk1dffZXOzk7GjBlTdzuSPoTdalrp7bffZvjw4QbDABERDB8+3DM5aRe0W4UDYDAMMP59SLum3S4cJEmN262uOWyt9ZJf9Ovxnr369A/cfuKJJzJnzhymTJnyXu26667jj3/8IzfccMM299t33315/fXX+6XH22+/nZ/85CdkJpnJrFmzuOiii/rl2FtcddVVXHrppf16TO18/f3/o9lt7/vDrsYzh340ffp05s+f/77a/PnzmT59+k75/L/85S+57rrreOCBB1i+fDlLly5l6NCh/f55rrrqqn4/pqSBxXDoR2eddRb33HMPGzduBODZZ59lzZo1fO5zn+P1119n8uTJTJo0ifHjx3PXXXcV+z/00EN88YtffG/9m9/8JrfddhsA7e3tnHDCCRx77LFMmTKFF198sdj/xz/+Mddeey2HHnoo0H0b6de//nUAOjo6OP7445kwYQJf/vKXWb9+PdB9trPlbUheeeUVtrxf1W233caZZ57J1KlTGTt2LBdffDEAl1xyCW+99RYTJ07kvPPO44033uD000/n6KOP5qijjmLBggX98CcpqW6GQz8aPnw4xx13HPfddx/Qfdbwla98hYhgyJAh/PznP2fp0qUsXryY7373u2TmDh1306ZNXHjhhSxcuJD29nZmzZrFD37wg2LcsmXLOPbYY3s9xowZM7jmmmt44oknGD9+PD/84Q+3+3k7OjpYsGABTz75JAsWLGD16tVcffXV7L333nR0dDBv3jzuu+8+Dj30UH7/+9+zbNkypk6dukNfk6SBzXDoZz2nlnpOKWUml156KRMmTODkk0/mhRdeYO3atTt0zKeffpply5bxhS98gYkTJ3LFFVfQ2dm5wz1t2LCB1157jRNOOAGAmTNn8pvf/Ga7+02ePJmhQ4cyZMgQjjzySJ57rnzPrvHjx/OrX/2K73//+zz88MMfyTSWpJ3PcOhnZ5xxBosWLWLp0qW89dZbTJo0CYB58+bR1dVFe3s7HR0dHHzwwcX9/4MHD+bdd999b33L9sxk3LhxdHR00NHRwZNPPskDDzxQfO5x48bR3t7+ofrt+Tm37mevvfZ6b3nQoEFs3ry52P8Tn/gE7e3tjB8/njlz5nD55Zd/qM8vaWAyHPrZvvvuy4knnsisWbPedyF6w4YNHHTQQbS0tLB48eJefwo/7LDDeOqpp9i4cSMbNmxg0aJFAHzyk5+kq6uLRx55BOieZlq+fHmx/5w5c7j44ot56aWXANi4cSPXX389Q4cOZdiwYTz88MMA3HHHHe+dRbS2tr4XKAsXLtyhr7GlpYVNmzYBsGbNGvbZZx+++tWvctFFF7F06dIdOoakgW23vpW1rlvLpk+fzplnnvm+O5fOO+88vvSlL9HW1sbEiRP51Kc+Vew3evRozjnnHCZMmMDYsWM55phjANhzzz1ZuHAh3/rWt9iwYQObN2/mO9/5DuPGjXvf/qeddhpr167l5JNPJjOJCGbNmgXA3Llz+cY3vsGbb77J4Ycfzq233grARRddxDnnnMMdd9zBSSedtENf3+zZs5kwYQKTJk1ixowZfO9732OPPfagpaWFG2+8sU9/ZpIGltjRi6IDTVtbW279y35WrFjBEUccUVNH2hb/XgYmn3PoX7vCcw4R0Z6ZbTsy1mklSVLBcJAkFXa7cNhVp8l2V/59SLum3SochgwZwquvvuo3pAFiy+9zGDJkSN2tSPqQdqu7lUaNGkVnZyddXV11t6LKlt8EJ2nXsluFQ0tLi79xTJL6wW41rSRJ6h+GgySpYDhIkgqGgySpsN1wiIhbIuLliFjWo/bxiHgwIlZWr8OqekTE9RGxKiKeiIhJPfaZWY1fGREze9SPjYgnq32uD38jvSTVbkfOHG4Dtv4NLpcAizJzLLCoWgc4FRhbfcwGboTuMAEuAz4DHAdctiVQqjGze+znb4uRpJptNxwy8zfAuq3K04C51fJc4Iwe9duz22+BAyLiEGAK8GBmrsvM9cCDwNRq2/6Z+Uh2P7l2e49jSZJq0tdrDgdn5osA1etBVX0ksLrHuM6q9kH1zl7qvYqI2RGxJCKW+KCbJH10+vuCdG/XC7IP9V5l5k2Z2ZaZbSNGjOhji5Kk7elrOKytpoSoXl+u6p3A6B7jRgFrtlMf1UtdklSjvobD3cCWO45mAnf1qM+o7lo6HthQTTvdD5wSEcOqC9GnAPdX2/4cEcdXdynN6HEsSVJNtvveShHxb8CJwIER0Un3XUdXA3dGxPnA88DZ1fB7gdOAVcCbwNcAMnNdRPwIeKwad3lmbrnI/b/pviNqb+CX1YckqUbbDYfMnL6NTZN7GZvABds4zi3ALb3UlwBHba8PSdLO4xPSkqSC4SBJKhgOkqSC4SBJKhgOkqSC4SBJKhgOkqSC4SBJKhgOkqSC4SBJKhgOkqSC4SBJKhgOkqSC4SBJKhgOkqSC4SBJKhgOkqSC4SBJKhgOkqSC4SBJKhgOkqSC4SBJKhgOkqSC4SBJKhgOkqSC4SBJKhgOkqSC4SBJKjQUDhHxjxGxPCKWRcS/RcSQiBgTEY9GxMqIWBARe1Zj96rWV1XbW3scZ05VfzoipjT2JUmSGtXncIiIkcC3gLbMPAoYBJwLXAP8NDPHAuuB86tdzgfWZ+ZfAT+txhERR1b7jQOmAjdExKC+9iVJalyj00qDgb0jYjCwD/AicBKwsNo+FzijWp5WrVNtnxwRUdXnZ+bGzPwTsAo4rsG+JEkN6HM4ZOYLwLXA83SHwgagHXgtMzdXwzqBkdXySGB1te/mavzwnvVe9nmfiJgdEUsiYklXV1dfW5ckbUcj00rD6P6pfwxwKPAx4NRehuaWXbaxbVv1sph5U2a2ZWbbiBEjPnzTkqQd0si00snAnzKzKzM3AT8D/gY4oJpmAhgFrKmWO4HRANX2ocC6nvVe9pEk1aCRcHgeOD4i9qmuHUwGngIWA2dVY2YCd1XLd1frVNt/nZlZ1c+t7mYaA4wFftdAX5KkBg3e/pDeZeajEbEQWApsBh4HbgJ+AcyPiCuq2s3VLjcDd0TEKrrPGM6tjrM8Iu6kO1g2Axdk5jt97UuS1Lg+hwNAZl4GXLZV+Rl6udsoM98Gzt7Gca4ErmykF0lS//EJaUlSwXCQJBUMB0lSwXCQJBUMB0lSwXCQJBUMB0lSwXCQJBUMB0lSwXCQJBUMB0lSwXCQJBUMB0lSwXCQJBUMB0lSwXCQJBUMB0lSwXCQJBUMB0lSwXCQJBUMB0lSwXCQJBUMB0lSwXCQJBUMB0lSwXCQJBUMB0lSwXCQJBUaCoeIOCAiFkbEHyJiRUT8dUR8PCIejIiV1euwamxExPURsSoinoiIST2OM7MavzIiZjb6RUmSGtPomcO/APdl5qeAo4EVwCXAoswcCyyq1gFOBcZWH7OBGwEi4uPAZcBngOOAy7YEiiSpHn0Oh4jYH/g8cDNAZv4lM18DpgFzq2FzgTOq5WnA7dntt8ABEXEIMAV4MDPXZeZ64EFgal/7kiQ1rpEzh8OBLuDWiHg8Iv41Ij4GHJyZLwJUrwdV40cCq3vs31nVtlUvRMTsiFgSEUu6uroaaF2S9EEaCYfBwCTgxsw8BniD/55C6k30UssPqJfFzJsysy0z20aMGPFh+5Uk7aBGwqET6MzMR6v1hXSHxdpquojq9eUe40f32H8UsOYD6pKkmvQ5HDLzJWB1RHyyKk0GngLuBrbccTQTuKtavhuYUd21dDywoZp2uh84JSKGVReiT6lqkqSaDG5w/wuBeRGxJ/AM8DW6A+fOiDgfeB44uxp7L3AasAp4sxpLZq6LiB8Bj1XjLs/MdQ32JUlqQEPhkJkdQFsvmyb3MjaBC7ZxnFuAWxrpRZLUf3xCWpJUMBwkSQXDQZJUMBwkSQXDQZJUMBwkSQXDQZJUMBwkSQXDQZJUMBwkSQXDQZJUMBwkSQXDQZJUMBwkSQXDQZJUMBwkSQXDQZJUMBwkSQXDQZJUMBwkSQXDQZJUMBwkSQXDQZJUMBwkSQXDQZJUMBwkSQXDQZJUMBwkSYWGwyEiBkXE4xFxT7U+JiIejYiVEbEgIvas6ntV66uq7a09jjGnqj8dEVMa7UmS1Jj+OHP4NrCix/o1wE8zcyywHji/qp8PrM/MvwJ+Wo0jIo4EzgXGAVOBGyJiUD/0JUnqo4bCISJGAacD/1qtB3ASsLAaMhc4o1qeVq1TbZ9cjZ8GzM/MjZn5J2AVcFwjfUmSGtPomcN1wMXAu9X6cOC1zNxcrXcCI6vlkcBqgGr7hmr8e/Ve9nmfiJgdEUsiYklXV1eDrUuStqXP4RARXwRezsz2nuVehuZ2tn3QPu8vZt6UmW2Z2TZixIgP1a8kaccNbmDfzwJ/GxGnAUOA/ek+kzggIgZXZwejgDXV+E5gNNAZEYOBocC6HvUteu4jSapBn88cMnNOZo7KzFa6Lyj/OjPPAxYDZ1XDZgJ3Vct3V+tU23+dmVnVz63uZhoDjAV+19e+JEmNa+TMYVu+D8yPiCuAx4Gbq/rNwB0RsYruM4ZzATJzeUTcCTwFbAYuyMx3PoK+JEk7qF/CITMfAh6qlp+hl7uNMvNt4Oxt7H8lcGV/9CJJapxPSEuSCoaDJKlgOEiSCoaDJKnwUdytpErrJb+ou4XdyrNXn153C1LT8MxBklQwHCRJBcNBklQwHCRJBcNBklQwHCRJBcNBklQwHCRJBcNBklQwHCRJBcNBklQwHCRJBcNBklQwHCRJBcNBklQwHCRJBcNBklQwHCRJBcNBklQwHCRJBcNBklQwHCRJBcNBklToczhExOiIWBwRKyJieUR8u6p/PCIejIiV1euwqh4RcX1ErIqIJyJiUo9jzazGr4yImY1/WZKkRjRy5rAZ+G5mHgEcD1wQEUcClwCLMnMssKhaBzgVGFt9zAZuhO4wAS4DPgMcB1y2JVAkSfXoczhk5ouZubRa/jOwAhgJTAPmVsPmAmdUy9OA27Pbb4EDIuIQYArwYGauy8z1wIPA1L72JUlqXL9cc4iIVuAY4FHg4Mx8EboDBDioGjYSWN1jt86qtq16b59ndkQsiYglXV1d/dG6JKkXDYdDROwL/Afwncz8rw8a2kstP6BeFjNvysy2zGwbMWLEh29WkrRDGgqHiGihOxjmZebPqvLaarqI6vXlqt4JjO6x+yhgzQfUJUk1aeRupQBuBlZk5j/32HQ3sOWOo5nAXT3qM6q7lo4HNlTTTvcDp0TEsOpC9ClVTZJUk8EN7PtZ4O+BJyOio6pdClwN3BkR5wPPA2dX2+4FTgNWAW8CXwPIzHUR8SPgsWrc5Zm5roG+JEkN6nM4ZOZ/0vv1AoDJvYxP4IJtHOsW4Ja+9iJJ6l8+IS1JKhgOkqSC4SBJKhgOkqSC4SBJKhgOkqSC4SBJKhgOkqSC4SBJKhgOkqSC4SBJKhgOkqSC4SBJKhgOkqSC4SBJKhgOkqSC4SBJKhgOkqSC4SBJKhgOkqSC4SBJKhgOkqSC4SBJKhgOkqSC4SBJKhgOkqSC4SBJKhgOkqTCgAmHiJgaEU9HxKqIuKTufiSpmQ2IcIiIQcD/A04FjgSmR8SR9XYlSc1rQIQDcBywKjOfycy/APOBaTX3JElNa6CEw0hgdY/1zqomSarB4LobqEQvtSwGRcwGZlerr0fE0x9pV83jQOCVupvYnrim7g5UE/999p/DdnTgQAmHTmB0j/VRwJqtB2XmTcBNO6upZhERSzKzre4+pN7477MeA2Va6TFgbESMiYg9gXOBu2vuSZKa1oA4c8jMzRHxTeB+YBBwS2Yur7ktSWpaAyIcADLzXuDeuvtoUk7VaSDz32cNIrO47itJanID5ZqDJGkAMRwkSQXDQdKAERF7RMTf1N2HvObQlCJiBPB1oJUeNyVk5qy6epK2iIhHMvOv6+6j2Q2Yu5W0U90FPAz8Cnin5l6krT0QEX8H/Cz96bU2njk0oYjoyMyJdfch9SYi/gx8jO4fXN6i++11MjP3r7WxJuM1h+Z0T0ScVncTUm8yc7/M3CMzWzJz/2rdYNjJPHNoQj1+MvsLsKkq+5OZBoSICOA8YExm/igiRgOHZObvam6tqRgOkgaUiLgReBc4KTOPiIhhwAOZ+emaW2sqXpBuUhHxt8Dnq9WHMvOeOvuRevhMZk6KiMcBMnN99Yac2om85tCEIuJq4NvAU9XHt6uaNBBsqn51cMJ7t16/W29LzcdppSYUEU8AEzPz3Wp9EPB4Zk6otzMJIuI84CvAJGAucBbwT5n577U21mScVmpeBwDrquWhdTYi9ZSZ8yKiHZhM922sZ2TmiprbajqGQ3P6MfB4RCym+z/f54E59bYkvc9K4L+ovkdFxP/MzOfrbam5OK3UpCLiEODTdIfDo5n5Us0tSQBExIXAZcBauh+E2/IQnNOeO5Hh0IQi4rNAR2a+ERFfpXtu918y87maW5OIiFV037H0at29NDPvVmpONwJvRsTRwPeA54Db621Jes9qYEPdTTQ7rzk0p82ZmRExDbg+M2+OiJl1N6XmFhH/p1p8BngoIn4BbNyyPTP/uZbGmpTh0Jz+HBFzgL8H/ld1K2tLzT1J+1Wvz1cfe1YfUD3zoJ3Haw5NKCL+BzAdeCwz/7N675oTM/OOmluTiIizt36mobeaPlqGQxOp3nBvy194VK9ZLW8EVgE/yMxFNbQnARARSzNz0vZq+mg5rdREMnO/bW2rppaOAuZVr9JOFRGnAqcBIyPi+h6b9gc219NV8zIcBEBmvgP8PiL+b929qGmtAZYAZwN/pPus9h26n3f4xxr7akpOK0kaECKiBbgS+AfgWbqnO0cDtwKXZuambe+t/uZzDpIGip8Aw4DDMnNSZh4DHE73e39dW2tnTcgzB0kDQkSsBD6RW31Tqq6H/SEzx9bTWXPyzEHSQJFbB0NVfAefc9jpDAdJA8VTETFj62L1/l9/qKGfpua0kqQBISJGAj8D3gLa6T5b+DSwN/DlzHyhxvaajuEgaUCJiJOAcXTfrbTchzLrYThIkgpec5AkFQwHSVLBcJAkFQwHSVLBcJAkFf4/Ts5bWohEuncAAAAASUVORK5CYII=\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "plotUniqueValuesComparation(df_json, df_other, 'script_domain')" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
jsonother
baidu.com0.1561620.029125
cloudfront.net0.0823070.031655
rambler.ru0.0482660.010561
google-analytics.com0.0000050.121869
yandex.ru0.0219040.030423
\n", + "
" + ], + "text/plain": [ + " json other\n", + "baidu.com 0.156162 0.029125\n", + "cloudfront.net 0.082307 0.031655\n", + "rambler.ru 0.048266 0.010561\n", + "google-analytics.com 0.000005 0.121869\n", + "yandex.ru 0.021904 0.030423" + ] + }, + "execution_count": 17, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAX0AAAFcCAYAAAAkiW7CAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDMuMC4zLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvnQurowAAIABJREFUeJzt3X24XeOd//H3Rx7ECNFGdEhoMqXVCFJ5ojS0RlElZYJgprTadKajakxVWq2qzlworer8mKl6qFBPNaUpGRkPg3qoJiFEpCoywZloRUhINEh8f3+sdXL22dnJWfucnbPOzv15XVcue6291j7fc5zz2fe+173uWxGBmZmlYbOyCzAzs+7j0DczS4hD38wsIQ59M7OEOPTNzBLi0DczS4hD38wsIQ59M7OEOPTNzBLSu+wCqm277bYxdOjQssswM2sqs2fPfiUiBnV0XI8L/aFDhzJr1qyyyzAzayqSni9yXKHuHUmHSHpG0gJJU2o8P17SY5JWS5pY9dxOkv5b0nxJT0saWuRrmplZ43UY+pJ6AZcChwLDgeMkDa867AXgJOD6Gi8xFbgwIj4MjAVe7krBZmbWeUW6d8YCCyJiIYCkG4EJwNOtB0TEovy5dytPzN8cekfEXflxKxpTtpmZdUaR0B8MvFix3QKMK/j6HwSWSfolMAy4G5gSEWsqD5I0GZgMsNNOOxV8aTNL3TvvvENLSwurVq0qu5Ru069fP4YMGUKfPn06dX6R0FeNfUUn4e8NfAz4CFkX0E1k3UBXtnuxiMuBywFGjx7tCf7NrJCWlha22morhg4dilQrqjYtEcHSpUtpaWlh2LBhnXqNIhdyW4AdK7aHAIsLvn4L8HhELIyI1cBtwF71lWhmVtuqVasYOHBgEoEPIImBAwd26ZNNkdCfCewiaZikvsAkYFrB158JvEdS69jRT1BxLcDMrKtSCfxWXf1+Owz9vIV+CjADmA/cHBHzJJ0r6Yi8iDGSWoCjgZ9Impefuwb4GnCPpLlkXUU/7VLFZmbWaYVuzoqI6cD0qn1nVzyeSdbtU+vcu4A9ulBj3YZOuaPLr7Ho/MMaUImZdadG/O1XKpoDH/3oR3n44Ycb+rU3Fs+9Y2bWRc0S+ODQNzPrsv79+/PSSy8xfvx4Ro4cyYgRI/jNb34DwA033MDuu+/OiBEjOPPMM9udc9ZZZ7Hnnnuy995786c//albanXom5k1wPXXX8/BBx/MnDlzeOKJJxg5ciSLFy/mzDPP5N5772XOnDnMnDmT2267DYCVK1ey995788QTTzB+/Hh++tPuudzp0Dcza4AxY8Zw9dVXc8455zB37ly22morZs6cyQEHHMCgQYPo3bs3J5xwAg888AAAffv25dOf/jQAo0aNYtGiRd1Sp0PfzKwBxo8fzwMPPMDgwYP5u7/7O6ZOnUrE+u817dOnz9rhl7169WL16tXdUqdD38ysAZ5//nm22247vvjFL3LyySfz2GOPMW7cOO6//35eeeUV1qxZww033MD+++9fap09bj59M7POKmuotSTuu+8+LrzwQvr06UP//v2ZOnUq22+/Peeddx4f//jHiQg+9alPMWHChFJqXFvrhj5+lGH06NHR1UVUPE7fLA3z58/nwx/+cKk1LF26lL322ovnny+0hklD1Pq+Jc2OiNEdnevuHTOzTlq8eDH77LMPX/va18oupTB375iZddIOO+zAH/7wh7LLqItb+mZmCXHom5klxKFvZpYQh76ZWUJ8IdfMNh3nDGjw6y2v+5Rly5Zx/fXX8+UvfxmA++67j4suuojbb7+9sbV1klv6ZmYNtGzZMi677LKGvV6jp2dw6JuZdcEPf/hDRowYwYgRI/jRj37ElClTeO655xg5ciRnnHEGACtWrGDixInsuuuunHDCCWvn5Jk9ezb7778/o0aN4uCDD+all14C4IADDuCb3/wm+++/P5dccklD6y3UvSPpEOASoBdwRUScX/X8eOBHZCtkTYqIW6qe35psqcVbI+KURhRuZla22bNnc/XVV/Poo48SEYwbN47rrruOp556ijlz5gBZ987jjz/OvHnz2GGHHdh333156KGHGDduHF/5ylf41a9+xaBBg7jppps466yzuOqqq4DsE8P999/f8Jo7DH1JvYBLgYOAFmCmpGkRUbnA+QvASWTr4dbyPaDx1ZuZlejBBx/kyCOPZMsttwTgqKOOWrt4SqWxY8cyZEi2ouzIkSNZtGgR22yzDU899RQHHXQQAGvWrGH77bdfe86xxx67UWou0tIfCyyIiIUAkm4EJgBrQz8iFuXPvVt9sqRRwPuAO4EO54UwM2sWRecu23zzzdc+bp1GOSLYbbfdeOSRR2qe0/pG0mhF+vQHAy9WbLfk+zokaTPgB8AZ9ZdmZtazjR8/nttuu40333yTlStXcuutt7LvvvvyxhtvdHjuhz70IZYsWbI29N955x3mzZu3sUsu1NJXjX1Fp+b8MjA9Il5sXSyg5heQJgOTAXbaaaeCL21mVqUTQyy7Yq+99uKkk05i7NixAHzhC19g1KhR7LvvvowYMYJDDz2Uww6rPWNv3759ueWWWzj11FNZvnw5q1ev5rTTTmO33XbbqDV3OLWypH2AcyLi4Hz7GwARcV6NY38G3N56IVfSz4GPAe8C/YG+wGURMWV9X89TK5tZUT1hauUydGVq5SIt/ZnALpKGAf8HTAKOL1JYRJxQUdBJwOgNBb6ZmW1cHfbpR8Rq4BRgBtmwy5sjYp6kcyUdASBpjKQW4GjgJ5I2fseUmZnVrdA4/YiYDkyv2nd2xeOZwJAOXuNnwM/qrtDMbAMigg1dM9zUdHW1Q9+Ra2ZNq1+/fixdurTLQdgsIoKlS5fSr1+/Tr+GJ1wzs6Y1ZMgQWlpaWLJkSdmldJt+/fqtvdGrMxz6Zta0+vTpw7Bhw8ouo6m4e8fMLCEOfTOzhDj0zcwS4tA3M0uIQ9/MLCEOfTOzhDj0zcwS4tA3M0uIQ9/MLCEOfTOzhDj0zcwS4tA3M0uIQ9/MLCEOfTOzhDj0zcwSUij0JR0i6RlJCySts7C5pPGSHpO0WtLEiv0jJT0iaZ6kJyUd28jizcysPh0uoiKpF3ApcBDQAsyUNC0inq447AXgJOBrVae/CXw2Ip6VtAMwW9KMiFjWkOrNrDznDGjAayzv+mtYXYqsnDUWWBARCwEk3QhMANaGfkQsyp97t/LEiPhDxePFkl4GBgEOfTOzEhTp3hkMvFix3ZLvq4uksUBf4Lkaz02WNEvSrJTWujQz625FQl819tW19Lyk7YFrgc9FxLvVz0fE5RExOiJGDxo0qJ6XNjOzOhQJ/RZgx4rtIcDiol9A0tbAHcC3IuK39ZVnZmaNVCT0ZwK7SBomqS8wCZhW5MXz428FpkbELzpfppmZNUKHoR8Rq4FTgBnAfODmiJgn6VxJRwBIGiOpBTga+ImkefnpxwDjgZMkzcn/jdwo34mZmXWoyOgdImI6ML1q39kVj2eSdftUn3cdcF0XazQzswbxHblmZglx6JuZJcShb2aWEIe+mVlCHPpmZglx6JuZJcShb2aWEIe+mVlCHPpmZglx6JuZJcShb2aWEIe+mVlCHPpmZglx6JuZJcShb2aWEIe+mVlCHPpmZglx6JuZJaRQ6Es6RNIzkhZImlLj+fGSHpO0WtLEqudOlPRs/u/ERhVuZmb16zD0JfUCLgUOBYYDx0kaXnXYC8BJwPVV574X+A4wDhgLfEfSe7petpmZdUaRlv5YYEFELIyIt4EbgQmVB0TEooh4Eni36tyDgbsi4tWIeA24CzikAXWbmVknFAn9wcCLFdst+b4iCp0rabKkWZJmLVmypOBLm5lZvYqEvmrsi4KvX+jciLg8IkZHxOhBgwYVfGkzM6tXkdBvAXas2B4CLC74+l0518zMGqxI6M8EdpE0TFJfYBIwreDrzwA+Kek9+QXcT+b7zMysBB2GfkSsBk4hC+v5wM0RMU/SuZKOAJA0RlILcDTwE0nz8nNfBb5H9sYxEzg332dmZiXoXeSgiJgOTK/ad3bF45lkXTe1zr0KuKoLNZqZWYP4jlwzs4Q49M3MEuLQNzNLiEPfzCwhDn0zs4Q49M3MEuLQNzNLiEPfzCwhDn0zs4Q49M3MEuLQNzNLiEPfzCwhDn0zs4Q49M3MEuLQNzNLiEPfzCwhDn0zs4QUWjlL0iHAJUAv4IqIOL/q+c2BqcAoYClwbEQsktQHuALYK/9aUyPivAbWbx0YOuWOLr/GovMPa0AlZtYTdNjSl9QLuBQ4FBgOHCdpeNVhJwOvRcTOwMXABfn+o4HNI2J3sjeEL0ka2pjSzcysXkW6d8YCCyJiYUS8DdwITKg6ZgJwTf74FuBASQIC2FJSb2AL4G3g9YZUbmZmdSsS+oOBFyu2W/J9NY+JiNXAcmAg2RvASuAl4AXgooh4tfoLSJosaZakWUuWLKn7mzAzs2KKhL5q7IuCx4wF1gA7AMOAf5b0V+scGHF5RIyOiNGDBg0qUJKZmXVGkdBvAXas2B4CLF7fMXlXzgDgVeB44M6IeCciXgYeAkZ3tWgzM+ucIqE/E9hF0jBJfYFJwLSqY6YBJ+aPJwL3RkSQdel8Qpktgb2B3zemdDMzq1eHoZ/30Z8CzADmAzdHxDxJ50o6Ij/sSmCgpAXA6cCUfP+lQH/gKbI3j6sj4skGfw9mZlZQoXH6ETEdmF617+yKx6vIhmdWn7ei1n4zMyuH78g1M0uIQ9/MLCEOfTOzhDj0zcwS4tA3M0uIQ9/MLCEOfTOzhDj0zcwS4tA3M0uIQ9/MLCEOfTOzhDj0zcwS4tA3M0uIQ9/MLCEOfTOzhDj0zcwS4tA3M0uIQ9/MLCGFQl/SIZKekbRA0pQaz28u6ab8+UclDa14bg9Jj0iaJ2mupH6NK9/MzOrRYehL6kW2wPmhwHDgOEnDqw47GXgtInYGLgYuyM/tDVwH/H1E7AYcALzTsOrNzKwuRVr6Y4EFEbEwIt4GbgQmVB0zAbgmf3wLcKAkAZ8EnoyIJwAiYmlErGlM6WZmVq8ioT8YeLFiuyXfV/OYiFgNLAcGAh8EQtIMSY9J+nqtLyBpsqRZkmYtWbKk3u/BzMwKKhL6qrEvCh7TG9gPOCH/75GSDlznwIjLI2J0RIweNGhQgZLMzKwzioR+C7BjxfYQYPH6jsn78QcAr+b774+IVyLiTWA6sFdXizYzs84pEvozgV0kDZPUF5gETKs6ZhpwYv54InBvRAQwA9hD0l/kbwb7A083pnQzM6tX744OiIjVkk4hC/BewFURMU/SucCsiJgGXAlcK2kBWQt/Un7ua5J+SPbGEcD0iLhjI30vZmbWgQ5DHyAippN1zVTuO7vi8Srg6PWcex3ZsE0zMyuZ78g1M0uIQ9/MLCEOfTOzhDj0zcwS4tA3M0uIQ9/MLCEOfTOzhDj0zcwS4tA3M0tIoTtyzTYFQ6d0fQaQRecf1oBKzMrjlr6ZWUIc+mZmCXHom5klxKFvZpYQh76ZWUIc+mZmCXHom5klpFDoSzpE0jOSFkiaUuP5zSXdlD//qKShVc/vJGmFpK81pmwzM+uMDkNfUi/gUuBQYDhwnKThVYedDLwWETsDFwMXVD1/MfBfXS/XzMy6okhLfyywICIWRsTbwI3AhKpjJgDX5I9vAQ6UJABJnwEWAvMaU7KZmXVWkdAfDLxYsd2S76t5TESsBpYDAyVtCZwJfHdDX0DSZEmzJM1asmRJ0drNzKxORUJfNfZFwWO+C1wcESs29AUi4vKIGB0RowcNGlSgJDMz64wiE661ADtWbA8BFq/nmBZJvYEBwKvAOGCipO8D2wDvSloVEf+vy5WbmVndioT+TGAXScOA/wMmAcdXHTMNOBF4BJgI3BsRAXys9QBJ5wArHPhmZuXpMPQjYrWkU4AZQC/gqoiYJ+lcYFZETAOuBK6VtICshT9pYxZtZmadU2g+/YiYDkyv2nd2xeNVwNEdvMY5najPzMwayHfkmpklxKFvZpYQh76ZWUIc+mZmCXHom5klpNDoHUvcOQMa8BrLu/4aZtZlbumbmSXELX0zs65qok/DbumbmSXELf31aaJ3bjOzotzSNzNLiEPfzCwhDn0zs4Q49M3MEuLQNzNLiEPfzCwhDn0zs4Q49M3MElLo5ixJhwCXkK2Re0VEnF/1/ObAVGAUsBQ4NiIWSToIOB/oC7wNnBER9zawfjOzLhk65Y4uv8aifg0opJt02NKX1Au4FDgUGA4cJ2l41WEnA69FxM7AxcAF+f5XgMMjYnfgRODaRhVuZmb1K9K9MxZYEBELI+Jt4EZgQtUxE4Br8se3AAdKUkQ8HhGL8/3zgH75pwIzMytBkdAfDLxYsd2S76t5TESsBpYDA6uO+Rvg8Yh4q/oLSJosaZakWUuWLClau5mZ1alI6KvGvqjnGEm7kXX5fKnWF4iIyyNidESMHjRoUIGSzMysM4qEfguwY8X2EGDx+o6R1BsYALyabw8BbgU+GxHPdbVgMzPrvCKhPxPYRdIwSX2BScC0qmOmkV2oBZgI3BsRIWkb4A7gGxHxUKOKNjOzzukw9PM++lOAGcB84OaImCfpXElH5IddCQyUtAA4HZiS7z8F2Bn4tqQ5+b/tGv5dmJlZIYXG6UfEdGB61b6zKx6vAo6ucd6/AP/SxRrNzKxBfEeumVlCHPpmZglx6JuZJcShb2aWEIe+mVlCHPpmZgkpNGTTzHLnDGjAayzv+muYdZJb+mZmCXHom5klxKFvZpYQh76ZWUIc+mZmCXHom5klxKFvZpYQh76ZWUIc+mZmCXHom5klpFDoSzpE0jOSFkiaUuP5zSXdlD//qKShFc99I9//jKSDG1e6mZnVq8PQl9QLuBQ4FBgOHCdpeNVhJwOvRcTOwMXABfm5w8kWUt8NOAS4LH89MzMrQZGW/lhgQUQsjIi3gRuBCVXHTACuyR/fAhwoSfn+GyPirYj4X2BB/npmZlaCIrNsDgZerNhuAcat75iIWC1pOTAw3//bqnMHV38BSZOByfnmCknPFKp+IxJsC7zSpRf5rhpTTMn8s2jjn0U7/lnkesjvxfuLHFQk9GtVEgWPKXIuEXE5cHmBWrqNpFkRMbrsOnoC/yza+GfRxj+LNs30syjSvdMC7FixPQRYvL5jJPUGBgCvFjzXzMy6SZHQnwnsImmYpL5kF2anVR0zDTgxfzwRuDciIt8/KR/dMwzYBfhdY0o3M7N6ddi9k/fRnwLMAHoBV0XEPEnnArMiYhpwJXCtpAVkLfxJ+bnzJN0MPA2sBv4xItZspO+l0XpUd1PJ/LNo459FG/8s2jTNz0JZg9zMzFLgO3LNzBLi0DczS4hD38wsIQ59M7OEOPRtHZK+WmRfKiTtVOtf2XWZdYZH71SQtA3wWWAoFcNZI+LUsmoqg6THImKvqn2PR8RHyqqpTJLm0naHeT9gGPBMROxWamHdTNJo4Cyy2/17k/08IiL2KLWwEki6mtqzC3y+hHLqUmQahpRMJ5sraC7wbsm1dDtJxwHHA8MkVd6AtxWwtJyqyhcRu1duS9oL+FJJ5ZTp58AZJPr3UeX2isf9gCNpktkG3NKvUKuFmxJJ7ydrxZ4HVK6b8AbwZESsLqWwHijF3xVJD0bEfmXX0RNJ2gy4OyI+UXYtHXHoV5D0T8AKsnfxt1r3R8SrpRVVkvwNYJeIuFvSFkDviHij7LrKIOn0is3NgL2AgRGR1KJAkg4EjgPuof3fxy9LK6qHkPQh4I58TZEezd077b0NXEjWb9n6bhjAX5VWUQkkfZFsquv3Ah8gmyjvP4ADy6yrRFtVPF4N3AH8Z0m1lOlzwK5AH9q6dwJILvQlvUH7Pv0/AmeWVE5dHPrtnQ7sHBFdmxe7+f0j2WI3jwJExLOStiu3pHLkK731j4gzyq6lB9iz+vpGivIFonaLiBfKrqUzPGSzvXnAm2UX0QO8la+SBqydLjvJfsB8gsCk+u434Lc1lkpNTj6D8K1l19FZbum3twaYI+l/aN9nmdSQTeB+Sd8EtpB0EPBl4Ncl11SmOflopl8AK1t3JtiXvR9woqT/Jfv7SHbIJtkb4JiImFl2IfXyhdwKkk6stT8irqm1f1OVj0Q4Gfgk2R/2DOCKSPSXJR+TXS2aYUx2I+UX99cREc93dy1lk/Q08EHgebKGQNO8ATr0q+QLxXww33wmIt4psx6znkTSnsDH8s3fRMQTZdZTlmZ+A3ToV5B0AHANsIjsnXtH4MSIeKDEsrqdpH2Bc1j3zsukRjFtiKRPR8TtHR+56cin4vgibaN1jgQuj4h/K68qq5dDv4Kk2cDxEfFMvv1B4IaIGFVuZd1L0u+BfwJmk13nACAikr0rt5qk70bEd8quoztJehLYJyJW5ttbAo80Q5dGd5B0e0R8uuw6OuILue31aQ18gIj4g6Q+ZRZUkuUR8V9lF9ET5Nc39o6Ihyv3pxb4OVHRCMgfq6RaeqIvll1AEQ799mZJuhK4Nt8+gay1m5r/kXQh2cf4ylFMj5VXUjki4l1JPwD2KbuWHuBq4FFJrcMVP0O2PnZyJG0XES9X7d4aeKmMeurh7p0KkjYnuzFpP7IWzAPAZRHx1gZP3MTkQ1arRTPMK7IxSPou8CTwy1RHMLXKJ5tb+/cREY+XXFIpJD0DfDsibs63/xk4OSJ6/H0MDv0KeR/lqvyGnNa7MTePCN+wlbD8lvstyboz/kzbhe2tSy2sm0naG5jXOgeTpK2A4RHxaLmVdT9J2wOXA6uA9wHzgX+OiBWlFlaA78ht7x5gi4rtLYC7S6qlR8lbeEmKiK0iYrOI6BMRW+fbSQV+7t/JJiRstTLfl5yIeAm4k6zbbygwtRkCHxz61fpV/o/LH/9FifX0JP9QdgFlUeZvJX07395R0tiy6yqBKru3IuJdEr0uKOkuYBwwAvgUcLGki8qtqhiHfnsrK1u0kkaRfZxPXkQ0xciEjeQyshbd8fn2CuDS8sopzUJJp0rqk//7KrCw7KJKcmlEfDYilkXEU8BHgeVlF1WE+/QrSBoD3EjbCjjbA8dGRFIjeCTdExEHdrQvFa0LplQuGSnpiYjYs+zaulM+0+qPgU+QTcB3D3BajVEsSWjWNSeS/Gi2PhExU9KuwIfILtb9PqVpGCT1I+vO2lbSe2gbg701sENphZXvnfyifgBIGkSCywXm4T6p7Dp6gmZec8KhXyUP+adatyX9ZUT8scSSutOXgNPIAn42baH/Oml2Z7T6MdlUuttJ+ldgIvCtckvqGVKcjiLXtGtOOPQ7diVwWNlFdIeIuAS4RNJXPJ9Km4j4eT5Fx4Fkb4SfiYj5JZfVU4yh/SLhqXgrIt7O1lNprjUn3KdvNUn6KNlQtLUNg4iYWlpBJZD03g09n+LayZaR9H1gGfBZ4Ctka048HRFnlVpYAQ79CpJ2qrW/WZdF6yxJ15L1U86hba6VSG0xmXyxkKD2/DLJzToq6Wjgzoh4Q9K3yFYU+16Kd+U285oTDv0KkubS9kfeDxhGNqf+bqUW1s0kzSe709K/HLaWpCcjYg9J+wHnARcB34yIcSWXZnVwn36F6kWf8zH7XyqpnDI9BfwlTTB5VHeRdBTZnDNBtnjIbSWXVIbWT32HAf8eEb+SdE6J9XS7ioZhTc0wzbRDfwMi4rF87H5qtgWelvQ72s+yeUR5JZVH0mXAzsAN+a6/l3RQRPxjiWWV4f8k/QT4a+CCfILC1G7wbJ0vv/X/feWMvE0xR5e7dypIOr1iczOyPsuBEXFwSSWVQtL+tfZHxP3dXUtPIGkeMKK1uyvvz52bYLffXwCHkH3vz+aTju0eEf9dcmndTtJDEbFvR/t6otTepTuyVcW/zYE7gAmlVlSCPNx/T9vPYn6qgZ97Bqi8yL8j2VTLqdkDuCsins23V9AkUw9sBFvm1zaAtaPdtiyxnsLc0rd1SDoGuBC4j+yi9seAMyLiljLr6m6Sfk3WfzuAbDz67/LtccDDEfHXJZbX7SQ9DuxV9YlnVkQkNwNrPi/XVWS/G5AN3/x8Myw05D592v1x15RgX/ZZwJjWOVXyaQfuBpIKfbLRKdZmnVk285uSkpPPx7WnpK3Jfi5N84knyf9hNbT+cR9FNmrlunz7OGBRGQWVbLOqSbSWkmBXYHWXVv4HnvLfzEJJp9I2h/6XSXSWzfwi9t+Q38DYemduRJxbYlmFuHungqQHImJ8R/s2dfn6uHvQNlrlWODJiDizvKrKI2ky8D2yabbfpW3lrNRuzvIsmzlJd5Jdz5hNxWLxEfGD0ooqyKFfIb8p6bCIWJhvDwOmR8SHy62s+1WMS29dC/XWDk7ZZEl6FtgnIl4puxbrGSQ9FREjyq6jM1L+qFrLPwH3SWr9yDqUxG7OyqcQnpFfpPxl2fX0EM/RJGOwNwZJX4+I70v6N2pc+0pteo7cw5J2j4i5ZRdSL4d+hYi4U9IuwK75rt9HxFsbOmdTExFrJL0paUAzXZzayL5B9kf+KO1vVksl7FpnFJ1VahU9y37ASfn8TG/R1uXnO3KbgaRPRMS9eZdGpQ9IIiJSa/GuAubm64CubN2ZUMhV+wlwLzCXNBdP+XX+8M2I+EXlc/kkbCk6tOwCOsuhn9mf7I/68BrPBel1c9yR/7PM6og4vePDNnnfAH5RYN8mLyKeh7UXt/uVXE5dfCHX1mpdB1fSBamO1KklXy3reeDXtO/eSWI+fUmHAp8CjgFuqnhqa7LZWMeWUliJJB0B/IBslbmXgfeT3bne46fmcEu/iqTDgN2oePduhrG3DbJ9Pu/OEZJupGoe+Wa423AjOT7/7zcq9gWQypDNxWT9+UeQDVFs9QbZ4IcUfQ/YG7g7Ij4i6eNk9/X0eA79CpL+g2xh8I8DV5Cthfq7UovqXmcDU8gWef5h1XNBNj47ORExrOwayhQRTwBPSHoVuCMikruuUcM7EbFU0maSNouI/5F0QdlFFeHunQoVi0S0/rc/8MuI+GTZtXUnSd+OiO+VXUdPImkEMJz2nwBTWz7yOmAf4D+Bq1NeJ1jS3cBngPOBgWRdPGMi4qOlFlaAW/rt/Tn/75uSdiCbfiCZVl6+aAzAHRWP10q1e0fSd4ADyEIACfEaAAAG0ElEQVR/OtnIjQeBpEI/Iv42n4riOOBqSQFcDdwQEW+UW123+xWwBXAa2Vz6A4Cm6AZ26Ld3u6RtgO/T1nd5RYn1dLfWW8j7AaOBJ8j69fcAHiUbm5yiicCewOMR8TlJ7yOt34u1IuJ1Sf9JW+AdCZwh6ccR8W/lVtet3gc8AjxGNtvm1GZZXtTdOxUkbQH8A9lUwgH8hmxZuFWlFtbN8ou4/9p6t2HetfG1iDip1MJKImlmRIyRNJvses8bwFPNMFKjkSQdDnwe+ADZilHXRMTL+eIq8yPi/aUW2M2UzbL2SeBzZI2km4ErI+K5UgvrgFv67V1D9gf943z7OLKP8MeUVlE5dq28vTwinpI0ssyCypL/YT+ZfwL8KdknwBWkdYG/1dHAxRHxQOXOiHhT0udLqqk0ERGS/gj8EVgNvAe4RdJdEfH1cqtbP7f0K0h6IiL27Gjfpk7SDWR34l5H9onnb4H+EdEUQ9IaTdLsiBiVPx4KbB0RKa6cZbl8iukTgVfIuvpui4h38oVlno2ID5Ra4Aa4pd/e45L2jojfAkgaBzxUck1l+BxZN9dX8+0HaJtDPUW/lTQmImZGxKKyi+lukt6g/URryrdb55vZupTCyrUtcFTrnbmt8oVlPr2ec3oEt/QBSXPJfon7AB8CXsi33w883axTqFpjSHoa+CDZXbkraaLJtcyqOfQBSRu8AFX9br6py2cOrDWFbip3oLazvt+P1H4vWlXPNxMRL5RYjtXJ3Tuk+8e7AaMrHvcju4D33pJqKZ1/PzLrm2+GbNoSaxJu6Vshkh6MiFTH6RvZoAayqTjazTcTEZNLLs3q4Ja+raPqbtzNyFr+W5VUjvUcTTvfjLVx6FstlYs7rwYWkd69CrauZfl8VA8AP5f0MtnvhzURd++YWSGStiRbVU20zTfz84hYWmphVheHvq1D0gDgO8D4fNf9wLleM9es+W1WdgHWI11FNh3FMfm/18lmU7SESTpK0rOSlkt6XdIbkl4vuy6rj1v6tg5JcyJiZEf7LC2SFgCHpzyP/qbALX2r5c+S1g7PlLQvbWsNWLr+5MBvfm7p2zryGTWvIbtQJ+BV4KR82TxLlKRLgL8EbqP9AvG/LK0oq5tD39YrXyWJiHC/rSGp1nWdiIjkplVuZg59W0vS6Rt6PiKqF0s3sybjm7OsUutdt63T5lZy6yBxkvoBJ5PNtVM54Zpb+k3EoW9rRcR3ASRdA3w1Ipbl2++h/V26lqZrgd8DB5MtAn4C2YRr1kQ8esdq2aM18AEi4jXgIyXWYz3DzhHxbWBlRFwDHAbsXnJNVieHvtWyWd66B0DSe/GnQoN38v8ukzSCbHTX0PLKsc7wH7LV8gPgYUm3kPXlHwP8a7klWQ9wed4Y+BYwDegPfLvckqxeHr1jNUkaTjZ3uoB7IuLpkksyswZw6JtZ3STdHhE9egFwq819+mbWGYPLLsA6x6FvZp3xeNkFWOe4e8fMCpO0BbBTRDxTdi3WOW7pm1khkg4H5gB35tsjJU0rtyqrl0PfzIo6BxgLLAOIiDl4nH7TceibWVGrvWRm8/PNWWZW1FOSjgd6SdoFOBV4uOSarE5u6ZtZUV8hm2HzLeAGsrWTTyu1IqubR++YmSXE3TtmtkGSfs0G1lOIiCO6sRzrIoe+mXXkorILsMZx946ZWULc0jezQiTNZd1unuXALOBfImJp91dl9XLom1lR/wWsAa7PtyeRTb29HPgZcHg5ZVk93L1jZoVIeigi9q21T9LciPDSiU3A4/TNrKj+ksa1bkgaS7Z6FsDqckqyerl7x8yK+gJwlaT+ZN06rwMnS9oSOK/Uyqwwd++YWV0kDSDLjmVl12L1c/eOmRUiaYCkHwL3AHdL+kH+BmBNxKFvZkVdBbwBHJP/ex24utSKrG7u3jGzQiTNiYiRHe2zns0tfTMr6s+S9mvdkLQv8OcS67FOcEvfzAqRNBK4BhhANnrnVeDEiHiy1MKsLg59M6uLpK0BIuL1smux+rl7x8wKqRi9cy9wr0fvNCeHvpkV5dE7mwB375hZIR69s2lwS9/MivLonU2AW/pmVoikPYGpZKN3AF7Do3eajkPfzAqRdHr+sHVmzRVkc+nPjog55VRl9XL3jpkVNRr4e2Brstb+ZOAA4KeSvl5iXVYHt/TNrBBJM4C/iYgV+XZ/4BbgSLLW/vAy67Ni3NI3s6J2At6u2H4HeH9E/Bl4q5ySrF5eRMXMiroe+K2kX+XbhwM35IuoPF1eWVYPd++YWWGSRgH7kc2982BEzCq5JKuTQ9/MLCHu0zczS4hD38wsIQ59M7OEOPTNzBLy/wGi3eFs8DD3DQAAAABJRU5ErkJggg==\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "plotTopUsageComparation(df_json, df_other, 'script_domain', 3)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Above the mean Sample" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "metadata": { + "scrolled": true + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "There are 811 unique script_domain present on the non-json dataset and 1051 on the JSONs\n" + ] + }, + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYAAAAEOCAYAAACAfcAXAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDMuMC4zLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvnQurowAAFHtJREFUeJzt3X+QVeWd5/H3V2lFo0GC6CqwttaQRBFE7BhnkomWmFXRDMT1RygysMENlapokjFEwUxVKpmsQcuaGKcmbFljFC1qIMsmhWWMY0Kw4lQlbmjsKEiMlKvSotgq9kRFAvE7f9yDaaGh277tvdDP+1XVdc95znPO+V66uZ97nnPOvZGZSJLKc1CzC5AkNYcBIEmFMgAkqVAGgCQVygCQpEIZAJJUKANAkgplAEhSoQwASSrUsGYXsC9HH310tra2NrsMSTqgtLe3v5SZo/vqt18HQGtrK2vWrGl2GZJ0QImIZ/rTzyEgSSqUASBJhTIAJKlQ+/U5AEn7jx07dtDZ2cmbb77Z7FJUGT58OGPHjqWlpWVA6xsAkvqls7OTI488ktbWViKi2eUULzN5+eWX6ezs5MQTTxzQNhwCktQvb775JqNGjfLFfz8REYwaNaquIzIDQFK/+eK/f6n392EASFKhPAcwCFoX/KTZJQwpTy+6qNklqB8G++++r9/7Oeecw8KFCzn//PPfbrvlllv4/e9/z/e///29rnfEEUfw2muvDUqNd911FzfddBOZSWYyd+5c5s+fPyjb3uWGG27g+uuvH9Rt7o1HAJIOCDNnzmTZsmXvaFu2bBkzZ85syP5/+tOfcsstt/DAAw+wfv161q5dy4gRIwZ9PzfccMOgb3NvDABJB4RLL72Ue++9l+3btwPw9NNPs3nzZj7+8Y/z2muvMXXqVKZMmcLEiRNZuXLlHus/+OCDXHzxxW/PX3XVVdx5550AtLe3c/bZZ3PGGWdw/vnn8/zzz++x/ne+8x1uvvlmjj/+eKB2CebnP/95ADo6OjjrrLOYNGkSn/70p9m6dStQO2rZ9XE2L730Ers+2+zOO+/kkksu4YILLmD8+PFce+21ACxYsIBt27YxefJkZs2axeuvv85FF13Eaaedxqmnnsry5csH4V/yzwwASQeEUaNGceaZZ3L//fcDtXf/V1xxBRHB8OHD+fGPf8zatWtZvXo1X/3qV8nMfm13x44dXH311axYsYL29nbmzp3L17/+9T36rVu3jjPOOKPXbcyePZsbb7yRRx99lIkTJ/LNb36zz/12dHSwfPlyHnvsMZYvX86mTZtYtGgRhx12GB0dHSxdupT777+f448/nt/+9resW7eOCy64oF/Pqb8MAEkHjJ7DQD2HfzKT66+/nkmTJnHeeefx3HPPsWXLln5t84knnmDdunV88pOfZPLkyXz729+ms7Oz3zV1d3fz6quvcvbZZwMwZ84cfvnLX/a53tSpUxkxYgTDhw/nlFNO4Zln9vz8tokTJ/Lzn/+c6667joceemjQh5wMAEkHjBkzZrBq1SrWrl3Ltm3bmDJlCgBLly6lq6uL9vZ2Ojo6OPbYY/e4Pn7YsGG89dZbb8/vWp6ZTJgwgY6ODjo6Onjsscd44IEH9tj3hAkTaG9vf1f19tzn7vUceuihb08ffPDB7Ny5c4/1P/jBD9Le3s7EiRNZuHAh3/rWt97V/vtiAEg6YBxxxBGcc845zJ079x0nf7u7uznmmGNoaWlh9erVvb6bPuGEE3j88cfZvn073d3drFq1CoAPfehDdHV18atf/QqoDQmtX79+j/UXLlzItddeywsvvADA9u3bufXWWxkxYgQjR47koYceAuDuu+9++2igtbX17dBYsWJFv55jS0sLO3bsAGDz5s0cfvjhfPazn2X+/PmsXbu2X9vorz4vA42IHwAXAy9m5qlV2weA5UAr8DRweWZujdpdCd8DpgFvAP8jM9dW68wB/r7a7Lczc8mgPhNJDdWsy3VnzpzJJZdc8o4rgmbNmsWnPvUp2tramDx5Mh/+8If3WG/cuHFcfvnlTJo0ifHjx3P66acDcMghh7BixQq+9KUv0d3dzc6dO/nKV77ChAkT3rH+tGnT2LJlC+eddx6ZSUQwd+5cAJYsWcIXvvAF3njjDU466STuuOMOAObPn8/ll1/O3Xffzbnnntuv5zdv3jwmTZrElClTmD17Nl/72tc46KCDaGlpYfHixQP6N9ub6OtESUR8AngNuKtHANwEvJKZiyJiATAyM6+LiGnA1dQC4KPA9zLzo1VgrAHagATagTMyc+u+9t3W1pYHwhfCeB/A4PI+gP3Thg0bOPnkk5tdhnbT2+8lItozs62vdfscAsrMXwKv7NY8Hdj1Dn4JMKNH+11Z82vgqIg4Djgf+FlmvlK96P8MGNzT2ZKkd2Wg5wCOzcznAarHY6r2McCmHv06q7a9tUuSmmSwTwL39slEuY/2PTcQMS8i1kTEmq6urkEtTlJ9+nttvRqj3t/HQANgSzW0Q/X4YtXeCYzr0W8ssHkf7XvIzNsysy0z20aP7vNL7SU1yPDhw3n55ZcNgf3Eru8DGD58+IC3MdAPg7sHmAMsqh5X9mi/KiKWUTsJ3J2Zz0fEvwE3RMTIqt9/AxYOuGpJDTd27Fg6OzvxyHz/sesbwQaqP5eB/itwDnB0RHQC36D2wv/DiLgSeBa4rOp+H7UrgDZSuwz0cwCZ+UpE/APwm6rftzJz9xPLkvZjLS0tA/7mKe2f+gyAzNzbR+1N7aVvAl/cy3Z+APzgXVUnSXrPeCewJBXKAJCkQhkAklQoA0CSCmUASFKhDABJKpQBIEmFMgAkqVAGgCQVygCQpEIZAJJUKANAkgplAEhSoQwASSqUASBJhTIAJKlQBoAkFcoAkKRCGQCSVCgDQJIKZQBIUqEMAEkqlAEgSYUyACSpUAaAJBXKAJCkQhkAklQoA0CSCmUASFKhDABJKpQBIEmFqisAIuLvImJ9RKyLiH+NiOERcWJEPBwRT0bE8og4pOp7aDW/sVreOhhPQJI0MMMGumJEjAG+BJySmdsi4ofAZ4BpwHczc1lE/G/gSmBx9bg1M/8iIj4D3AhcUfczkLRPrQt+0uwShoynF13U7BIGVb1DQMOAwyJiGHA48DxwLrCiWr4EmFFNT6/mqZZPjYioc/+SpAEacABk5nPAzcCz1F74u4F24NXM3Fl16wTGVNNjgE3Vujur/qN2325EzIuINRGxpqura6DlSZL6MOAAiIiR1N7VnwgcD7wPuLCXrrlrlX0s+3ND5m2Z2ZaZbaNHjx5oeZKkPtQzBHQe8P8zsyszdwA/Av4KOKoaEgIYC2yupjuBcQDV8hHAK3XsX5JUh3oC4FngrIg4vBrLnwo8DqwGLq36zAFWVtP3VPNUy3+RmXscAUiSGqOecwAPUzuZuxZ4rNrWbcB1wDURsZHaGP/t1Sq3A6Oq9muABXXULUmq04AvAwXIzG8A39it+SngzF76vglcVs/+JEmDxzuBJalQBoAkFcoAkKRCGQCSVCgDQJIKZQBIUqEMAEkqlAEgSYUyACSpUAaAJBXKAJCkQhkAklQoA0CSCmUASFKhDABJKpQBIEmFMgAkqVAGgCQVygCQpEIZAJJUKANAkgplAEhSoQwASSqUASBJhTIAJKlQBoAkFcoAkKRCGQCSVCgDQJIKZQBIUqHqCoCIOCoiVkTE7yJiQ0T8ZUR8ICJ+FhFPVo8jq74REbdGxMaIeDQipgzOU5AkDUS9RwDfA+7PzA8DpwEbgAXAqswcD6yq5gEuBMZXP/OAxXXuW5JUhwEHQES8H/gEcDtAZv4xM18FpgNLqm5LgBnV9HTgrqz5NXBURBw34MolSXWp5wjgJKALuCMiHomIf4mI9wHHZubzANXjMVX/McCmHut3Vm3vEBHzImJNRKzp6uqqozxJ0r7UEwDDgCnA4sw8HXidPw/39CZ6acs9GjJvy8y2zGwbPXp0HeVJkvalngDoBDoz8+FqfgW1QNiya2inenyxR/9xPdYfC2yuY/+SpDoMOAAy8wVgU0R8qGqaCjwO3APMqdrmACur6XuA2dXVQGcB3buGiiRJjTeszvWvBpZGxCHAU8DnqIXKDyPiSuBZ4LKq733ANGAj8EbVV5LUJHUFQGZ2AG29LJraS98EvljP/iRJg8c7gSWpUAaAJBXKAJCkQhkAklQoA0CSCmUASFKhDABJKpQBIEmFMgAkqVAGgCQVygCQpEIZAJJUKANAkgplAEhSoQwASSqUASBJhTIAJKlQBoAkFcoAkKRCGQCSVCgDQJIKZQBIUqEMAEkqlAEgSYUyACSpUAaAJBXKAJCkQhkAklQoA0CSCmUASFKh6g6AiDg4Ih6JiHur+RMj4uGIeDIilkfEIVX7odX8xmp5a737liQN3GAcAXwZ2NBj/kbgu5k5HtgKXFm1Xwlszcy/AL5b9ZMkNUldARARY4GLgH+p5gM4F1hRdVkCzKimp1fzVMunVv0lSU1Q7xHALcC1wFvV/Cjg1czcWc13AmOq6THAJoBqeXfVX5LUBAMOgIi4GHgxM9t7NvfSNfuxrOd250XEmohY09XVNdDyJEl9qOcI4GPA30TE08AyakM/twBHRcSwqs9YYHM13QmMA6iWjwBe2X2jmXlbZrZlZtvo0aPrKE+StC8DDoDMXJiZYzOzFfgM8IvMnAWsBi6tus0BVlbT91TzVMt/kZl7HAFIkhrjvbgP4DrgmojYSG2M//aq/XZgVNV+DbDgPdi3JKmfhvXdpW+Z+SDwYDX9FHBmL33eBC4bjP1JkurnncCSVCgDQJIKZQBIUqEMAEkqlAEgSYUyACSpUAaAJBXKAJCkQhkAklQoA0CSCmUASFKhDABJKpQBIEmFMgAkqVAGgCQVygCQpEIZAJJUKANAkgplAEhSoQwASSqUASBJhTIAJKlQBoAkFcoAkKRCGQCSVCgDQJIKZQBIUqEMAEkqlAEgSYUyACSpUAaAJBVqwAEQEeMiYnVEbIiI9RHx5ar9AxHxs4h4snocWbVHRNwaERsj4tGImDJYT0KS9O7VcwSwE/hqZp4MnAV8MSJOARYAqzJzPLCqmge4EBhf/cwDFtexb0lSnQYcAJn5fGaurab/AGwAxgDTgSVVtyXAjGp6OnBX1vwaOCoijhtw5ZKkugzKOYCIaAVOBx4Gjs3M56EWEsAxVbcxwKYeq3VWbbtva15ErImINV1dXYNRniSpF3UHQEQcAfxf4CuZ+R/76tpLW+7RkHlbZrZlZtvo0aPrLU+StBd1BUBEtFB78V+amT+qmrfsGtqpHl+s2juBcT1WHwtsrmf/kqSBq+cqoABuBzZk5j/2WHQPMKeangOs7NE+u7oa6Cyge9dQkSSp8YbVse7HgL8FHouIjqrtemAR8MOIuBJ4FrisWnYfMA3YCLwBfK6OfUuS6jTgAMjMf6f3cX2Aqb30T+CLA92fJGlweSewJBXKAJCkQhkAklQoA0CSCmUASFKhDABJKpQBIEmFMgAkqVAGgCQVygCQpEIZAJJUKANAkgplAEhSoQwASSqUASBJhTIAJKlQBoAkFcoAkKRCGQCSVCgDQJIKZQBIUqEMAEkqlAEgSYUyACSpUAaAJBXKAJCkQhkAklQoA0CSCmUASFKhDABJKlTDAyAiLoiIJyJiY0QsaPT+JUk1DQ2AiDgY+GfgQuAUYGZEnNLIGiRJNY0+AjgT2JiZT2XmH4FlwPQG1yBJovEBMAbY1GO+s2qTJDXYsAbvL3ppy3d0iJgHzKtmX4uIJ97zqspxNPBSs4voS9zY7ArUBP5tDq4T+tOp0QHQCYzrMT8W2NyzQ2beBtzWyKJKERFrMrOt2XVIu/NvszkaPQT0G2B8RJwYEYcAnwHuaXANkiQafASQmTsj4irg34CDgR9k5vpG1iBJqmn0EBCZeR9wX6P3K8ChNe2//NtsgsjMvntJkoYcPwpCkgplAEhSoQwASQ0XEQdFxF81u47SeQ5gCIuI0cDngVZ6nPDPzLnNqknaJSJ+lZl/2ew6Stbwq4DUUCuBh4CfA39qci3S7h6IiP8O/Ch9J9oUHgEMYRHRkZmTm12H1JuI+APwPmpvTrZR+6iYzMz3N7WwgngOYGi7NyKmNbsIqTeZeWRmHpSZLZn5/mreF/8G8ghgCOvxDuuPwI6q2XdY2i9ERACzgBMz8x8iYhxwXGb+vyaXVgwDQFJTRMRi4C3g3Mw8OSJGAg9k5keaXFoxPAk8xEXE3wCfqGYfzMx7m1mP1MNHM3NKRDwCkJlbqw+JVIN4DmAIi4hFwJeBx6ufL1dt0v5gR/U1sQlvX7b8VnNLKotDQENYRDwKTM7Mt6r5g4FHMnNScyuTICJmAVcAU4AlwKXA32fm/2lqYQVxCGjoOwp4pZoe0cxCpJ4yc2lEtANTqV0COiMzNzS5rKIYAEPbd4BHImI1tf9gnwAWNrck6R2eBP6D6rUoIv5rZj7b3JLK4RDQEBcRxwEfoRYAD2fmC00uSQIgIq4GvgFsoXYz2K4bwRyibBADYAiLiI8BHZn5ekR8ltpY6/cy85kmlyYRERupXQn0crNrKZVXAQ1ti4E3IuI04GvAM8BdzS1JetsmoLvZRZTMcwBD287MzIiYDtyambdHxJxmF6WyRcQ11eRTwIMR8RNg+67lmfmPTSmsQAbA0PaHiFgI/C3w19VloC1Nrkk6snp8tvo5pPqB6p4ANYbnAIawiPgvwEzgN5n579VnrZyTmXc3uTSJiLhs92v+e2vTe8cAGIKqD4Hb9YuN6jGr6e3ARuDrmbmqCeVJAETE2syc0leb3jsOAQ1BmXnk3pZVw0CnAkurR6mhIuJCYBowJiJu7bHo/cDO5lRVJgOgMJn5J+C3EfFPza5FxdoMrAEuA35P7ej0T9TuB/i7JtZVHIeAJDVURLQA/wv4n8DT1IYmxwF3ANdn5o69r63B5H0AkhrtJmAkcEJmTsnM04GTqH1W1c1NrawwHgFIaqiIeBL44O5fBF+dn/pdZo5vTmXl8QhAUqPl7i/+VeOf8D6AhjIAJDXa4xExe/fG6vOqfteEeorlEJCkhoqIMcCPgG1AO7V3/R8BDgM+nZnPNbG8ohgAkpoiIs4FJlC7Cmi9NyY2ngEgSYXyHIAkFcoAkKRCGQCSVCgDQJIKZQBIUqH+E63PxydaKaS5AAAAAElFTkSuQmCC\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "plotUniqueValuesComparation(df_a_json, df_a_other, 'script_domain')" + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
jsonother
sociaplus.com0.0899090.006558
tiqcdn.com0.0818660.037636
twimg.com0.079437NaN
google-analytics.com0.0000050.100417
adobedtm.com0.0084220.050673
yoox.biz0.0014980.041437
\n", + "
" + ], + "text/plain": [ + " json other\n", + "sociaplus.com 0.089909 0.006558\n", + "tiqcdn.com 0.081866 0.037636\n", + "twimg.com 0.079437 NaN\n", + "google-analytics.com 0.000005 0.100417\n", + "adobedtm.com 0.008422 0.050673\n", + "yoox.biz 0.001498 0.041437" + ] + }, + "execution_count": 19, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAX0AAAFcCAYAAAAkiW7CAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDMuMC4zLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvnQurowAAIABJREFUeJzt3Xu8VXWd//HXO0Ao7yHNKKRQOhmikiBqGGqOt7wwNpqalZVlM+btV97Kn2ba/MoyzcrGzCTN8TbOqKikWeb9EqAg4mVERvOEFaKgoijo5/fHdx3YHA+ezTn7nLX3+r6fjwcPzlp7rXM+m31477W/63tRRGBmZnl4V9kFmJlZ33Hom5llxKFvZpYRh76ZWUYc+mZmGXHom5llxKFvZpYRh76ZWUYc+mZmGelfdgEdbbDBBjF8+PCyyzAzaynTp09/PiKGdHVc04X+8OHDmTZtWtllmJm1FEnP1HOcm3fMzDLi0Dczy4hD38wsI03Xpm9mVq+lS5fS1tbGkiVLyi6lzwwaNIhhw4YxYMCAbp3v0DezltXW1sbaa6/N8OHDkVR2Ob0uIliwYAFtbW2MGDGiW9/DzTtm1rKWLFnC4MGDswh8AEkMHjy4R59s6gp9SXtKekLSHEknd/L4BEkPSlom6YAOjx0m6cniz2HdrtTMrBO5BH67nj7fLkNfUj/gfGAvYCRwiKSRHQ77E/B54PIO574X+BawHTAO+Jak9XtUsZmZdVs9bfrjgDkRMRdA0pXARODR9gMi4unisbc6nLsHcGtEvFA8fiuwJ3BFjys3y8Hp63bzvEWNraNFDD/5poZ+v6e/t3ddx330ox/l3nvvbejP7i31NO8MBZ6t2W4r9tWjrnMlHSFpmqRp8+fPr/Nbm5k1h1YJfKgv9DtrQIo6v39d50bEhRExNiLGDhnS5dQRZmZNZa211uK5555jwoQJjB49mlGjRnHXXXcBcMUVV7DlllsyatQoTjrppJXOOeWUU9h6663Zfvvt+etf/9ontdYT+m3A+2u2hwHz6vz+PTnXzKxlXH755eyxxx7MmDGDmTNnMnr0aObNm8dJJ53EbbfdxowZM5g6dSrXXXcdAIsXL2b77bdn5syZTJgwgV/84hd9Umc9oT8V2EzSCElrAAcDk+v8/rcAu0tav7iBu3uxz8ysUrbddlsmTZrE6aefzqxZs1h77bWZOnUqO++8M0OGDKF///4ceuih3HnnnQCsscYa7LPPPgCMGTOGp59+uk/q7DL0I2IZcBQprB8Dro6I2ZLOkLQfgKRtJbUBBwI/lzS7OPcF4EzSG8dU4Iz2m7pmZlUyYcIE7rzzToYOHcpnP/tZLr30UiJW3RI+YMCA5d0v+/Xrx7Jly/qkzrpG5EbEFGBKh32n1Xw9ldR009m5FwMX96BGM7Om98wzzzB06FC+/OUvs3jxYh588EFOOukkjj32WJ5//nnWX399rrjiCo4++uhS6/Q0DGZWGfV2sWw0Sdx+++384Ac/YMCAAay11lpceumlbLjhhnz3u99ll112ISL4xCc+wcSJE0upcXmt7/Txowxjx44NL6JiVnA//Xf02GOP8eEPf7jUGhYsWMA222zDM8/UtYZJQ3T2vCVNj4ixXZ3ruXfMzLpp3rx57LDDDhx//PFll1I3N++YmXXTRhttxP/8z/+UXcZq8ZW+mVlGHPpmZhlx6JuZZcShb2aWEd/INbPq6G4X11V+v9Xv+rpw4UIuv/xyjjzySABuv/12zj77bG688cbG1tZNvtI3M2ughQsX8rOf/axh36/R0zM49M3MeuCcc85h1KhRjBo1ih/96EecfPLJPPXUU4wePZoTTjgBgFdeeYUDDjiAzTffnEMPPXT5nDzTp09np512YsyYMeyxxx4899xzAOy8885885vfZKedduK8885raL1u3jEz66bp06czadIkHnjgASKC7bbbjssuu4xHHnmEGTNmAKl556GHHmL27NlstNFGjB8/nnvuuYftttuOo48+muuvv54hQ4Zw1VVXccopp3DxxWmqsoULF3LHHXc0vGaHvplZN919993sv//+rLnmmgB88pOfXL54Sq1x48YxbFiak3L06NE8/fTTrLfeejzyyCPstttuALz55ptsuOGGy8856KCDeqVmh76ZWTfVO3fZwIEDl3/dPo1yRLDFFltw3333dXpO+xtJo7lN38ysmyZMmMB1113Hq6++yuLFi7n22msZP348L7/8cpfnfuhDH2L+/PnLQ3/p0qXMnj27t0uuzpX+8JNv6tZ5ZU3Fama9oI9nF91mm234/Oc/z7hx4wD40pe+xJgxYxg/fjyjRo1ir732Yu+9O8+YNdZYg2uuuYZjjjmGRYsWsWzZMo477ji22GKLXq25MlMrO/Stkjy18jtqhqmVy+Cplc3MrC4OfTOzjDj0zaylNVsTdW/r6fN16JtZyxo0aBALFizIJvgjggULFjBo0KBuf4/K9N4xs/wMGzaMtrY25s+fX3YpfWbQoEHLB3p1h0PfzFrWgAEDGDFiRNlltBQ375iZZcShb2aWEYe+mVlGHPpmZhlx6JuZZcShb2aWEYe+mVlGHPpmZhnx4KwW4amjzawR6rrSl7SnpCckzZF0ciePD5R0VfH4A5KGF/sHSLpE0ixJj0n6RmPLNzOz1dFl6EvqB5wP7AWMBA6RNLLDYYcDL0bEpsC5wFnF/gOBgRGxJTAG+Er7G4KZmfW9eq70xwFzImJuRLwBXAlM7HDMROCS4utrgF0lCQhgTUn9gXcDbwAvNaRyMzNbbfWE/lDg2ZrttmJfp8dExDJgETCY9AawGHgO+BNwdkS80PEHSDpC0jRJ03KaLc/MrK/VcyNXnezrOHn1qo4ZB7wJbASsD9wl6XcRMXelAyMuBC6EtEZuHTVZxfhGtVnfqOdKvw14f832MGDeqo4pmnLWBV4APg3cHBFLI+JvwD1Alwv3mplZ76gn9KcCm0kaIWkN4GBgcodjJgOHFV8fANwWaSmbPwEfV7ImsD3weGNKNzOz1dVl6Bdt9EcBtwCPAVdHxGxJZ0jarzjsl8BgSXOArwHt3TrPB9YCHiG9eUyKiIcb/BzMzKxOdQ3OiogpwJQO+06r+XoJqXtmx/Ne6Wy/mZmVw9MwmJllxKFvZpYRh76ZWUYc+mZmGXHom5llxKFvZpYRh76ZWUYc+mZmGXHom5llxKFvZpYRh76ZWUYc+mZmGXHom5llxKFvZpYRh76ZWUYc+mZmGXHom5llxKFvZpYRh76ZWUYc+mZmGXHom5llxKFvZpYRh76ZWUYc+mZmGXHom5llxKFvZpYRh76ZWUYc+mZmGXHom5llxKFvZpYRh76ZWUYc+mZmGakr9CXtKekJSXMkndzJ4wMlXVU8/oCk4TWPbSXpPkmzJc2SNKhx5ZuZ2eroMvQl9QPOB/YCRgKHSBrZ4bDDgRcjYlPgXOCs4tz+wGXAv0TEFsDOwNKGVW9mZqulniv9ccCciJgbEW8AVwITOxwzEbik+PoaYFdJAnYHHo6ImQARsSAi3mxM6WZmtrrqCf2hwLM1223Fvk6PiYhlwCJgMPAPQEi6RdKDkk7s7AdIOkLSNEnT5s+fv7rPwczM6lRP6KuTfVHnMf2BHYFDi7/3l7Tr2w6MuDAixkbE2CFDhtRRkpmZdUc9od8GvL9mexgwb1XHFO346wIvFPvviIjnI+JVYAqwTU+LNjOz7qkn9KcCm0kaIWkN4GBgcodjJgOHFV8fANwWEQHcAmwl6T3Fm8FOwKONKd3MzFZX/64OiIhlko4iBXg/4OKImC3pDGBaREwGfgn8WtIc0hX+wcW5L0o6h/TGEcCUiLipl56LmZl1ocvQB4iIKaSmmdp9p9V8vQQ4cBXnXkbqtmlmZiXziFwzs4w49M3MMuLQNzPLiEPfzCwjDn0zs4w49M3MMuLQNzPLiEPfzCwjDn0zs4w49M3MMuLQNzPLiEPfzCwjDn0zs4w49M3MMlLX1MpmZg13+rrdPG9RY+vIjK/0zcwy4tA3M8uIQ9/MLCMOfTOzjDj0zcwy4tA3M8uIQ9/MLCMOfTOzjDj0zcwy4tA3M8uIQ9/MLCMOfTOzjDj0zcwy4tA3M8uIQ9/MLCMOfTOzjDj0zcwy4tA3M8tIXcslStoTOA/oB1wUEd/r8PhA4FJgDLAAOCginq55fGPgUeD0iDi7MaWbmTWxJl0OsssrfUn9gPOBvYCRwCGSRnY47HDgxYjYFDgXOKvD4+cCv+l5uWZm1hP1NO+MA+ZExNyIeAO4EpjY4ZiJwCXF19cAu0oSgKR/AuYCsxtTspmZdVc9oT8UeLZmu63Y1+kxEbEMWAQMlrQmcBLw7Xf6AZKOkDRN0rT58+fXW7uZma2mekJfneyLOo/5NnBuRLzyTj8gIi6MiLERMXbIkCF1lGRmZt1Rz43cNuD9NdvDgHmrOKZNUn9gXeAFYDvgAEnfB9YD3pK0JCJ+2uPKzcxstdUT+lOBzSSNAP4MHAx8usMxk4HDgPuAA4DbIiKAj7UfIOl04BUHvplZeboM/YhYJuko4BZSl82LI2K2pDOAaRExGfgl8GtJc0hX+Af3ZtFmZtY9dfXTj4gpwJQO+06r+XoJcGAX3+P0btRnPdWkfYXNrBwekWtmlhGHvplZRhz6ZmYZceibmWXEoW9mlhGHvplZRhz6ZmYZceibmWXEoW9mlhGHvplZRhz6ZmYZceibmWXEoW9mlhGHvplZRhz6ZmYZceibmWXEoW9mlhGHvplZRhz6ZmYZceibmWXEoW9mlhGHvplZRhz6ZmYZceibmWXEoW9mlhGHvplZRhz6ZmYZceibmWXEoW9mlhGHvplZRhz6ZmYZceibmWWkrtCXtKekJyTNkXRyJ48PlHRV8fgDkoYX+3eTNF3SrOLvjze2fDMzWx1dhr6kfsD5wF7ASOAQSSM7HHY48GJEbAqcC5xV7H8e2DcitgQOA37dqMLNzGz11XOlPw6YExFzI+IN4EpgYodjJgKXFF9fA+wqSRHxUETMK/bPBgZJGtiIws3MbPXVE/pDgWdrttuKfZ0eExHLgEXA4A7H/DPwUES83vEHSDpC0jRJ0+bPn19v7WZmtprqCX11si9W5xhJW5CafL7S2Q+IiAsjYmxEjB0yZEgdJZmZWXfUE/ptwPtrtocB81Z1jKT+wLrAC8X2MOBa4HMR8VRPCzYzs+6rJ/SnAptJGiFpDeBgYHKHYyaTbtQCHADcFhEhaT3gJuAbEXFPo4o2M7Pu6TL0izb6o4BbgMeAqyNitqQzJO1XHPZLYLCkOcDXgPZunUcBmwKnSppR/Hlfw5+FmZnVpX89B0XEFGBKh32n1Xy9BDiwk/O+A3ynhzWamVmDeESumVlGHPpmZhlx6JuZZcShb2aWEYe+mVlGHPpmZhlx6JuZZcShb2aWEYe+mVlGHPpmZhlx6JuZZcShb2aWEYe+mVlGHPpmZhlx6JuZZcShb2aWEYe+mVlGHPpmZhmpa7lEM7NVGX7yTd067+lBDS7E6uIrfTOzjDj0zcwy4tA3M8uI2/RPX7eb5y1qbB1mZn3AV/pmZhlx6JuZZcShb2aWEYe+mVlGHPpmZhlx6JuZZcShb2aWEYe+mVlGHPpmZhmpa0SupD2B84B+wEUR8b0Ojw8ELgXGAAuAgyLi6eKxbwCHA28Cx0TELQ2r3sysl1VtFtEur/Ql9QPOB/YCRgKHSBrZ4bDDgRcjYlPgXOCs4tyRwMHAFsCewM+K72dmZiWop3lnHDAnIuZGxBvAlcDEDsdMBC4pvr4G2FWSiv1XRsTrEfG/wJzi+5mZWQnqad4ZCjxbs90GbLeqYyJimaRFwOBi//0dzh3a8QdIOgI4oth8RdITdVXfAIINgOdX+8Rvq/HF9ILKP7+zuvn8WkdlX7/K/272/fPbpJ6D6gn9ziqIOo+p51wi4kLgwjpqaThJ0yJibBk/uy/4+bW2Kj+/Kj83aN7nV0/zThvw/prtYcC8VR0jqT+wLvBCneeamVkfqSf0pwKbSRohaQ3SjdnJHY6ZDBxWfH0AcFtERLH/YEkDJY0ANgP+2JjSzcxsdXXZvFO00R8F3ELqsnlxRMyWdAYwLSImA78Efi1pDukK/+Di3NmSrgYeBZYBX42IN3vpuXRXKc1KfcjPr7VV+flV+blBkz4/pQtyMzPLgUfkmpllxKFvZpYRh76ZWUYc+mZmGalrwrWqkrQONf8GEfFCieVYHYq5m/YGhrPya3dOWTXZ6pG0FW9//f67tIIaSNJTwA8i4oKafTdGxD4llrWSLENf0leAM4DXWDFCOIAPlFZUg0jaBziTNCS7P2lUdETEOqUW1jg3AEuAWcBbJdfScJLGAqfw9tdvq1ILaxBJFwNbAbNZ8foFUInQB5YCu0jaDvhKMV/Z26aeKVOWoQ8cD2wREVWcs+VHwCeBWVHN/rjDqhKAq/AfwAlU9E0N2D4iOs7SWyWvRsRBkk4E7pL0KTqZeqZMuYb+U8CrZRfRS54FHqlo4AP8RtLuEfHbsgvpJfOLAY9VdZ+kkRHxaNmF9BIBRMT3JU0nDWp9b7klrSzLwVmSPgJMAh4AXm/fHxHHlFZUg0jaltS8cwcrP7dKtHlL2h+4jNQJYSkVa76StCtwCPB7Vn79KtH8IWkCqYnuL6TnV7Xmq30j4oaa7U2AwyLijBLLWkmuV/o/B26jmh+h/w14BRgErFFyLb3hh8AOVLf56gvA5sAAqtnmfTHwWSr2f0/S5hHxOPBnSdt0ePjGMmpalVxDf1lEfK3sInrJeyNi97KL6EVPUu3mq60jYsuyi+hFf6po89XXSGuC/LCTxwL4eN+Ws2q5hv4fioVbbmDlj9BV6LL5u4q3eT8H3C7pN1Sw+Qq4v+Jt3o9Lupy3/99r6U8yEXFE8fcuZdfSlVzb9P+3k90REVXosvkysCbwBqnNG6rV5v2tzvZHxLf7upbeIOkx4IPA/1LNNu9JneyOiPhinxfTCyQNAo4EdiRd4d8FXBARS0otrEaWoW+tT9LapLB4pexaGqm48fc2EfFMX9diq6+YSv5lUmcDSDfl14+IA8uramVZhr6kAcC/AhOKXbcDP4+Ipas8qYVI2o+a5xYRTXUjqSckjQJ+zYpucM8Dn4uI2eVV1ViStgY+VmzeFREzy6ynkSQNA34CjCddCd8NHBsRbaUW1iCSZkbE1l3tK1Ouc+/8OzAG+FnxZ0yxr+VJ+h5wLGnhmkeBY4t9VXEh8LWI2CQiNgG+Dvyi5JoaRtKxpAFa7yv+XCbp6HKraqhJpBX1NiKNVL2h2FcVD0navn2jGJl7T4n1vE2uV/pN/27cXZIeBkZHxFvFdj/goQq1CVf2tYPlr98OEbG42F4TuK9Cr9+MiBjd1b5WI2kW6ZPLAOBDwJ+K7U2ARyNiVInlrSTX3jtvSvpgRDwFIOkDQLMt49gT65GWrYS0SH2VzJV0KqmJB+AzpJueVSFW/l18s9hXFc9L+gxwRbF9CLCgxHoapWkmVOtKrqF/Aqnb5lzSf6hNSINiquC7pI+YfyA9twnAN8otqaG+CHybFYOV7qQ6rx0UI8UlXVts/xNpDeqq+CLwU+Bc0pXwvcW+ltZKN9qzbN4BkDSQ9DFMwOMR8XoXp7QMSRsC25Ke2wMR8ZeSS7LVUIzo3JH0+t0ZEQ+VXJL1QLNNrZzljVxJXwXeHREPFz0j3iPpyLLraoRibppXI2JyRFwPLJH0T2XX1SiSbpW0Xs32+pJuKbOmRipuAj4ZET+OiPOAOcXNwEqQdEknr9/FZdbUB75cdgG1sgx94MsRsbB9IyJepMlemB74VkQsat8onmenA5pa1AadvHbvK7GeRvt30txJ7RZTkZ5lha06ef0+UmI9DSWps9/FphoYmWvov0vS8ptjRQ+XqkxO1tlrWqV7N29J2rh9oxjMVKU2StXOK1T0wqrS6/cuSeu3b0h6L9V6fu1z6AMg6evAte9wfJ+r0j/26rgFuFrSBaTA+Bfg5nJLaphpks4Bzic9t6OB6eWW1FCnAHdLuqPYnkCa6Koq5ko6hhVX90cCc0usp9F+CNwr6RrS7+enSDPDVsXOwIWSDgT+DngMGFdqRR1keSNX0rtIQfGPpJtlvwUuioiW77ZZ9Os+lfTcID23f2vv910FkjYAtie9dvdVaQW0onngx6RZGYM0r/5xEfG3UgtrIEkjSc9PwO+rNrlccc/wG6Spow+JCA/OMjOrIkm3kmaCPQYYRlo/4M6IOL7Uwmrk2qb/NpJOL7uG3lJMI11Zkh4su4beVCx2X1mSKjM3FHB+RHwuIhZGxCPAR4FFXZ3Ulxz6K1Sp3bujKo3ofJuI6LhSUdVsW3YBvawqPeeIiOsk/Z2kfYo36/dGxJll11XLzTtmZg1S9Nz5AWnmXpFmSz0hIq4ps65aWYa+pO8D3wFeI/Xa2Zp0s+yydzyxBRQjjf8ZGE5N76xmWpi5J4pFYjr+0i4CpgFfj4iW7ulS9Pq4OSJelvR/gW2AM6syKre4+j2TNPVJf6q3sP1MYLf2G++ShgC/a6YJAXNt3tk9Il4iTZLUBvwDaT6eKrgemAgsIw3saf9TFeeQXquhpBtlx5OmVr6SdNOs1Z1aBP6OwB7AJcAFJdfUSD8CDgMGR8Q6EbF2VQK/8K4OPa0W0GQ5m2s//QHF358AroiIF2rGarW6YRGxZ9lF9KI9I6J2WoILJd0fEWdI+mZpVTVOe7fhvYF/j4jrK9bJ4FmqvbD9zcW0IO2ziB4ETCmxnrfJNfRvkPQ4qXnnyOIjWNOsYdlD90raMiJmlV1IL3mraDdtbyM9oOaxKgTJnyX9nDTO4qyiua6prhR76ERgSjG4rnIL20fECZI+yYoJ8y6MiKYakZtlmz6kiZ6AlyLiTUnvAdapwmyUkh4FNqW6C2t/ADgP2IEU8vcD/wf4MzAmIu4usbweK34X9wRmRcSTxYypW0bEb0surSEk/ZY0t9As0uAloDoL2wNI+jvSKNwA/thsA+uyDH1Jn+tsf0Rc2te1NJoX1m5txSybsyPi5WJ7bWBkRDxQbmWNIWlaRIwtu47e4t47TUrST2o2BwG7Ag9GxAGrOKXpFRNXrVJEvPBOj7cKSSNI8wkNZ+XeSfuVVVMjSXoI2Ka9zbuYMmRaVcYiFOs131aVTy4dtULvnSzb9CNipYWmJa3LiuX3WtV00sdJARsDLxZfr0dar3NEeaU11HWklaRuoKZ5oELeNsumpCr9P/0qcKKk14GlVKzLJu690zJeBTYru4ieiIgRAMXMoZMjYkqxvRcrJl+rgiUR8eOyi+hFlZ5lMyLWLruGXvabZu+9k2vzzg2s6OnxLmAkcHVEnFxeVY0haXpEjOmwrzLtqJI+TXqD/i0r9/6oxPw7VZ9lU9LvI2LXrva1quIN+1lSW377cpdN1Xsn1yv9s2u+XgY8ExFtZRXTYM8XIzkvI4XGZ0gfMatiS+CzpFBsb96JYrvlFeF+cNl1NJqkQcB7gA2KnnPtA2PWATYqrbDGex9phs0HSYMFm24pzyyv9KusuKH7LdLiIgHcCZxRoRu5j5OW3Huj7FoaSdKJEfH9opPB2/5TRsQxJZTVMJKOBY4jBfy8modeAn4RET8tpbBeUKzKtzvwBWAscDXwy4h4qtTCClld6a9i3hao0M2kItyPLbuOXjSTdHO6Es0dNR4r/p5WahW9pFjk/TxJR0fET7o8oYVFREj6C/AXUkvC+sA1km6NiBPLrc5X+pVTLOJwYPvi08VH6SsjYo9yK2sMSbcDWwFTWblNvypdNg+MiP/sal+rKUaprlJE/Hdf1dKbijb9w4DngYuA6yJiadH19smI+GCpBZLZlX4tSduQhkoHcHdVZjEENmgPfICIeLG4OVgV3yq7gF72DaBjwHe2r9XsW/z9PtLCIrcV27uQBjJVIvSBDYBPdhwMWXS9bYrFcLIMfUmnAQey4hftV5L+MyK+U2JZjfKWpI0j4k+wfIRuZT7ORcQdXR/VeoqutZ8Ahkqq7ZK6DqmJoKVFxBdg+SpZIyPiuWJ7Q+D8MmtrpIg47R0ee2xVj/WlLEMfOAT4SEQsgeWjBB8kzbHf6k4B7i4mtIJ0Q7fll0uUdHdE7NjJfZmq3I+ZR2rP34+VV3F7mTS3UFUMbw/8wl9JU5tbH8k19J8mTb/QPrPmQKAp7qz3VETcXDRdbU8KxP8TEc+XXFaPRcSOxd+VHNwTETOBmZJeAG6KiCqONga4vWbwUpC6p/6h3JLykuWNXEnXkdYdvZX0i7cbcDdFj5BW7h5XBP4qtfogJklnkLqh3hcRVVocBgBJl5FmEP0vYFKzNAk0kqT9SZ9AoQkHL1VdrqF/2Ds9HhGX9FUtjSbpftISew+TrvS3BP5ImuckIqKlBzFJ+iLpBvwOpKaPu0jBcX2phTWQpHVITZBfIF2UTCIt9vNyqYU1SHGfabOI+F0xlXS/qjy3VpBl6FeZpCuBf2tfREXSKOD4iPh8qYU1mKS/Bz5FWi5x/ao1+0jagDSa+jhSH/5NgR+3eh93SV8m3WN6b0R8UNJmwAVVmYahFTTV7G99RdJmkq6R9Kikue1/yq6rQTavXTUrIh4BRpdYT0NJukjSvaQJyfqTVs5av9yqGkfSvpKuJXVpHACMi4i9gK1Jb3Ct7qvAeNJIXCLiSVI3Tusjud7InUTq730uqZ/wF1gxF0ire0zSRaw8906V2oUHA/2AhcALwPMR0fJdGmscCJwbEXfW7oyIV4umrVb3ekS80b4mdTFttJsb+lCWzTvtM1FKmhURWxb77oqIj5VdW08VE1v9KzU3ykgLbFdlDWAAJH0Y2IPUnbFfRAwruSSrg6Tvk96wP0daDOdI4NGIOKXUwjKSa+jfQ5r69BrSx+g/A9+LiA+VWph1qRjV+DHSm9r6wH3AXRFxcamF9dCqxh+0/12BcQjA8pXADidNSCbSLJQXRY5BVJJcQ39bUpPHesCZwLrA9yPi/lIL6wFJV0fEpyTNovNZGquyMHr7dLV3RcS8Yt9ZEXFSuZVZvSStAWxO+j19omozpja7LEO/iiRtGBHPSboaOKH2IdIb2qdKKq2hJD3Ycb1YSQ9X5U2tXTFf0qD27fZpNVqdpL2BC0iDIUV3F3DaAAAGbUlEQVRaxvMrEfGbUgvLSFY3ciX9KCKO67By1nKtPFNjzdD2TTtO9iRp8xJKaihJ/0pq//2ApIdrHlobuKecqhpP0n7AD0nzzv8N2IT0qXSLMutqoB8Cu0TEHABJHwRuAhz6fSSr0GfF4udnv+NRLSiDULycFAzfBWqXtXy5KgvEFM4kTaHxu4j4iKRdSAO1quJv7YFfmEv11kZoalk270haE3itfX4TSf2AgRHxarmVdZ+kdUk3NqseipXWvp6xpJmkSQHfkvTHiBhXdm09UTOf/m6kTy9Xkz5tH0hq1/96WbXlJrcr/Xa/B/4ReKXYfjdpoe2PllZRD0XEImAR1boqzNFCSWuRutr+h6S/UYGplVkxnz6kmTV3Kr6eT4UG17WCXK/0Z0TE6K72mfW14lPoEtJNzkNJPcv+IyKqtLi9lSjLaRiAxbWzUUoaA7xWYj1mAETE4oh4MyKWRcQlEfHjKgW+pGGSrpX0N0l/lfRfkjywrg/leqW/LXAlaeEKgA2BgyJi+qrPMut9Rdv3WaT5aET1BmfdSrop396p4jPAoRGxW3lV5SXL0AeQNAD4EOk/1eMRsbTkksyQNAfYt4rz6IObVptBljdyi8CvnZ/mdkk/d/BbE/hrVQO/8Lykz5BWzoLU8aAyzVetIMsr/WIWygFA+2IpnwXejIgvlVeVGUg6D/h74Drg9fb9EfHfpRXVQJI2Bn5KWgQngHuBYzsOKLTek2voz4yIrbvaZ9bXJE3qZHdERBWmVbYmkGXzDvCmpA9GxFMAkj4AvFlyTWZExBfKrqE3SPoJ7zBvfiuvS91qcg3944E/1KyWNZy0kIpZqYr1EA4nzbVTO+Faq1/pTyv+Hg+MBK4qtg8E3GuuD+Ua+oOBUaSwn0gaibuozILMCr8GHictEHMGaYBWy9/YjYhLACR9njTh2tJi+wLSaHjrI7kOzjo1Il4C1iHNBXIBac1Vs7JtGhGnAouLoNwb2LLkmhppI9IkgO3WKvZZH8k19Nvb7/cGLoiI64E1SqzHrF17t+GFkkaRpmEYXl45Dfc94CFJv5L0K+BB4P+VW1Jecu29cyNpicR/BNqnYPije+9Y2SR9Cfgv0tX9r0hXwqdGxM/LrKuRJG1E6ib9GPAeYF7HheCt9+Qa+u8B9gRmRcSTkjYEtowIty2a9aLiTe1YYBgwg7R2wH0R8fFSC8tIlqFv1gok3RgR+5RdRyMVazhvC9wfEaOLVd2+HREHlVxaNnJt0zdrBUPLLqAXLImIJQCSBkbE46Q5sKyP5Npl06wVPFR2Ab2gTdJ6pGkmbpX0Iitmu7U+4OYdsyYj6d3AxhHxRNm19CZJO5F6J90cEW+UXU8u3Lxj1kQk7Uu6wXlzsT1a0uRyq+odEXFHREx24Pcth75ZczkdGAcsBIiIGVSrn76VzKFv1lyWFYvcm/UK38g1ay6PSPo00E/SZsAxpDnnzRrCV/pmzeVo0gybr5NWl3oJOK7UiqxS3HvHzCwjbt4xawKSbuCdFxnZrw/LsQpz6Js1h7PLLsDy4OYdM7OM+ErfrIkUE5J1vBJbRFpu8DsRsaDvq7IqceibNZffkBb5ubzYPhgQKfh/BexbTllWFW7eMWsiku6JiPGd7ZM0KyKqtHSilcD99M2ay1qStmvfkDSOtHoWwLJySrIqcfOOWXP5EnCxpLVIzTovAYdLWhP4bqmVWSW4ecesCUlal/T/c2HZtVi1uHnHrIlIWlfSOcDvgd9J+mHxBmDWEA59s+ZyMfAy8Kniz0vApFIrskpx845ZE5E0IyJGd7XPrLt8pW/WXF6TtGP7hqTxwGsl1mMV4yt9syYiaTRwCWntWAEvAIdFxMOlFmaV4dA3a0KS1gGIiJfKrsWqxc07Zk2kpvfObcBt7r1jjebQN2su7r1jvcrNO2ZNxL13rLf5St+subj3jvUqX+mbNRFJWwOXknrvALyIe+9YAzn0zZqIpK8VX7bPrPkKaS796RExo5yqrErcvGPWXMYC/wKsQ7raPwLYGfiFpBNLrMsqwlf6Zk1E0i3AP0fEK8X2WsA1wP6kq/2RZdZnrc9X+mbNZWPgjZrtpcAmEfEa8Ho5JVmVeBEVs+ZyOXC/pOuL7X2BK4pFVB4tryyrCjfvmDUZSWOAHUlz79wdEdNKLskqxKFvZpYRt+mbmWXEoW9mlhGHvplZRhz6ZmYZ+f/0/wq8Ey4tqgAAAABJRU5ErkJggg==\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "plotTopUsageComparation(df_a_json, df_a_other, 'script_domain', 3)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# TLD\n", + "All top 3 domains are the same for both valid JSON and non-JSON, and they remain for the filtered data. But the are some domains that only appear in the whole sample producing only smaller values[1].\n", + "\n", + "---\n", + " For futher investigation: \n", + "1. Why are there TLD that only produces smaller values? What are they? Are there also the ones that only produces bigger values? " + ] + }, + { + "cell_type": "code", + "execution_count": 20, + "metadata": { + "scrolled": true + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "There are 248 unique script_tld present on the non-json dataset and 141 on the JSONs\n" + ] + }, + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAXoAAAEOCAYAAACHE9xHAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDMuMC4zLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvnQurowAAE8JJREFUeJzt3X+QnVWd5/H3F9IQHTDGEBhIIo1WUIgJTWhZdnWHDMElBh1+rCApNKmJS9YqUJklYhK3ytFVRIpxGLbWVGVKJVDZSdiMFBQiE0lBgVWOmo5tSIhoygmkSQwNYgSBTBK++0c/HZuk079vbvfp96vq1n2ec895nm+nO59++txz743MRJJUrmPqXYAkqbYMekkqnEEvSYUz6CWpcAa9JBXOoJekwhn0klQ4g16SCmfQS1LhxtS7AICTTjopGxsb612GJI0oLS0tL2TmxN76DYugb2xsZMOGDfUuQ5JGlIh4pi/9nLqRpMIZ9JJUOINekgrX6xx9REwB7gb+HHgDWJGZ/xARfwtcB7RXXZdl5kPVmKXAp4ADwGcz81/6W9i+fftoa2vj9ddf7+9Q1cjYsWOZPHkyDQ0N9S5FUj/05cnY/cBNmbkxIk4EWiLih9Vjf5+Zt3ftHBFnA9cA04DTgEci4szMPNCfwtra2jjxxBNpbGwkIvozVDWQmbz44ou0tbVxxhln1LscSf3Q69RNZu7KzI3V9svAVmBSD0MuA1Zn5t7M/DdgG3B+fwt7/fXXmTBhgiE/TEQEEyZM8C8saQTq1xx9RDQC5wI/qZpuiIhNEfGdiBhftU0CdnQZ1kbPvxh6Ot9AhqlG/H5II1Ofgz4iTgD+GbgxM/8ALAfeDTQBu4C/6+zazfDDPq8wIhZFxIaI2NDe3t7NEEnSUOjTC6YiooGOkF+Vmd8DyMzdXR7/R+DBarcNmNJl+GRg56HHzMwVwAqA5ubmXj+4tnHJ9/tSap9tv/XSHh+fNWsWS5cu5ZJLLjnYdscdd/CrX/2Kb33rW0ccd8IJJ/DKK68MSY133303t912G5lJZrJw4UIWL148JMfudMstt7Bs2bIhPaaOvqH+/zHa9ZYPI02vV/TR8ff6t4GtmfnNLu2ndul2BbC52n4AuCYijo+IM4CpwE+HruSjY968eaxevfpNbatXr2bevHlH5fw/+MEPuOOOO1i3bh1btmxh48aNjBs3bsjPc8sttwz5MSUNL32ZuvkA8EngoohorW5zgdsi4smI2AT8JfA3AJm5BbgXeAp4GLi+vytuhoOPfexjPPjgg+zduxeA7du3s3PnTj74wQ/yyiuvMHv2bGbOnMn06dO5//77Dxv/2GOP8ZGPfOTg/g033MBdd90FQEtLCxdeeCHnnXcel1xyCbt27Tps/Ne//nVuv/12TjvtNKBjaeN1110HQGtrKxdccAEzZszgiiuu4KWXXgI6/grpfCuJF154gc73D7rrrru48sormTNnDlOnTuXmm28GYMmSJbz22ms0NTVx7bXX8sc//pFLL72Uc845h/e9732sWbNmCP4lJdVbX1bd/CgzIzNnZGZTdXsoMz+ZmdOr9r/KzF1dxnwtM9+dme/JzB/U9kuojQkTJnD++efz8MMPAx1X8x//+MeJCMaOHct9993Hxo0befTRR7npppvI7HX2Ceh4fcBnPvMZ1q5dS0tLCwsXLuSLX/ziYf02b97Meeed1+0x5s+fzze+8Q02bdrE9OnT+fKXv9zreVtbW1mzZg1PPvkka9asYceOHdx666285S1vobW1lVWrVvHwww9z2mmn8Ytf/ILNmzczZ86cPn1NkoY3Xxnbg67TN12nbTKTZcuWMWPGDC6++GKee+45du/e3dOhDnr66afZvHkzH/rQh2hqauKrX/0qbW1tfa5pz549/P73v+fCCy8EYMGCBTz++OO9jps9ezbjxo1j7NixnH322TzzzOHvhTR9+nQeeeQRvvCFL/DEE0/UZKpI0tFn0Pfg8ssvZ/369WzcuJHXXnuNmTNnArBq1Sra29tpaWmhtbWVU0455bD15WPGjOGNN944uN/5eGYybdo0WltbaW1t5cknn2TdunWHnXvatGm0tLT0q96u5zy0nuOPP/7g9rHHHsv+/fsPG3/mmWfS0tLC9OnTWbp0KV/5ylf6dX5Jw5NB34MTTjiBWbNmsXDhwjc9Cbtnzx5OPvlkGhoaePTRR7u9Oj799NN56qmn2Lt3L3v27GH9+vUAvOc976G9vZ0f//jHQMdUzpYtWw4bv3TpUm6++WZ++9vfArB3717uvPNOxo0bx/jx43niiScAuOeeew5e3Tc2Nh785bB27do+fY0NDQ3s27cPgJ07d/LWt76VT3ziEyxevJiNGzf26RiShrdh8X70fVGv5U7z5s3jyiuvfNMKnGuvvZaPfvSjNDc309TUxHvf+97Dxk2ZMoWrr76aGTNmMHXqVM4991wAjjvuONauXctnP/tZ9uzZw/79+7nxxhuZNm3am8bPnTuX3bt3c/HFF5OZRAQLFy4EYOXKlXz605/m1Vdf5V3vehff/e53AVi8eDFXX30199xzDxdddFGfvr5FixYxY8YMZs6cyfz58/n85z/PMcccQ0NDA8uXLx/Qv5mk4SX6+iRiLTU3N+ehHzyydetWzjrrrDpVpCPx+zI8uY5+aI2UdfQR0ZKZzb31c+pGkgpn0EtS4YZ10A+HaSX9id8PaWQatkE/duxYXnzxRcNlmOh8P/qxY8fWuxRJ/TRsV91MnjyZtrY2fGfL4aPzE6YkjSzDNugbGhr8JCNJGgLDdupGkjQ0DHpJKpxBL0mFM+glqXAGvSQVzqCXpMIZ9JJUOINekgpn0EtS4Qx6SSqcQS9JhTPoJalwBr0kFc6gl6TCGfSSVDiDXpIKZ9BLUuEMekkqnEEvSYUz6CWpcAa9JBXOoJekwvUa9BExJSIejYitEbElIj5Xtb8jIn4YEb+u7sdX7RERd0bEtojYFBEza/1FSJKOrC9X9PuBmzLzLOAC4PqIOBtYAqzPzKnA+mof4MPA1Oq2CFg+5FVLkvqs16DPzF2ZubHafhnYCkwCLgNWVt1WApdX25cBd2eHfwXeHhGnDnnlkqQ+6dccfUQ0AucCPwFOycxd0PHLADi56jYJ2NFlWFvVJkmqgz4HfUScAPwzcGNm/qGnrt20ZTfHWxQRGyJiQ3t7e1/LkCT1U5+CPiIa6Aj5VZn5vap5d+eUTHX/fNXeBkzpMnwysPPQY2bmisxszszmiRMnDrR+SVIv+rLqJoBvA1sz85tdHnoAWFBtLwDu79I+v1p9cwGwp3OKR5J09I3pQ58PAJ8EnoyI1qptGXArcG9EfAp4FriqeuwhYC6wDXgV+OshrViS1C+9Bn1m/oju590BZnfTP4HrB1mXJGmI+MpYSSqcQS9JhTPoJalwBr0kFc6gl6TCGfSSVDiDXpIKZ9BLUuEMekkqnEEvSYUz6CWpcAa9JBXOoJekwhn0klQ4g16SCmfQS1LhDHpJKpxBL0mFM+glqXAGvSQVzqCXpMIZ9JJUOINekgpn0EtS4Qx6SSqcQS9JhTPoJalwBr0kFc6gl6TCGfSSVDiDXpIKZ9BLUuEMekkqXK9BHxHfiYjnI2Jzl7a/jYjnIqK1us3t8tjSiNgWEU9HxCW1KlyS1Dd9uaK/C5jTTfvfZ2ZTdXsIICLOBq4BplVjvhURxw5VsZKk/us16DPzceB3fTzeZcDqzNybmf8GbAPOH0R9kqRBGswc/Q0Rsama2hlftU0CdnTp01a1HSYiFkXEhojY0N7ePogyJEk9GWjQLwfeDTQBu4C/q9qjm77Z3QEyc0VmNmdm88SJEwdYhiSpNwMK+szcnZkHMvMN4B/50/RMGzClS9fJwM7BlShJGowBBX1EnNpl9wqgc0XOA8A1EXF8RJwBTAV+OrgSJUmDMaa3DhHxT8As4KSIaAO+BMyKiCY6pmW2A/8dIDO3RMS9wFPAfuD6zDxQm9IlSX3Ra9Bn5rxumr/dQ/+vAV8bTFGSpKHjK2MlqXAGvSQVrtepG/1J45Lv17uEomy/9dJ6lyCNCl7RS1LhDHpJKpxBL0mFM+glqXAGvSQVzqCXpMIZ9JJUOINekgpn0EtS4Qx6SSqcQS9JhTPoJalwBr0kFc6gl6TCGfSSVDiDXpIKZ9BLUuEMekkqnEEvSYUz6CWpcAa9JBXOoJekwhn0klQ4g16SCmfQS1LhDHpJKpxBL0mFM+glqXAGvSQVzqCXpML1GvQR8Z2IeD4iNndpe0dE/DAifl3dj6/aIyLujIhtEbEpImbWsnhJUu/6ckV/FzDnkLYlwPrMnAqsr/YBPgxMrW6LgOVDU6YkaaB6DfrMfBz43SHNlwErq+2VwOVd2u/ODv8KvD0iTh2qYiVJ/TfQOfpTMnMXQHV/ctU+CdjRpV9b1XaYiFgUERsiYkN7e/sAy5Ak9Waon4yNbtqyu46ZuSIzmzOzeeLEiUNchiSp00CDfnfnlEx1/3zV3gZM6dJvMrBz4OVJkgZroEH/ALCg2l4A3N+lfX61+uYCYE/nFI8kqT7G9NYhIv4JmAWcFBFtwJeAW4F7I+JTwLPAVVX3h4C5wDbgVeCva1CzJKkfeg36zJx3hIdmd9M3gesHW5Qkaej4ylhJKpxBL0mFM+glqXAGvSQVzqCXpMIZ9JJUOINekgpn0EtS4Qx6SSqcQS9JhTPoJalwBr0kFc6gl6TCGfSSVDiDXpIKZ9BLUuEMekkqnEEvSYUz6CWpcAa9JBXOoJekwhn0klQ4g16SCmfQS1LhDHpJKpxBL0mFM+glqXAGvSQVzqCXpMIZ9JJUOINekgpn0EtS4cYMZnBEbAdeBg4A+zOzOSLeAawBGoHtwNWZ+dLgypQkDdRQXNH/ZWY2ZWZztb8EWJ+ZU4H11b4kqU5qMXVzGbCy2l4JXF6Dc0iS+miwQZ/AuohoiYhFVdspmbkLoLo/eZDnkCQNwqDm6IEPZObOiDgZ+GFE/LKvA6tfDIsA3vnOdw6yDEnSkQzqij4zd1b3zwP3AecDuyPiVIDq/vkjjF2Rmc2Z2Txx4sTBlCFJ6sGAgz4i/iwiTuzcBv4LsBl4AFhQdVsA3D/YIiVJAzeYqZtTgPsiovM4/zczH46InwH3RsSngGeBqwZfpiRpoAYc9Jn5G+CcbtpfBGYPpihJ0tDxlbGSVDiDXpIKZ9BLUuEMekkqnEEvSYUz6CWpcAa9JBXOoJekwhn0klQ4g16SCmfQS1LhDHpJKpxBL0mFM+glqXAGvSQVzqCXpMIZ9JJUOINekgpn0EtS4Qx6SSqcQS9JhTPoJalwBr0kFc6gl6TCGfSSVDiDXpIKZ9BLUuEMekkqnEEvSYUz6CWpcAa9JBXOoJekwhn0klS4mgV9RMyJiKcjYltELKnVeSRJPatJ0EfEscD/AT4MnA3Mi4iza3EuSVLPanVFfz6wLTN/k5n/DqwGLqvRuSRJPahV0E8CdnTZb6vaJElH2ZgaHTe6acs3dYhYBCyqdl+JiKdrVMtodBLwQr2L6E18o94VqA782Rxap/elU62Cvg2Y0mV/MrCza4fMXAGsqNH5R7WI2JCZzfWuQzqUP5v1Uaupm58BUyPijIg4DrgGeKBG55Ik9aAmV/SZuT8ibgD+BTgW+E5mbqnFuSRJPavV1A2Z+RDwUK2Orx45Jabhyp/NOojM7L2XJGnE8i0QJKlwBr0kFc6gl1QzEXFMRPynetcx2jlHP8JFxETgOqCRLk+uZ+bCetUkdRURP87M/1jvOkazmq260VFzP/AE8AhwoM61SN1ZFxH/FfheemVZF17Rj3AR0ZqZTfWuQzqSiHgZ+DM6LkReo+MtUjIz31bXwkYR5+hHvgcjYm69i5COJDNPzMxjMrMhM99W7RvyR5FX9CNcl6ulfwf2Vc1eLWnYiIgArgXOyMz/FRFTgFMz86d1Lm3UMOgl1VRELAfeAC7KzLMiYjywLjPfX+fSRg2fjC1ARPwV8BfV7mOZ+WA965EO8R8yc2ZE/BwgM1+q3uxQR4lz9CNcRNwKfA54qrp9rmqThot91ceLJhxcEvxGfUsaXZy6GeEiYhPQlJlvVPvHAj/PzBn1rUzqEBHXAh8HZgIrgY8B/zMz/19dCxtFnLopw9uB31Xb4+pZiHSozFwVES3AbDqWVl6emVvrXNaoYtCPfF8Hfh4Rj9Lxn+gvgKX1LUk6zK+BP1BlTkS8MzOfrW9Jo4dTNwWIiFOB99MR9D/JzN/WuSTpoIj4DPAlYDcdL5rqfMGU04tHiUE/wkXEB4DWzPxjRHyCjnnQf8jMZ+pcmgRARGyjY+XNi/WuZbRy1c3Itxx4NSLOAT4PPAPcXd+SpDfZAeypdxGjmXP0I9/+zMyIuAy4MzO/HREL6l2UFBH/o9r8DfBYRHwf2Nv5eGZ+sy6FjUIG/cj3ckQsBT4J/OdqeWVDnWuSAE6s7p+tbsdVN6jW1OvocI5+hIuIPwfmAT/LzB9V7yMyKzPvqXNpEgARcdWha+a7a1PtGPQjVPVmZp3fvKjus9reC2wDvpiZ6+tQnnRQRGzMzJm9tal2nLoZoTLzxCM9Vk3fvA9YVd1LR11EfBiYC0yKiDu7PPQ2YH99qhqdDPoCZeYB4BcR8b/rXYtGtZ3ABuAq4Fd0/MV5gI719H9Tx7pGHaduJNVERDQAXwP+G7CdjmnFKcB3gWWZue/IozWUXEcvqVZuA8YDp2fmzMw8F3gXHe/HdHtdKxtlvKKXVBMR8WvgzEM/ELx6DumXmTm1PpWNPl7RS6qVPDTkq8YDuI7+qDLoJdXKUxEx/9DG6j2ZflmHekYtp24k1URETAK+B7wGtNBxFf9+4C3AFZn5XB3LG1UMekk1FREXAdPoWHWzxRfxHX0GvSQVzjl6SSqcQS9JhTPoJalwBr0kFc6gl6TC/X/qB5qnoz9HUQAAAABJRU5ErkJggg==\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "plotUniqueValuesComparation(df_json, df_other, 'script_tld')" + ] + }, + { + "cell_type": "code", + "execution_count": 21, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
jsonother
com0.6464850.650045
net0.1436210.170509
ru0.0823250.053212
fr0.0230780.006061
cn0.0060600.014413
\n", + "
" + ], + "text/plain": [ + " json other\n", + "com 0.646485 0.650045\n", + "net 0.143621 0.170509\n", + "ru 0.082325 0.053212\n", + "fr 0.023078 0.006061\n", + "cn 0.006060 0.014413" + ] + }, + "execution_count": 21, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAXcAAAEHCAYAAABV4gY/AAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDMuMC4zLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvnQurowAAFCRJREFUeJzt3X+QXWWd5/H3l/wgrkSYgcaFNNgZJ4IhUpE0CZgyiZYMRByyOswAC9Qwi6RmnAiWK0XULYpld3T8xWjtRNc4E0a0QnCZESJmh9qRBeSHTHcwDCRMNGIYeuLshMgPgULozHf/uJ1wbZr06fTtPt3Pfb+qqLrnuU+f/vSF+nDuc885NzITSVJZDqk7gCSp9Sx3SSqQ5S5JBbLcJalAlrskFchyl6QCWe6SVCDLXZIKZLlLUoGm1vWLjzrqqOzq6qrr10vSpLR58+YnM7NjuHm1lXtXVxe9vb11/XpJmpQi4vEq81yWkaQCWe6SVCDLXZIKVNuauyRV8fLLL9PX18eLL75Yd5RxNWPGDDo7O5k2bdpB/bzlLmlC6+vrY+bMmXR1dRERdccZF5nJnj176OvrY/bs2Qe1D5dlJE1oL774IkceeWTbFDtARHDkkUeO6t2K5S5pwmunYt9ntH+z5S5JBXLNXdKk0rX6uy3d384/PbvSvHe84x3cd999Lf3dY8lyv+bwUf78M63JIWlCm0zFDpO83Fvxf/CdM1oQRFLxDjvsMH784x9z3nnn8eyzz9Lf389XvvIV3vnOd3LjjTfyqU99iszk7LPP5jOf+cz+n7niiiu47bbbeN3rXsett97KG9/4xnHJ65q7JFW0fv16zjzzTLZs2cJDDz3E/Pnz2bVrF1dddRV33HEHW7Zsoaenh1tuuQWA559/ntNOO42HHnqIJUuW8LWvfW3cslruklTRqaeeyvXXX88111zDww8/zMyZM+np6WHZsmV0dHQwdepULrzwQu6++24Apk+fzvve9z4AFixYwM6dO8ctq+UuSRUtWbKEu+++m1mzZnHxxRdzww03kJmvOX/atGn7T2mcMmUK/f394xXVcpekqh5//HGOPvpoLrvsMi699FIefPBBFi1axF133cWTTz7J3r17ufHGG1m6dGndUSf3B6qS2k/VUxdbLSK48847+dznPse0adM47LDDuOGGGzjmmGP49Kc/zbve9S4yk/e+972sWLGiloy/kvdAbynGUnd3d472yzpac7bMfxzdDjwVUhpTjz76KG9961trzbBnzx5OOeUUHn+80vdktMxQf3tEbM7M7uF+1mUZSTqAXbt2cfrpp/Oxj32s7igj4rKMJB3Asccey49+9KO6Y4yYR+6SVCDLXZIKVKncI+KsiNgeETsiYvVrzPm9iNgWEVsjYn1rY0qSRmLYNfeImAKsAc4A+oCeiNiYmdua5swBPg4szsynIuLosQosSRpelQ9UFwI7MvMxgIjYAKwAtjXNuQxYk5lPAWTmv7Y6qCQBo7+T66v2d3CnMz/99NOsX7+eD33oQwDceeedfP7zn+e2225rZbqDVmVZZhbwRNN238BYs7cAb4mIeyPiBxFxVqsCStJE9PTTT/PlL3+5Zftr9a0JqpT7UN/1NPjKp6nAHGAZcAHwFxFxxKt2FLEyInojonf37t0jzSpJtbnuuuuYN28e8+bN44tf/CKrV6/mJz/5CfPnz+fKK68E4LnnnuPcc8/lxBNP5MILL9x/35nNmzezdOlSFixYwJlnnsnPfvYzAJYtW8YnPvEJli5dype+9KWW5q2yLNMHHNe03QnsGmLODzLzZeCnEbGdRtn3NE/KzLXAWmhcoXqwoSVpPG3evJnrr7+eBx54gMxk0aJFfPOb3+SRRx5hy5YtQGNZ5oc//CFbt27l2GOPZfHixdx7770sWrSID3/4w9x66610dHRw00038clPfpJ169YBjXcAd911V8szVyn3HmBORMwG/hk4Hxh8zf4tNI7Y/yoijqKxTPNYK4NKUl3uuece3v/+9/P6178egA984AN8//vff9W8hQsX0tnZCcD8+fPZuXMnRxxxBI888ghnnHEGAHv37uWYY47Z/zPnnXfemGQettwzsz8iVgG3A1OAdZm5NSKuBXozc+PAc78VEduAvcCVmblnTBJL0jireg+uQw89dP/jfbf4zUxOOukk7r///iF/Zt//MFqt0nnumbkpM9+SmW/OzD8ZGLt6oNjJho9m5tzMfFtmbhiTtJJUgyVLlnDLLbfwwgsv8Pzzz/Ptb3+bxYsX84tf/GLYnz3hhBPYvXv3/nJ/+eWX2bp161hH9t4ykiaZGu7Eesopp3DJJZewcOFCAD74wQ+yYMECFi9ezLx581i+fDlnnz30rYinT5/OzTffzOWXX84zzzxDf38/H/nIRzjppJPGNLO3/PWWv9KENhFu+VsXb/krSfoVlrskFchylzTh1bV8XKfR/s2Wu6QJbcaMGezZs6etCj4z2bNnDzNmzDjofXi2jKQJrbOzk76+PtrtliUzZszYf0HUwbDcJU1o06ZNY/bs2XXHmHRclpGkAlnuklQgy12SCmS5S1KBLHdJKpDlLkkFstwlqUCWuyQVyHKXpAJZ7pJUIMtdkgpkuUtSgSx3SSqQ5S5JBbLcJalAlco9Is6KiO0RsSMiVg/x/CURsTsitgz888HWR5UkVTXsl3VExBRgDXAG0Af0RMTGzNw2aOpNmblqDDJKkkaoypH7QmBHZj6WmS8BG4AVYxtLkjQaVcp9FvBE03bfwNhgvxMR/xARN0fEcUPtKCJWRkRvRPS22/chStJ4qlLuMcTY4K8h/w7QlZknA38HfH2oHWXm2szszszujo6OkSWVJFVWpdz7gOYj8U5gV/OEzNyTmb8c2PwasKA18SRJB6NKufcAcyJidkRMB84HNjZPiIhjmjbPAR5tXURJ0kgNe7ZMZvZHxCrgdmAKsC4zt0bEtUBvZm4ELo+Ic4B+4OfAJWOYWZI0jGHLHSAzNwGbBo1d3fT448DHWxtNknSwvEJVkgpkuUtSgSx3SSqQ5S5JBbLcJalAlrskFchyl6QCWe6SVCDLXZIKZLlLUoEsd0kqkOUuSQWy3CWpQJa7JBXIcpekAlnuklQgy12SCmS5S1KBLHdJKpDlLkkFstwlqUCWuyQVyHKXpAJVKveIOCsitkfEjohYfYB550ZERkR36yJKkkZq2HKPiCnAGmA5MBe4ICLmDjFvJnA58ECrQ0qSRqbKkftCYEdmPpaZLwEbgBVDzPtvwGeBF1uYT5J0EKqU+yzgiabtvoGx/SLi7cBxmXnbgXYUESsjojcienfv3j3isJKkaqqUewwxlvufjDgE+DPgPw+3o8xcm5ndmdnd0dFRPaUkaUSqlHsfcFzTdiewq2l7JjAPuDMidgKnARv9UFWS6lOl3HuAORExOyKmA+cDG/c9mZnPZOZRmdmVmV3AD4BzMrN3TBJLkoY1bLlnZj+wCrgdeBT4VmZujYhrI+KcsQ4oSRq5qVUmZeYmYNOgsatfY+6y0ceSJI2GV6hKUoEsd0kqkOUuSQWy3CWpQJa7JBXIcpekAlnuklQgy12SCmS5S1KBLHdJKpDlLkkFstwlqUCWuyQVyHKXpAJZ7pJUIMtdkgpkuUtSgSx3SSqQ5S5JBbLcJalAlrskFchyl6QCWe6SVKBK5R4RZ0XE9ojYERGrh3j+DyPi4YjYEhH3RMTc1keVJFU1bLlHxBRgDbAcmAtcMER5r8/Mt2XmfOCzwHUtTypJqqzKkftCYEdmPpaZLwEbgBXNEzLz2abN1wPZuoiSpJGaWmHOLOCJpu0+YNHgSRHxx8BHgenAu4faUUSsBFYCHH/88SPNKkmqqMqRewwx9qoj88xck5lvBq4C/stQO8rMtZnZnZndHR0dI0sqSaqsSrn3Acc1bXcCuw4wfwPwH0YTSpI0OlXKvQeYExGzI2I6cD6wsXlCRMxp2jwb+HHrIkqSRmrYNffM7I+IVcDtwBRgXWZujYhrgd7M3Aisioj3AC8DTwG/P5ahJUkHVuUDVTJzE7Bp0NjVTY+vaHEuSdIoeIWqJBXIcpekAlnuklQgy12SCmS5S1KBLHdJKpDlLkkFstwlqUCWuyQVyHKXpAJZ7pJUIMtdkgpkuUtSgSx3SSqQ5S5JBbLcJalAlrskFchyl6QCWe6SVCDLXZIKZLlLUoEsd0kqUKVyj4izImJ7ROyIiNVDPP/RiNgWEf8QEd+LiDe1Pqokqaphyz0ipgBrgOXAXOCCiJg7aNoPge7MPBm4Gfhsq4NKkqqrcuS+ENiRmY9l5kvABmBF84TM/L+Z+cLA5g+AztbGlCSNRJVynwU80bTdNzD2Wi4F/vdoQkmSRmdqhTkxxFgOOTHiIqAbWPoaz68EVgIcf/zxFSNKkkaqypF7H3Bc03YnsGvwpIh4D/BJ4JzM/OVQO8rMtZnZnZndHR0dB5NXklRBlXLvAeZExOyImA6cD2xsnhARbwe+SqPY/7X1MSVJIzFsuWdmP7AKuB14FPhWZm6NiGsj4pyBaZ8DDgP+V0RsiYiNr7E7SdI4qLLmTmZuAjYNGru66fF7WpxLkjQKXqEqSQWy3CWpQJa7JBXIcpekAlX6QFVt4prDR/nzz7Qmh6RR88hdkgpkuUtSgSx3SSqQ5S5JBbLcJalAlrskFchyl6QCWe6SVCAvYipE1+rvjnofO2e0IIikCcEjd0kqkOUuSQWy3CWpQJa7JBXIcpekAlnuklQgy12SCmS5S1KBLHdJKlClco+IsyJie0TsiIjVQzy/JCIejIj+iDi39TElSSMxbLlHxBRgDbAcmAtcEBFzB037J+ASYH2rA0qSRq7KvWUWAjsy8zGAiNgArAC27ZuQmTsHnvu3McgoSRqhKssys4Anmrb7BsYkSRNUlXKPIcbyYH5ZRKyMiN6I6N29e/fB7EKSVEGVcu8Djmva7gR2Hcwvy8y1mdmdmd0dHR0HswtJUgVVyr0HmBMRsyNiOnA+sHFsY0mSRmPYcs/MfmAVcDvwKPCtzNwaEddGxDkAEXFqRPQBvwt8NSK2jmVoSdKBVfompszcBGwaNHZ10+MeGss1kqQJwK/ZU3FG+5WDO//07BYlkerj7QckqUCWuyQVyHKXpAJZ7pJUIMtdkgpkuUtSgSx3SSqQ5S5JBbLcJalAXqEqDXbN4S3YxzOj34c0Ch65S1KBLHdJKpDlLkkFstwlqUCWuyQVyHKXpAJZ7pJUIMtdkgrkRUxSwfzKwfblkbskFchyl6QCuSwjSVVMsnsOVTpyj4izImJ7ROyIiNVDPH9oRNw08PwDEdHV6qCSpOqGLfeImAKsAZYDc4ELImLuoGmXAk9l5m8CfwZ8ptVBJUnVVVmWWQjsyMzHACJiA7AC2NY0ZwVwzcDjm4E/j4jIzGxhVknjbZItRRzIqM8cmtGiIOOkyrLMLOCJpu2+gbEh52RmP/AMcGQrAkqSRq7KkXsMMTb4iLzKHCJiJbByYPO5iNhe4fePqYCjgCcPegf/dag/fXLytWgY9esAvhbNfC1e0ZrX4k1VJlUp9z7guKbtTmDXa8zpi4ipwOHAzwfvKDPXAmurBBsvEdGbmd1155gIfC0afB1e4Wvxisn2WlRZlukB5kTE7IiYDpwPbBw0ZyPw+wOPzwXucL1dkuoz7JF7ZvZHxCrgdmAKsC4zt0bEtUBvZm4E/hL4RkTsoHHEfv5YhpYkHVili5gycxOwadDY1U2PXwR+t7XRxs2EWiaqma9Fg6/DK3wtXjGpXotw9USSyuO9ZSSpQJa7JBXIcpekArXlXSEj4mSgi6a/PzP/prZANYmI2Zn50+HG1D4G7iX19cy8qO4sGp22K/eIWAecDGwF/m1gOIG2K3fgr4FTBo3dDCyoIUutIuJ6hriqOjP/Uw1xapOZeyOiIyKmZ+ZLdeepW0R8gMaNEI+mcSV+AJmZb6g1WAVtV+7AaZk5+K6WbSUiTgROAg4f+I93nzcAk+z2SC1zW9PjGcD7efWV2O1iJ3BvRGwEnt83mJnX1ZaoPp8FfjszH607yEi1Y7nfHxFzM3Pb8FOLdQLwPuAI4Lebxn8BXFZLoppl5l83b0fEjcDf1RSnFhHxjcy8GDiPxq27DwFm1puqdv9vMhY7tOF57hGxBPgO8C/AL3nlbdbJtQarQUScnpn3151jIoqIE4DvDnxHQVuIiG00vrfhO8Cywc9n5qvuF1W6iPgS8O+BW2j0BTA5PqNrxyP3dcDFwMO8subervZExPeAN2bmvIEPms/JzP9ed7DxFBEB7AWeaxr+F+CqehLV5n8CfwvMBnqbxoPG5xG/UUeomr0BeAH4raaxSfEZXTseud+Rme+uO8dEEBF3AVcCX83Mtw+MPZKZ8+pNNv4i4sHMHPzhcluKiK9k5h/VnWMiiIivA1dk5tMD278GfGEyfNDejkfu/xgR62m89ZxUb7PGwL/LzL9vHLju119XmJrdFxGnZmZP3UHqZrH/ipP3FTtAZj4VEW+vM1BV7Vjur6NR6pPubdYYeDIi3szAKYARcS7ws3oj1ebdwB9FxE4aZ4i07Wcx+hWHRMSvZeZTABHx60yS3pwUIVspM/+g7gwTyB/TuNPdiRHxz8BPgQvrjVSb5XUH0IT0BRrv6m6mcRD0e8Cf1BupmnZcc+8E/gewmMa/rHtorKn11RqsBhFxKI0vV+kCfh14lsbR6rV15pImkoiYS+OdXQDfmyynUbdjuf8fYD3wjYGhi4ALM/OM+lLVIyL+FngaeJDG2SIAZOYXagslqSXasdy3ZOb84cbaQbueGSO1g3a8K+STEXFRREwZ+OciYE/doWpyX0S8re4QklqvHY/cjwf+HDidxpr7fcDlmflPtQarwcAVib9J44PUtr5aVypNO5b714GPDDq16fOT4aKEVouINw01npmPj3cWSa3VdqdC0rgo4al9G5n588lyUUKrWeJSudpxzf2QgUuIgcl1UYIkVdWOpTZpL0qQpKrabs0dJu9FCZJUVVuWuySVrh3X3CWpeJa7JBXIcpekAlnuklQgy12SCvT/AbRxrevuS2knAAAAAElFTkSuQmCC\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "plotTopUsageComparation(df_json, df_other, 'script_tld', 4)" + ] + }, + { + "cell_type": "code", + "execution_count": 22, + "metadata": { + "scrolled": true + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "There are 52 unique script_tld present on the non-json dataset and 89 on the JSONs\n" + ] + }, + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAXQAAAEOCAYAAACZ2uz0AAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDMuMC4zLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvnQurowAAEc1JREFUeJzt3XuMlfWZwPHvIwwdrYoU0VVxHUzwhiDi1LWXrUZstGorul5KsJLFlTRZb62ogJs07Xa9xbTWZmti6ioastJlbWxs61JZTG1i3DLjKCD1EtfLiNLRVVoVKeizf8yBcBmcw9xe5jffT0Jmzjvve84DM3x5+c17zkRmIkka/PaoegBJUt8w6JJUCIMuSYUw6JJUCIMuSYUw6JJUCIMuSYUw6JJUCIMuSYUYPpAPtv/++2dTU9NAPqQkDXotLS1vZeaY7vYb0KA3NTWxfPnygXxISRr0IuKVevZzyUWSCmHQJakQBl2SCjGga+iSdg8bN26kvb2dDz/8sOpRtJXGxkbGjh1LQ0NDj4436NIQ1N7ezj777ENTUxMRUfU4AjKTt99+m/b2dsaNG9ej+3DJRRqCPvzwQ0aPHm3MdyMRwejRo3v1vyaDLg1Rxnz309vPiUGXpEK4ht6Fprm/rHqEYrx881lVj6A69PXXfHef91NOOYV58+Zx+umnb9l2++238/zzz/OTn/xkp8ftvffevPfee30y43333cett95KZpKZzJo1izlz5vTJfW924403Mn/+/D69z0/iGbqkATd9+nQeeOCBbbY98MADTJ8+fUAe/9e//jW33347S5YsYdWqVbS2tjJy5Mg+f5wbb7yxz+/zkxh0SQPu/PPP5+GHH2bDhg0AvPzyy6xZs4YvfvGLvPfee0ydOpUpU6YwceJEHnrooR2Of+yxxzj77LO33L788su59957AWhpaeHkk0/mhBNO4PTTT+eNN97Y4fibbrqJ2267jYMPPhjovFzwsssuA6CtrY2TTjqJSZMmce655/LOO+8Anf+r2PzSJW+99RabX5fq3nvv5bzzzuOMM85g/PjxXHfddQDMnTuX9evXM3nyZGbMmMH777/PWWedxXHHHcexxx7LokWL+uBPclsGXdKAGz16NCeeeCKPPPII0Hl2ftFFFxERNDY28vOf/5zW1laWLVvGNddcQ2bWdb8bN27kiiuuYPHixbS0tDBr1ixuuOGGHfZbuXIlJ5xwQpf3cckll3DLLbfwzDPPMHHiRL773e92+7htbW0sWrSIFStWsGjRIl577TVuvvlm9txzT9ra2li4cCGPPPIIBx98ME8//TQrV67kjDPOqOv3tCsMuqRKbL3ssvVyS2Yyf/58Jk2axGmnncbrr7/O2rVr67rP5557jpUrV/LlL3+ZyZMn8/3vf5/29va6Z1q3bh3vvvsuJ598MgAzZ87kt7/9bbfHTZ06lZEjR9LY2MgxxxzDK6/s+FpaEydO5NFHH+X666/n8ccf75clHoMuqRLTpk1j6dKltLa2sn79eqZMmQLAwoUL6ejooKWlhba2Ng488MAdrs0ePnw4H3/88Zbbmz+emUyYMIG2tjba2tpYsWIFS5Ys2eGxJ0yYQEtLyy7Nu/Vjbj/Ppz71qS3vDxs2jE2bNu1w/BFHHEFLSwsTJ05k3rx5fO9739ulx6+HQZdUib333ptTTjmFWbNmbfPN0HXr1nHAAQfQ0NDAsmXLujzbPeyww3j22WfZsGED69atY+nSpQAceeSRdHR08MQTTwCdSzCrVq3a4fh58+Zx3XXX8eabbwKwYcMG7rjjDkaOHMmoUaN4/PHHAbj//vu3nK03NTVt+Udg8eLFdf0eGxoa2LhxIwBr1qxhr7324uKLL2bOnDm0trbWdR+7wssWJVV2een06dM577zztrniZcaMGXz1q1+lubmZyZMnc9RRR+1w3KGHHsqFF17IpEmTGD9+PMcffzwAI0aMYPHixVx55ZWsW7eOTZs2cfXVVzNhwoRtjj/zzDNZu3Ytp512GplJRDBr1iwAFixYwDe/+U0++OADDj/8cO655x4A5syZw4UXXsj999/PqaeeWtfvb/bs2UyaNIkpU6ZwySWXcO2117LHHnvQ0NDAnXfe2aM/s08S9X6zoS80NzfnYPgBF16H3ne8Dn33tHr1ao4++uiqx1AXuvrcRERLZjZ3d6xLLpJUCIMuSYUw6NIQNZDLrapPbz8nBl0aghobG3n77beN+m5k8+uhNzY29vg+vMpFGoLGjh1Le3s7HR0dVY+irWz+iUU9ZdClIaihoaHHPxVHuy+XXCSpEAZdkgph0CWpEHUFPSK+FRGrImJlRPx7RDRGxLiIeDIiXoiIRRExor+HlSTtXLdBj4hDgCuB5sw8FhgGfB24BfhhZo4H3gEu7c9BJUmfrN4ll+HAnhExHNgLeAM4Fdj8kmMLgGl9P54kqV7dBj0zXwduA16lM+TrgBbg3czc/KK/7cAhXR0fEbMjYnlELPeaV0nqP/UsuYwCzgHGAQcDnwa+0sWuXT7lLDPvyszmzGweM2ZMb2aVJH2CepZcTgP+NzM7MnMj8CDweWC/2hIMwFhgTT/NKEmqQz1BfxU4KSL2iogApgLPAsuA82v7zAR2/NHckqQBU88a+pN0fvOzFVhRO+Yu4Hrg2xHxIjAauLsf55QkdaOu13LJzO8A39lu80vAiX0+kSSpR3ymqCQVwqBLUiEMuiQVwqBLUiEMuiQVwqBLUiEMuiQVwqBLUiEMuiQVwqBLUiEMuiQVwqBLUiEMuiQVwqBLUiEMuiQVwqBLUiEMuiQVwqBLUiEMuiQVwqBLUiEMuiQVwqBLUiEMuiQVwqBLUiEMuiQVwqBLUiEMuiQVwqBLUiEMuiQVwqBLUiEMuiQVwqBLUiEMuiQVwqBLUiEMuiQVwqBLUiEMuiQVwqBLUiHqCnpE7BcRiyPiDxGxOiI+FxGfiYjfRMQLtbej+ntYSdLO1XuG/iPgkcw8CjgOWA3MBZZm5nhgae22JKki3QY9IvYFvgTcDZCZf8nMd4FzgAW13RYA0/prSElS9+o5Qz8c6ADuiYinIuKnEfFp4MDMfAOg9vaArg6OiNkRsTwilnd0dPTZ4JKkbdUT9OHAFODOzDweeJ9dWF7JzLsyszkzm8eMGdPDMSVJ3akn6O1Ae2Y+Wbu9mM7Ar42IgwBqb//YPyNKkurRbdAz803gtYg4srZpKvAs8AtgZm3bTOChfplQklSX4XXudwWwMCJGAC8Bf0/nPwY/i4hLgVeBC/pnRElSPeoKema2Ac1dfGhq344jSeopnykqSYUw6JJUCIMuSYUw6JJUCIMuSYUw6JJUCIMuSYUw6JJUCIMuSYUw6JJUCIMuSYUw6JJUCIMuSYWo9+VzJe0Gmub+suoRivLyzWdVPUKf8gxdkgph0CWpEAZdkgph0CWpEAZdkgph0CWpEAZdkgph0CWpEAZdkgph0CWpEAZdkgph0CWpEAZdkgph0CWpEAZdkgph0CWpEAZdkgph0CWpEAZdkgph0CWpEAZdkgph0CWpEAZdkgpRd9AjYlhEPBURD9duj4uIJyPihYhYFBEj+m9MSVJ3duUM/Spg9Va3bwF+mJnjgXeAS/tyMEnSrqkr6BExFjgL+GntdgCnAotruywApvXHgJKk+tR7hn47cB3wce32aODdzNxUu90OHNLVgRExOyKWR8Tyjo6OXg0rSdq5boMeEWcDf8zMlq03d7FrdnV8Zt6Vmc2Z2TxmzJgejilJ6s7wOvb5AvC1iDgTaAT2pfOMfb+IGF47Sx8LrOm/MSVJ3en2DD0z52Xm2MxsAr4O/HdmzgCWAefXdpsJPNRvU0qSutWb69CvB74dES/SuaZ+d9+MJEnqiXqWXLbIzMeAx2rvvwSc2PcjSZJ6wmeKSlIhDLokFcKgS1IhDLokFcKgS1IhDLokFcKgS1IhDLokFcKgS1IhDLokFcKgS1IhDLokFcKgS1IhDLokFcKgS1IhDLokFcKgS1IhDLokFcKgS1IhDLokFcKgS1IhDLokFcKgS1IhDLokFcKgS1IhDLokFcKgS1IhDLokFcKgS1IhDLokFcKgS1IhDLokFcKgS1IhDLokFcKgS1IhDLokFcKgS1IhDLokFaLboEfEoRGxLCJWR8SqiLiqtv0zEfGbiHih9nZU/48rSdqZes7QNwHXZObRwEnAP0bEMcBcYGlmjgeW1m5LkirSbdAz843MbK29/2dgNXAIcA6woLbbAmBafw0pSereLq2hR0QTcDzwJHBgZr4BndEHDtjJMbMjYnlELO/o6OjdtJKknao76BGxN/CfwNWZ+ad6j8vMuzKzOTObx4wZ05MZJUl1qCvoEdFAZ8wXZuaDtc1rI+Kg2scPAv7YPyNKkupRz1UuAdwNrM7MH2z1oV8AM2vvzwQe6vvxJEn1Gl7HPl8AvgGsiIi22rb5wM3AzyLiUuBV4IL+GVGSVI9ug56ZvwNiJx+e2rfjSJJ6ymeKSlIhDLokFcKgS1IhDLokFcKgS1IhDLokFcKgS1IhDLokFcKgS1IhDLokFcKgS1IhDLokFcKgS1IhDLokFcKgS1IhDLokFcKgS1IhDLokFcKgS1IhDLokFcKgS1IhDLokFcKgS1IhDLokFcKgS1IhDLokFcKgS1IhDLokFcKgS1IhDLokFcKgS1IhDLokFcKgS1IhDLokFcKgS1IhDLokFcKgS1IhehX0iDgjIp6LiBcjYm5fDSVJ2nU9DnpEDAP+FfgKcAwwPSKO6avBJEm7pjdn6CcCL2bmS5n5F+AB4Jy+GUuStKt6E/RDgNe2ut1e2yZJqsDwXhwbXWzLHXaKmA3Mrt18LyKe68Vjalv7A29VPcQniVuqnkAV2e2/NmFQfX0eVs9OvQl6O3DoVrfHAmu23ykz7wLu6sXjaCciYnlmNlc9h7Q9vzar0Zsll98D4yNiXESMAL4O/KJvxpIk7aoen6Fn5qaIuBz4L2AY8G+ZuarPJpMk7ZLeLLmQmb8CftVHs2jXuZSl3ZVfmxWIzB2+jylJGoR86r8kFcKgS1IhDLqkXomIPSLi81XPIdfQB42IGANcBjSx1TezM3NWVTNJm0XEE5n5uarnGOp6dZWLBtRDwOPAo8BHFc8ibW9JRPwd8GB6llgZz9AHiYhoy8zJVc8hdSUi/gx8ms6TjfV0vjRIZua+lQ42xLiGPng8HBFnVj2E1JXM3Ccz98jMhszct3bbmA8wz9AHia3OgP4CbKxt9gxIu4WICGAGMC4z/zkiDgUOysz/qXi0IcWgS+q1iLgT+Bg4NTOPjohRwJLM/GzFow0pflN0EImIrwFfqt18LDMfrnIeaSt/k5lTIuIpgMx8p/aifRpArqEPEhFxM3AV8Gzt11W1bdLuYGPtx1ImbLnM9uNqRxp6XHIZJCLiGWByZn5cuz0MeCozJ1U7mQQRMQO4CJgCLADOB/4pM/+j0sGGGJdcBpf9gP+rvT+yykGkrWXmwohoAabSecnitMxcXfFYQ45BHzxuAp6KiGV0/oX5EjCv2pGkbbwA/IlaVyLirzPz1WpHGlpcchlEIuIg4LN0Bv3JzHyz4pEkACLiCuA7wFo6n1y0+YlFLgkOIIM+SETEF4C2zHw/Ii6mc63yR5n5SsWjSUTEi3Re6fJ21bMMZV7lMnjcCXwQEccB1wKvAPdVO5K0xWvAuqqHGOpcQx88NmVmRsQ5wB2ZeXdEzKx6KA1tEfHt2rsvAY9FxC+BDZs/npk/qGSwIcqgDx5/joh5wDeAv61dtthQ8UzSPrW3r9Z+jaj9gto16Ro4rqEPEhHxV8B04PeZ+bvaa2Wckpn3VzyaRERcsP01511tU/8y6Lu52otybf4kRe1t1t7fALwI3JCZSysYTwIgIlozc0p329S/XHLZzWXmPjv7WG3Z5VhgYe2tNKAi4ivAmcAhEXHHVh/aF9hUzVRDl0EfxDLzI+DpiPhx1bNoyFoDLAcuAJ6n83+PH9F5Pfq3KpxrSHLJRVKPRUQD8C/APwAv07kUeChwDzA/Mzfu/Gj1Na9Dl9QbtwKjgMMyc0pmHg8cTudrDd1W6WRDkGfoknosIl4Ajtj+B0PXvr/zh8wcX81kQ5Nn6JJ6I7ePeW3jR3gd+oAz6JJ649mIuGT7jbXXG/pDBfMMaS65SOqxiDgEeBBYD7TQeVb+WWBP4NzMfL3C8YYcgy6p1yLiVGACnVe5rPKJbtUw6JJUCNfQJakQBl2SCmHQJakQBl2SCmHQJakQ/w93tLJrAKtZgwAAAABJRU5ErkJggg==\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "plotUniqueValuesComparation(df_a_json, df_a_other, 'script_tld')" + ] + }, + { + "cell_type": "code", + "execution_count": 23, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
jsonother
com0.7050270.627121
net0.1508780.190486
ru0.0554860.002650
biz0.0014980.041437
\n", + "
" + ], + "text/plain": [ + " json other\n", + "com 0.705027 0.627121\n", + "net 0.150878 0.190486\n", + "ru 0.055486 0.002650\n", + "biz 0.001498 0.041437" + ] + }, + "execution_count": 23, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAXcAAAEHCAYAAABV4gY/AAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDMuMC4zLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvnQurowAAFK9JREFUeJzt3X+QndV93/H31/phNYZCAusOaGWkJgpYLFigRRJWRwLHFDCOFDvEloodM8WoqSsT1zVFNh1KlXbs2K5/NJEzlhNpTDyScJQaZKyOZmLMb0x3BSIgEbmyIsxGxFnWgA0MRiLf/rEr5frqSvvs7l1d3aP3a2Zn7jnPuc/97pXms+ee58eNzESSVJY3tLoASVLzGe6SVCDDXZIKZLhLUoEMd0kqkOEuSQUy3CWpQIa7JBXIcJekAk1s1QuffvrpOX369Fa9vCS1pW3btj2XmR3DjWtZuE+fPp3e3t5WvbwktaWIeLrKOJdlJKlAhrskFchwl6QCtWzNXZKq2L9/P319fbz66qutLuWYmjJlCp2dnUyaNGlUzzfcJR3X+vr6OPnkk5k+fToR0epyjonMZGBggL6+PmbMmDGqfbgsI+m49uqrr3LaaaedMMEOEBGcdtppY/q0YrhLOu6dSMF+0Fh/50rhHhFXRMSuiNgdESsbbP9iRGwf+vlBRLwwpqokSWMy7Jp7REwAVgOXAX1AT0RszsydB8dk5n+sGf9R4IJxqFWSmL7yO03d397PXFVp3Nvf/nYeeuihpr72eKpyQHUusDsz9wBExEZgCbDzCOOXAf+1OeWNTLP/0aH6P7yksrVTsEO1ZZmpwDM17b6hvsNExFnADODuI2xfHhG9EdHb398/0lolqWVOOukknn32WRYuXMjs2bPp6uri/vvvB2DDhg2cd955dHV1cdNNN/3Cc26++Wbe9ra3MX/+fH784x8fs3qrhHujVf08wtilwKbMfL3Rxsxck5ndmdnd0THsfW8k6biyfv16Lr/8crZv387jjz/O7Nmz2bdvHzfddBN3330327dvp6enhzvuuAOAl19+mfnz5/P444+zcOFCvva1rx2zWquEex8wrabdCew7wtilwIaxFiVJx6OLLrqIdevWceutt/LEE09w8skn09PTwyWXXEJHRwcTJ07kmmuu4b777gNg8uTJvPvd7wZgzpw57N2795jVWiXce4CZETEjIiYzGOCb6wdFxNnALwMPN7dESTo+LFy4kPvuu4+pU6fywQ9+kNtuu43MIy1kwKRJkw6d0jhhwgQOHDhwrEodPtwz8wCwAtgKPAV8MzN3RMSqiFhcM3QZsDGP9ptKUht7+umnefOb38z111/Pddddx6OPPsq8efO49957ee6553j99dfZsGEDixYtanWp1W4/kJlbgC11fbfUtW9tXlmS1FirzmCLCO655x4+97nPMWnSJE466SRuu+02zjjjDD796U9z6aWXkpm8613vYsmSJS2p8RfqbdVEu7u7O5v9ZR2eCimV56mnnuKtb31rS2sYGBjgwgsv5OmnK31PRtM0+t0jYltmdg/3XG8/IElHsW/fPi6++GI+8YlPtLqUEfGukJJ0FGeeeSY/+MEPWl3GiDlzl6QCGe6SVCDDXZIKZLhLUoE8oCqpvdx6SpP39+KonvbCCy+wfv16PvKRjwBwzz338PnPf5677rqrmdWNmjN3SRqFF154ga985StN21+zb01guEtSBV/4whfo6uqiq6uLL33pS6xcuZIf/vCHzJ49mxtvvBGAl156iauvvppzzjmHa6655tB9Z7Zt28aiRYuYM2cOl19+Oc8++ywAl1xyCZ/61KdYtGgRX/7yl5tar8sykjSMbdu2sW7dOh555BEyk3nz5vGNb3yDJ598ku3btwODyzKPPfYYO3bs4Mwzz2TBggU8+OCDzJs3j49+9KPceeeddHR0cPvtt3PzzTezdu1aYPATwL333tv0mg13SRrGAw88wHve8x7e9KY3AfDe97730Bd11Jo7dy6dnZ0AzJ49m71793Lqqafy5JNPctlllwHw+uuvc8YZZxx6zvvf//5xqdlwl6RhVL0H1xvf+MZDjw/e4jczOffcc3n44cZ3Qz/4B6PZXHOXpGEsXLiQO+64g1deeYWXX36Zb33rWyxYsICf/exnwz737LPPpr+//1C479+/nx07dox3yc7cJbWZUZ66OBYXXngh1157LXPnzgXgwx/+MHPmzGHBggV0dXVx5ZVXctVVje8gO3nyZDZt2sQNN9zAiy++yIEDB/jYxz7GueeeO641e8vfYeyd8m+avs9W/OeU2tXxcMvfVvGWv5KkX2C4S1KBDHdJx70T8auZx/o7G+6SjmtTpkxhYGDghAr4zGRgYIApU6aMeh+VzpaJiCuALwMTgD/NzM80GPM+4FYggcczcxyOREo60XR2dtLX10d/f3+rSzmmpkyZcuiCqNEYNtwjYgKwGrgM6AN6ImJzZu6sGTMT+CSwIDOfj4g3j7oiSaoxadIkZsyY0eoy2k6VZZm5wO7M3JOZrwEbgSV1Y64HVmfm8wCZ+Q/NLVOSNBJVwn0q8ExNu2+or9avA78eEQ9GxPeHlnEOExHLI6I3InpPtI9YknQsVQn3aNBXf2RjIjATuARYBvxpRJx62JMy12Rmd2Z2d3R0jLRWSVJFVcK9D5hW0+4E9jUYc2dm7s/MvwV2MRj2kqQWqBLuPcDMiJgREZOBpcDmujF3AJcCRMTpDC7T7GlmoZKk6oYN98w8AKwAtgJPAd/MzB0RsSoiFg8N2woMRMRO4HvAjZk5MF5FS5KOrtJ57pm5BdhS13dLzeMEPj70I0lqMa9QlaQCGe6SVCDDXZIKZLhLUoEMd0kqkOEuSQUy3CWpQIa7JBXIcJekAhnuklQgw12SCmS4S1KBDHdJKpDhLkkFMtwlqUCGuyQVyHCXpAIZ7pJUIMNdkgpkuEtSgSqFe0RcERG7ImJ3RKxssP3aiOiPiO1DPx9ufqmSpKomDjcgIiYAq4HLgD6gJyI2Z+bOuqG3Z+aKcahRkjRCVWbuc4HdmbknM18DNgJLxrcsSdJYVAn3qcAzNe2+ob56vx0Rfx0RmyJiWlOqkySNSpVwjwZ9Wdf+NjA9M88H/gr4esMdRSyPiN6I6O3v7x9ZpZKkyqqEex9QOxPvBPbVDsjMgcz8+VDza8CcRjvKzDWZ2Z2Z3R0dHaOpV5JUQZVw7wFmRsSMiJgMLAU21w6IiDNqmouBp5pXoiRppIY9WyYzD0TECmArMAFYm5k7ImIV0JuZm4EbImIxcAD4CXDtONYsSRrGsOEOkJlbgC11fbfUPP4k8MnmliZJGi2vUJWkAhnuklQgw12SCmS4S1KBDHdJKpDhLkkFMtwlqUCGuyQVyHCXpAIZ7pJUIMNdkgpkuEtSgQx3SSqQ4S5JBTLcJalAhrskFchwl6QCGe6SVCDDXZIKZLhLUoEMd0kqUKVwj4grImJXROyOiJVHGXd1RGREdDevREnSSA0b7hExAVgNXAnMApZFxKwG404GbgAeaXaRkqSRqTJznwvszsw9mfkasBFY0mDcHwCfBV5tYn2SpFGoEu5TgWdq2n1DfYdExAXAtMy862g7iojlEdEbEb39/f0jLlaSVE2VcI8GfXloY8QbgC8C/2m4HWXmmszszszujo6O6lVKkkakSrj3AdNq2p3Avpr2yUAXcE9E7AXmA5s9qCpJrVMl3HuAmRExIyImA0uBzQc3ZuaLmXl6Zk7PzOnA94HFmdk7LhVLkoY1bLhn5gFgBbAVeAr4ZmbuiIhVEbF4vAuUJI3cxCqDMnMLsKWu75YjjL1k7GVJksbCK1QlqUCGuyQVyHCXpAIZ7pJUIMNdkgpkuEtSgQx3SSqQ4S5JBTLcJalAhrskFchwl6QCGe6SVCDDXZIKZLhLUoEMd0kqkOEuSQUy3CWpQIa7JBXIcJekAhnuklSgSuEeEVdExK6I2B0RKxts/72IeCIitkfEAxExq/mlSpKqGjbcI2ICsBq4EpgFLGsQ3usz87zMnA18FvhC0yuVJFVWZeY+F9idmXsy8zVgI7CkdkBm/rSm+SYgm1eiJGmkJlYYMxV4pqbdB8yrHxQR/wH4ODAZeEdTqpMkjUqVmXs06DtsZp6ZqzPzV4GbgP/ScEcRyyOiNyJ6+/v7R1apJKmyKuHeB0yraXcC+44yfiPwW402ZOaazOzOzO6Ojo7qVUqSRqRKuPcAMyNiRkRMBpYCm2sHRMTMmuZVwP9rXomSpJEads09Mw9ExApgKzABWJuZOyJiFdCbmZuBFRHxTmA/8DzwofEsWpJ0dFUOqJKZW4AtdX231Dz+/SbXJUkaA69QlaQCGe6SVCDDXZIKZLhLUoEMd0kqkOEuSQUy3CWpQIa7JBXIcJekAhnuklQgw12SCmS4S1KBDHdJKpDhLkkFMtwlqUCGuyQVyHCXpAIZ7pJUIMNdkgpkuEtSgSqFe0RcERG7ImJ3RKxssP3jEbEzIv46Ir4bEWc1v1RJUlXDhntETABWA1cCs4BlETGrbthjQHdmng9sAj7b7EIlSdVVmbnPBXZn5p7MfA3YCCypHZCZ38vMV4aa3wc6m1umJGkkqoT7VOCZmnbfUN+RXAf8n7EUJUkam4kVxkSDvmw4MOIDQDew6AjblwPLAd7ylrdULFGSNFJVZu59wLSadiewr35QRLwTuBlYnJk/b7SjzFyTmd2Z2d3R0TGaeiVJFVQJ9x5gZkTMiIjJwFJgc+2AiLgA+CqDwf4PzS9TkjQSwy7LZOaBiFgBbAUmAGszc0dErAJ6M3Mz8DngJOAvIgLgR5m5eBzrVru69ZRx2OeLzd+n1OaqrLmTmVuALXV9t9Q8fmeT65IkjYFXqEpSgQx3SSqQ4S5JBTLcJalAhrskFchwl6QCGe6SVCDDXZIKZLhLUoEqXaGqE9P0ld9p+j73Tmn6LiU14MxdkgpkuEtSgQx3SSqQ4S5JBTLcJalAhrskFchwl6QCGe6SVCDDXZIKZLhLUoEMd0kqUKVwj4grImJXROyOiJUNti+MiEcj4kBEXN38MiVJIzFsuEfEBGA1cCUwC1gWEbPqhv0IuBZY3+wCJUkjV+WukHOB3Zm5ByAiNgJLgJ0HB2Tm3qFt/zgONUqSRqjKssxU4Jmadt9Q34hFxPKI6I2I3v7+/tHsQpJUQZVwjwZ9OZoXy8w1mdmdmd0dHR2j2YUkqYIq4d4HTKtpdwL7xqccSVIzVAn3HmBmRMyIiMnAUmDz+JYlSRqLYcM9Mw8AK4CtwFPANzNzR0SsiojFABFxUUT0Ab8DfDUidoxn0ZKko6v0HaqZuQXYUtd3S83jHgaXayRJxwGvUJWkAhnuklQgw12SCmS4S1KBDHdJKpDhLkkFMtwlqUCVznOXNDbTV36n6fvc+5mrmr5PlcOZuyQVyHCXpAIZ7pJUIMNdkgrkAVVJArj1lHHY54vN32dFztwlqUCGuyQVyHCXpAIZ7pJUIMNdkgpkuEtSgQx3SSpQpXCPiCsiYldE7I6IlQ22vzEibh/a/khETG92oZKk6oYN94iYAKwGrgRmAcsiYlbdsOuA5zPz14AvAn/Y7EIlSdVVmbnPBXZn5p7MfA3YCCypG7ME+PrQ403Ab0RENK9MSdJIVLn9wFTgmZp2HzDvSGMy80BEvAicBjzXjCIlNVDY5fIjMS73x5/S9F22VJVwbzQDz1GMISKWA8uHmi9FxK4Kr99SAafT7D9S/+3E/VDj+9k8vpfN1Ubv51lVBlUJ9z5gWk27E9h3hDF9ETEROAX4Sf2OMnMNsKZKYceLiOjNzO5W11EK38/m8b1srtLezypr7j3AzIiYERGTgaXA5roxm4EPDT2+Grg7Mw+buUuSjo1hZ+5Da+grgK3ABGBtZu6IiFVAb2ZuBv4M+POI2M3gjH3peBYtSTq6Svdzz8wtwJa6vltqHr8K/E5zSztutNUyUhvw/Wwe38vmKur9DFdPJKk83n5AkgpkuEtSgQx3SSqQ4d5ARJwfEYsj4r0Hf1pdUzuLiBlV+qRjLSJ+GBG/V9d3V6vqaaZKZ8ucSCJiLXA+sAP4x6HuBP53y4pqf38JXFjXtwmY04Ja2lpErKPB1d+Z+W9bUE4J9gOXRsQ84N8N3T9raotragrD/XDzM7P+rpcahYg4BzgXOKXu088/Bwq7k8cxUzurnAK8h8OvGFd1r2Tm+yPiPwP3R8T7aPDHsx0Z7od7OCJmZebOVhdSgLOBdwOnAr9Z0/8z4PqWVNTmMvMva9sRsQH4qxaVU4IAyMzPRsQ2Bi/W/JXWltQcnudeJyIWAt8G/h74OYP/+JmZ57e0sDYWERdn5sOtrqNEEXE28J2h71LQCEXEb2bmt2vaZwEfysxVLSyrKZy5H24t8EHgCf5pzV1jMxAR3wX+RWZ2RcT5wOLM/O+tLqydDH1HwuvASzXdfw/c1JqK2ldEnJOZfwP8XUTUHw8q4oCqM/c6EXF3Zr6j1XWUJCLuBW4EvpqZFwz1PZmZXa2trP1ExKOZWR9GGqGIWJOZyyPie/ziGvvBT+ptnwHO3A/3NxGxnsGlmZ8f7MxMz5YZvV/KzP9b9+VcB1pVTJt7KCIuysyeVhfSzjLz4PdKvAv4CPCvGAz5+4E/aVVdzWS4H+6fMRjq/7qmz1Mhx+a5iPhVhmZIEXE18GxrS2pb7wD+fUTsBV7GY0Jj9XXgp8D/GmovA24D3teyiprEZRmNu4j4lwzece/twPPA3wLXZObTLS2sDQ0d8DuM7+XoRMTjmfm24frakTP3OhHRCfwRsIDBmeYDwO9nZl9LC2tvfwesA77H4GlmP2Xwy13a/oyEY80Qb7rHImJ+Zn4fYOhipgdbXFNTGO6HWwes55/uT/+Bob7LWlZR+7sTeAF4FC+40XEgIp5gcPI2CfjdiPjRUPssoIhrXFyWqRMR2zNz9nB9qs4zY3S8OdLy1kElfEJy5n645yLiA8CGofYyYKCF9ZTgoYg4LzOfaHUhEpQR3sNx5l4nIt4C/DFwMYMf0x4CbsjMH7W0sDYWETuBX2PwQKpX/UrHgOFeJyK+DnwsM58fav8K8Hnvujd6nuEhHXsuyxzu/IPBDpCZP4mIC1pZULszxKVjzy/rONwbIuKXDzaGZu7+EZTUVgytw/1PBg8AbmJwzf19wP9obUmSNDKuuTcQEbMYvMw7gO96b3dJ7cZwl6QCueYuSQUy3CWpQIa7JBXIcJekAhnuklSg/w9NYc34abuLYwAAAABJRU5ErkJggg==\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "plotTopUsageComparation(df_a_json, df_a_other, 'script_tld', 3)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.6.8" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} From e1ee1f20a0a14aade8bc1f40080b9461f7ed3bc5 Mon Sep 17 00:00:00 2001 From: Camila Resende Date: Mon, 22 Apr 2019 00:13:35 -0300 Subject: [PATCH 22/23] Remove isJson_correlation_domain_and_value.ipynb --- .../isJson_correlation_domain_and_value.ipynb | 922 ------------------ 1 file changed, 922 deletions(-) delete mode 100644 analyses/2019_03_aliamcami_value_analyses/isJson_correlation_domain_and_value.ipynb diff --git a/analyses/2019_03_aliamcami_value_analyses/isJson_correlation_domain_and_value.ipynb b/analyses/2019_03_aliamcami_value_analyses/isJson_correlation_domain_and_value.ipynb deleted file mode 100644 index f171d76..0000000 --- a/analyses/2019_03_aliamcami_value_analyses/isJson_correlation_domain_and_value.ipynb +++ /dev/null @@ -1,922 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Start Dask" - ] - }, - { - "cell_type": "code", - "execution_count": 1, - "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/anaconda3/envs/overscripted/lib/python3.6/site-packages/dask/config.py:168: YAMLLoadWarning: calling yaml.load() without Loader=... is deprecated, as the default Loader is unsafe. Please read https://msg.pyyaml.org/load for full details.\n", - " data = yaml.load(f.read()) or {}\n", - "/anaconda3/envs/overscripted/lib/python3.6/site-packages/distributed/config.py:20: YAMLLoadWarning: calling yaml.load() without Loader=... is deprecated, as the default Loader is unsafe. Please read https://msg.pyyaml.org/load for full details.\n", - " defaults = yaml.load(f)\n" - ] - }, - { - "data": { - "text/html": [ - "\n", - "\n", - "\n", - "\n", - "\n", - "
\n", - "

Client

\n", - "\n", - "
\n", - "

Cluster

\n", - "
    \n", - "
  • Workers: 4
  • \n", - "
  • Cores: 4
  • \n", - "
  • Memory: 8.59 GB
  • \n", - "
\n", - "
" - ], - "text/plain": [ - "" - ] - }, - "execution_count": 1, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "import dask.dataframe as dd\n", - "from dask.distributed import Client\n", - "\n", - "#Initializing client\n", - "client = Client()\n", - "client" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Objective\n", - "\n", - "The objective of this notebook is to answer two main questions: \n", - " - \"The JSON values are always from the same location or related domains?\" \n", - " - \"Are there a set of location domains that always produces a JSON?\"\n", - "\n", - "To answer this we will use the sample data set produced by the notebook \"isJson_dataPrep.ipynb\" called 'all_json_above_mean.parquet' for first question and 'is_json_above_mean_md5.parquet' for seccond, this contains two extra calculated columns that will be important: 'is_json' and 'location_domain'.\n", - "\n", - "\n", - "OBS.: For \"value\" comparison I will use instead value_md5, because its reliable and faster. Value_md5 is the calculated md5 for the value columns \n", - "OBS2.: To see validation that all biggest values are json please reffer to 'isJson_Sample_Comparasion.ipynb'\n", - "\n", - "### Findings: \n", - "\n", - "On this notebook I was able to validate couple facts about the two proposed questions, which are: \n", - "- One domain produces multiple JSONs\n", - "- One JSON is usually (99.9%) produced by a single domain. \n", - "\n", - "\n", - "- One domain can produce values there are both Json or not, but most produce only one type\n", - "- Most of the domains that produce a single type produces JSON type. " - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "---\n", - "\n", - "# Are there a set of location domains that always produces a JSON?\n", - "The dataset used to this analise contains non-json values as well for the sake of proving that one domain may or may not produce only json values." - ] - }, - { - "cell_type": "code", - "execution_count": 37, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "Index(['value_md5', 'is_json', 'location_domain'], dtype='object')" - ] - }, - "execution_count": 37, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "df = dd.read_parquet('is_json_above_mean_md5.parquet', engine='pyarrow', columns=['value_md5', 'is_json', 'location_domain'])\n", - "df.columns" - ] - }, - { - "cell_type": "code", - "execution_count": 38, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
value_md5is_jsonlocation_domain
0cff77029e3ae45dd439a62987b1d8340Truecanada.ca
19ac0a0a0afb677c8fd985a7c2f4ddbc5Truetmall.com
29ac0a0a0afb677c8fd985a7c2f4ddbc5Truetmall.com
3db64465b639e01993d9212390f057628Falsecoches.net
4db64465b639e01993d9212390f057628Falsecoches.net
\n", - "
" - ], - "text/plain": [ - " value_md5 is_json location_domain\n", - "0 cff77029e3ae45dd439a62987b1d8340 True canada.ca\n", - "1 9ac0a0a0afb677c8fd985a7c2f4ddbc5 True tmall.com\n", - "2 9ac0a0a0afb677c8fd985a7c2f4ddbc5 True tmall.com\n", - "3 db64465b639e01993d9212390f057628 False coches.net\n", - "4 db64465b639e01993d9212390f057628 False coches.net" - ] - }, - "execution_count": 38, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "df.head()" - ] - }, - { - "cell_type": "code", - "execution_count": 60, - "metadata": {}, - "outputs": [], - "source": [ - "location_domain_group = df.compute().groupby('location_domain')" - ] - }, - { - "cell_type": "code", - "execution_count": 160, - "metadata": {}, - "outputs": [], - "source": [ - "agg = location_domain_group.agg({'value_md5': ['nunique', 'count'], 'is_json': ['sum', 'nunique']})" - ] - }, - { - "cell_type": "code", - "execution_count": 161, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
value_md5is_json
nuniquecountsumnunique
location_domain
0123movies.com222.01
10010.com288.01
1001freefonts.com20155155.01
10fastfingers.com42828.01
10jqka.com.cn73030.01
\n", - "
" - ], - "text/plain": [ - " value_md5 is_json \n", - " nunique count sum nunique\n", - "location_domain \n", - "0123movies.com 2 2 2.0 1\n", - "10010.com 2 8 8.0 1\n", - "1001freefonts.com 20 155 155.0 1\n", - "10fastfingers.com 4 28 28.0 1\n", - "10jqka.com.cn 7 30 30.0 1" - ] - }, - "execution_count": 161, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "agg.head()" - ] - }, - { - "cell_type": "code", - "execution_count": 178, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "1563" - ] - }, - "execution_count": 178, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "#Count the number of domains that only produce one type of value (json or non-json)\n", - "f1 = agg['is_json']['nunique'] == 1\n", - "agg_1 = agg[f1]\n", - "oneType = len(agg_1['is_json'])\n", - "oneType" - ] - }, - { - "cell_type": "code", - "execution_count": 180, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "1226" - ] - }, - "execution_count": 180, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "#Out of the ones there have only one type of output, these are the ones that have as JSON\n", - "f2 = agg['is_json']['sum'] > 0\n", - "agg_1a = agg[f1 & f2]\n", - "oneType_json = len(agg_1a['is_json'])\n", - "oneType_json" - ] - }, - { - "cell_type": "code", - "execution_count": 185, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "" - ] - }, - "execution_count": 185, - "metadata": {}, - "output_type": "execute_result" - }, - { - "data": { - "image/png": "iVBORw0KGgoAAAANSUhEUgAAAWYAAAFbCAYAAADmwiRlAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDMuMC4zLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvnQurowAAIABJREFUeJzt3XmcXFWd9/HP6SV7uCRhSwJSkLBGIGRhG0EWHxkJMgqKgIoLMoo74zgUDy5XZJwgbvggIjjsKDgoMFJICDGCEHYIEFCBhIAEgRDIzdprneePW0k6kKSru6vqd5fv+/WqVzqVSve3afqb0+eee47z3iMiIsnRZB1AREQ2pmIWEUkYFbOISMKomEVEEkbFLCKSMCpmEZGEUTGLiCSMillEJGFUzCIiCaNiFhFJGBWziEjCqJhFRBJGxSwikjAqZhGRhFExi4gkjIpZRCRhVMwiIgmjYhYRSRgVs4hIwqiYRUQSRsUsIpIwKmYRkYRRMYuIJIyKWUQkYVTMIiIJo2IWEUkYFbOISMKomEVEEkbFLCKSMCpmEZGEUTGLiCSMillEJGFUzCIiCaNiFhFJGBWziEjCqJhFRBJGxSwikjAqZhGRhFExi4gkjIpZRCRhVMwiIgnTYh1AZEDCYBAwCmgl/v+5ufJrz8e65zywAlgORITRKovIIr1x3nvrDCJvFwbDgN2AHYHxPR5jgW2BbSqPkQP4KF1ARFzUPR+vA4uA54CFwHOE0eoBfByRPlExi614xLsn8E5gUuXXdwIFkjXV9gpxUW8oa/grsIAw6rIMJtmjYpbGCYMhwMHAu4B9iQt4IumeUmsD5gMPAQ+f2XHGvT/+3syFxpkk5VTMUj9hMJi4iA+vPA4CBhsmqrsD2n629DVGAdwL3FN5PLp45oxO02CSKipmqZ14WuIg4Ag2FPEQy0iN1OmbX9qt/ZodN/FHq4E7gFuAWxfPnLGssckkbVTMMjDxqPifgROB9zOwi3Gp9lx57Lz3dPzwkF5e1k08ir4ZuGXxzBnP1z+ZpI2KWfouLuOj2VDGW9kGSoYru957d9j1ycP6+NeeIB5J37x45oxH6xBLUkjFLNWJpynWlfFxqIzf5uSOc566rzxp0gDexd+B3wCXLp4545kaxZIUUjHLloXBPsAZwClAYJwmsbynfff2q10nLYNq8e6AucAlxCNpXTjMGRWzvF28rO3DxIV8sHGaVFjhhz25b/sv96nDu34FuJx4FP1CHd6/JJCKWTYIg3HAF4B/Jb6rTqp0b/ekuz7aec676/ghysDtxKPo0uKZM8p1/FhiLM0L+6VWwmAacCbxKLnVOE0qzS5PrcUUxpY0AcdUHi8WiqVLgIsWz5yxss4fVwxoxJxnYXAQcB5wlHWUtDu0/Scv/91vN67BH3YZ8APg/y2eOUN7eWSIijmPwmB/4kI+xjpKFnR798qE9ut2MIywFDgfuHjxzBlrDXNIjaiY8yQM9gbOBY4HnHGazHixvN39h3X85CDrHMQXCmcClyyeOaPdOoz0n4o5D8JgAvAd4GSStWNbJlzfdcRdxa7T63nhr6+WAP8FXLZ45owO6zDSd/omzbIwGEsYXEa8PeVH0de7LmaVp42yzvAW44GLgGcLxdInrMNI32nEnEVh0ES87O08dIdeXXlP117tV3S2MXiodZYtuAv43OKZM/5qHUSqo2LOmjCYAvwCmGYdJQ9W+8F/ndR+xZ7WOarQAVwAnLd45ow26zCyZfrRNivCYCvC4KfAg6iUG+Zpv/Nr1hmqNAg4B1hQKJbeax1GtkzFnAVh8CHgL8CXiA8elQaZ0z0lbTdpTQBmFYql6wvFkuUSP9kCTWWkWRjsQnyRR+uRjRzVfsGLC/34d1jn6KeIeBT9c93inSwaMadVGHyCeC9flbKRsnfLUlzKEO8WeBFwb6FY2sU6jGygYk6bMBhJGFwLXAmMME6Ta68yapF1hho5CHisUCydYB1EYirmNIk3G3qUeE2yGHugvOca6ww1FAA3FoqliwvFUqYPzE0DFXMahIEjDL4GzAMmWseR2O3d07N4vuEZwP2FYmk36yB5pmJOujDYFigR7yKmLTkTwnvKfy7vm9Xymgw8WiiWTrEOkldalZFkYXAUcA0w1jqKbGytb312r/arslrMPV0OfFG71jWWRsxJFQb/BtyBSjmRnvU7vmKdoUE+DTxUKJb2tg6SJyrmpAmDVsLgl8AP0dcnseaWJ+dp29RJwH2FYulI6yB5oW/8JAmDMcBs4DTrKLJls7qn5+0nma2APxSKJa0IagAVc1KEwW7A/UCS9vWVTfCe6Gm/867WOQwMAq4pFEtnWwfJOl38S4IwOBj4X3QydSos9cGj09t/PsU6h7GfA19aPHNGt3WQLNKI2VoYfBCYg0o5NR4u766TqeP1zr8rFEvDrINkkYrZUhh8DrgRSPIm6/IWd3RPG26dISGOA/5YKJa2tQ6SNSpmK2HweeIfB/U1SBHv8XPLkydY50iQA4F5hWJJ/01qSKVgIS7ln1nHkL7rpGXxckYm7Yw/axOJd6jbwzpIXzjnznXOvcc6x6aomBtNpZxqC/3Yl60zJNT2wJw0bR/qvf+W9/5O6xybomJuJJVy6t1d3k8bym/eeOJyHl/Ld+qcKzjn/uKcu8w595Rz7g7n3FDn3GTn3P3OuSecczc550ZVXv8n59z5zrkHnXPPOOcO3cz7vdI596HK2zOdc09X3tcPKs/t7JybU3lujnPuHT3+3k+dc/Occ4vWvY9aUjE3iko5E2Z1T9vOOkPC7UJczrX+77Qb8DPv/SRgOXACcDVwlvd+X+BJ4Ns9Xt/ivT8A+Opbnn8b59xo4IPApMr7Oq/yRxcBV1eeuw74aY+/NhZ4F3AsMHOAn9vbqJgbQaWcCd6zZr6fqG1Xe7cHMLtQLNVyLv557/38ytuPEJ9duLX3/q7Kc1cBh/V4/e96vLbQy/teAbQBv3TOHQ+s22f7YOBXlbevIS7idW723pe9908TT+PUlIq53sLgs6iUM2E5I54p06TDbquzL3B7oViq1Z7V7T3e7ga2rvL13UALgHPuCufcfOfcbT1f6L3vAg4Afgt8ALh9M++z5914PfPUfN8UFXM9hcExqJQz47HyxMg6Q8ocAJTqdBNKBLzZY/7448BdW3g93vtPee8ne+83OifTOTcCCLz3txFPfUyu/NE84KTK2x8F7qlV+N6omOslDPYDbgA0wsqI2eWpuhGo7w4FbqrTcVWfAC5wzj1BXKbn9uN9eGAkcGvl/dwFnFn5sy8Dn6o8/3HgKwOPXB3tlVEPYTAOeJD4KrVkxIFtF732KqN18a9/fgOctHjmjMQUjnPu98CPvPdzrbO8lUbMtRYGw4FbUSlnSpdvWqJSHpATgW9Zh1jHOXc5MIwGTk/0hYq5lsKgCfg1sL91FKmtF/z2L1hnyIBvF4qlmq/57Q/v/ae990d57zuts2yKirm2fgy83zqE1N6fy/t0WWfIAAdcVSiWNHDphYq5VsLgi8QXCySDZpWnj7HOkBHDgFvqcANKpqiYayEM3g38xDqG1If3tD9c3iMPJ2I3yk7A9YViSSuWNkPFPFDxOX3XoWVxmbWSoc920jLIOkfGHAH8l3WIpFIxD9wVaAVGpj1Z3nWZdYaM+nqhWDreOkQSqZgHIgy+gi72Zd7s8lSNluvnyrTt49wIKub+CoP9ge9bx5D6m1OesrN1hgwbCVxbKJZarIMkiYq5P8JgBHA98XHukmHd3r3yd7/dOOscGTcNKFqHSBIVc//8DNjdOoTU3xK/jW4saYxvFoqlfa1DJIWKua/C4GPAqdYxpDHuK09qs86QE4OI55tbrYMkgYq5L8LgHcDF1jGkcW4vT+9t31+pnf2Bc6xDJIGKuW9+TnyxQnLAe7ruK++tKavGOke3bKuYqxcGJwHH9Po6yYw1DH6ujcHag7mxWoj308j1hXUVczXCYDRwoXUMaay/+J1fs86QU/vQywGqWadirs4FgDZdyZk53fvrNns7ZxWKpWnWIayomHsTBocAn7KOIY13R3najtYZcqwZuLRQLNX8oNM0UDFvSRg0E69ZzuX/HHlW9m7ZQj9ed/zZ2p/4rL3cUTFv2efZcGKu5MirjFpknUEAOK9QLOXuAqyKeXPCYDvgu9YxxMYD5T3XWGcQIN67+cxeX5UxKubN+wYQWIcQG7O6p29lnUHWK+btxBMV86bEd/h91jqG2PCe8t3lfSda55D1RgLfsQ7RSCrmTfsm2jkut9poXbiaobrDM1k+UyiW9rIO0Sgq5rcKg4nAJ61jiJ1n/Y6vWGeQt2khR/ufq5jf7jvE/xNITs0tT9byyGQ6tlAsHWkdohFUzD2FwSTgJOsYYuuO7mljrTPIZl1gHaARVMwb+y76b5Jr3hM95Qu7WueQzZpSKJb+2TpEvamE1gmDqcAHrWOIrdcJFoLTVEayfc06QL2pmDc41zqA2Hu4vPtK6wzSq/cUiqX9rEPUk4oZIAz2QnstC3BH97Th1hmkKpkeNauYY1+0DiDJMLc8eYJ1BqnKSYViabx1iHrRsrAw2AodripAh29ZvJyRBescm9O1Yimvl35E96o3ca6JEZOPZqtp/8Kbcy9nzXMP4ppbaNl6B7Y55qs0DRlB20tP88YdF+OaW9nmuK/TOmoc5bZVLL3lfLY78VxcuqfSW4EvAUXrIPWgEXN8M8kI6xBib5Ef+5J1hi1qambUEacx/vRL2OHjP2DloyU6Xn+RIYXJjDvtZ4z79EW0jh5PdP//ALDioZvY9gNns/Vhp7LysdsAWD7veoKDT0x7Ka/z2UKxlMnv3XwXcxg44AvWMSQZ7irv560zbEnLiNEM3iHewqNp8DBax+xE98plDN1lCq4pPmxl8Lg96Fr5OgCuqQXf1YHvasc1tdD55j/oXrmMIe/Yx+xzqLGtgdOsQ9RDvosZ3gvoFGQBYFb3tNTsYNYVvUrHq4sYPG6PjZ5f9cRshu4an8gUHPRhlt1+ESsevoWRU45l+d1Xs/WhH7OIW09fLRRLmTsCLO9zzLroJwB4z5r5fmIqdpQrd6xl6U3fY/RRp9M0eNj656N5N0BTM8P3PhyAQdvvythTfwhA298X0DxiNABLbzkf19TMqCNPo3n4qIbnr7EC8f0HNxrnqKn8jpjDYFe0RE4qljPi2TJNiR95+e4ult70PYbvfTjD9jhk/fOrnpzDmoUPss37//1t88fee6J5NxD808ksv/dXbP2uUxg+6QhWPPL7Rsevl8ydyZnfYoYzyPfnLz08Vp74pnWG3njvWfaHC2kdsxNbHbDhJtW1ix5hxQM3st0J36Kpdcjb/t7qBXMYOmEazUNG4DvbwTWBc/Hb2fDeQrG0jXWIWsrnVEZ8yKqWyMl6s8tTh/X+KlvtS55m9VNzad22wMtXfAmAUYedyht3Xorv7uTVG74BxBcAxxwdz9KVO9tYtWAO258Yn5K21fQPsPSm7+GaW9jmuP+w+URqrwU4EbjYOkitOO8TfSG6PsLgKOBO6xiSHAe2XfTaq4xOzcU/eZt7F8+c8S7rELWS1x/lT7QOIMnR5ZuWqJRT75BCsbSzdYhayV8xx9MY2kVO1nvBb/+idQYZMAecbB2iVvJXzHAEsK11CEmOe8r7dFhnkJo4xTpAreSxmDWNIRu5vTw9U1f0c2yfQrH0TusQtZCvYg6DFjSNIT14T/vD5T12s84hNZOJUXO+ihmOBDQ6kvVWMvTZTloGWeeQmjm5UCylfoemvBWzpjFkIwvKuyyzziA1VQAmW4cYqPwUcxg0AR+wjiHJcmd5qkbL2XOkdYCByk8xwzRgjHUISZbZ5anvsM4gNXeUdYCBylMxv8c6gCRLt3ev/N1vl9njiXLs0EKx1GodYiBUzJJbS/w2L1hnkLoYARxgHWIg8lHMYTAUOKTX10mu3Fee1GadQeom1dMZ+Shm+CdgsHUISZZZ5Wmp3yVeNivVFwDzUsyHWQeQZPGervvKk1JxYon0y8GFYmmodYj+yksxH2odQJJlDYOfW8vgxO/BLP02CEjtNqDZL+YwaAUOtI4hyfIXv/Nr1hmk7lI7z5z9YoapQGp/pJH6mNO9f+LP95MBO8I6QH/loZgPsg4gyTO7PHVH6wxSd/sViqVU/gOch2LexzqAJEvZu2XP+R0zc9qFbNZgIJU7B+ahmDOxP6vUzquMWmSdQRomld//2S7mMHDA3tYxJFkeKO+5xjqDNIyKOYEKxLdniqw3q3v6VtYZpGFUzAmUyi+K1I/3lP9c3meCdQ5pmFR2QNaLeZJ1AEmWdloXrWKYRsz5MbFQLKVuO4Zei9k5t7tzbo5zbkHl9/s6575R/2g1kcp/LaV+nvE7/sM6gzRUM7CXdYi+qmbEfBlwNtAJ4L1/AjipnqFqSMUsG5lbnpz68+Ckz1LXA9UU8zDv/YNvea6rHmFqKgyagT2tY0iy3NE9bax1Bmm4TBbz6865CYAHcM59CEjDj4M7oa0+pQfviZ7yhV2tc0jDpW7JbEsVr/kCcCmwp3NuCfA88NG6pqoNjYxkI68TLAQ3xTqHNNw46wB91Wsxe+8XAe9xzg0Hmrz3K+sfqya2tw4gyfJIefcV1hnExHbWAfqqmlUZY5xzPwX+DPzJOXehcy4Np03vYB1AkmVW9zTdbJRP21oH6Ktq5pivB5YCJwAfqrx9Qz1D1YiKWTYytzxZN5bk05BCsTTSOkRfVFPMo7333/XeP195nAdsXe9gNaBilvU6fMvi5YzUGX/5larpjGqKea5z7iTnXFPlcSJQqnewGlAxy3qL/Ngl1hnEVOaK+bPAr4D2yuN64N+ccyudc0m+mKKLf7LeXeV9u60ziKlUFXM1qzJSNTfTg0bMst6s7un6hzrfUlXM1azKuNE5d4xzLm0bHukbUQDwnjXz/cSJ1jnEVKpWZlRTtpcQ31DyrHNupnMu+bc5h8FgdACrVCxnxLNlmlJ59pvUTLZGzN77O733HwWmAIuB2c65ec65TznnWusdsJ+SmksMzC9PWG6dQcxlbsRM5YaSTwKfAR4DLiQu6tl1SzYw1dxqLjkxuzx1iHUGMZeqfXN6LTDn3O+Id2m7Bni/937dBkY3OOcerme4AVAxy3pzuqfsYp1BzKWqE6oJ+0vv/W09n3DODfbet3vvp9Up10Cl6osg9dPlm5a8yujx1jnEXKo6oZqpjPM28dx9tQ5SY6n6Ikj9vOC3f9E6gyRCqi7+brbAnHM7AOOBoc65/YF1Jz9sBQxrQLaBUDELAPeU39lpnUESIVWdsKWwRxNf8NsR+CEbinkF8H/rG2vAUvVFkPo5uXnuTsc33/OUdQ6xtZohr8IM6xhV22yBee+vAq5yzp3gvf9tAzPVgpbLCQCDXNcug1JwEprU10jWpmqvlGrWMaetlEEjZhHZWKr2SknbbdbVStUXQUTqLlWdkNViXm0dQEQSJVXzWdVsYjTMOfdN59xlld/v5pw7tv7RBiQt5xKKSGO0Wwfoi2pGzFcQf1IHV37/Epte25wkq6wDiEiiLLMO0BfVFPME7/33gU4A7/1aNiydS6Yw6gA6rGOISGK8bh2gL6op5g7n3FDAAzjnJpCOHwsi6wAikhipKuZqlpV9G7gd2Mk5dx3wT8Q3niTdG6Rsqz8RqZtsFbP3frZz7lHgIOIpjK9479PwSaZqTklE6ipVfVDtjRhDgDcrr9/bOYf3/u76xaqJVH0hRKSu0jCYXK+a/ZjPBz4CPAWUK097QMUsImmRrWIGPgDs4b1PwwW/nl6xDiAiiZGqYq5mVcYi0rkp0ELrACKSCGsJozXWIfqimhHzGmC+c24OPZbJee+/XLdUtfGcdQARSYR/9P6SZKmmmP+38kgbjZhFBOBv1gH6qppivgGYSHzBb6H3vq2+kWrmJaCNeEWJiORX6op5s3PMzrkW59z3iQvuKuBa4O/Oue8755I/5xxGnnh+XETy7a/WAfpqSxf/LgBGA7t476d67/cHJgBbAz9oRLga0HSGiGSqmI8FTvfer99C03u/AjgDOKbewWpEFwBFJFPF7L33fhNPdlPZ0CgFVMwi+bacMHrVOkRfbamYn3bOnfrWJ51zHyM9/wJpKkMk31J34Q+2vCrjC8DvnHOfBh4hHiVPB4YCH2xAtlrQsfUi+ZaWQeRGNjti9t4v8d4fCJwLLAZeBM713h/gvU/HUeBh9BKQjqwiUg+ZGzED4L3/I/DHBmSplweA461DiIiJx60D9EdWT8nu6X7rACJiogzMsw7RHypmEcmqpwij5dYh+iMPxfww0GUdQkQa7h7rAP2V/WIOo7XAE9YxRKTh/mwdoL+yX8wxTWeI5I+KOeFUzCL58mJluWwq5aWY77MOICINldr5ZchLMYfRc8Q3yIhIPqR2GgPyUsyx31sHEJGG0Yg5JVTMIvnwMinfJydPxfwnYJV1CBGpu1srJxilVn6KOYzagTusY4hI3aXx8OiN5KeYY5rOEMm21cAc6xADlbdiLhFvbCIi2XQHYdRmHWKg8lXMYbSUeBtQEcmm31oHqIV8FXNM0xki2dRORr6/81jMt1gHEJG6mEUYrbAOUQv5K+YwehqYbx1DRGruf6wD1Er+ijl2pXUAEamptWRgmdw6vZ75l1G/Ai4AWq2D1MLfXu/mIzeuXf/7RW+WOfeIwRxeaOFzt7bR1uVpaYKLZwzlgPHN/PbpTr71p3ZGD3Xc/JGhjBnWxMI3ypzzxzau/9Aww89EpN9+k5VpDADnfapvkOm/MLgFOM46Rq11lz3jf7SKBz4znNN/v5YzDxrE+3Zr5bZnO/n+vR386ZPDOeS/VzPrY8O4fkEnbV3wpQMHcfJv13Du4YPZbUyz9acg0h+HEEaZ2UUyr1MZAJdbB6iHOc93M2F0Eztv3YRzsKI9fj5qg3EjHQBNDtq7PWs6Pa3N8OcXuhg7okmlLGn1eJZKGfI7lQHxzSYvA+Osg9TS9Qs6Ofmd8QzNT44ewtHXruHfZ7dR9jDv08MB+Pa7B3P0tWsYN7KJaz84lBNvXMP1J2gKQ1LrF9YBai2/UxkAYfBd4BvWMWqlo9sz7oereOrzw9l+RBNf/kMb7965mRP2buU3T3Vy6SMd3Hnq8I3+zlXzO1je5jlwx2Z+MK+DUUMcF75vCMNandFnIdInq4BxhNFK6yC1lOepDIDLyNAt2n94tospY5vYfkT8Zb3q8Q6O3yv+oejDe7fw4JLujV6/ptNz1eOdfH76IM6e087l/zKUqeOaue6JzoZnF+mnX2etlCHvxRxGLwK3W8eolV/3mMYAGDeyibteiMv4j893s9uYjb/c37+3na8cOIjWZsfaTnDE889rOnP8U5SkzSXWAeohz3PM6/wYOMY6xECt6fTMXtTNL44duv65y94/hK/c3kZXGYa0wKU9/uzllWUefrlMePgQAL528CAO+u/VbD0kXkInkgIPEUaPWoeoh3zPMa8TBg8AB1jHEJE+OY0wyuTqqnxPZWzwPesAItInS4DrrEPUi4o59r/AAusQIlK1mZVTiTJJxQxUzgf7L+sYIlKVJcQrqjJLxbzBDcBC6xAi0qvzszxaBhXzBmHUDZxvHUNEtuhl4FLrEPWmYt7YVcBL1iFEZLMyP1oGFfPGwqgD+KF1DBHZpH+Qg9EyqJg35RLgBesQIvI252fhBOxqqJjfKv7C/4d1DBHZSG5Gy6Bi3rQw+g1wt3UMEVnvbMJobe8vywYV8+Z9lQztPCeSYvOAq61DNJKKeXPC6DEyesqJSIp0A1+o3ASWGyrmLTsHiKxDiOTYJYTRfOsQjaZi3pIweg34rnUMkZxaSoZOGOoLFXPvfgo8Yx1CJIfOIoyWW4ewoGLuTRh1AmdaxxDJmfuBK61DWFExVyOMbgOutY4hkhNlcnjBrycVc/W+iPbREGmEi7N6ZFS1VMzVCqMI+DSQ23/FRRrgWeAs6xDWVMx9EUazgZ9bxxDJqG7gVMJojXUQayrmvvs68Jx1CJEMOp8wut86RBKomPsq/tf8E+h2bZFaegwIrUMkhYq5P8JoHnCBdQyRjFgNnFxZmiqomAfiW8AT1iFEMuBLhNHfrEMkifNeiwz6LQz2Ah4ERlhHEUmpXxNGp1iHSBqNmAcijP5CvIRORPpuIfA56xBJpGIeqDD6H+BH1jFEUmYFcBxhtMI6SBKpmGvjLOAu6xAiKVEmvtj3tHWQpFIx10IYdQEfRoe4ilTjPyr7z8hm6OJfLYXBvsTH4Ay3jiKSUJcTRqdZh0g6jZhrKYyeAD6G9tMQ2ZR7gDOsQ6SBirnWwuhm4JvWMUQSZjFwPGHUYR0kDTJbzM65eWYfPIz+E7jY7OOLJMsq4hUYS62DpEVmi9l7f4hxhC+izfVFOoGPEEZPWgdJk8wWs3NulXNurHPubufcfOfcAufcoZU/O9k592TlufPf8nf+0zn3uHPufufc9v0OEJ++8Cng5gF/MiLp1E28LE4rMPoos8VccQowy3s/GdgPmO+cGwecDxwJTAamO+c+UHn9cOB+7/1+wN3A6QP66PEyupOAOQN6PyLpUybeW/m31kHSKOvF/BDwKedcCOzjvV8JTAf+5L1f6r3vAq4DDqu8vgO4tfL2I0BhwAnCqB34F+LDJUXywAOfIYx+ZR0krTJdzN77u4lLdwlwjXPuVMBt4a90+g0Lu7uBlpoECaPVwDFoNzrJhzMIoyusQ6RZpovZObcz8Jr3/jLgv4EpwAPAu51z2zjnmoGTacTt1GH0JvBe4jPNRLLqq4TRL6xDpF2Wi9kDhxPPKz8GnABc6L3/B3A2MBd4HHjUe39LQxKF0avEc9t/acjHE2msswijC61DZEEmb8l2zo0hLtydrbNsUhiMAUrAgdZRRGrkG5X1+1IDmRsxV1Zd3Af8wDrLZoXRMuAo4HbrKCID1A38q0q5tjI5Yk6NMGgFriRe1ieSNquJbx4pWQfJmsyNmFMlPnzyY4Dm5SRtXgMOVynXh0bMSREGZwPfs44hUoVngPcRRousg2SVijlJwuA04BdAs3UUkc24j3hDotetg2SZijlpwuBI4HpgW+soIm9xM3AKYbTWOkjWaY45acLoj2y4EUYkKS4ATlApN4ZGzEkVBoOAn6A8g1viAAAEAklEQVQTH8TWCuCThNFN1kHyRMWcdGFwKnAJMNQ6iuTOAuJR8jPWQfJGUxlJF0ZXAwcDugIujXQ1cKBK2YZGzGkRBlsD1wDHWkeRTFsFfJ4wusY6SJ5pxJwWYbQcOA44E2gzTiPZ9BgwRaVsTyPmNAqDPYl/1JxuHUUyoQv4EfBNnWKdDCrmtAqDZuLtS78FtBqnkfR6lPi0kcesg8gGKua0C4N9iA8B0OhZ+mIt8G3gR4RRt3UY2ZiKOQvi0fNXgO8Cw4zTSPLdCXxWe10kl4o5S8JgV+DnxEdYibzVG8DXCKMrrYPIlqmYsygM3kd8C+0k6yiSGL8mPo/vNesg0jsVc1bF0xunAecC2xunETtzic/ie8g6iFRPxZx1YTASOAv4N3Rbd57MB4qE0SzrINJ3Kua8CIMdgf8EPg444zRSP88D3wB+TRjpmzulVMx5Ewb7Exf0+6yjSE0tJV6V8wvdJJJ+Kua8itc/fx04Cd2gkmavARcBPyGMVlqHkdpQMeddGOxEvP/G6cAI4zRSvSeBHwO/IozarcNIbamYJRbvXncG8GVgB+M0smkeuA34MWE0xzqM1I+KWTYWBoOBU4GvAnsbp5HYGuAq4ELC6G/WYaT+VMyyeWEwHfgE8Tz0GOM0efQ34ArgMsLoDesw0jgqZuldfP7gDOKSPgZdLKynN4hPSb+aMNKBvDmlYpa+CYNtgJOJS3qqcZqsWAXcCtwA3KblbqJilv4Lg0nAB4mPuzoA3bjSF6uJL+StK+O1xnkkQVTMUhthsD3xdMcM4CggsA2UON3AQ8Rbbs4G7iOMOm0jSVKpmKX24g2UDgD+T+VxENBimsnGM2wo4rmEUWScR1JCxSz1FwbDgP2BaT0eu5Otw4DXAguINw96AJhNGL1oG0nSSsUsNuJd76awcVlPIB3z1K8RF3DPxzM6oklqRcUsyRGPrAuVx8493l73+0btK70SWAK8VHmse/t54HHC6JUG5ZCcUjFLeoTBUOKCHguMJN7bY3jl1009BgOdQFePX9uJV0T0fETE5RsXcBitaNjnJLIJKmYRkYTJ0sUXEZFMUDGLiCSMillEJGFUzCIiCaNiFhFJGBWziEjCqJhFRBJGxSwikjAqZhGRhFExi4gkjIpZRCRhVMwiIgmjYhYRSRgVs4hIwqiYRUQSRsUsIpIwKmYRkYRRMYuIJIyKWUQkYVTMIiIJo2IWEUkYFbOISMKomEVEEkbFLCKSMCpmEZGEUTGLiCSMillEJGFUzCIiCaNiFhFJGBWziEjCqJhFRBJGxSwikjAqZhGRhFExi4gkjIpZRCRhVMwiIgmjYhYRSRgVs4hIwqiYRUQSRsUsIpIwKmYRkYRRMYuIJMz/B5iMTfFQ5PYAAAAAAElFTkSuQmCC\n", - "text/plain": [ - "
" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "series = pd.Series([(oneType - oneType_json), (oneType_json)], index=['non-json', 'json'], name='One type')\n", - "series.plot.pie(figsize=(6, 6), autopct='%1.0f%%')" - ] - }, - { - "cell_type": "code", - "execution_count": 179, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "294" - ] - }, - "execution_count": 179, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "#Count the number of domains that only produce one BOTH json and non-json values\n", - "agg_2 = agg[agg['is_json']['nunique'] == 2]\n", - "twoTypes = len(agg_2['is_json'])\n", - "twoTypes" - ] - }, - { - "cell_type": "code", - "execution_count": 187, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "" - ] - }, - "execution_count": 187, - "metadata": {}, - "output_type": "execute_result" - }, - { - "data": { - "image/png": "iVBORw0KGgoAAAANSUhEUgAAAWYAAAFbCAYAAADmwiRlAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDMuMC4zLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvnQurowAAIABJREFUeJzt3XecVNXBxvHf2cLSERBRNHqNhVgQKxZsmDcxyaQYo1hi7xiNxHqNbX3VOKZZo1Gj0dcSTdTEmJvEqNgLMRABex2NSBAQhs7s7pz3jzvgosDO7t6Zc++d5/v5zGeX2dmdB3GfPXvm3HOMtRYREYmPOtcBRERkZSpmEZGYUTGLiMSMillEJGZUzCIiMaNiFhGJGRWziEjMqJhFRGJGxSwiEjMqZhGRmFExi4jEjIpZRCRmVMwiIjGjYhYRiRkVs4hIzKiYRURiRsUsIhIzKmYRkZhRMYuIxIyKWUQkZlTMIiIxo2IWEYkZFbOISMyomEVEYkbFLCISMypmEZGYUTGLiMSMillEJGZUzCIiMaNiFhGJGRWziEjMqJhFRGJGxSwiEjMqZhGRmFExi4jEjIpZRCRmVMwiIjGjYhYRiRkVs4hIzKiYRURiRsUsIhIzDa4DiHSH5wf1wFrAQKBXuw/Zzzy0/Z+LwHzg41w201LZhCKdZ6z97P+/Iu55ftAD2BzYDFivdFu39HYosDZhGfcHTDeeKg/MAj4uvf3s+zOAN3LZzPRuPIdIp6iYxalSAQ8HtgS2avd2U+L1G9084FXglfa3XDYzw2kqSSUVs1SN5wdNwK7AnsBIwgLehHgVcGfNJSzpV4HJwNO5bOZVt5Ek6VTMUjGl0fDOwBhgb8JS7ukyU5XMBp4BngaeAF7KZTNFp4kkUVTMEhnPDxqBUYQlPIawiHu7zBQTc4AJwCPAI7lsJuc2jsSdilm6xfODnsA3gINLb/u4TZQIbwH3AffkspmprsNI/KiYpdNKI+N9gYOA7wD93CZKtFeBe4Hf5bKZt1yHkXhQMUtZSuuF9yEcGX+XcKmaROvfwO+Ae3PZzAeuw4g7KmZZI88PdgSOAb4HrOM4Tq2wwPPAPcAduWxmnuM8UmUqZvmc0rK2g4AfEL6YJ+4sBG4Drs5lM287ziJVomKWFTw/WJ+wjI8DhjiOIysrAn8FrsxlMxNch5HKUjELnh9sA5xJOH/c6DiOdGwqcBVwdy6bWeY6jERPxVzDPD/4H+As4Kuus0iXzAR+DVyfy2Y+dh1GoqNirkGeH+wGXAHs7jqLRGIpcA1wuV4oTAcVcw3x/GAL4HLCtceSPp8APwGu0xRHsqmYa0DpRb2LgaOAerdppAreBy4E7tQeHcmkYk4xzw8GAD5wGitvIi+1YSpwTi6b+bvrINI5KuYUKq1DPgX4MTDIcRxxbwJwdi6bmeQ6iJRHxZwynh/sBfyGcKN5keUscDtwRi6b+cR1GFkzFXNKeH7QH/gpcALdO2pJ0m0W8KNcNnOX6yCyeirmFPD84BvAjcAGrrNIYvwdGKe9oeNJxZxgnh8MJrwC7DDXWSSRFgHnEF6goiKIERVzQnl+cCBwHdrxTbpvAnBMLpt533UQCamYE8bzg3WB6wn3RBaJygLCFwZvdh1EVMyJ4vnBPoR79GrnN6mUPwJH5bKZ+a6D1DIVcwJ4fmAI5wIvRVfuSeW9CXwvl8287DpIrVIxx1zp6r3b0f4WUl2LgBNy2czdroPUIhVzjHl+MAJ4AF0sIu5cSzj33OI6SC1RMceU5weHE+6129t1Fql5zwFjc9nMdNdBaoWKOWY8P+hBuDZ5nOssIu3MBA7OZTNPuA5SC+pcB5BPlZbCPYVKWeJnKPCo5wdnuA5SCzRijgnPDzYHHgY8x1FEOnIVcLquFqwcFXMMeH6wC/AXYLDrLCJluoPwasFW10HSSFMZjnl+8C3gMVTKkiyHA3/0/EAHMFSAitkhzw+OJLzSSisvJIm+CTxcWmsvEVIxO+L5wanAb9GVfJJsewBPen4w1HWQNFExO+D5wXmEx81rQ3tJg5HAs54fbOw6SFroxb8q8/wgS7jvhUjazAD2zWUz01wHSToVcxV5fnAx4bHyImk1G9g9l8284TpIkqmYq8Tzgx8CV7vOIVIFHwCjc9nMh66DJJWKuQo8P/g+4bpPzSlLrXgN2COXzcxxHSSJVMwV5vlBBvgT0OA6i0iV/RP4ci6bWeg6SNJoVUYFeX6wO/AHVMpSm0YBD5Q25pJOUDFXiOcHIwkvs9aVUVLLvgLc4fmBuqYT9B+rAjw/2AT4O6ArokRgLPAr1yGSRMUcsdLWnY8A67rOIhIjJ3l+8L+uQySFXvyLkOcHjcDjwGjXWURi6vBcNnOn6xBxpxFztH6GSllkTW7y/GB71yHiTiPmiHh+cDDwO9c5RBLgA2DHXDYzy3WQuFIxR8Dzg62AiUAf11lEEuIJ4CvaaH/VNJXRTZ4f9AceQKUs0hl7A5e7DhFXKubuuw3Y3HUIkQQ6w/ODb7sOEUcq5m7w/OAc4Luuc4gklAFu8/zAcx0kbjTH3EWeH+wD/AOdQCLSXf8k3PCo4DpIXGjE3AWeH6xNuAJDpSzSfaPQfPNKVMxdcw2wjusQIiky3vODXV2HiAtNZXRS6cWKB13nEEmh14DtctnMMtdBXIv1iNkYE6t9XD0/WAu4wXUOkZTaAh29BsS8mGPoF8Aw1yFEUuxszw+2cx3CtdgXszFmPWPMU8aYl4wxLxtj9ijdf4gxZlrpvivaPX6hMeYyY8wUY8wLxpihUeTw/OArwDFRfC0RWa0G4FbPD2r6cInYFzNwKPCwtXZbYCTwkjFmGHAFsA+wLbCTMWa/0uP7AC9Ya0cCTwHHdzeA5wd9gZu7+3VEpCzbAme7DuFSEor5ReBoY0wzMMJauwDYCXjCWjvLWtsK3AXsWXp8gfDkEIBJgBdBhiuAjSL4OiJSngs9P9jCdQhXYl/M1tqnCEt3OnCHMeYI1nzadIv9dKlJG908b8/zgz2Bcd35GiLSaU3ALbV6JFXs/9LGmI2Aj621NwO3ANsT7uS2lzFmbWNMPXAI8GTUz13a+P4m1vyDQEQqY1fgZNchXIhtMRtjGoBlhLtQvWSM+TfwPeBqa+0M4FzC00KmAJOttZVYWzwOGF6Brysi5bmotINjTYntBSbGmJHAzdbaUS6ev7Rm+W1gsIvnF5EVLstlM+e7DlFNsRwxG2NOItyLwuU/xgWolEXi4EelQ45rRiyL2Vr7a2vtltbaf7h4fs8Pvgic4uK5ReRzegPNrkNUUyyLOQYuA3q4DiEiKxzr+UHNvN6jYv4Mzw9GAge5ziEiK2kAfuI6RLWomD/vMrQ8TiSO9vf8YBfXIapBxdyO5wejgYzrHCKyWld0/JDkUzGv7DLXAURkjfb0/OCbrkNUmoq5xPODnYG9XOcQkQ6lfk2zivlTP3IdQETKsnPa55pVzIDnB18gvNxbRJJhvOsAlaRiDp1CN3ehE5Gq+l5pQJVKNV/Mnh/0AU5wnUNEOqWBFF+dW/PFDBwFrOU6hIh02vGeH/R2HaISarqYPT8wwGmuc4hIlwwEjnQdohJqupgJLybZzHUIEemy00oDrFSp9WLWEjmRZBsOfN11iKjVbDF7frA14SnbIpJsqZuOrNliBg53HUBEIvGVtC2dq8liLs1JaWtPkXQwwPddh4hSTRYz4em7G7kOISKROcx1gCjVajEf7DqAiERqK88PtnUdIio1V8yeH9QDY13nEJHIpWbUXIv7Q+wNDHUdorPmv/gnFk75BxhoHOKx9jfGYxrCYwk/eeTXLJz2KBuefl/42EkPsfClv1Hffwjr7H8+pr6RpR++wuI3nmPQl493+dcQqaQDgTNdh4hCzY2YSeA0RuuC2cyf9BDrHnklw469HopFFr32FADLZrxFcdmilR6/cMrDrHfMdfQYuglL3puMtZb8s/cwYPQhLuKLVMuGnh+Mch0iCjVVzJ4fNAL7u87RJcU2bGsBW2zDti6jvu8gbLGNuU/cylp7H/35x7e1YVuWYeoaWPTKBHptsiP1PftWP7dIdR3gOkAUaqqYgX2BQa5DdFZDv7XpP+q7TL/haD687nBMU296bbw9Cyb/hd6b7kxD35X/Sv1H7c+MO86guDhP0/pbsOjlx+i3nY4ylJqQin3Va22OOZFrl9uWLmTxWxNZ/6RbqGvqw6wHsyx8+TEWv/4sQw+9/HOP77v1PvTdOryocd4zd9Nvh2+z5N1JLHr5Mer7D2HgPsdiTK39TJYa8UXPD7bPZTOTXQfpjpr57ixdVPI11zm6YmnuJRoGDKW+9wBMfQO9N9+Vec/cTcu8j5h+4/F8eMMx2JZlTL9x5Rf2WhfMofDft+i92S7kn7+Htb9zTvhCYG6Ko7+JSFV8w3WA7upwxGyMOdBa+4eO7kuArYG1XYfoiob+Qyh89AbFlqWYhiaWvj+F/jvtR/8dvrXiMR/88gDWP/HmlT5v3tN3stYe4Qoi21IAY8AYbOuyquYXqbIxwKWuQ3RHOSPmc8u8L+7GuA7QVU3DhtN7+Ghm3DaeGbf+AKyl38g1D/4LM98BoMfQTQDou81XmXHLKRRmvkOvjXeoeGYRh3b1/KCH6xDdYay1q/6AMV8n/JVgLHBvuw/1B7a01iZqWYrnB38CvuM6h4hUxV65bOYp1yG6ak0j5o+AfwFLgUntbn8mXN2QGJ4f1AF7uc4hIlWT2N+QYQ1zzNbaKcAUY8xd1trWKmaqhG3RuX4itWQMcLHrEF1VzhzzW8aYdz97q3iyaCX6p6eIdNounh/0dB2iq8pZx7xju/d7El6PnrSLNFTMIrWliXB738ddB+mKDkfM1to57W7TrbVXkaAjmTw/aAD2dJ1DRKousQOyctYxb9/uj3WEI+h+FUsUvR1IVl4RiUZ6ixn4Rbv3W4EcydrPeLTrACLixM6eHzTlspnEXVHVYTFbaxP7U6ckNacaiEinNALDgamug3RWh3PMxpjBxphrjDGTjTGTjDFXG2MGVyNcREa6DiAizmzpOkBXlLNc7h5gFuF2egeU3r93jZ8RE6X9l7/kOoeIOLOV6wBdUc4c8yBr7SXt/nypMWa/SgWK2BZAoq+ZF5FuSWQxlzNiftwYc7Axpq50GwsElQ4WkW1cBxARp1I7lXEicDewrHS7BzjdGLPAGDO/kuEisIXrACLi1KaeHzS5DtFZ5azKSPIa4OGuA4iIU/UkcGVGOasyHivnvpjSC38ikrjpjNWOmI0xPYHewNrGmIGAKX2oPzCsCtm6pbTV56auc4iIc4l7AXBNUxknAuMJS7j9wYbzgV9VMlREPMKNTESktqWnmK21VwNXG2NOtdZeW8VMUfmC6wAiEgsbuw7QWeWsY84bY4747J3W2v+rQJ4oreM6gIjEwhDXATqrnGLeqd37PYEvE05tqJhFJAnSV8zW2lPb/9kYMwC4o2KJopO4fwwRqYgenh8MyGUzeddBylXOBSaftRjYLOogFaARs4gsl6iBWjkb5T8E2NIf6wmvpvt9JUNFJFH/ECJSUUOAt12HKFc5c8w/b/d+K/C+tfbDCuWJkkbMIrJcogZq5Zz59yTwOuHxTAOBQqVDRUTFLCLLJaoPyrkkeyzwT8LTsccCE40xB1Q6WAQS9RNSRCoqUX1QzlTGecBO1tqPAYwxQ4BHgfsqGaw7PD+oBwa5ziEisZGoYi5nVUbd8lIumVPm57k0mE/39hARSVQxlzNi/rsx5mHgd6U/HwT8tXKRItHoOoCIxEpv1wE6o5wLTM4yxuwP7E44Cr3JWvvHiicTEYlOvesAnVHOiBlr7QPAAxXOIiJSKWV1XVzEfa64qzS/LCLtJWrEnNZiFhFpL1Ej5kSFFemsk+sffHZcw5/Xcp1D3FpEz5mQcR2jbOXslTEaaAY2Kj3eANZa+8XKRusWTWUIALe2fW378Q33z+xhWj3XWcSdfiz5yHWGzihnKuMW4JeEqzJ2AnZk5T2aRWJrKU29TmoZn7d2xUZcUpuKrgN0RjnFnLfW/s1a+7G1ds7yW8WTdY9GzLLChOL2I/9tN33GdQ5xqs11gM4op5gfN8b8zBizqzFm++W3iicTidDhhXO3bbV1ifp1ViKVlM3XgPJe/Nu59HbHdvdZYJ/o40Sm1XUAiZdF9Op3VsuJr1/Z44ZhrrOIE4k5vQTKu/JvTDWCRCzuUy3iwB+Le+w0rvjn5zavm76b6yxSdfNcB+gMY+2qXxMxxhxmrb3TGHP6qj5urf1lRZN1k+cH8wn3kBZZYSDzP5nUNK5YZ+zarrNIVV1Mc77ZdYhyrWmOuU/pbb/V3OJutusAEj9z6T/o0tbD3nSdQ6ouUSPm1U5lWGtvLL29uHpxIjUL2Nh1CImfW9u+vtsxDX+buIGZvXPHj5aUSEcxL2eM6QkcC2wF9Fx+v7X2mArmisIs1wEkvg5cdtFGzzWdmjeGAa6zSFUkqpjLWS53B7AusC/wJLABsKCSoSKiYpbVmsHgdX/V9p2prnNI1aSumDe11l4ALLLW3k54wfmIysaKhOaYZY1+3nrQHnNsv3+7ziFVkaiVWuUUc0vp7TxjzNbAAMCrWKLoaMQsHTqgcNFga1nsOodU3PuuA3RGOcV8kzFmIHAB8GfgVeCnFU0VDRWzdOg9O2zDu9q+/KLrHFJRc2nOz3cdojM6LGZr7W+stXOttU9aa79orV3HWvvraoTrJhWzlOWC1qP3WGB7veI6h1RMznWAzipnVcZawBGE0xcrHm+t/WHlYkViuusAkgyWurqDC+c3/aXHeQVj6OE6j0Qu5zpAZ5UzlfFXwlKeBkxqd4s7XUQgZXvFbrxpUNzledc5pCJyrgN01movyV7xAGMmW2sTuZuc5wcfAuu7ziHJ0EBry7Sm43K9TGEz11kkUqfRnL/GdYjOKGsdszHmeGPMesaYQctvFU8WDY2apWytNDQeVTi7xdpk7d0rHcq5DtBZ5RRzAfgZ8DyfTmP8q5KhIvSG6wCSLBPtlls+XRyhTfXT5R3XATqrnGI+nfAiE89au3HpFufz/tp73XUASZ4TWk4f1WLrE7XuVVZrKQkcoJVTzK9AYhfgv+w6gCTPUpp6ndxy2lydE5gKr9CcT9zBGeWcYNIGvGSMeRxYtvzOBCyXA5jiOoAk0yPFHbedYjd5elvzzh6us0i3JPKS+3JGzH8CLgOeI1nL5chlM7OB/7rOIcl0WOHckW22bobrHNItL7kO0BXlHC11uzGmB7B56a43rLUta/qcmJlKuDueSKcspHf/c1qPf/3njTeu5zqLdFkii7nDEbMxZm/gLeBXwPXAm8aYPSucK0qazpAuu69tr1FvF4c95zqHdEmRhH7/lzOV8Qvgq9bavay1exLuy3xlZWNFSt9U0i1jCxcML1qTqG0jBYB3aM4vdB2iK8op5kZr7YrlJtbaN4HGykWK3JOEPzlFuuQTBgzOth6SuCVXkpjrLT6nnGL+lzHmFmPM3qXbzSTkxT+AXDYzl3CeWaTLbmr75m4f2UH/dJ1DOuVJ1wG6qpxiHke4lvmHwGmE+zGfVMlQFfCE6wCSfAcuu+gL1pKofX1r3OOuA3RVOfsxLyM89+9Ea+13rbVXlu5LksT+A0l8TGfIeje0fTuRLybVoI9ozid2r5zVFrMJNRtjZhNe2vyGMWaWMebC6sWLzFNonlki8NPWg3b/xPZL5BKsGpPowdiaRszjgdHATtbawdbaQcDOwGhjzI+qki4iuWxmHgldzyhxY8yBhQsHWssS10k+65gHl7DOzxaw9fUrL0S4dmKB4dctZKvrF3L2I0sBePaDVra5YSE73byQtz8Jxyzzllr2vXMRHW0FnBCpLeYjgEOste8tv8Na+y5wWOljSfOE6wCSDu/Y9Te6p21M7F4IPGrbRv5+WO+V7nv8vVYefKOFqSf14ZWT+3LmbuEBLb94vsD9Y3vxk316csOLBQAueXIZP969CWNM1bNXQGqLudFaO/uzd1prZ5Gs5XLLPeE6gKTHea3H7r7Q9nzVdY729tyogUG9Vi7VG/5VwN+9iaaG8P51+oTf8o31sKQVFrdYGuvhnU+KTF9QZC+vnO1zYu8DmvPvug7RHWsq5kIXPxZXT4E2QJdoFKmrP7RwXqO1xHp7gjfnFHn6/VZ2/s1C9rptES9OD78Fzt29iRMeWspVEwucMqoH501YyiVjmhynjcw/XAforjUV80hjzPxV3BYAI6oVMCq5bCYPTHSdQ9Jjqt1ks78VRz3rOseatBZh7lJ44dg+/OwrPRl732KstWy7bj0vHNeHx4/sw7tziwzrV4cFDrpvMYc9sISZCxP9WvkfXQfortUWs7W23lrbfxW3ftbaJE5lAPzedQBJl9NaTtltqW1823WO1dmgv2H/LRowxjBq/XrqDMxe/OmLe9ZaLn1qGRfs2cTFTy7j4r2bOGybRq6ZmMRfigGYDzzqOkR3lXOBSZr8Hi2bkwi10NDjmJazllkbz/+v9vtSIxPeC/eJf3NOG4U2WLv3p/PQt09pIbNZAwN7GRa3QJ0Jb4tjPUGzRn+hOZ/YnyrL1VQx57KZGcDTrnNIujxX3Hqr54tbOv//6pD7F7PrLYt4Y06RDX65gFsmFzhmu0benWvZ+vqFHHzfEm7fr9eKVReLWyy3T2nh5J3ClRqn79KD7/1+Cec+tpRxOyX1l2IecB0gCiYlaxbL5vnBScANrnNIuvRi2eKpTcfNbjRtG7rOUsOWAGvTnE/qUXgr1NSIueR+IHFngEm8LaGp9yktp2prULceTkMpQw0Wcy6bmQVMcJ1D0ufh4qjtphU3fsZ1jhp2v+sAUam5Yi6513UASadDCz8e0WaNzpmsvoWE55OmQq0W8x9J5kUyEnML6DPgvNZj33edowbdm9TTSlalJou5tHl+4q8Okni6p22fnd8trvu86xw15jeuA0SpJou55C7XASS9xhYu3LRozSeuc9SIl2nOv+A6RJRquZjvBz5yHULSaTZrDflp60Gvuc5RI25xHSBqNVvMuWymBbjedQ5Jr1+3fXv0f+3AF13nSLnlJyylSs0Wc8mNwFLXISS9Dig0r28tC1znSLE/0ZxP3frxmi7mXDYzG801SwV9aIcMu7kt82/XOVLsRtcBKqGmi7nkKtcBJN1+0nroHnNtXx3iGr1JNOcTfVLJ6tR8MeeymZeBx1znkDQzZmzhwgHWatosYle4DlApNV/MJVe7DiDp9pbdwPtD2146qCE6b5OiS7A/S8Uc+gvhP7RIxfitx+++yPbUErpo/JzmfCz3wI6CihnIZTMWuMZ1Dkm3InX13y/8uC7u5wQmwEzgdtchKknF/KnfAp87FVwkSi/ZTYf/o7jDc65zJNw1NOdTPV+vYi7JZTMLSfGLCRIfp7b8cJdltvEd1zkSagE1cGGYinllv0KXaUuFFWhsOrblzMVxPScw5q6iOT/PdYhKUzG3k8tmlgCXus4h6fdMccSIiXYLbarfOXOAn7sOUQ0q5s/7DfCe6xCSfscUztqhxdZ/6DpHglxOc36+6xDVoGL+jNLmRue7ziHpt5iefca3/GCm6xwJ8R/CqcaaoGJetd8B2hVMKi4o7rLDK8WNNKXRsR+nfSVGeyrmVSitaz7DdQ6pDYcUzhvRZo1Gzqs3iS5sNmaM8YwxL3fi8eONMb3b/dnZUVUq5tXIZTNPE54NKFJR8+k74ILWo/W6xuqdQXPeVuF5xgO9O3xUFaiY1+wc0FVaUnl3t/3PLu8X10nV8UgRuZfm/JPd+PwGY8ztxpipxpj7jDG9jTFfNsb82xgzzRhzqzGmyRjzQ2AY8LgxZsWOdcaYy4wxU4wxLxhjhnb7b1MmFfMa5LKZt4DLXeeQ2nBg4aJNipa5rnPEyDzCUWx3DAdustZuA8wHTgduAw6y1o4AGoBx1tprCK9hGGOtHVP63D7AC9bakcBTwPHdzFI2FXPHLgNecR1C0u9jBg75ZeuBr7rOESPn0pz/bze/xn+stc+W3r8T+DLwnrX2zdJ9twN7ruZzC4QbnEE4z+11M0vZVMwdyGUzBeBY0FVaUnnXtX139Md2rX+5zhEDzxHN6STdmZtusdYu//w2wtF1VaiYy5DLZiaiPZulSg4oXLSutThbERADLcCJEb3gt6ExZtfS+4cAjwKeMWbT0n2HA8vnsBcA/SJ4zm5TMZfvfEAbz0jFfWCHbnBL29cnu87h0M9pzpe9zK0DrwFHGmOmAoOAK4GjgT8YY6YR/ib869JjbwL+1v7FP1fMpyN16YjnB2OACa5zSC2w9qWmE6atZRZt4zpJlb0DjKA5v8R1EJc0Yu6EXDbzOHCz6xxSC4w5qHBBP2tZ5jpJFbUBR9Z6KYOKuSvOAqa7DiHp94bdcOMHins87zpHFV1Kc/7Zjh+WfirmTsplM3lgnOscUhvObjlh98W26XXXOargOeAS1yHiQsXcBbls5iHgWtc5JP3aqG84rHAu1tLqOksF5YHv05xvcx0kLlTMXXcGUEu/Zoojk+3mX5pQ3C7Nv+KPozmfcx0iTrQqoxs8P1gfmAys4zqLpFsThaVTm46b0WRaN3adJWJ30Jw/wnWIuNGIuRty2cx04GDCV5NFKmYZPXoe33LGAmu7dSVb3LwF/MB1iDhSMXdTaQmdTjyRinuqOHKbf9nhT7vOEZH5wLdpzi9wHSSOVMzRuAJ40HUISb+jCmdv32rrkn5OYBE4lOZ8Law26RIVcwRKJ54cCbztOouk2yJ69f1Ry8nd3XHNtfNozgeuQ8SZijkipfXN+wOLXWeRdHuouNuOrxW/kNRVGvfQnM+6DhF3KuYI5bKZacAxdG+rQZEOHVI4f8s2a2a5ztFJkwm/P6QDKuaI5bKZe4HTXOeQdJtHv4EXtx6RpKmzmcB+2gejPCrmCshlM9cCl7rOIen2f2377vpBMs4JzANfozn/H9dBkkIXmFSQ5wc3ACe5ziHpNZRPPn6h6ZQmYxjgOstqLAH2pTmflmV+VaERc2X9ALjPdQhJr5kMWufqtv2nuc6xGq3AWJVy52nEXGGeH/QA/kp4CKRIRbzYNG7SEJPfwXWOdixwBM35O10HSSKNmCusdJjrfoAO2JSKOaBw0VBrWeQ6Rzt+f6fyAAAGJklEQVTjVcpdp2Kuglw2sxD4BvCG6yySTu/bdTe4ve2rk1znKPlfmvPXuA6RZJrKqCLPDzYkPDNwE9dZJH0MxeJLTSe8MsAsHuEwxgU057UiqZs0Yq6iXDbzAbAH8KrrLJI+lrq6gwsX9HZ4TuDpKuVoqJirLJfNzAD2IrwKSiRSr9mNNnmwuFu11zZb4CSa81dW+XlTS1MZjnh+MAAIgNGus0i6NNDaMrXpuHd7m8LwKjxdG3A0zfk7qvBcNUMjZkdKmx59lbCcRSLTSkPjkQW/aG3FD3BoAQ5SKUdPxexQLptZTLiU7reus0i6vGi/tMUTxZHPVPAp5gPfojl/fwWfo2ZpKiMmPD+4DPix6xySHk0Ulk5rOu6/PUyrF/GXzhGW8ssRf10p0Yg5JnLZzHmEl3Cn+Zh6qaJl9Oh5Usv4fMTnBD4HjFIpV5aKOUZy2cz1hJduz3SdRdJhQnH7kZPtZlHtVXEXsA/N+aTtA504msqIIc8PhgF/AHZznUWSrw9LFkxpOn5BgykO6+KXsMBFNOcviTKXrJ5GzDGUy2Y+AvYGrnUcRVJgEb36ndVy4vQufvpCwpUXKuUq0og55jw/+D5wE9DbdRZJtod7nP3s8LoPO7Nu/iXCbTvfqlQmWTUVcwJ4fjACeADY1HUWSa6BzP9kUtO4tjpjh5Tx8OsJL7F2dXl3TdNURgKUDnndEfiz6yySXHPpP+jS1sM6OicwDxxAc/4HKmV3VMwJUbpScD/gdMLjekQ67da2r+/6oV174mo+/E9gO1004p6mMhLI84NNgd8QboYk0inrMee/zzWd2qvdOYFtwM+AC2nOtziMJiUaMSdQLpt5GxhDeEHKQsdxJGFmMHjda9v2W35O4CvArjTnz1Upx4dGzAnn+cFGwM3AV1xnkURpeaTHmRduVvfRlZpLjh8Vc0p4fnAs8AuI7TH2Eh8vACeUXlSWGNJURkrksplbgC2Bh1xnkdiaD5wCjFYpx5tGzCnk+cH+wOXA5q6zSCxY4F7gzFw209UrAKWKVMwp5flBA3A8cBEw1HEccedR4JxcNqOjzBJExZxynh/0Bc4EzgD6Oo4j1TMJ8HPZzKOug0jnqZhrhOcHQwlHz8cDDY7jSOW8A5wP3JvLZvTNnVAq5hrj+cHmhPPP+7vOIpGaCVwC3JTLZrQeOeFUzDXK84NdCI+y+iZgHMeRrptNuD3sL3PZjC42SgkVc43z/GALwjnow4AejuNI+aYBVwN35bKZpa7DSLRUzAKA5wfrAT8knIMe7DiOrFoRCICrctnMBNdhpHJUzLISzw96AocApwLbOY4joQXAb4FrctnMO67DSOWpmGW1PD8YDZxMuN2oTlCpvneA64Bbc9nMfNdhpHpUzNKh0lrobwMHA/uiuehK+gj4PeGVehO15K02qZilUzw/GAh8j7Ckx6D9VqLwMXAfYRk/k8tmio7ziGMqZukyzw/WBQ4knJPeBS2764w5hOc43gs8kctm2hznkRhRMUskSvtC7wvsA+yN9udYldcJ9674K/CoLgSR1VExS0V4frAlYUmPISzqQU4DufEh8ARhGT+qnd2kXCpmqTjPD+qAbfi0qEcDA52Gip4FXgWeAZ4mnCt+320kSSoVszhRuqBla2Crz9z6u8xVBgvkCEu4/e21XDazwGEuSREVs8SK5wdfICzo5aW9EbAeMIzqlfZSYBbhPhT/4fMFvLhKOaRGqZglMTw/6E1Y0kMJLxsfTDh3PYhwamT5dqam3VuzivtaCVdFLC/f9m9n5bKZRRX9i4h0QMUsIhIzujhARCRmVMwiIjGjYhYRiRkVs4hIzKiYRURiRsUsIhIzKmYRkZhRMYuIxIyKWUQkZlTMIiIxo2IWEYkZFbOISMyomEVEYkbFLCISMypmEZGYUTGLiMSMillEJGZUzCIiMaNiFhGJGRWziEjMqJhFRGJGxSwiEjMqZhGRmFExi4jEjIpZRCRmVMwiIjGjYhYRiRkVs4hIzKiYRURiRsUsIhIzKmYRkZhRMYuIxIyKWUQkZlTMIiIxo2IWEYkZFbOISMyomEVEYkbFLCISMypmEZGYUTGLiMSMillEJGZUzCIiMfP/yJL5lUvL2ZgAAAAASUVORK5CYII=\n", - "text/plain": [ - "
" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "series = pd.Series([(oneType), (twoTypes)], index=['Json', 'both'], name='Domain output')\n", - "series.plot.pie(figsize=(6, 6), autopct='%1.0f%%')" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Conclusion\n", - "Most of the domains has only one type of value output, but not all of them. 16% have outputs that can be json and non-json\n", - "\n", - "Out of the ones that has one value type, 78% is json. \n", - "\n", - ">Are there a set of location domains that always produces a JSON?\n", - "\n", - "Yes, there is a set that always produces the value as a valid JSON, but not all of them. There are also the ones there never produces JSON and some that produces both. \n", - "\n", - "---" - ] - }, - { - "cell_type": "code", - "execution_count": 189, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "location_domain\n", - "twitter.com 5594\n", - "petsmart.com 2313\n", - "cdiscount.com 1835\n", - "debenhams.com 1229\n", - "mediamarkt.de 1094\n", - "Name: value_md5, dtype: int64" - ] - }, - "execution_count": 189, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "#Using the above methoed I could tell that the domains did not have only one \n", - "#output, but I could not find a way to tell the output type. \n", - "#Thats why I decided to calculate by hand as you propably notice, sorry. \n", - "\n", - "location_domain_group_unique_md5 = location_domain_group['value_md5'].nunique()\n", - "location_domain_group_unique_md5.sort_values(ascending=False).head()" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "---\n", - "\n", - "## The JSON values are always from the same location or related domains?\n", - "For \"value\" comparison I will use value_md5 instead, because its reliable and faster\n", - "\n", - "* value_md5 is the calculated md5 for the value columns" - ] - }, - { - "cell_type": "code", - "execution_count": 191, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "Index(['value_md5', 'location_domain', 'value_len'], dtype='object')" - ] - }, - "execution_count": 191, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "df = dd.read_parquet('all_json_above_mean.parquet', columns=['value_md5','location_domain', 'value_len'])\n", - "df.columns" - ] - }, - { - "cell_type": "code", - "execution_count": 192, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
value_md5location_domainvalue_len
0cff77029e3ae45dd439a62987b1d8340canada.ca3713
19ac0a0a0afb677c8fd985a7c2f4ddbc5tmall.com103878
29ac0a0a0afb677c8fd985a7c2f4ddbc5tmall.com103878
3983f2d6827a86b128a02cf7442c94af1coches.net1686
4b2ad4d7452aeed3df181b1501cc20231coches.net1686
\n", - "
" - ], - "text/plain": [ - " value_md5 location_domain value_len\n", - "0 cff77029e3ae45dd439a62987b1d8340 canada.ca 3713\n", - "1 9ac0a0a0afb677c8fd985a7c2f4ddbc5 tmall.com 103878\n", - "2 9ac0a0a0afb677c8fd985a7c2f4ddbc5 tmall.com 103878\n", - "3 983f2d6827a86b128a02cf7442c94af1 coches.net 1686\n", - "4 b2ad4d7452aeed3df181b1501cc20231 coches.net 1686" - ] - }, - "execution_count": 192, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "df.head()" - ] - }, - { - "cell_type": "code", - "execution_count": 196, - "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/anaconda3/envs/overscripted/lib/python3.6/site-packages/distributed/worker.py:2791: UserWarning: Large object of size 1.89 MB detected in task graph: \n", - " (\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
location_domain
nunique
value_md5
000599fa6f59053c67e6ccbef137a0d21
0005e12de9897336bf5c7e352a8075681
00076462ead16ac77a1d56745584fd5b1
0007a2345e42bca1d5cac86e356bb87b1
000b0b6b104a36cbc6f31b923e1b31a71
\n", - "" - ], - "text/plain": [ - " location_domain\n", - " nunique\n", - "value_md5 \n", - "000599fa6f59053c67e6ccbef137a0d2 1\n", - "0005e12de9897336bf5c7e352a807568 1\n", - "00076462ead16ac77a1d56745584fd5b 1\n", - "0007a2345e42bca1d5cac86e356bb87b 1\n", - "000b0b6b104a36cbc6f31b923e1b31a7 1" - ] - }, - "execution_count": 207, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "aggmd.head()" - ] - }, - { - "cell_type": "code", - "execution_count": 210, - "metadata": {}, - "outputs": [], - "source": [ - "f1 = aggmd['location_domain']['nunique'] > 1\n", - "aggf = aggmd[f1]" - ] - }, - { - "cell_type": "code", - "execution_count": 215, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "35746" - ] - }, - "execution_count": 215, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "unique_values_count = len(aggmd)\n", - "unique_values_count" - ] - }, - { - "cell_type": "code", - "execution_count": 218, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "(35, 35711)" - ] - }, - "execution_count": 218, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "values_multiple_origin = len(aggf)\n", - "values_single_origin = unique_values_count - values_multiple_origin \n", - "(values_multiple_origin, values_single_origin )" - ] - }, - { - "cell_type": "code", - "execution_count": 219, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "" - ] - }, - "execution_count": 219, - "metadata": {}, - "output_type": "execute_result" - }, - { - "data": { - "image/png": "iVBORw0KGgoAAAANSUhEUgAAAXcAAAFbCAYAAAAurs6zAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDMuMC4zLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvnQurowAAIABJREFUeJzt3Xd8ZVW9/vHPN9PCFDbNkRkQASleyiBNQS+9eKUNKlLkR1VA4KJwwSvlCktApag4CCJdmh1w6KI0B5Q6FEGBQUHKDKCUTTKTmUly1u+PdTLJhJST5Jyz9tn7eb9eeZ2ck+TkmUzyZGXttdc27z0iIpIvTbEDiIhI9ancRURySOUuIpJDKncRkRxSuYuI5JDKXUQkh1TuIiI5pHIXEckhlbuISA6p3EVEckjlLiKSQyp3EZEcUrmLiOSQyl1EJIdU7iIiOaRyFxHJIZW7iEgOqdxFRHJI5S4ikkMqdxGRHFK5i4jkkMpdRCSHVO4iIjmkchcRySGVu4hIDqncRURySOUuIpJDKncRkRxSuYuI5JDKXUQkh1TuIiI5pHIXEckhlbuISA6p3EVEckjlLiKSQyp3EZEcUrmLiOSQyl1EJIdU7iIiOaRyFxHJIZW7iEgOjY4dQGREXNIMrASMA8aUX8aWb7u+vzv7eGkFXselbfWOLFIP5r2PnUHk/VwyCvggsAowtcft1F6PLT/Cz9QCvF5+mdfj9d4vb+LSzhF+LpG6UblLXKHE1wc2K79sDHwYmAyMipistxIwB5gNPF6+nY1L34maSqQfKnepH5c0AevSXeSbAxsB42PGGqGX6Fn2ofBfj5pIBJW71JJLVga2IZT45oRR+aSomerjdULRPwTcBjyGS/WDJnWlcpfqcsl6wB7AdOATgMUNlAnzgFuBW4Df49IFkfNIAajcZWTCnPknCWU+HVgrbqDMWwjcQyj6W3Dpy5HzSE6p3GXoXDIB2JlQ5rsSliLK8PwFuJlQ9g/h0lLkPJITKnepTFhPvhewD7Aj0Bw3UC69AVwLXIZLn40dRhqbyl0GFubQjwAOYORryqVyfwIuA36FS+fHDiONR+Uu7xdG6V8glPqnIqcpuhbg58CFuPSp2GGkcajcpZtLpgBHE0pd8+jZcx9wPjBTZ8vKYFTuAi7ZBDgO2JuwL4tk28vAjwlz82/FDiPZpHIvMpfsBHwT2Cp2FBmWVuA84Hu49L3YYSRbVO5FFEbqZxNWvUjjews4C7gAly6MHUayQeVeJC5ZE/g2YTmjzhzNn9eA04ErcGlH7DASl8q9CFzyAeBUwoHSMZHTSO3NIfx//1J72hSXyj3PwpmkxwMnUIwNu2RpTwAn49LbYweR+lO555FLRgOHE0ZvH4ycRuKbBZyESx+IHUTqR+WeNy7ZDfgBsHbsKJI5lwPH49I0dhCpPZV7XrhkOWAGcGDsKJJprwGH49LbYgeR2lK554FL/ouwD8kqsaNIw7gaOFaXCcwvlXsjc8kkwhTMl2NHkYY0D/gKLr0pdhCpPpV7o3LJDsAVwGqxo0jD+zlwjLYyyBeVe6MJyxvPBb6CTkSS6nkDOBqXXh87iFSHyr2RuGRr4EpgzdhRJLd+TSj5f8UOIiOjcm8ELhlL2Avma2i0LrU3F/gcLn0odhAZPpV71rlkMnADumiG1NciwsHWn8YOIsOjcs8yl2wE3IQOmko8MwgnPuniIA1G5Z5VLvkscA0wIXYUKby7gL1x6duxg0jlmmIHkD645JvA9ajYJRt2AB7BJRvEDiKV08g9S1yyDGHt+r6xo4j0oRU4CJfeEDuIDE7lnhUuWQX4LbBZ7CgiA/DAGYDTXvHZpnLPApd8nFDsU2JHEanQTOAAXNoSO4j0TeUem0v2I0zFNMeOIjJEjwM749J/xw4i76cDqjG55CjgOlTs0pg2Bu7FJSvHDiLvp3KPxSXHAheiM06lsa0PzMIlOhcjY1TuMbjk68B5sWOIVMlahIL/SOwg0k3lXm8uOQU4J3YMkSpbjVDw68YOIoHKvZ7CyUlnxo4hUiNTgLtxyVqxg4hWy9RPmIrRiF2K4BVgG1z6YuwgRaZyrweXHA1cEDuGSB39E9gal74cO0hRqdxrzSWHEi5erVUxUjT/IBT8a7GDFJHKvZbCCUrXomMbUlzPAFvqTNb6U7nXiku2ImyVOiZ2FJHIbgX2wKWl2EGKRCPKWggndFyPil0EYFfCZSKljjRyrzaXjAfuJ5yaLSLdDsalV8UOURQauVfflajYRfpyCS75ZOwQRaFyryaXnAzsHTuGSEaNBW7EJR+OHaQINC1TLS7ZjbDHtX5higzsKeCTuHR+7CB5piKqBpf8B2HrXn09RQY3DbgWl+jcjxpSGY2US5YjjNiXjR1FpIHsifZZqilNy4yES0YR1vB+OnYUkQa1Py79WewQeaSR+8ichYpdZCQuwSVrxg6RRxq5D5dLdgLujB1DJAdmAdvqDNbq0sh9OFwyEbg0dgyRnNgK+FrsEHmjch+ecwGt1RWpnu/oKk7VpWmZoXLJ9sAf0Ba+ItX2EPApXNoZO0geaOQ+FGE65nJU7CK18Ang67FD5IXKfWjOAlaPHUIkx76FSzaIHSIPNC1TKZdsA9yDRu0itTYb+AQu7YgdpJFp5F6JsI3vFajYRephE+D/YododCr3ynwX0IkWIvVzCi7ZJHaIRqZpmcGEy+Xdh0btIvX2F2BjrZ4ZHo3cB+KSsWh1jEgsGwKHxA7RqFTuAzsCWDt2CJEC+xYuWSZ2iEakcu9PWNOugzoicU0Fjo0dohGp3Pt3LDA5dggR4Ru4ZMXYIRqNyr0v4RtJZ8qJZEOC/ooeMpV7305CV1YSyZKjcMnqsUM0EpV7by5ZFTg6dgwRWcpYdFm+IVG5v99pQHPsECLyPl/EJRvHDtEoVO49hf2kta5WJJsMODt2iEahcl/amcCo2CFEpF874ZIdY4doBCr3Li7ZDPh87BgiMiiN3iugcu/2HbTNgEgj2ASX7Bw7RNap3IHyQZqdYscQkYodFztA1qncg6/GDiAiQ/JpXPIfsUNkmcrdJSsB+8aOISJDYsDXYofIMpU7HI7WtYs0ogO150z/il3uLhkNHBk7hogMyzKEwZn0odjlDp8FVo0dQkSG7QhcUvQe61PRvyhHxA4gIiPyYeDTsUNkUXHL3SVrAtvHjiEiI6apmT4Ut9zhS+ikJZE82A2XTI0dImuKWe4uGQUcHDuGiFTFaMJgTXooZrnDLoRrM4pIPnwZl+gv8R6KWu6Hxg4gIlW1GrBp7BBZUrxyd8l44L9ixxCRqtszdoAsKV65w47ojFSRPJoeO0CWFLHcd48dQERqYoPyEmehaOUeDrjsGjuGiNSMpmbKilXu4YDLlNghRKRmNDVTVrRy15SMSL59SjtFBip3EcmTUejnHChSubtkFWDj2DFEpOY0NUORyh12ix1AROpiZ1yyTOwQsancRSRvxhPOZym0YpR7+C2+Q+wYIlI3hZ+aKUa5h2Iv/J9pIgWyVewAsRWl3LeJHUBE6mptXJLEDhFTUcp9k9gBRKSujILvElmUctcSSJHi2Sx2gJgGLXcz+5yZzTGz1MzeM7MWM3uvHuGqwiVrAMvHjiEidbd57AAxja7gfc4Bdvfe/63WYWpEUzIixaSR+yDeaOBiB5W7SFGtjktWih0ilkpG7o+a2S+B3wKLuh703t9Qs1TVpXIXKa7NgDtih4ihknJfFlgA7NzjMQ+o3EUk6zZH5d437/0h9QhSE2GzsMmxY4hINIWdd++33M3sf73355jZjwgj9aV4779a02TVoVG7SLEVdsXMQCP3roOoj9YjSI2o3EWKbQoumYpL58YOUm/9lrv3/uby7VX1i1N1hT5DTUQAmAao3Hszs5t5/7RMShjRX+y9X1iLYFWyYewAIhLdqrEDxFDJOvd/AK3ApeWX94A3gHXK97PJJQasEjuGiEQ3NXaAGCpZCrmx937rHvdvNrM/eu+3NrNnahWsClYExsQOISLRFXKQV8nI/QNmtlrXnfLrXWd9La5JqupYOXYAEcmEQpZ7JSP344H7zezvhG001wCOMrMJQJYPtk6JHUBEMkHl3hfv/W1mtjbwUUK5P9vjIOoPaxluhDRyFxEoaLn3Oy1jZtuXbz8H7Ap8BFgT2KX8WNap3EUEYCVcMjZ2iHobaOS+DXA3sHsfb2uEvWU0LSMiEGYcpgD/jB2kngY6iek0M2sCbvfe/6qOmapFI3cR6bIKBSv3AVfLeO9LwH/XKUu1aeQuIl0KN+9eyVLI35vZCWb2ITNboeul5slGTiN3EelSuHKvZCnkoeXbo3s85gkHV7NMI3cR6aJy7817v0Y9glSVS5qBJHYMEcmM5WIHqLcBy93MJhNG7OsTRut/BS703r9Zh2wjsXzsACKSKYVbCjnQOvdPAY+U714NXFt+/eHy27JMe8qISE/Ryt3MWsu3q5vZF3s8vpmZnT/Ix65uZk8P5/MOdED1+8Ce3vvTvPc3ee9neu9PA/YEftDjk19mZusN55OPJPggRtXgOavm0JltTD63hQ1+3LrksbfbPDtdM5+1f9TKTtfM5522sMuy956v3r6Qtc5vYdpFrcye1wnAc//uZNNLWtnoJ638+ZUOADpKnh2vns+C9vddOEukT3e80MG6F7Sy1vktnHX/IgD2v2EB0y5q5eS7unfzPuO+Rcx8tj1WzGrIwsh9dWBJuXvvH63lFe0GKvdlvfeP937Qe/8EMKnH/S977/9ai3AjUMmB4mgO/tgY7vh/45d67Kz7F7HDGqOZc8xEdlhj9JIftNtf6GDO253MOWYil+zezJG3tgFw8WPtnLVDM7/5wjJ8789h/7aLHmnngGljGD/G6vsPkobUWfIcfVsbt+8/nr8ePZGfP93OU2+EwcNTR05k1sudpAs981pKPDy3k+kfbeg/iIcdvjwIfbY8kH3azK4zsx3N7AEzm2NmHzczZ2Yn9PiYp81s9V5PdRawlZk9YWbHmdm2ZnZL+f2dmV1jZneXn/OwPnKMMrNzzewRM3vKzI4YKPdA5W5mtnyvByaY2Z3AmuXw+5jZvWa2WfntrWb2bTN70sweNLMPlh//SPn+I2Z2etefKSMJPohMj9y3/vBoVlhm6QKe+VwHB20Uvv8O2mgMv30ujMZnPtvBgdPGYmZssepo3l0I81pKjGmCtg7PgnYY0wTvLvTc/Hw7B27U0D+AUkcPv9bJWis0sebyTYwdZey7/hhufb6DtnYoec/iTs+oJjj1nkWcvu242HFHaqQj97WAGYSrOn2UMAL/T+AE4OQKn+NEYJb3/mPe+/P6ePs0wlYvWwKnmlnvfei/BKTe+80J14Y9zMz6XfAy0Aj3PODO8m+j2eXHjgM+BnzNe3+xmSXAkT0+ZgLwoPf+FDM7BzgMOJPwRZnhvf+5mX2ln8+3JLiZjQMeMLM7vfcvDpCxP5ku97680VpiyqTwu3bKpCbenF8C4LUWz4eS7l8Eqy5rvNbiOfrjYznwxjYWdcLFuzVz+n2LOGWrcZhp1J5l3uMJixNKfdwueb08sVYC67rf4+MMX35f3/12wMqP4cG873679+X7PR9/8q22SZOaS5NSP/4VjzGm2ZZ77NXF48c3j/LrXtg2cfr6E9+67/Xm995c1Dl5ysorvTTP4z1NlD+ekreSxwjPbZSWft13va38+JLHyrdWomnJx5V819ualjxPKbwPpfLrnqbyY+ZLNFkpvL/3mHV6837JY13P00T5Ofx8ml8Z4fzHi977vwCUr2Nxl/fem9lfCNMtT4zs6QGY6b1vA9rM7B7g472ed2dgmpntVb6fAGsDfXbkQNsPXGJmc4Ez6F4t8yLhm2pNM9vKez+rV5ksBm4pv/4YsFP59S0Jc/UAPwO+18enHFLwQTRcufenr9lzA1ZLmrj34AkAvPB2ibktJT66UhMH3NjG4k7PGduNY50Vc/NlGBHv+yzS3oXaVX4975dftx5lCz68z5JC7Xnry4W89GPW9XHZOhjiMfA04UeBZzSlptHmm368x7LzgDcBdr/mndUu3H3Zuefe9a/Vnnmzo3nbNca2Hr3FhHeA8I1YLTUek7TS/MoIn2JRj9dLPe6XCD3awdIzIc3D+By9vz963zfgGO/97yp5sgHnpr33t9Bd1uHZw9mpuwDfLU/R9NTuve8K1DnY8/cypOCD6KzCc9TVByc2Ma8ljN7ntZSYPCF8n6w6yXgl7f4/fvU9z9RJS/8knHL3Qs7cbhznP7SY/Tccw+rLGd+6bxHXfW7pef2iMlvyQzfgb7vwVe2rf7PVydWy0Uod3PhMB5OsbXmAtoWLWW9FmGRtKwDMfLad7VYzxpba1p2bLuaOL45n6yvnL/vVzWxqox3XmUTbqzX+FC8BuwGY2SaE61701kKP45V9mG5m3yXMgGxLmMbpOZ30O+BIM7vbe99uZusAr3nv5/f1ZJVsP7BEeQ5ogff+WsLoe5MKP/RB4PPl1/ft5326go8pf651yhcEGY6GO6y/xzqjuerJEPuqJ9uZvm74vbjHuqO5+qnFeO958NUOknEsmb4BuO+lDlaZ1MTaK45iQTs0GYwyWNBwXwGpt81XGcWct0q8+E6JxZ2eXzzTzh7l77v2Ts+Mhxbz9U+NZUF798C65GFxww2dgDCyrqXrgRXM7AnCVPXzfbzPU0BH+ZjkcX28/WHgVkJfnuG9n9vr7ZcRzjWaXV5leDEDDKCHuqpkQ+BcMysRCvRI+p5i6e1Y4FozO74cPu3jfS4jzF3NtjDX8y+6p3KGKtPVtt/1C7j3pU7+vcCz6g9a+Na24zjxP8ey92/auPzxdlZLjF9/IYy6d1l7NLfN6WCtH7Uyfoxx5fRlljyP954zZy3iV3uF9z180zHsf0MbHSW4aNfh/FUoRTK6ybhgl2Y+fe0COr3n0I+NZf3J4Y+bCx9ZzEEbhZVX0z7YhAc2vKiVXdYazXLNjTVqLxt2uXvvXwI26HH/4H7etnM/Hz+xfNsO7NDrzff2eP157/3h/X3u8kaOJ1PhAVzrnkWpHTMbD7SVD0DsC+znvZ9es0/oklWBkc6xiUh+XI9L9xr83eIwMwe0eu8rGSxXZNCRe3k543eAqd77z5RPWNrSe3/5ED7PpsAF5RH5u3RvRlYrmR65i0jd1XpaZkS8967az1nJnPtPCfPhXWsunydMs1TMez/Le7+R936a935r7/0LQ4s5ZCp3EelpcewA9VZJua9UvhJTWM7lfQfZX43yHnld4iAiw/Gv2AHqrZJyn29mK1IuSzPbgr4PiGaHSzso4H+miPRrXuwA9VbJapn/AW4CPmJmDwAfADJ7YKKHucDk2CFEJBNU7r1572eb2TbAuoTlrs+Vl/Rk3VzCVgkiIr3XjOdeJatlDuz10CZmhvf+6hplqpbC/WeKSL80cu/D5j1ebyYswp9NuIBHlqncRaSLyr037/0xPe+Xd4K8pmaJqkflLiIAC3BptheB1MCQ9pYpW0DYrTHrVO4iAgUctUNlc+43071mvAlYD/hVLUNVicpdREDl3q+eex10AP/03td6+8xqULmLCKjc++a9v68eQWrgDcKZtLpihUixFXKg1++cu5m1mNl7fby0mNl79Qw5LC4tEQpeRIpNI/eevPcDXTGkUbxG94ZnIlJML8cOEEPFF+sws8n0uC6g974RvmBPs/Q6fREpntmxA8Qw6FJIM9vDzOYQLlR9H+FagbfXOFe1PBo7gIhE1ULfl7zLvUrWuZ8BbEG4BNQahDNUH6hpqup5LHYAEYnqCVxayO2/Kyn3du/9W0CTmTV57++hcTbkehJduEOkyAo7wKuk3N81s4nAH4HrzGwGGb9k1RIuXQg8EzuGiESjch/AdKANOA64A/g7sHstQ1VZYf9zRaS4P/8DrXO/wMw+6b2f773v9N53eO+v8t6fX56maRQ6qCpSTK3Ac7FDxDLQyH0O8H0ze8nMzjazRpln703lLlJMT5RPZiykfsvdez/De78lsA3wNnClmf3NzE41s3XqlnDknqKAVz4XkWKub+8y6Jy79/6f3vuzvfcbA18EPgv8rebJqsWliwknM4lIsRR2vh0qO4lpjJntbmbXEU5eeh74fM2TVZemZkSKp9Dl3u/2A2a2E7AfsCvwMPAL4HDv/fw6ZaumQv8nixTQAuDZ2CFiGmhvmZOBnwEneO/frlOeWrk/dgARqat7cWln7BAxDXRAdTvv/aU5KHZw6V+Bf8SOISJ1c3PsALEN5xqqjeqm2AFEpG5U7rED1JHKXaQYZuPS12KHiK1I5T4LeCd2CBGpOQ3kKFK5u7QDuC12DBGpucJPyUCRyj3Qb3SRfHsVlxb6zNQuRSv329FWBCJ5dkvsAFlRrHJ3aQtwb+wYIlIz+uu8rFjlHsyMHUBEaqIVuDt2iKwoYrnrN7tIPv0ely6KHSIrilfuLn0VeDx2DBGpOg3ceiheuQc3xA4gIlW1CJX7Uopa7lcBhd5USCRnrseljb8PVhUVs9xd+grhYt8ikg+Xxg6QNcUs9+Di2AFEpCrm4NJ7Y4fImiKX+23Aq7FDiMiIXRY7QBYVt9zDRv5XxI4hIiPSDvw0dogsKm65B5ehA6sijewGXPpm7BBZVOxyDwdWb4wdQ0SGbUbsAFlV7HIPfhg7gIgMyyO49M+xQ2SVyt2lDwCPxo4hIkOmUfsAVO6BvklEGss84FexQ2SZyj34JeGbRUQaw0W4tD12iCxTuQPlbxLNvYs0hneBC2KHyDqVe7cfAXNjhxCRQZ2NS3Wx+0Go3Lu4tA1wsWOIyIDmomNkFVG5L+0K4NnYIUSkX6eXB2IyCJV7T2FLglNixxCRPj0PXB47RKNQuffm0huAB2PHEJH3+SYu7YgdolGo3Pv2jdgBRGQpjwG/jh2ikajc++LSPxK2BBaRbDgRl/rYIRqJyr1/JwKl2CFEhLtw6R9ih2g0Kvf+uPQvwLWxY4gUnCcMtGSIVO4DO5VwVXURieM3uFQb+w2Dyn0gLv0n4cxVEam/BcBJsUM0KpX74E4DXogdQqSATsalf48dolGp3Afj0gXAIejgqkg9zQLOjx2ikancK+HS+9F+FiL1EgZUWvo4Iir3yp0CPBc7hEgBnKTpmJFTuVcqbFZ0MJqeEamlP6JFDFWhch8Klz4IfD92DJGcWgAcqumY6lC5D903gb/FDiGSQ5qOqSKV+1C5dBFwENAZO4pIjmg6pspU7sPh0keAc2LHEMkJTcfUgMp9+BzwdOwQIjlwoqZjqs+81y/LYXPJhsCfgQmxo4g0qF/j0r1jh8gjjdxHIuwceQBh5zoRGZonCMuLpQZU7iPl0hsJu0eKSOXeBKaXt/eQGtC0TLW45BfAPrFjiDSAxcD2uPSB2EHyTCP36jmEcJ1HERnYUSr22lO5V0vYnmBP4PXYUUQy7Ee49PLYIYpA5V5NLn2VUPC6epPI+90F/E/sEEWhcq82lz4EHBY7hkjG/B3YG5d2xA5SFCr3WnDpNcC5sWOIZEQLsAcufTt2kCJRudfOicCtsUOIRFYC9self40dpGhU7rXi0hKwL+EMVpEi8oQ9Y26OHaSIVO615NJW4DPAo7GjiERwFC69KnaIolK515pLU2Bn4MnYUUTq6Dhc+pPYIYpM5V4PLn0H2BF4JnYUkTo4CZf+MHaIolO514tL/w3sADwbO4pIDZ2GS8+KHUK0t0z9uWQy8Adgw9hRRKrsRFx6duwQEqjcY3DJisDvgE1jRxGpkmNx6YzYIaSbyj0WlyTA7cCWsaOIjIAnrIrRwdOM0Zx7LN2raO6LHUVkmDqBL6nYs0nlHlP3OvhfxI4iMkQpsBsuvTJ2EOmbpmWywiUnAWeiX7iSfc8T9op5LnYQ6Z/KPUtcshtwHbBs7Cgi/bgT2AeXvhs7iAxMo8QscektwBbAC7GjiPThPGAXFXtj0Mg9i1yyPPBLYKfYUUQI1zz9iubXG4tG7lkUtiv4DGGkJBLTG8B2KvbGo5F71rnkYOAnwLjISaR4Hgem49JXYgeRoVO5NwKXbAHcCKwcO4oUxq+Bg3HpgthBZHhU7o3CJVOBywjTNSK10gIcj0svjR1ERkbl3mhccghhLj6JHUVy5w+EM05fjh1ERk7l3ohcsgpwCbBL7CiSC63ACbj04thBpHpU7o0sHGw9D1guchJpXHcTRusvxQ4i1aWlkI3MpT8F1gdujZxEGk8rcBSwo4o9nzRyzwuXHAjMQKN4Gdy9wKG49MXYQaR2NHLPC5deTRjF3xI7imTWfOC/ge1V7PmnkXseuWQf4NvAR2JHkUwoAdcAp2olTHGo3PPKJWOAw4BvopOfiuxm4GRc+nTsIFJfKve8c8kE4Fjg62htfJH8CfgGLr0/dhCJQ+VeFC5ZATiJMOfaHDmN1M4zhJH6TbGDSFwq96JxyarAacAhwKjIaaR6XiH8v16FS0uxw0h8Kveicsm6hIOun48dRUbkbeA7wIW4dGHsMJIdKveic8lmwPHAXsDoyGmkci8BFwCX4dI0chbJIJW7BGG/miOBw4EPRE4j/fsj4WS1mbi0M3YYyS6VuyzNJc3AfsAxwMaR00jQRrjs4gxc+kTsMNIYVO7SvzBlcxih7CdFTlNETwGXAtfqotQyVCp3GVxYK78Poei3iJwm7+YTRumX4NKHYoeRxqVyl6FxyXrAnsDuwMfR/kTV8BZhZ8+bgN/h0tbIeSQHVO4yfC6ZDOxKKPqdgQlxAzWU5wllfhPwJx0clWpTuUt1uGQcsD2h6HcDPhQ3UOZ0An+mq9Bd+lzkPJJzKnepDZd8jFD0uwObUMyzYd8EZhE277oVl/47ch4pEJW71J5LlgGmAZsSin5Twt7zY2LGqrJ/AY8Bj5ZvH8Olr8SNJEWmcpc4wjTOhnSX/Sbl++NixqpQV5F3vTyqIpesUblLdoQ96NcnlPxUYEr5ZeUer0+scYoSobznAvN63b4KPKULXkgjULlLY3HJRPou/eUIe+OMKt92vTQRDma2Ax29bt9h6fKeC7yBSzvq9w8SqQ2Vu4hIDukEFBGRHFK5i4jkkMpdRCSHVO4iIjmkchcRySGVu4hIDqncRURySOUuIpJDKncRkRxSuYuI5JDKXUQkh1TuIiI5pHLksqqgAAAApklEQVQXEckhlbuISA6p3EVEckjlLiKSQyp3EZEcUrmLiOSQyl1EJIdU7iIiOaRyFxHJIZW7iEgOqdxFRHJI5S4ikkMqdxGRHFK5i4jkkMpdRCSHVO4iIjmkchcRySGVu4hIDqncRURySOUuIpJDKncRkRxSuYuI5JDKXUQkh1TuIiI5pHIXEckhlbuISA6p3EVEckjlLiKSQyp3EZEcUrmLiOTQ/wem8SZLamobjQAAAABJRU5ErkJggg==\n", - "text/plain": [ - "
" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "series = pd.Series([(values_multiple_origin), (values_single_origin)], index=['multiple', 'single'], name='Value Origin')\n", - "series.plot.pie(figsize=(6, 6), autopct='%1.0f%%')" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Conclusion\n", - "The absolute most values have only one origin. Only 35 occurencies of the same value are found to have more than one domain origin. \n", - "\n", - ">The JSON values are always from the same location or related domains?\n", - "\n", - "Almost, 0,097% of the values have multiple origins, but 99,9% is only produced by one domain. \n", - "\n", - "---" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.6.8" - } - }, - "nbformat": 4, - "nbformat_minor": 2 -} From 3b8091515f70b446a9aa34cdef245fabbadda24d Mon Sep 17 00:00:00 2001 From: Camila Resende Date: Mon, 22 Apr 2019 01:37:56 -0300 Subject: [PATCH 23/23] Add isJson_Script_Domain_Output.ipynb and update readme --- .../README.md | 12 +- .../isJson_Script_Domain_Output.ipynb | 245 ++++++++++++++++++ 2 files changed, 252 insertions(+), 5 deletions(-) create mode 100644 analyses/2019_03_aliamcami_value_analyses/isJson_Script_Domain_Output.ipynb diff --git a/analyses/2019_03_aliamcami_value_analyses/README.md b/analyses/2019_03_aliamcami_value_analyses/README.md index d7195a9..0039d9f 100644 --- a/analyses/2019_03_aliamcami_value_analyses/README.md +++ b/analyses/2019_03_aliamcami_value_analyses/README.md @@ -29,13 +29,15 @@ The top 46745 gratest value_len are valid JSONs, that is 9.35% of the filtered s --- ## Correlation of location_domain and value -- One domain produces multiple JSONs -- One JSON is usually (99.9%) produced by a single domain. +- One domain can produces a single type of output (31%). +- 99% of the domains with single type of output do not produces JSON. -### -- One domain can produce values there are both Json or not, but most produce only one type -- Most of the domains that produce a single type produces JSON type. +- 31% of all domains can produce JSON. +- Only 0,016% of all the domains will aways have JSON as output, and less than half of it will always have the same JSON. + + +- One JSON is usually (83.09%) produced by a single script domain. --- diff --git a/analyses/2019_03_aliamcami_value_analyses/isJson_Script_Domain_Output.ipynb b/analyses/2019_03_aliamcami_value_analyses/isJson_Script_Domain_Output.ipynb new file mode 100644 index 0000000..64b75e3 --- /dev/null +++ b/analyses/2019_03_aliamcami_value_analyses/isJson_Script_Domain_Output.ipynb @@ -0,0 +1,245 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Start Dask" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/anaconda3/envs/overscripted/lib/python3.6/site-packages/dask/config.py:168: YAMLLoadWarning: calling yaml.load() without Loader=... is deprecated, as the default Loader is unsafe. Please read https://msg.pyyaml.org/load for full details.\n", + " data = yaml.load(f.read()) or {}\n" + ] + } + ], + "source": [ + "import dask.dataframe as dd\n", + "from dask.diagnostics import ProgressBar\n", + "import matplotlib.pyplot as plt\n", + "import pandas as pd\n", + "import numpy as np\n", + "%matplotlib inline" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Objective\n", + "\n", + "The objective of this notebook is to answer two main questions: \n", + " - \"Are there a set of location domains that always produces a JSON?\"" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Conclusion\n", + "To answer \"Are there a set of location domains that always produces a JSON?\", YES, but it counts only to 0.16%, and\n", + "about 31% of all the domains can produces both types of values, json and non-json, so, we can say that only 31% of all the domains are capaple of geting JSONs, all the others 69% will never get a JSON value. \n", + "\n", + "---\n", + "\n", + "There are 11185 different scripts domains, 93.60% of those appear in multiple rows across the dataset. \n", + "\n", + "Most domains only have one type of value output, either they are json or not-json.\n", + "For the domains that have one tipe of output, 99% of the time they get the same keys_md5, but it may not be very accurate because every non-json value has a empty key and thus equal, so, after filtering to valid jsons we have only 19 domains with unique output && valid jsons, out of this, 63% are aways the same json." + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "DIR = 'sample_0_prep/'" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "Index(['is_json', 'location_domain', 'script_domain', 'keys_md5'], dtype='object')" + ] + }, + "execution_count": 3, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df = dd.read_parquet(DIR + 's0_domains_isJson_jsonKeys_md5_TLD.parquet',\n", + " engine='pyarrow',\n", + " columns=['is_json', 'location_domain', 'script_domain', 'keys_md5'])\n", + "df.columns" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "---\n", + "\n", + "# Are there a set of location domains that always produces a JSON?\n" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[########################################] | 100% Completed | 3.5s\n", + "The total number of different script_domain is 11185\n" + ] + } + ], + "source": [ + "with ProgressBar():\n", + " group_by_script_domain = df.compute().groupby(['script_domain'])\n", + " group_by_script_domain_len = len(group_by_script_domain)\n", + " print(\"The total number of different {} is {}\".format('script_domain', group_by_script_domain_len))" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [], + "source": [ + "agg = group_by_script_domain.agg({'is_json': ['nunique', 'sum'],\n", + " 'location_domain': ['nunique'],\n", + " 'keys_md5': ['nunique']})" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "There are a total of 10469(93.60%) JSONs that appear in multiple rows\n" + ] + } + ], + "source": [ + "appear_multiple_times = agg['is_json'][group_by_script_domain['is_json'].count() > 1]\n", + "appear_multiple_times_len = len(appear_multiple_times)\n", + "agg_len = len(agg['is_json'])\n", + "print('There are a total of {0}({1:0.2f}%) JSONs that appear in multiple rows'.format(\n", + " appear_multiple_times_len, \n", + " appear_multiple_times_len*100/agg_len))" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [], + "source": [ + "def get_unique(agg, column, title=''):\n", + " agg_len = len(agg[column])\n", + " x = agg[agg[column]['nunique'] == 1]\n", + " x_len = len(x)\n", + " print(title + '{0} ({1:0.2f}%) unique {2},\\n{3} ({4:0.2f}%) multiple {2}'.format(\n", + " x_len,\n", + " x_len*100/agg_len,\n", + " column, \n", + " agg_len - x_len,\n", + " (agg_len - x_len) * 100 / agg_len\n", + " ))\n", + " return x" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "JSON data:\n", + "7697 (68.82%) unique is_json,\n", + "3488 (31.18%) multiple is_json\n", + "\n", + "KEYS data: out of the unique jsons\n", + "7690 (99.91%) unique keys_md5,\n", + "7 (0.09%) multiple keys_md5\n" + ] + } + ], + "source": [ + "unique_is_json = get_unique(agg, 'is_json', 'JSON data:\\n')\n", + "unique_json_key = get_unique(unique_is_json, 'keys_md5', '\\nKEYS data: out of the unique jsons\\n')\n", + "#it may not be very accurate because every non-json value has a empty key and thus equal" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "There are only 19 domains with unique valid json output\n", + "\n", + "KEYS data: out of the unique valid jsons\n", + "12 (63.16%) unique keys_md5,\n", + "7 (36.84%) multiple keys_md5\n" + ] + } + ], + "source": [ + "#Filter to only valid jsons\n", + "unique_is_json_jsons = unique_is_json[unique_is_json['is_json']['sum'] > 0]\n", + "print(\"There are only {} domains with unique valid json output\".format(len(unique_is_json_jsons)))\n", + "unique_key_jsons = get_unique(unique_is_json_jsons, 'keys_md5', '\\nKEYS data: out of the unique valid jsons\\n')" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.6.8" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +}