Skip to content

Help: Can not use grid search on Kaggle #555

Open
@gaojianan

Description

@gaojianan

I used pip to install pytorch_tabular on Kaggle, and try to uxe grid search to find good hyperparameters.

!pip install pytorch_tabular
import gc
import numpy as np
import pickle
import pandas as pd
import random
import pytorch_tabular as pt
import warnings
import pprint
random.seed(191)
np.random.seed(191)
class parameter:
    epochs=64
    timesteps=30
    decaysteps=224
    inputsize=100
    outputsize=32
    learningrate=0.01
    alpha=0.0001
    decayto=0.0000001
    
    def __init__(self,data,n,batchsize):
        self.batchsize=batchsize
        self.epochs=round(n/self.batchsize)
        self.inputsize=data.shape[1]
        self.k=9
        self.outputsize=1
        self.steps_per_epoch = int(n/batchsize)
        self.validation_steps = int(n/batchsize/3)
        self.num_regressors=512
def poly_description(y1,j):
    x1=np.linspace(j/(-2),j/2,j)
    k=j
    ans=np.polyfit(x1,y1[:,0:j].T,deg=3).T
    print('\r0',end='\r')
    while k<y1.shape[1]:
        ans=np.hstack((ans,np.polyfit(x1,y1[:,k:k+j].T,deg=3).T))
        print('\r{} '.format(k/y1.shape[1]),end='\r')
        k+=j
    return ans
def preprocess(spectrum):
    x0=np.linspace(7115.0,7167.8,spectrum.shape[0])
    ansx=x0[np.argmax(spectrum,axis=1).tolist()].reshape(-1,1)
    for j in [5,10,20,25]:
        a=poly_description(spectrum,j)
        ansx=np.hstack((a,ansx))
    index=[]
    for j in [25,20,10,5]:
        for i in range(spectrum.shape[1]//j):
            mid='N{}D{}'.format(spectrum.shape[1]//j,i)
            index.extend([mid+'a3',mid+'a2',mid+'a1',mid+'a0'])
    index.append('Edge0')
    return ansx,index
def csh(train_file, test_file, batch_size):
    with open(train_file, 'rb') as f:
        tv = pickle.load(f)
    n = round(0.75 * tv[1].shape[0])
    state=np.random.get_state()
    np.random.shuffle(tv[0])
    np.random.set_state(state)
    np.random.shuffle(tv[1])
    x_train , columns= preprocess(tv[0][:n, :].copy())
    x_validation , _= preprocess(tv[0][n:, :].copy())
    pr=parameter(x_train, n, batch_size)
    xmean=np.mean(x_train,axis=0,dtype='float64')
    xstd=np.std(x_train,axis=0,dtype='float64',ddof=1)
    ymean=np.mean(tv[1][:n],axis=0,dtype='float64')
    ystd=np.std(tv[1][:n],axis=0,dtype='float64',ddof=1)
    x_train = (x_train - xmean) / xstd
    y_train = (tv[1][:n]-ymean)/ystd
    x_validation = (x_validation - xmean) / xstd
    y_validation = (tv[1][n:]-ymean)/ystd
    with open(test_file, 'rb') as f:
        te = pickle.load(f)
    x_test , _= preprocess(te[0].copy())
    x_test = (x_test - xmean) / xstd
    y_test = (te[1][:]-ymean)/ystd
    train=pd.DataFrame(x_train,columns=columns)
    train=train.assign(y=y_train)
    validation=pd.DataFrame(x_validation,columns=columns)
    validation=validation.assign(y=y_validation)
    test=pd.DataFrame(x_test,columns=columns)
    test=test.assign(y=y_test)
    return pr,train, validation, test, ymean.item(), ystd.item(), te[1][:], n, columns
BATCHSIZE=4096
pr,train, validation, test, ymean, ystd, y_test, n, columns=csh('/kaggle/input/materials-project-fe-xanes-elements-k-average/materials project Fe-XANES-K-average-combine_train_clean_constantx.pickle',
                          '/kaggle/input/materials-project-fe-xanes-elements-k-average/materials project Fe-XANES-K-average-combine_test_clean_constantx.pickle',
                          BATCHSIZE)
gc.enable()
data_config=pt.config.DataConfig(
    target=['y'],
    continuous_cols=columns,
    validation_split=0
    )
trainer_config=pt.config.TrainerConfig(
    batch_size=pr.batchsize,
    max_epochs=pr.epochs,
    min_epochs=8,
    auto_lr_find=True,
    early_stopping='valid_loss',
    early_stopping_mode='min',
    early_stopping_patience=8,
    load_best=True,
    progress_bar='none',
    accelerator='auto'
    )
optimizer_config=pt.config.OptimizerConfig(
    optimizer='AdamW',
    lr_scheduler_monitor_metric='valid_loss'
    )
head_config=pt.models.common.heads.LinearHeadConfig(layers="",
    dropout=0.0,
    initialization=(  # No additional layer in head, just a mapping layer to output_dim
        "kaiming"
    ),
).__dict__
model_config=pt.models.GANDALFConfig(
    task='regression',
    head='LinearHead',
    head_config=head_config,
    loss='SmoothL1Loss',
    target_range=[((0-2**(-16)-ymean)/ystd,(6+2**(-16)-ymean)/ystd)]
)
search_space={
    "model_config__gflu_stages":[3,6,9],
    'model_config__gflu_dropout':[0.0,0.2],
    }
tuner=pt.tabular_model_tuner.TabularModelTuner(
    data_config=data_config,
    model_config=model_config,
    optimizer_config=optimizer_config,
    trainer_config=trainer_config
)
with warnings.catch_warnings():
    warnings.simplefilter("ignore")
    result , best_model = tuner.tune(
        train=train,
        validation=test,
        search_space=search_space,
        strategy="grid_search",
        metric="mean_squared_error",
        mode='min',
        return_best_model=True,
        progress_bar=False,
        verbose=True
    )
#print("Best Score: ", result.best_score)
with open('result.pickle','wb') as f:
    pickle.dump(result,f)
    f.close()
#pprint.pprint(result.best_params)
best_model.save_model("best_model", inference_only=True)

I got asyncio.exceptions.CancelledError ih tne log after this version running failed.

2520.6s | 128 | Traceback (most recent call last):
-- | -- | --
2520.6s | 129 | File "/usr/local/lib/python3.10/dist-packages/nbclient/client.py", line 762, in _async_poll_output_msg
2520.6s | 130 | msg = await ensure_async(self.kc.iopub_channel.get_msg(timeout=None))
2520.6s | 131 | File "/usr/local/lib/python3.10/dist-packages/nbclient/util.py", line 96, in ensure_async
2520.6s | 132 | result = await obj
2520.6s | 133 | File "/usr/local/lib/python3.10/dist-packages/jupyter_client/channels.py", line 308, in get_msg
2520.6s | 134 | ready = await self.socket.poll(timeout_ms)
2520.6s | 135 | asyncio.exceptions.CancelledError
2520.6s | 136 |  
2520.6s | 137 | During handling of the above exception, another exception occurred:
2520.6s | 138 |  
2520.6s | 139 | Traceback (most recent call last):
2520.6s | 140 | File "/usr/lib/python3.10/asyncio/tasks.py", line 456, in wait_for
2520.6s | 141 | return fut.result()
2520.6s | 142 | asyncio.exceptions.CancelledError
2520.6s | 143 |  
2520.6s | 144 | The above exception was the direct cause of the following exception:
2520.6s | 145 |  
2520.6s | 146 | Traceback (most recent call last):
2520.6s | 147 | File "/usr/local/lib/python3.10/dist-packages/nbclient/client.py", line 735, in _async_poll_for_reply
2520.6s | 148 | await asyncio.wait_for(task_poll_output_msg, self.iopub_timeout)
2520.6s | 149 | File "/usr/lib/python3.10/asyncio/tasks.py", line 458, in wait_for
2520.6s | 150 | raise exceptions.TimeoutError() from exc
2520.6s | 151 | asyncio.exceptions.TimeoutError
2520.6s | 152 |  
2520.6s | 153 | During handling of the above exception, another exception occurred:
2520.6s | 154 |  
2520.6s | 155 | Traceback (most recent call last):
2520.6s | 156 | File "<string>", line 1, in <module>
2520.6s | 157 | File "/usr/local/lib/python3.10/dist-packages/papermill/execute.py", line 116, in execute_notebook
2520.6s | 158 | nb = papermill_engines.execute_notebook_with_engine(
2520.6s | 159 | File "/usr/local/lib/python3.10/dist-packages/papermill/engines.py", line 48, in execute_notebook_with_engine
2520.7s | 160 | return self.get_engine(engine_name).execute_notebook(nb, kernel_name, **kwargs)
2520.7s | 161 | File "/usr/local/lib/python3.10/dist-packages/papermill/engines.py", line 370, in execute_notebook
2520.7s | 162 | cls.execute_managed_notebook(nb_man, kernel_name, log_output=log_output, **kwargs)
2520.7s | 163 | File "/usr/local/lib/python3.10/dist-packages/papermill/engines.py", line 442, in execute_managed_notebook
2520.7s | 164 | return PapermillNotebookClient(nb_man, **final_kwargs).execute()
2520.7s | 165 | File "/usr/local/lib/python3.10/dist-packages/papermill/clientwrap.py", line 45, in execute
2520.7s | 166 | self.papermill_execute_cells()
2520.7s | 167 | File "/usr/local/lib/python3.10/dist-packages/papermill/clientwrap.py", line 72, in papermill_execute_cells
2520.7s | 168 | self.execute_cell(cell, index)
2520.7s | 169 | File "/usr/local/lib/python3.10/dist-packages/nbclient/util.py", line 84, in wrapped
2520.7s | 170 | return just_run(coro(*args, **kwargs))
2520.7s | 171 | File "/usr/local/lib/python3.10/dist-packages/nbclient/util.py", line 62, in just_run
2520.7s | 172 | return loop.run_until_complete(coro)
2520.7s | 173 | File "/usr/lib/python3.10/asyncio/base_events.py", line 649, in run_until_complete
2520.7s | 174 | return future.result()
2520.7s | 175 | File "/usr/local/lib/python3.10/dist-packages/nbclient/client.py", line 949, in async_execute_cell
2520.7s | 176 | exec_reply = await self.task_poll_for_reply
2520.7s | 177 | File "/usr/local/lib/python3.10/dist-packages/nbclient/client.py", line 739, in _async_poll_for_reply
2520.7s | 178 | raise CellTimeoutError.error_from_timeout_and_cell(
2520.7s | 179 | nbclient.exceptions.CellTimeoutError: A cell timed out while it was being executed, after 4 seconds.
2520.7s | 180 | The message was: Timeout waiting for IOPub output.
2520.7s | 181 | Here is a preview of the cell contents:
2520.7s | 182 | -------------------
2520.7s | 183 | ['!pip install pytorch_tabular', 'import gc', 'import numpy as np', 'import pickle', 'import pandas as pd']
2520.7s | 184 | ...
2520.7s | 185 | ["with open('result.pickle','wb') as f:", '    pickle.dump(result,f)', '    f.close()', '#pprint.pprint(result.best_params)', 'best_model.save_model("best_model", inference_only=True)']
2520.7s | 186 | -------------------
2520.7s | 187 |  
2523.2s | 188 | /usr/local/lib/python3.10/dist-packages/traitlets/traitlets.py:2915: FutureWarning: --Exporter.preprocessors=["remove_papermill_header.RemovePapermillHeader"] for containers is deprecated in traitlets 5.0. You can pass `--Exporter.preprocessors item` ... multiple times to add items to a list.
2523.2s | 189 | warn(
2523.2s | 190 | [NbConvertApp] Converting notebook __notebook__.ipynb to notebook
2523.6s | 191 | [NbConvertApp] Writing 27376 bytes to __notebook__.ipynb
2524.9s | 192 | /usr/local/lib/python3.10/dist-packages/traitlets/traitlets.py:2915: FutureWarning: --Exporter.preprocessors=["nbconvert.preprocessors.ExtractOutputPreprocessor"] for containers is deprecated in traitlets 5.0. You can pass `--Exporter.preprocessors item` ... multiple times to add items to a list.
2524.9s | 193 | warn(
2524.9s | 194 | [NbConvertApp] Converting notebook __notebook__.ipynb to html
2525.7s | 195 | [NbConvertApp] Writing 323460 bytes to __results__.html
2527.7s | 196 | 0  0.05   0.1   0.15   0.2   0.25   0.3   0.35   0.4   0.45   0.5   0.55   0.6   0.65   0.7   0.75   0.8   0.85   0.9   0.95   0  0.1   0.2   0.3   0.4   0.5   0.6   0.7   0.8   0.9   0  0.2   0.4   0.6   0.8   0  0.25   0.5   0.75   0  0.05   0.1   0.15   0.2   0.25   0.3   0.35   0.4   0.45   0.5   0.55   0.6   0.65   0.7   0.75   0.8   0.85   0.9   0.95   0  0.1   0.2   0.3   0.4   0.5   0.6   0.7   0.8   0.9   0  0.2   0.4   0.6   0.8   0  0.25   0.5   0.75   0  0.05   0.1   0.15   0.2   0.25   0.3   0.35   0.4   0.45   0.5   0.55   0.6   0.65   0.7   0.75   0.8   0.85   0.9   0.95   0  0.1   0.2   0.3   0.4   0.5   0.6   0.7   0.8   0.9   0  0.2   0.4   0.6   0.8   0  0.25   0.5   0.75

How to solve this problem?

Metadata

Metadata

Assignees

No one assigned

    Labels

    bugSomething isn't working

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions