|
41 | 41 | },
|
42 | 42 | "outputs": [],
|
43 | 43 | "source": [
|
44 |
| - "%pip install \"comet_ml>=3.31.5\" \"ray[air]>=2.1.0\" \"transformers>=4.43.0\" \"accelerate>=0.12.0\" \"datasets\" \"sentencepiece\" scipy \"scikit-learn\" protobuf \"torch>=1.3\" evaluate" |
| 44 | + "%pip install \"comet_ml>=3.49.0\" \"ray[air]>=2.1.0\" \"transformers>=4.43.0\" \"accelerate>=0.12.0\" \"datasets\" \"sentencepiece\" scipy \"scikit-learn\" protobuf \"torch>=1.3\" evaluate" |
45 | 45 | ]
|
46 | 46 | },
|
47 | 47 | {
|
|
62 | 62 | "outputs": [],
|
63 | 63 | "source": [
|
64 | 64 | "import comet_ml\n",
|
65 |
| - "import comet_ml.integration.ray\n", |
66 | 65 | "\n",
|
67 | 66 | "comet_ml.init()"
|
68 | 67 | ]
|
|
101 | 100 | "\n",
|
102 | 101 | "import ray.train.huggingface.transformers\n",
|
103 | 102 | "from ray.train import ScalingConfig, RunConfig\n",
|
104 |
| - "from ray.train.torch import TorchTrainer" |
| 103 | + "from ray.train.torch import TorchTrainer\n", |
| 104 | + "import comet_ml.integration.ray\n", |
| 105 | + "from comet_ml.integration.ray import comet_worker" |
105 | 106 | ]
|
106 | 107 | },
|
107 | 108 | {
|
|
164 | 165 | "metadata": {},
|
165 | 166 | "outputs": [],
|
166 | 167 | "source": [
|
| 168 | + "@comet_worker\n", |
167 | 169 | "def train_func(config):\n",
|
168 | 170 | " from comet_ml import get_running_experiment\n",
|
169 |
| - " from comet_ml.integration.ray import comet_worker_logger\n", |
170 |
| - "\n", |
171 |
| - " with comet_worker_logger(config) as experiment:\n", |
172 |
| - " small_train_dataset, small_eval_dataset = get_dataset()\n", |
173 |
| - "\n", |
174 |
| - " # Model\n", |
175 |
| - " model = AutoModelForSequenceClassification.from_pretrained(\n", |
176 |
| - " \"google-bert/bert-base-cased\", num_labels=5\n", |
177 |
| - " )\n", |
178 |
| - "\n", |
179 |
| - " # Evaluation Metrics\n", |
180 |
| - " metric = evaluate.load(\"accuracy\")\n", |
181 |
| - "\n", |
182 |
| - " def compute_metrics(eval_pred):\n", |
183 |
| - " logits, labels = eval_pred\n", |
184 |
| - " predictions = np.argmax(logits, axis=-1)\n", |
185 |
| - "\n", |
186 |
| - " experiment = comet_ml.get_running_experiment()\n", |
187 |
| - " if experiment:\n", |
188 |
| - " experiment.log_confusion_matrix(predictions, labels)\n", |
189 |
| - "\n", |
190 |
| - " return metric.compute(predictions=predictions, references=labels)\n", |
191 |
| - "\n", |
192 |
| - " # Hugging Face Trainer\n", |
193 |
| - " training_args = TrainingArguments(\n", |
194 |
| - " do_eval=True,\n", |
195 |
| - " do_train=True,\n", |
196 |
| - " eval_strategy=\"epoch\",\n", |
197 |
| - " num_train_epochs=config[\"epochs\"],\n", |
198 |
| - " output_dir=\"./results\",\n", |
199 |
| - " overwrite_output_dir=True,\n", |
200 |
| - " per_device_eval_batch_size=4,\n", |
201 |
| - " per_device_train_batch_size=4,\n", |
202 |
| - " report_to=[\"comet_ml\"],\n", |
203 |
| - " seed=SEED,\n", |
204 |
| - " )\n", |
205 |
| - " trainer = Trainer(\n", |
206 |
| - " model=model,\n", |
207 |
| - " args=training_args,\n", |
208 |
| - " train_dataset=small_train_dataset,\n", |
209 |
| - " eval_dataset=small_eval_dataset,\n", |
210 |
| - " compute_metrics=compute_metrics,\n", |
211 |
| - " )\n", |
212 |
| - "\n", |
213 |
| - " # Report Metrics and Checkpoints to Ray Train\n", |
214 |
| - " callback = ray.train.huggingface.transformers.RayTrainReportCallback()\n", |
215 |
| - " trainer.add_callback(callback)\n", |
216 |
| - "\n", |
217 |
| - " # Prepare Transformers Trainer\n", |
218 |
| - " trainer = ray.train.huggingface.transformers.prepare_trainer(trainer)\n", |
219 |
| - "\n", |
220 |
| - " # Start Training\n", |
221 |
| - " trainer.train()\n", |
222 |
| - "\n", |
223 |
| - " comet_ml.get_running_experiment().end()" |
| 171 | + "\n", |
| 172 | + " small_train_dataset, small_eval_dataset = get_dataset()\n", |
| 173 | + "\n", |
| 174 | + " # Model\n", |
| 175 | + " model = AutoModelForSequenceClassification.from_pretrained(\n", |
| 176 | + " \"google-bert/bert-base-cased\", num_labels=5\n", |
| 177 | + " )\n", |
| 178 | + "\n", |
| 179 | + " # Evaluation Metrics\n", |
| 180 | + " metric = evaluate.load(\"accuracy\")\n", |
| 181 | + "\n", |
| 182 | + " def compute_metrics(eval_pred):\n", |
| 183 | + " logits, labels = eval_pred\n", |
| 184 | + " predictions = np.argmax(logits, axis=-1)\n", |
| 185 | + "\n", |
| 186 | + " experiment = comet_ml.get_running_experiment()\n", |
| 187 | + " if experiment:\n", |
| 188 | + " experiment.log_confusion_matrix(predictions, labels)\n", |
| 189 | + "\n", |
| 190 | + " return metric.compute(predictions=predictions, references=labels)\n", |
| 191 | + "\n", |
| 192 | + " # Hugging Face Trainer\n", |
| 193 | + " training_args = TrainingArguments(\n", |
| 194 | + " do_eval=True,\n", |
| 195 | + " do_train=True,\n", |
| 196 | + " eval_strategy=\"epoch\",\n", |
| 197 | + " num_train_epochs=config[\"epochs\"],\n", |
| 198 | + " output_dir=\"./results\",\n", |
| 199 | + " overwrite_output_dir=True,\n", |
| 200 | + " per_device_eval_batch_size=4,\n", |
| 201 | + " per_device_train_batch_size=4,\n", |
| 202 | + " report_to=[\"comet_ml\"],\n", |
| 203 | + " seed=SEED,\n", |
| 204 | + " )\n", |
| 205 | + " trainer = Trainer(\n", |
| 206 | + " model=model,\n", |
| 207 | + " args=training_args,\n", |
| 208 | + " train_dataset=small_train_dataset,\n", |
| 209 | + " eval_dataset=small_eval_dataset,\n", |
| 210 | + " compute_metrics=compute_metrics,\n", |
| 211 | + " )\n", |
| 212 | + "\n", |
| 213 | + " # Report Metrics and Checkpoints to Ray Train\n", |
| 214 | + " callback = ray.train.huggingface.transformers.RayTrainReportCallback()\n", |
| 215 | + " trainer.add_callback(callback)\n", |
| 216 | + "\n", |
| 217 | + " # Prepare Transformers Trainer\n", |
| 218 | + " trainer = ray.train.huggingface.transformers.prepare_trainer(trainer)\n", |
| 219 | + "\n", |
| 220 | + " # Start Training\n", |
| 221 | + " trainer.train()\n", |
| 222 | + "\n", |
| 223 | + " comet_ml.end()" |
224 | 224 | ]
|
225 | 225 | },
|
226 | 226 | {
|
|
240 | 240 | " scaling_config = ScalingConfig(num_workers=num_workers, use_gpu=use_gpu)\n",
|
241 | 241 | " config = {\"use_gpu\": use_gpu, \"epochs\": 2}\n",
|
242 | 242 | "\n",
|
243 |
| - " callback = comet_ml.integration.ray.CometTrainLoggerCallback(\n", |
244 |
| - " config, project_name=\"comet-example-ray-train-hugginface-transformers\"\n", |
245 |
| - " )\n", |
246 |
| - "\n", |
247 | 243 | " ray_trainer = TorchTrainer(\n",
|
248 | 244 | " train_func,\n",
|
249 | 245 | " scaling_config=scaling_config,\n",
|
250 | 246 | " train_loop_config=config,\n",
|
251 |
| - " run_config=RunConfig(callbacks=[callback]),\n", |
252 | 247 | " )\n",
|
| 248 | + " comet_ml.integration.ray.comet_ray_train_logger(\n", |
| 249 | + " ray_trainer, project_name=\"comet-example-ray-train-hugginface-transformers\"\n", |
| 250 | + " )\n", |
| 251 | + "\n", |
253 | 252 | " result = ray_trainer.fit()"
|
254 | 253 | ]
|
255 | 254 | },
|
|
278 | 277 | "\n",
|
279 | 278 | "train(num_workers, use_gpu=False, epochs=5)"
|
280 | 279 | ]
|
281 |
| - }, |
282 |
| - { |
283 |
| - "cell_type": "code", |
284 |
| - "execution_count": null, |
285 |
| - "metadata": {}, |
286 |
| - "outputs": [], |
287 |
| - "source": [] |
288 | 280 | }
|
289 | 281 | ],
|
290 | 282 | "metadata": {
|
|
0 commit comments