diff --git a/.github/workflows/build_documentation.yml b/.github/workflows/build_documentation.yml index 9477002a91..bcb51d6b58 100644 --- a/.github/workflows/build_documentation.yml +++ b/.github/workflows/build_documentation.yml @@ -51,7 +51,7 @@ jobs: run: | pip install --upgrade pip uv uv pip install git+https://github.com/huggingface/doc-builder - uv pip install .[quality] nncf openvino neural-compressor[pt]>3.4 diffusers accelerate + uv pip install .[quality] nncf openvino neural-compressor[pt]>3.4 diffusers accelerate datasets - name: Make documentation shell: bash diff --git a/.github/workflows/build_pr_documentation.yml b/.github/workflows/build_pr_documentation.yml index d476884aa9..41aa8ce9bd 100644 --- a/.github/workflows/build_pr_documentation.yml +++ b/.github/workflows/build_pr_documentation.yml @@ -38,7 +38,7 @@ jobs: run: | pip install --upgrade pip uv uv pip install git+https://github.com/huggingface/doc-builder - uv pip install .[quality] nncf openvino neural-compressor[pt]>3.4 diffusers accelerate + uv pip install .[quality] nncf openvino neural-compressor[pt]>3.4 diffusers accelerate datasets - name: Make documentation shell: bash diff --git a/optimum/intel/openvino/quantization.py b/optimum/intel/openvino/quantization.py index b7b692b225..5b2ee4ee66 100644 --- a/optimum/intel/openvino/quantization.py +++ b/optimum/intel/openvino/quantization.py @@ -23,7 +23,6 @@ from pathlib import Path from typing import Any, Callable, Dict, Iterable, List, Optional, Tuple, Union -import datasets import nncf import numpy as np import openvino @@ -360,7 +359,15 @@ def build_from_quantization_config(self, config: OVQuantizationConfigBase) -> OV streaming=dataset_metadata["streaming"], ) elif isinstance(config.dataset, list) and all(isinstance(it, str) for it in config.dataset): - dataset = datasets.Dataset.from_list([{"text": it} for it in config.dataset]) + if not is_datasets_available(): + raise ValueError( + DATASETS_IMPORT_ERROR.format("OVCalibrationDatasetBuilder.build_from_quantization_config") + ) + + from datasets import Dataset + + dataset = Dataset.from_list([{"text": it} for it in config.dataset]) + else: raise ValueError( "Please provide dataset as one of the accepted dataset labels or as a list of strings." diff --git a/setup.py b/setup.py index b141677055..2c2add705f 100644 --- a/setup.py +++ b/setup.py @@ -30,9 +30,7 @@ "torch>=2.1", "optimum-onnx==0.0.*", "transformers>=4.45,<4.56", - "datasets>=1.4.0", "setuptools", - "scipy", ] TESTS_REQUIRE = [ @@ -68,7 +66,7 @@ EXTRAS_REQUIRE = { "nncf": ["nncf>=2.18.0"], "openvino": ["nncf>=2.18.0", "openvino>=2025.1.0", "openvino-tokenizers>=2025.1.0"], - "neural-compressor": ["neural-compressor[pt]>=3.4.1", "accelerate", "transformers<4.46"], + "neural-compressor": ["neural-compressor[pt]>=3.4.1", "accelerate", "transformers<4.46", "datasets"], "ipex": ["intel-extension-for-pytorch>=2.8", "transformers>4.54,<4.56", "accelerate"], "diffusers": ["diffusers"], "quality": QUALITY_REQUIRE,