Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 0 additions & 8 deletions .azure-pipelines/scripts/models/update_yaml_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -134,14 +134,6 @@ def update_yaml_config_tuning(
prev_strategy = tuning_config.get("strategy", {})
strategy_name = prev_strategy.get("name", None)
prev_strategy.update({"name": strategy})
if strategy == "sigopt":
prev_strategy.update(
{
"sigopt_api_token": strategy_token,
"sigopt_project_id": "lpot",
"sigopt_experiment_name": "lpot-tune",
}
)
if strategy == "hawq":
prev_strategy.update({"loss": "CrossEntropyLoss"})
print(f"Changed {strategy_name} to {strategy}")
Expand Down
2 changes: 1 addition & 1 deletion .github/checkgroup.yml
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ subprojects:
paths:
- "neural_compressor/common/**"
- "neural_compressor/torch/**"
- "examples/3.x_api/pytorch/nlp/huggingface_models/language-modeling/quantization/llm/**"
- "examples/pytorch/nlp/huggingface_models/language-modeling/quantization/llm/**"
- "setup.py"
- "requirements_pt.txt"
- ".azure-pipelines/scripts/models/**"
Expand Down
159 changes: 159 additions & 0 deletions .github/workflows/pr-link-scan.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,159 @@
# Copyright (C) 2025 Intel Corporation
# SPDX-License-Identifier: Apache-2.0

name: Check hyperlinks and relative path validity

permissions:
contents: read

on:
pull_request:
branches: [master]
types: [opened, reopened, ready_for_review, synchronize]

jobs:
check-the-validity-of-hyperlinks-in-README:
runs-on: ubuntu-latest
steps:
- name: Clean Up Working Directory
run: sudo rm -rf ${{github.workspace}}/*

- name: Checkout Repo
uses: actions/checkout@1e31de5234b9f8995739874a8ce0492dc87873e2
with:
fetch-depth: 0

- name: Check the Validity of Hyperlinks
env:
BASE_SHA: ${{ github.event.pull_request.base.sha }}
run: |
cd ${{github.workspace}}
delay=1
fail="FALSE"
merged_commit=$(git log -1 --format='%H')
changed_files="$(git diff --name-status --diff-filter=ARM $BASE_SHA ${merged_commit} | awk '/\.md$/ {print $NF}')"
if [ -n "$changed_files" ]; then
for changed_file in $changed_files; do
# echo $changed_file
url_lines=$(grep -H -Eo '\]\(http[s]?://[^)]+\)' "$changed_file") || true
if [ -n "$url_lines" ]; then
for url_line in $url_lines; do
# echo $url_line
url=$(echo "$url_line"|cut -d '(' -f2 | cut -d ')' -f1|sed 's/\.git$//')
path=$(echo "$url_line"|cut -d':' -f1 | cut -d'/' -f2-)
if [[ "$url" == "https://dgpu-docs.intel.com/installation-guides/ubuntu/ubuntu-focal-dc.html" || "$url" == "https://ai.cloud.intel.com/" ]]; then
echo "Link "$url" from ${{github.workspace}}/$path needs to be verified by real person."
else
sleep $delay
response=$(curl -L -s -o /dev/null -w "%{http_code}" -A "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36" -H "Accept: text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8" -H "Accept-Language: en-US,en;q=0.5" "$url")|| true
if [ "$response" -ne 200 ]; then
echo "**********Validation $url failed ($response), try again**********"
response_retry=$(curl -s -o /dev/null -w "%{http_code}" "$url") || true
if [ "$response_retry" -eq 200 ]; then
echo "*****Retry successfully*****"
else
echo "******Retry $url failed ($response_retry), add simulated browser requests******"
response_browser=$(curl -s -o /dev/null -w "%{http_code}" -A "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36" -H "Accept: text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8" -H "Accept-Language: en-US,en;q=0.5" "$url")|| true
if [ "$response_browser" -eq 200 ]; then
echo "*****Retry successfully*****"
else
echo -e "::error:: Invalid link ($response_retry) from ${{github.workspace}}/$(echo "$url_line"|cut -d':' -f1): $url"
fail="TRUE"
fi
fi
fi
fi
done
fi
done
else
echo "No changed .md file."
fi

if [[ "$fail" == "TRUE" ]]; then
exit 1
else
echo "All hyperlinks are valid."
fi
shell: bash

check-the-validity-of-relative-path:
runs-on: ubuntu-latest
steps:
- name: Clean up Working Directory
run: sudo rm -rf ${{github.workspace}}/*

- name: Checkout Repo
uses: actions/checkout@1e31de5234b9f8995739874a8ce0492dc87873e2
with:
fetch-depth: 0

- name: Checking Relative Path Validity
env:
REPO_NAME: ${{ github.event.pull_request.head.repo.full_name }}
HEAD_REF: ${{ github.event.pull_request.head.ref }}
BASE_SHA: ${{ github.event.pull_request.base.sha }}
run: |
cd ${{github.workspace}}
delay=1
fail="FALSE"
branch="https://github.com/$REPO_NAME/blob/$HEAD_REF"

merged_commit=$(git log -1 --format='%H')
changed_files="$(git diff --name-status --diff-filter=ARM $BASE_SHA ${merged_commit} | awk '/\.md$/ {print $NF}')"
png_lines=$(grep -Eo '\]\([^)]+\)' --include='*.md' -r .|grep -Ev 'http' | grep -Ev 'shape=' | grep -Ev 'mailto:inc.maintainers@intel.com')
if [ -n "$png_lines" ]; then
for png_line in $png_lines; do
# echo "No.1----->png_line is $png_line"
refer_path=$(echo "$png_line"|cut -d':' -f1 | cut -d'/' -f2-)
png_path=$(echo "$png_line"|cut -d '(' -f2 | cut -d ')' -f1)
# echo "No.2----->refer_path is $refer_path, png_path is $png_path"

if [[ "${png_path:0:1}" == "/" ]]; then
# absolute path
check_path=$(echo "${png_path:1}" | cut -d '#' -f1)
# echo "No.3----->check_path is $check_path"
else
# relative path
check_path=${refer_path}
relative_path=$(echo "$png_path" | cut -d '#' -f1)
if [ -n "$relative_path" ]; then check_path=$(dirname "$refer_path")/$relative_path; fi
# echo "No.4----->check_path is $check_path"
fi

if [ -e "$check_path" ]; then
real_path=$(realpath $check_path)
# echo "No.5----->real_path is $real_path"
if [[ "$png_path" == *#* ]]; then
if [ -n "$changed_files" ] && echo "$changed_files" | grep -q "^${refer_path}$"; then
url_dev=$branch$(echo "$real_path" | sed 's|.*/neural-compressor||')#$(echo "$png_path" | cut -d '#' -f2)
# echo "No.6----->url_dev is $url_dev"
sleep $delay
response=$(curl -I -L -s -o /dev/null -w "%{http_code}" "$url_dev")
if [ "$response" -ne 200 ]; then
echo "**********Validation failed ($response), try again**********"
response_retry=$(curl -s -o /dev/null -w "%{http_code}" "$url_dev")
if [ "$response_retry" -eq 200 ]; then
echo "*****Retry successfully*****"
else
echo -e "::error:: Invalid path ($response_retry) from ${{github.workspace}}/$refer_path: $png_path"
fail="TRUE"
fi
else
echo "Validation succeed $png_line"
fi
fi
fi
else
echo -e "::error:: ${{github.workspace}}/$refer_path:$png_path does not exist."
fail="TRUE"
fi
done
fi

if [[ "$fail" == "TRUE" ]]; then
exit 1
else
echo "All relative path are valid."
fi
shell: bash
2 changes: 1 addition & 1 deletion .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -134,7 +134,7 @@ repos:
exclude: |
(?x)^(
examples/.*(txt|patch)|
examples/onnxrt/nlp/huggingface_model/text_generation/llama/quantization/ptq_static/prompt.json|
examples/deprecated/onnxrt/nlp/huggingface_model/text_generation/llama/quantization/ptq_static/prompt.json|
neural_compressor/torch/algorithms/fp8_quant/internal/diffusion_evaluation/SR_evaluation/imagenet1000_clsidx_to_labels.txt|
neural_compressor/evaluation/hf_eval/datasets/cnn_validation.json|
neural_compressor/torch/algorithms/fp8_quant/.+|
Expand Down
9 changes: 4 additions & 5 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -19,12 +19,12 @@ Intel® Neural Compressor aims to provide popular model compression techniques s
as well as Intel extensions such as [Intel Extension for TensorFlow](https://github.com/intel/intel-extension-for-tensorflow) and [Intel Extension for PyTorch](https://github.com/intel/intel-extension-for-pytorch).
In particular, the tool provides the key features, typical examples, and open collaborations as below:

* Support a wide range of Intel hardware such as [Intel Gaudi Al Accelerators](https://www.intel.com/content/www/us/en/products/details/processors/ai-accelerators/gaudi-overview.html), [Intel Core Ultra Processors](https://www.intel.com/content/www/us/en/products/details/processors/core-ultra.html), [Intel Xeon Scalable Processors](https://www.intel.com/content/www/us/en/products/details/processors/xeon/scalable.html), [Intel Xeon CPU Max Series](https://www.intel.com/content/www/us/en/products/details/processors/xeon/max-series.html), [Intel Data Center GPU Flex Series](https://www.intel.com/content/www/us/en/products/details/discrete-gpus/data-center-gpu/flex-series.html), and [Intel Data Center GPU Max Series](https://www.intel.com/content/www/us/en/products/details/discrete-gpus/data-center-gpu/max-series.html) with extensive testing;
* Support a wide range of Intel hardware such as [Intel Gaudi Al Accelerators](https://www.intel.com/content/www/us/en/products/details/processors/ai-accelerators/gaudi.html), [Intel Core Ultra Processors](https://www.intel.com/content/www/us/en/products/details/processors/core-ultra.html), [Intel Xeon Scalable Processors](https://www.intel.com/content/www/us/en/products/details/processors/xeon/scalable.html), [Intel Xeon CPU Max Series](https://www.intel.com/content/www/us/en/products/details/processors/xeon/max-series.html), [Intel Data Center GPU Flex Series](https://www.intel.com/content/www/us/en/products/overview.html), and [Intel Data Center GPU Max Series](https://www.intel.com/content/www/us/en/products/overview.html) with extensive testing;
support AMD CPU, ARM CPU, and NVidia GPU through ONNX Runtime with limited testing; support NVidia GPU for some WOQ algorithms like AutoRound and HQQ.

* Validate popular LLMs such as [LLama2](/examples/deprecated/pytorch/nlp/huggingface_models/language-modeling/quantization/llm), [Falcon](/examples/deprecated/pytorch/nlp/huggingface_models/language-modeling/quantization/llm), [GPT-J](/examples/deprecated/pytorch/nlp/huggingface_models/language-modeling/quantization/llm), [Bloom](/examples/deprecated/pytorch/nlp/huggingface_models/language-modeling/quantization/llm), [OPT](/examples/deprecated/pytorch/nlp/huggingface_models/language-modeling/quantization/llm), and more than 10,000 broad models such as [Stable Diffusion](/examples/deprecated/pytorch/nlp/huggingface_models/text-to-image/quantization), [BERT-Large](/examples/deprecated/pytorch/nlp/huggingface_models/text-classification/quantization/ptq_static/fx), and [ResNet50](/examples/deprecated/pytorch/image_recognition/torchvision_models/quantization/ptq/cpu/fx) from popular model hubs such as [Hugging Face](https://huggingface.co/), [Torch Vision](https://pytorch.org/vision/stable/index.html), and [ONNX Model Zoo](https://github.com/onnx/models#models), with automatic [accuracy-driven](/docs/source/design.md#workflow) quantization strategies

* Collaborate with cloud marketplaces such as [Google Cloud Platform](https://console.cloud.google.com/marketplace/product/bitnami-launchpad/inc-tensorflow-intel?project=verdant-sensor-286207), [Amazon Web Services](https://aws.amazon.com/marketplace/pp/prodview-yjyh2xmggbmga#pdp-support), and [Azure](https://azuremarketplace.microsoft.com/en-us/marketplace/apps/bitnami.inc-tensorflow-intel), software platforms such as [Alibaba Cloud](https://www.intel.com/content/www/us/en/developer/articles/technical/quantize-ai-by-oneapi-analytics-on-alibaba-cloud.html), [Tencent TACO](https://new.qq.com/rain/a/20221202A00B9S00) and [Microsoft Olive](https://github.com/microsoft/Olive), and open AI ecosystem such as [Hugging Face](https://huggingface.co/blog/intel), [PyTorch](https://pytorch.org/tutorials/recipes/intel_neural_compressor_for_pytorch.html), [ONNX](https://github.com/onnx/models#models), [ONNX Runtime](https://github.com/microsoft/onnxruntime), and [Lightning AI](https://github.com/Lightning-AI/lightning/blob/master/docs/source-pytorch/advanced/post_training_quantization.rst)
* Collaborate with cloud marketplaces such as [Google Cloud Platform](https://console.cloud.google.com/marketplace/product/bitnami-launchpad/inc-tensorflow-intel?project=verdant-sensor-286207), [Amazon Web Services](https://aws.amazon.com/marketplace/pp/prodview-yjyh2xmggbmga#pdp-support), and [Azure](https://azuremarketplace.microsoft.com/en-us/marketplace/apps/bitnami.inc-tensorflow-intel), software platforms such as [Tencent TACO](https://new.qq.com/rain/a/20221202A00B9S00) and [Microsoft Olive](https://github.com/microsoft/Olive), and open AI ecosystem such as [Hugging Face](https://huggingface.co/blog/intel), [PyTorch](https://pytorch.org/tutorials/recipes/intel_neural_compressor_for_pytorch.html), [ONNX](https://github.com/onnx/models#models), [ONNX Runtime](https://github.com/microsoft/onnxruntime), and [Lightning AI](https://github.com/Lightning-AI/lightning/blob/master/docs/source-pytorch/advanced/post_training_quantization.rst)

## What's New
* [2025/10] [MXFP8 / MXFP4 quantization](./docs/source/3x/PT_MXQuant.md) experimental support
Expand Down Expand Up @@ -115,8 +115,8 @@ model = load(
<td colspan="2" align="center"><a href="./docs/source/3x/design.md#architecture">Architecture</a></td>
<td colspan="2" align="center"><a href="./docs/source/3x/design.md#workflows">Workflow</a></td>
<td colspan="2" align="center"><a href="https://intel.github.io/neural-compressor/latest/docs/source/api-doc/apis.html">APIs</a></td>
<td colspan="1" align="center"><a href="./docs/source/3x/llm_recipes.md">LLMs Recipes</a></td>
<td colspan="1" align="center"><a href="./examples/3.x_api/README.md">Examples</a></td>
<td colspan="1" align="center"><a href="./docs/source/llm_recipes.md">LLMs Recipes</a></td>
<td colspan="1" align="center"><a href="./examples/README.md">Examples</a></td>
</tr>
</tbody>
<thead>
Expand Down Expand Up @@ -190,7 +190,6 @@ model = load(

## Additional Content

* [Release Information](./docs/source/releases_info.md)
* [Contribution Guidelines](./docs/source/CONTRIBUTING.md)
* [Legal Information](./docs/source/legal_information.md)
* [Security Policy](SECURITY.md)
Expand Down
4 changes: 2 additions & 2 deletions docs/source/3x/PT_MixedPrecision.md
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ The 4th Gen Intel® Xeon® Scalable processor supports FP16 instruction set arch
Further details can be found in the [Intel AVX512 FP16 Guide](https://www.intel.com/content/www/us/en/content-details/669773/intel-avx-512-fp16-instruction-set-for-intel-xeon-processor-based-products-technology-guide.html) published by Intel.

The latest Intel Xeon processors deliver flexibility of Intel Advanced Matrix Extensions (Intel AMX) ,an accelerator that improves the performance of deep learning(DL) training and inference, making it ideal for workloads like NLP, recommender systems, and image recognition. Developers can code AI functionality to take advantage of the Intel AMX instruction set, and they can code non-AI functionality to use the processor instruction set architecture (ISA). Intel has integrated the Intel® oneAPI Deep Neural Network Library (oneDNN), its oneAPI DL engine, into Pytorch.
Further details can be found in the [Intel AMX Document](https://www.intel.com/content/www/us/en/content-details/785250/accelerate-artificial-intelligence-ai-workloads-with-intel-advanced-matrix-extensions-intel-amx.html) published by Intel.
Further details can be found in the [Intel AMX Document](https://www.intel.com/content/www/us/en/content-details/785250/accelerate-artificial-intelligence-workloads-with-intel-advanced-matrix-extensions.html) published by Intel.

<p align="center" width="100%">
<img src="./imgs/data_format.png" alt="Architecture" height=230>
Expand Down Expand Up @@ -107,5 +107,5 @@ best_model = autotune(model=build_torch_model(), tune_config=custom_tune_config,

## Examples

Users can also refer to [examples](https://github.com/intel/neural-compressor/blob/master/examples/3.x_api/pytorch/cv/mixed_precision
Users can also refer to [examples](https://github.com/intel/neural-compressor/blob/master/examples/pytorch/cv/mixed_precision
) on how to quantize a model with Mixed Precision.
4 changes: 1 addition & 3 deletions docs/source/3x/PT_SmoothQuant.md
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ run_fn(prepared_model)
q_model = convert(prepared_model)
```

To get more information, please refer to [examples](https://github.com/intel/neural-compressor/blob/master/examples/3.x_api/pytorch/nlp/huggingface_models/language-modeling/quantization/smooth_quant).
To get more information, please refer to [examples](https://github.com/intel/neural-compressor/blob/master/examples/pytorch/nlp/huggingface_models/language-modeling/quantization/smooth_quant).


## Validated Models
Expand Down Expand Up @@ -99,8 +99,6 @@ A list of models that achieved a <1% accuracy drop is shown below.
| databricks/dolly-v2-3b* | 0.6297 | 0.6247 | alpha=0.5, Ipex 2.1 |
| tiiuae/falcon-7b-instruct | 0.6437 | 0.6392 | alpha=0.7, Pytorch |

Please refer to the step-by-step [instruction](../../examples/pytorch/nlp/huggingface_models/language-modeling/quantization/llm/ipex/README.md) for details.

Please note that for models with asterisk(*), we have set all add ops to FP32 during quantization step to achieve desirable results.


Expand Down
4 changes: 2 additions & 2 deletions docs/source/3x/PT_StaticQuant.md
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,7 @@ q_model = convert(prepared_model)

#### Model Examples

Users could refer to [examples](https://github.com/intel/neural-compressor/blob/master/examples/3.x_api/pytorch/nlp/huggingface_models/language-modeling/quantization/static_quant/ipex) on how to quantize a new model.
Users could refer to [examples](https://github.com/intel/neural-compressor/blob/master/examples/pytorch/nlp/huggingface_models/language-modeling/quantization/static_quant/ipex) on how to quantize a new model.


### Static Quantization with PT2E Backend
Expand Down Expand Up @@ -105,4 +105,4 @@ opt_model = torch.compile(q_model)

#### Model Examples with PT2E

Users could refer to [cv examples](https://github.com/intel/neural-compressor/blob/master/examples/3.x_api/pytorch/cv/static_quant) and [llm examples](https://github.com/intel/neural-compressor/blob/master/examples/3.x_api/pytorch/nlp/huggingface_models/language-modeling/quantization/static_quant/pt2e) on how to quantize a new model.
Users could refer to [cv examples](https://github.com/intel/neural-compressor/blob/master/examples/pytorch/cv/static_quant) and [llm examples](https://github.com/intel/neural-compressor/blob/master/examples/pytorch/nlp/huggingface_models/language-modeling/quantization/static_quant/pt2e) on how to quantize a new model.
Loading
Loading