|
66 | 66 | PrefixTuningConfig,
|
67 | 67 | PromptEncoderConfig,
|
68 | 68 | RandLoraConfig,
|
69 |
| - RoadConfig, |
70 | 69 | TaskType,
|
71 | 70 | VeraConfig,
|
72 | 71 | create_arrow_model,
|
@@ -1722,226 +1721,6 @@ def test_causal_lm_training_multi_gpu_4bit_randlora(self):
|
1722 | 1721 | # assert loss is not None
|
1723 | 1722 | assert trainer.state.log_history[-1]["train_loss"] is not None
|
1724 | 1723 |
|
1725 |
| - @pytest.mark.single_gpu_tests |
1726 |
| - def test_causal_lm_training_8bit_road(self): |
1727 |
| - r""" |
1728 |
| - Same as test_causal_lm_training but with RoAd |
1729 |
| - """ |
1730 |
| - with tempfile.TemporaryDirectory() as tmp_dir: |
1731 |
| - model = AutoModelForCausalLM.from_pretrained( |
1732 |
| - self.causal_lm_model_id, |
1733 |
| - quantization_config=BitsAndBytesConfig(load_in_8bit=True), |
1734 |
| - device_map="auto", |
1735 |
| - ) |
1736 |
| - |
1737 |
| - tokenizer = AutoTokenizer.from_pretrained(self.causal_lm_model_id) |
1738 |
| - model = prepare_model_for_kbit_training(model) |
1739 |
| - |
1740 |
| - config = RoadConfig( |
1741 |
| - variant="road_1", |
1742 |
| - target_modules=["q_proj", "v_proj"], |
1743 |
| - task_type="CAUSAL_LM", |
1744 |
| - ) |
1745 |
| - |
1746 |
| - model = get_peft_model(model, config) |
1747 |
| - |
1748 |
| - data = load_dataset("ybelkada/english_quotes_copy") |
1749 |
| - data = data.map(lambda samples: tokenizer(samples["quote"]), batched=True) |
1750 |
| - |
1751 |
| - trainer = Trainer( |
1752 |
| - model=model, |
1753 |
| - train_dataset=data["train"], |
1754 |
| - args=TrainingArguments( |
1755 |
| - per_device_train_batch_size=4, |
1756 |
| - gradient_accumulation_steps=4, |
1757 |
| - warmup_steps=2, |
1758 |
| - max_steps=3, |
1759 |
| - learning_rate=1e-3, |
1760 |
| - fp16=True, |
1761 |
| - logging_steps=1, |
1762 |
| - output_dir=tmp_dir, |
1763 |
| - ), |
1764 |
| - data_collator=DataCollatorForLanguageModeling(tokenizer, mlm=False), |
1765 |
| - ) |
1766 |
| - model.config.use_cache = False |
1767 |
| - trainer.train() |
1768 |
| - |
1769 |
| - model.cpu().save_pretrained(tmp_dir) |
1770 |
| - |
1771 |
| - assert "adapter_config.json" in os.listdir(tmp_dir) |
1772 |
| - assert SAFETENSORS_WEIGHTS_NAME in os.listdir(tmp_dir) |
1773 |
| - |
1774 |
| - # assert loss is not None |
1775 |
| - assert trainer.state.log_history[-1]["train_loss"] is not None |
1776 |
| - |
1777 |
| - @pytest.mark.single_gpu_tests |
1778 |
| - def test_causal_lm_training_4bit_road(self): |
1779 |
| - r""" |
1780 |
| - Same as test_causal_lm_training_4bit but with RoAd |
1781 |
| - """ |
1782 |
| - with tempfile.TemporaryDirectory() as tmp_dir: |
1783 |
| - model = AutoModelForCausalLM.from_pretrained( |
1784 |
| - self.causal_lm_model_id, |
1785 |
| - quantization_config=BitsAndBytesConfig(load_in_4bit=True), |
1786 |
| - device_map="auto", |
1787 |
| - ) |
1788 |
| - |
1789 |
| - tokenizer = AutoTokenizer.from_pretrained(self.causal_lm_model_id) |
1790 |
| - model = prepare_model_for_kbit_training(model) |
1791 |
| - |
1792 |
| - config = RoadConfig( |
1793 |
| - variant="road_1", |
1794 |
| - target_modules=["q_proj", "v_proj"], |
1795 |
| - task_type="CAUSAL_LM", |
1796 |
| - ) |
1797 |
| - |
1798 |
| - model = get_peft_model(model, config) |
1799 |
| - |
1800 |
| - data = load_dataset("ybelkada/english_quotes_copy") |
1801 |
| - data = data.map(lambda samples: tokenizer(samples["quote"]), batched=True) |
1802 |
| - |
1803 |
| - trainer = Trainer( |
1804 |
| - model=model, |
1805 |
| - train_dataset=data["train"], |
1806 |
| - args=TrainingArguments( |
1807 |
| - per_device_train_batch_size=4, |
1808 |
| - gradient_accumulation_steps=4, |
1809 |
| - warmup_steps=2, |
1810 |
| - max_steps=3, |
1811 |
| - learning_rate=1e-3, |
1812 |
| - fp16=True, |
1813 |
| - logging_steps=1, |
1814 |
| - output_dir=tmp_dir, |
1815 |
| - ), |
1816 |
| - data_collator=DataCollatorForLanguageModeling(tokenizer, mlm=False), |
1817 |
| - ) |
1818 |
| - model.config.use_cache = False |
1819 |
| - trainer.train() |
1820 |
| - |
1821 |
| - model.cpu().save_pretrained(tmp_dir) |
1822 |
| - |
1823 |
| - assert "adapter_config.json" in os.listdir(tmp_dir) |
1824 |
| - assert SAFETENSORS_WEIGHTS_NAME in os.listdir(tmp_dir) |
1825 |
| - |
1826 |
| - # assert loss is not None |
1827 |
| - assert trainer.state.log_history[-1]["train_loss"] is not None |
1828 |
| - |
1829 |
| - @pytest.mark.multi_gpu_tests |
1830 |
| - def test_causal_lm_training_multi_gpu_8bit_road(self): |
1831 |
| - r""" |
1832 |
| - Same as test_causal_lm_training_multi_gpu but with RoAd |
1833 |
| - """ |
1834 |
| - |
1835 |
| - with tempfile.TemporaryDirectory() as tmp_dir: |
1836 |
| - model = AutoModelForCausalLM.from_pretrained( |
1837 |
| - self.causal_lm_model_id, |
1838 |
| - device_map=DEVICE_MAP_MAP[self.causal_lm_model_id], |
1839 |
| - quantization_config=BitsAndBytesConfig(load_in_8bit=True), |
1840 |
| - ) |
1841 |
| - |
1842 |
| - assert set(model.hf_device_map.values()) == set(range(device_count)) |
1843 |
| - assert {p.device.index for p in model.parameters()} == set(range(device_count)) |
1844 |
| - |
1845 |
| - model = prepare_model_for_kbit_training(model) |
1846 |
| - |
1847 |
| - setattr(model, "model_parallel", True) |
1848 |
| - setattr(model, "is_parallelizable", True) |
1849 |
| - |
1850 |
| - config = RoadConfig( |
1851 |
| - variant="road_1", |
1852 |
| - target_modules=["q_proj", "v_proj"], |
1853 |
| - task_type="CAUSAL_LM", |
1854 |
| - ) |
1855 |
| - |
1856 |
| - model = get_peft_model(model, config) |
1857 |
| - |
1858 |
| - data = load_dataset("Abirate/english_quotes") |
1859 |
| - data = data.map(lambda samples: self.tokenizer(samples["quote"]), batched=True) |
1860 |
| - |
1861 |
| - trainer = Trainer( |
1862 |
| - model=model, |
1863 |
| - train_dataset=data["train"], |
1864 |
| - args=TrainingArguments( |
1865 |
| - per_device_train_batch_size=4, |
1866 |
| - gradient_accumulation_steps=4, |
1867 |
| - warmup_steps=2, |
1868 |
| - max_steps=3, |
1869 |
| - learning_rate=1e-3, |
1870 |
| - fp16=True, |
1871 |
| - logging_steps=1, |
1872 |
| - output_dir=tmp_dir, |
1873 |
| - ), |
1874 |
| - data_collator=DataCollatorForLanguageModeling(self.tokenizer, mlm=False), |
1875 |
| - ) |
1876 |
| - model.config.use_cache = False |
1877 |
| - trainer.train() |
1878 |
| - |
1879 |
| - model.cpu().save_pretrained(tmp_dir) |
1880 |
| - |
1881 |
| - assert "adapter_config.json" in os.listdir(tmp_dir) |
1882 |
| - assert SAFETENSORS_WEIGHTS_NAME in os.listdir(tmp_dir) |
1883 |
| - |
1884 |
| - # assert loss is not None |
1885 |
| - assert trainer.state.log_history[-1]["train_loss"] is not None |
1886 |
| - |
1887 |
| - @pytest.mark.multi_gpu_tests |
1888 |
| - def test_causal_lm_training_multi_gpu_4bit_road(self): |
1889 |
| - r""" |
1890 |
| - Same as test_causal_lm_training_multi_gpu_4bit but with RoAd |
1891 |
| - """ |
1892 |
| - |
1893 |
| - with tempfile.TemporaryDirectory() as tmp_dir: |
1894 |
| - model = AutoModelForCausalLM.from_pretrained( |
1895 |
| - self.causal_lm_model_id, |
1896 |
| - device_map=DEVICE_MAP_MAP[self.causal_lm_model_id], |
1897 |
| - quantization_config=BitsAndBytesConfig(load_in_4bit=True), |
1898 |
| - ) |
1899 |
| - |
1900 |
| - assert set(model.hf_device_map.values()) == set(range(device_count)) |
1901 |
| - assert {p.device.index for p in model.parameters()} == set(range(device_count)) |
1902 |
| - |
1903 |
| - model = prepare_model_for_kbit_training(model) |
1904 |
| - |
1905 |
| - setattr(model, "model_parallel", True) |
1906 |
| - setattr(model, "is_parallelizable", True) |
1907 |
| - |
1908 |
| - config = RoadConfig( |
1909 |
| - variant="road_1", |
1910 |
| - target_modules=["q_proj", "v_proj"], |
1911 |
| - task_type="CAUSAL_LM", |
1912 |
| - ) |
1913 |
| - |
1914 |
| - model = get_peft_model(model, config) |
1915 |
| - |
1916 |
| - data = load_dataset("Abirate/english_quotes") |
1917 |
| - data = data.map(lambda samples: self.tokenizer(samples["quote"]), batched=True) |
1918 |
| - |
1919 |
| - trainer = Trainer( |
1920 |
| - model=model, |
1921 |
| - train_dataset=data["train"], |
1922 |
| - args=TrainingArguments( |
1923 |
| - per_device_train_batch_size=4, |
1924 |
| - gradient_accumulation_steps=4, |
1925 |
| - warmup_steps=2, |
1926 |
| - max_steps=3, |
1927 |
| - learning_rate=1e-3, |
1928 |
| - fp16=True, |
1929 |
| - logging_steps=1, |
1930 |
| - output_dir=tmp_dir, |
1931 |
| - ), |
1932 |
| - data_collator=DataCollatorForLanguageModeling(self.tokenizer, mlm=False), |
1933 |
| - ) |
1934 |
| - model.config.use_cache = False |
1935 |
| - trainer.train() |
1936 |
| - |
1937 |
| - model.cpu().save_pretrained(tmp_dir) |
1938 |
| - |
1939 |
| - assert "adapter_config.json" in os.listdir(tmp_dir) |
1940 |
| - assert SAFETENSORS_WEIGHTS_NAME in os.listdir(tmp_dir) |
1941 |
| - |
1942 |
| - # assert loss is not None |
1943 |
| - assert trainer.state.log_history[-1]["train_loss"] is not None |
1944 |
| - |
1945 | 1724 | @pytest.mark.single_gpu_tests
|
1946 | 1725 | def test_causal_lm_training_lora_resize_embeddings_trainable_tokens(self):
|
1947 | 1726 | r"""
|
|
0 commit comments