update documentation

JosephSefara · JosephSefara · commit f8b3d50824f9 · 2023-11-16T22:20:10.000+02:00
diff --git a/README.md b/README.md
@@ -64,7 +64,7 @@ nltk.download('averaged_perceptron_tagger')
 Use gensim to load a pre-trained word2vec model. Like [Google News from Google drive](https://drive.google.com/file/d/0B7XkCwpI5KDYNlNUTTlSS21pQmM/edit).
 ```python
 import gensim
-model = gensim.models.Word2Vec.load_word2vec_format('./GoogleNews-vectors-negative300.bin', binary=True)
+model = gensim.models.KeyedVectors.load_word2vec_format('./GoogleNews-vectors-negative300.bin', binary=True)
 ```
 You can also use gensim to load Facebook's Fasttext [English](https://fasttext.cc/docs/en/english-vectors.html) and [Multilingual models](https://fasttext.cc/docs/en/crawl-vectors.html)
 ```
@@ -103,6 +103,11 @@ There are three types of augmentations which can be used:
 ```python
 from textaugment import Word2vec
 ```
+- fasttext 
+
+```python
+from textaugment import Fasttext
+```
 
 - wordnet 
 ```python
@@ -112,17 +117,20 @@ from textaugment import Wordnet
 ```python
 from textaugment import Translate
 ```
-#### Word2vec-based augmentation
+#### Fasttext/Word2vec-based augmentation
 
 [See this notebook for an example](https://github.com/dsfsi/textaugment/blob/master/examples/word2vec_example.ipynb)
 
 **Basic example**
 
 ```python
->>> from textaugment import Word2vec
+>>> from textaugment import Word2vec, Fasttext
 >>> t = Word2vec(model='path/to/gensim/model'or 'gensim model itself')
 >>> t.augment('The stories are good')
 The films are good
+>>> t = Fasttext(model='path/to/gensim/model'or 'gensim model itself')
+>>> t.augment('The stories are good')
+The films are good
 ```
 **Advanced example**
 
@@ -131,8 +139,11 @@ The films are good
 >>> v = False # verbose mode to replace all the words. If enabled runs is not effective. Used in this paper (https://www.cs.cmu.edu/~diyiy/docs/emnlp_wang_2015.pdf)
 >>> p = 0.5 # The probability of success of an individual trial. (0.1<p<1.0), default is 0.5. Used by Geometric distribution to selects words from a sentence.
 
->>> t = Word2vec(model='path/to/gensim/model'or'gensim model itself', runs=5, v=False, p=0.5)
->>> t.augment('The stories are good')
+>>> word = Word2vec(model='path/to/gensim/model'or'gensim model itself', runs=5, v=False, p=0.5)
+>>> word.augment('The stories are good', top_n=10)
+The movies are excellent
+>>> fast = Fasttext(model='path/to/gensim/model'or'gensim model itself', runs=5, v=False, p=0.5)
+>>> fast.augment('The stories are good', top_n=10)
 The movies are excellent
 ```
 #### WordNet-based augmentation
@@ -155,7 +166,7 @@ In the afternoon, John is walking to town
 >>> p = 0.5 # The probability of success of an individual trial. (0.1<p<1.0), default is 0.5. Used by Geometric distribution to selects words from a sentence.
 
 >>> t = Wordnet(v=False ,n=True, p=0.5)
->>> t.augment('In the afternoon, John is going to town')
+>>> t.augment('In the afternoon, John is going to town', top_n=10)
 In the afternoon, Joseph is going to town.
 ```
 #### RTT-based augmentation
@@ -183,7 +194,7 @@ one of its synonyms chosen at random.
 ```python
 >>> from textaugment import EDA
 >>> t = EDA()
->>> t.synonym_replacement("John is going to town")
+>>> t.synonym_replacement("John is going to town", top_n=10)
 John is give out to town
 ```
 
diff --git a/examples/aeda_example.ipynb b/examples/aeda_example.ipynb
@@ -91,7 +91,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.8.15"
+   "version": "3.7.7"
   }
  },
  "nbformat": 4,
diff --git a/examples/eda_example.ipynb b/examples/eda_example.ipynb
@@ -22,7 +22,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 7,
+   "execution_count": 2,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -39,19 +39,19 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 8,
+   "execution_count": 3,
    "metadata": {},
    "outputs": [
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "John is going to townspeople\n"
+      "John is choke to town\n"
      ]
     }
    ],
    "source": [
-    "output = t.synonym_replacement(\"John is going to town\")\n",
+    "output = t.synonym_replacement(\"John is going to town\", top_n=10)\n",
     "print(output)"
    ]
   },
@@ -65,14 +65,14 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 9,
+   "execution_count": 4,
    "metadata": {},
    "outputs": [
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "John is going st john to town\n"
+      "John is going to lead town\n"
      ]
     }
    ],
@@ -91,14 +91,14 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 10,
+   "execution_count": 5,
    "metadata": {},
    "outputs": [
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "John to going is town\n"
+      "John is to going town\n"
      ]
     }
    ],
@@ -117,14 +117,14 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 11,
+   "execution_count": 6,
    "metadata": {},
    "outputs": [
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "going to town\n"
+      "John going to town\n"
      ]
     }
    ],
diff --git a/examples/fasttext_example.ipynb b/examples/fasttext_example.ipynb
@@ -79,8 +79,8 @@
    "outputs": [],
    "source": [
     "from textaugment import Word2vec\n",
-    "t = Word2vec(model = model.wv)\n",
-    "output = t.augment('The stories are good')"
+    "t = Word2vec(model = model)\n",
+    "output = t.augment('The stories are good', top_n=10)"
    ]
   },
   {
@@ -132,9 +132,9 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.7.3"
+   "version": "3.7.7"
   }
  },
  "nbformat": 4,
- "nbformat_minor": 2
+ "nbformat_minor": 4
 }
diff --git a/examples/word2vec_example.ipynb b/examples/word2vec_example.ipynb
@@ -12,7 +12,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 1,
    "metadata": {
     "colab": {},
     "colab_type": "code",
@@ -170,7 +170,7 @@
    "source": [
     "from textaugment import Word2vec\n",
     "t = Word2vec(model=model)\n",
-    "output = t.augment('The stories are good')"
+    "output = t.augment('The stories are good', top_n=10)"
    ]
   },
   {

Original file line number	Diff line number	Diff line change
`@@ -91,7 +91,7 @@`
`91`	`91`	`"name": "python",`
`92`	`92`	`"nbconvert_exporter": "python",`
`93`	`93`	`"pygments_lexer": "ipython3",`
`94`		`- "version": "3.8.15"`
	`94`	`+ "version": "3.7.7"`
`95`	`95`	`}`
`96`	`96`	`},`
`97`	`97`	`"nbformat": 4,`
Original file line number	Diff line number	Diff line change
`@@ -22,7 +22,7 @@`
`22`	`22`	`},`
`23`	`23`	`{`
`24`	`24`	`"cell_type": "code",`
`25`		`- "execution_count": 7,`
	`25`	`+ "execution_count": 2,`
`26`	`26`	`"metadata": {},`
`27`	`27`	`"outputs": [],`
`28`	`28`	`"source": [`
`@@ -39,19 +39,19 @@`
`39`	`39`	`},`
`40`	`40`	`{`
`41`	`41`	`"cell_type": "code",`
`42`		`- "execution_count": 8,`
	`42`	`+ "execution_count": 3,`
`43`	`43`	`"metadata": {},`
`44`	`44`	`"outputs": [`
`45`	`45`	`{`
`46`	`46`	`"name": "stdout",`
`47`	`47`	`"output_type": "stream",`
`48`	`48`	`"text": [`
`49`		`- "John is going to townspeople\n"`
	`49`	`+ "John is choke to town\n"`
`50`	`50`	`]`
`51`	`51`	`}`
`52`	`52`	`],`
`53`	`53`	`"source": [`
`54`		`- "output = t.synonym_replacement(\"John is going to town\")\n",`
	`54`	`+ "output = t.synonym_replacement(\"John is going to town\", top_n=10)\n",`
`55`	`55`	`"print(output)"`
`56`	`56`	`]`
`57`	`57`	`},`
`@@ -65,14 +65,14 @@`
`65`	`65`	`},`
`66`	`66`	`{`
`67`	`67`	`"cell_type": "code",`
`68`		`- "execution_count": 9,`
	`68`	`+ "execution_count": 4,`
`69`	`69`	`"metadata": {},`
`70`	`70`	`"outputs": [`
`71`	`71`	`{`
`72`	`72`	`"name": "stdout",`
`73`	`73`	`"output_type": "stream",`
`74`	`74`	`"text": [`
`75`		`- "John is going st john to town\n"`
	`75`	`+ "John is going to lead town\n"`
`76`	`76`	`]`
`77`	`77`	`}`
`78`	`78`	`],`
`@@ -91,14 +91,14 @@`
`91`	`91`	`},`
`92`	`92`	`{`
`93`	`93`	`"cell_type": "code",`
`94`		`- "execution_count": 10,`
	`94`	`+ "execution_count": 5,`
`95`	`95`	`"metadata": {},`
`96`	`96`	`"outputs": [`
`97`	`97`	`{`
`98`	`98`	`"name": "stdout",`
`99`	`99`	`"output_type": "stream",`
`100`	`100`	`"text": [`
`101`		`- "John to going is town\n"`
	`101`	`+ "John is to going town\n"`
`102`	`102`	`]`
`103`	`103`	`}`
`104`	`104`	`],`
`@@ -117,14 +117,14 @@`
`117`	`117`	`},`
`118`	`118`	`{`
`119`	`119`	`"cell_type": "code",`
`120`		`- "execution_count": 11,`
	`120`	`+ "execution_count": 6,`
`121`	`121`	`"metadata": {},`
`122`	`122`	`"outputs": [`
`123`	`123`	`{`
`124`	`124`	`"name": "stdout",`
`125`	`125`	`"output_type": "stream",`
`126`	`126`	`"text": [`
`127`		`- "going to town\n"`
	`127`	`+ "John going to town\n"`
`128`	`128`	`]`
`129`	`129`	`}`
`130`	`130`	`],`
Original file line number	Diff line number	Diff line change
`@@ -79,8 +79,8 @@`
`79`	`79`	`"outputs": [],`
`80`	`80`	`"source": [`
`81`	`81`	`"from textaugment import Word2vec\n",`
`82`		`- "t = Word2vec(model = model.wv)\n",`
`83`		`- "output = t.augment('The stories are good')"`
	`82`	`+ "t = Word2vec(model = model)\n",`
	`83`	`+ "output = t.augment('The stories are good', top_n=10)"`
`84`	`84`	`]`
`85`	`85`	`},`
`86`	`86`	`{`
`@@ -132,9 +132,9 @@`
`132`	`132`	`"name": "python",`
`133`	`133`	`"nbconvert_exporter": "python",`
`134`	`134`	`"pygments_lexer": "ipython3",`
`135`		`- "version": "3.7.3"`
	`135`	`+ "version": "3.7.7"`
`136`	`136`	`}`
`137`	`137`	`},`
`138`	`138`	`"nbformat": 4,`
`139`		`- "nbformat_minor": 2`
	`139`	`+ "nbformat_minor": 4`
`140`	`140`	`}`
Original file line number	Diff line number	Diff line change
`@@ -12,7 +12,7 @@`
`12`	`12`	`},`
`13`	`13`	`{`
`14`	`14`	`"cell_type": "code",`
`15`		`- "execution_count": null,`
	`15`	`+ "execution_count": 1,`
`16`	`16`	`"metadata": {`
`17`	`17`	`"colab": {},`
`18`	`18`	`"colab_type": "code",`
`@@ -170,7 +170,7 @@`
`170`	`170`	`"source": [`
`171`	`171`	`"from textaugment import Word2vec\n",`
`172`	`172`	`"t = Word2vec(model=model)\n",`
`173`		`- "output = t.augment('The stories are good')"`
	`173`	`+ "output = t.augment('The stories are good', top_n=10)"`
`174`	`174`	`]`
`175`	`175`	`},`
`176`	`176`	`{`