Adjusted the readme and upgraded version

jrzaurin · jrzaurin · commit a407fc55f6d4 · 2024-08-25T20:53:06.000+02:00
diff --git a/README.md b/README.md
@@ -587,13 +587,111 @@ trainer.fit(
 )
 ```
 
-**7. Tabular with a multi-target loss**
+**7. A two-tower model**
+
+This is a popular model in the context of recommendation systems. Let's say we
+have a tabular dataset formed my triples (user features, item features,
+target). We can create a two-tower model where the user and item features are
+passed through two separate models and then "fused" via a dot product.
+
+<p align="center">
+  <img width="350" src="docs/figures/arch_7.png">
+</p>
+
+
+```python
+import numpy as np
+import pandas as pd
+
+from pytorch_widedeep import Trainer
+from pytorch_widedeep.preprocessing import TabPreprocessor
+from pytorch_widedeep.models import TabMlp, WideDeep, ModelFuser
+
+# Let's create the interaction dataset
+# user_features dataframe
+np.random.seed(42)
+user_ids = np.arange(1, 101)
+ages = np.random.randint(18, 60, size=100)
+genders = np.random.choice(["male", "female"], size=100)
+locations = np.random.choice(["city_a", "city_b", "city_c", "city_d"], size=100)
+user_features = pd.DataFrame(
+    {"id": user_ids, "age": ages, "gender": genders, "location": locations}
+)
+
+# item_features dataframe
+item_ids = np.arange(1, 101)
+prices = np.random.uniform(10, 500, size=100).round(2)
+colors = np.random.choice(["red", "blue", "green", "black"], size=100)
+categories = np.random.choice(["electronics", "clothing", "home", "toys"], size=100)
+
+item_features = pd.DataFrame(
+    {"id": item_ids, "price": prices, "color": colors, "category": categories}
+)
+
+# Interactions dataframe
+interaction_user_ids = np.random.choice(user_ids, size=1000)
+interaction_item_ids = np.random.choice(item_ids, size=1000)
+purchased = np.random.choice([0, 1], size=1000, p=[0.7, 0.3])
+interactions = pd.DataFrame(
+    {
+        "user_id": interaction_user_ids,
+        "item_id": interaction_item_ids,
+        "purchased": purchased,
+    }
+)
+user_item_purchased = interactions.merge(
+    user_features, left_on="user_id", right_on="id"
+).merge(item_features, left_on="item_id", right_on="id")
+
+# Users
+tab_preprocessor_user = TabPreprocessor(
+    cat_embed_cols=["gender", "location"],
+    continuous_cols=["age"],
+)
+X_user = tab_preprocessor_user.fit_transform(user_item_purchased)
+tab_mlp_user = TabMlp(
+    column_idx=tab_preprocessor_user.column_idx,
+    cat_embed_input=tab_preprocessor_user.cat_embed_input,
+    continuous_cols=["age"],
+    mlp_hidden_dims=[16, 8],
+    mlp_dropout=[0.2, 0.2],
+)
+
+# Items
+tab_preprocessor_item = TabPreprocessor(
+    cat_embed_cols=["color", "category"],
+    continuous_cols=["price"],
+)
+X_item = tab_preprocessor_item.fit_transform(user_item_purchased)
+tab_mlp_item = TabMlp(
+    column_idx=tab_preprocessor_item.column_idx,
+    cat_embed_input=tab_preprocessor_item.cat_embed_input,
+    continuous_cols=["price"],
+    mlp_hidden_dims=[16, 8],
+    mlp_dropout=[0.2, 0.2],
+)
+
+two_tower_model = ModelFuser([tab_mlp_user, tab_mlp_item], fusion_method="dot")
+
+model = WideDeep(deeptabular=two_tower_model)
+
+trainer = Trainer(model, objective="binary")
+
+trainer.fit(
+    X_tab=[X_user, X_item],
+    target=interactions.purchased.values,
+    n_epochs=1,
+    batch_size=32,
+)
+```
+
+**8. Tabular with a multi-target loss**
 
 This one is "a bonus" to illustrate the use of multi-target losses, more than
 actually a different architecture.
 
 <p align="center">
-  <img width="200" src="docs/figures/arch_7.png">
+  <img width="200" src="docs/figures/arch_8.png">
 </p>
 
 
diff --git a/VERSION b/VERSION
@@ -1 +1 @@
-1.6.2
+1.6.3
diff --git a/docs/figures/arch_7.png b/docs/figures/arch_7.png
diff --git a/docs/figures/arch_8.png b/docs/figures/arch_8.png
diff --git a/examples/scripts/readme_snippets.py b/examples/scripts/readme_snippets.py
@@ -407,39 +407,7 @@ def output_dim(self):
 )
 
 
-# 7. Simply Tabular with a multi-target loss
-
-# let's add a second target to the dataframe
-df["target2"] = [random.choice([0, 1]) for _ in range(100)]
-
-# Tabular
-tab_preprocessor = TabPreprocessor(
-    embed_cols=["city", "name"], continuous_cols=["age", "height"]
-)
-X_tab = tab_preprocessor.fit_transform(df)
-tab_mlp = TabMlp(
-    column_idx=tab_preprocessor.column_idx,
-    cat_embed_input=tab_preprocessor.cat_embed_input,
-    continuous_cols=tab_preprocessor.continuous_cols,
-    mlp_hidden_dims=[64, 32],
-)
-
-# 2 binary targets. For other types of targets, please, see the documentation
-model = WideDeep(deeptabular=tab_mlp, pred_dim=2)
-
-loss = MultiTargetClassificationLoss(binary_config=[0, 1], reduction="mean")
-
-trainer = Trainer(model, objective="multitarget", custom_loss_function=loss)
-
-trainer.fit(
-    X_tab=X_tab,
-    target=df[["target", "target2"]].values,
-    n_epochs=1,
-    batch_size=32,
-)
-
-
-# 8. A Two tower model
+# 7. A Two tower model
 np.random.seed(42)
 
 # user_features dataframe
@@ -520,3 +488,35 @@ def output_dim(self):
     n_epochs=1,
     batch_size=32,
 )
+
+
+# 8. Simply Tabular with a multi-target loss
+
+# let's add a second target to the dataframe
+df["target2"] = [random.choice([0, 1]) for _ in range(100)]
+
+# Tabular
+tab_preprocessor = TabPreprocessor(
+    embed_cols=["city", "name"], continuous_cols=["age", "height"]
+)
+X_tab = tab_preprocessor.fit_transform(df)
+tab_mlp = TabMlp(
+    column_idx=tab_preprocessor.column_idx,
+    cat_embed_input=tab_preprocessor.cat_embed_input,
+    continuous_cols=tab_preprocessor.continuous_cols,
+    mlp_hidden_dims=[64, 32],
+)
+
+# 2 binary targets. For other types of targets, please, see the documentation
+model = WideDeep(deeptabular=tab_mlp, pred_dim=2)
+
+loss = MultiTargetClassificationLoss(binary_config=[0, 1], reduction="mean")
+
+trainer = Trainer(model, objective="multitarget", custom_loss_function=loss)
+
+trainer.fit(
+    X_tab=X_tab,
+    target=df[["target", "target2"]].values,
+    n_epochs=1,
+    batch_size=32,
+)
diff --git a/pytorch_widedeep/version.py b/pytorch_widedeep/version.py
@@ -1 +1 @@
-__version__ = "1.6.2"
+__version__ = "1.6.3"

Original file line number	Diff line number	Diff line change
`@@ -1 +1 @@`
`1`		`-__version__ = "1.6.2"`
	`1`	`+__version__ = "1.6.3"`