scikit-learn
diff --git a/‎dev/_downloads/010337852815f8103ac6cca38a812b3c/plot_roc_crossval.py
Lines changed: 33 additions & 23 deletions b/‎dev/_downloads/010337852815f8103ac6cca38a812b3c/plot_roc_crossval.py
Lines changed: 33 additions & 23 deletions
diff --git a/‎dev/_downloads/02192f99342d6d1323161978b6c80bfc/plot_ols_ridge.zip
0 Bytes b/‎dev/_downloads/02192f99342d6d1323161978b6c80bfc/plot_ols_ridge.zip
0 Bytes
diff --git a/‎dev/_downloads/02fe21a1f5d14bd1ebbe34aa905d9bf6/plot_multilabel.zip
0 Bytes b/‎dev/_downloads/02fe21a1f5d14bd1ebbe34aa905d9bf6/plot_multilabel.zip
0 Bytes
diff --git a/‎dev/_downloads/0358b1921962fd7c6f5a94c5abc91476/plot_hashing_vs_dict_vectorizer.zip
0 Bytes b/‎dev/_downloads/0358b1921962fd7c6f5a94c5abc91476/plot_hashing_vs_dict_vectorizer.zip
0 Bytes
diff --git a/‎dev/_downloads/038d1885eb5f5ea53aca42da3031fe38/plot_document_clustering.zip
0 Bytes b/‎dev/_downloads/038d1885eb5f5ea53aca42da3031fe38/plot_document_clustering.zip
0 Bytes
diff --git a/‎dev/_downloads/03c018a16384c69f3a89e473650d57ee/plot_svm_margin.zip
0 Bytes b/‎dev/_downloads/03c018a16384c69f3a89e473650d57ee/plot_svm_margin.zip
0 Bytes
diff --git a/‎dev/_downloads/04b9a7769df5b331f4d94e1d065b4311/plot_voting_decision_regions.zip
0 Bytes b/‎dev/_downloads/04b9a7769df5b331f4d94e1d065b4311/plot_voting_decision_regions.zip
0 Bytes
diff --git a/‎dev/_downloads/053b58bbfc8177072856c743b2c93424/plot_varimax_fa.zip
0 Bytes b/‎dev/_downloads/053b58bbfc8177072856c743b2c93424/plot_varimax_fa.zip
0 Bytes
diff --git a/‎dev/_downloads/055e8313e28f2f3b5fd508054dfe5fe0/plot_roc_crossval.ipynb
Lines changed: 2 additions & 2 deletions b/‎dev/_downloads/055e8313e28f2f3b5fd508054dfe5fe0/plot_roc_crossval.ipynb
Lines changed: 2 additions & 2 deletions
diff --git a/‎dev/_downloads/06c18f4675ecd124f98537d05e10abd0/plot_nca_dim_reduction.zip
0 Bytes b/‎dev/_downloads/06c18f4675ecd124f98537d05e10abd0/plot_nca_dim_reduction.zip
0 Bytes
@@ -62,46 +62,56 @@
 # Classification and ROC analysis
 # -------------------------------
 #
-# Here we run a :class:`~sklearn.svm.SVC` classifier with cross-validation and
-# plot the ROC curves fold-wise. Notice that the baseline to define the chance
+# Here we run :func:`~sklearn.model_selection.cross_validate` on a
+# :class:`~sklearn.svm.SVC` classifier, then use the computed cross-validation results
+# to plot the ROC curves fold-wise. Notice that the baseline to define the chance
 # level (dashed ROC curve) is a classifier that would always predict the most
 # frequent class.
 
 import matplotlib.pyplot as plt
 
 from sklearn import svm
 from sklearn.metrics import RocCurveDisplay, auc
-from sklearn.model_selection import StratifiedKFold
+from sklearn.model_selection import StratifiedKFold, cross_validate
 
 n_splits = 6
 cv = StratifiedKFold(n_splits=n_splits)
 classifier = svm.SVC(kernel="linear", probability=True, random_state=random_state)
+cv_results = cross_validate(
+    classifier, X, y, cv=cv, return_estimator=True, return_indices=True
+)
+
+prop_cycle = plt.rcParams["axes.prop_cycle"]
+colors = prop_cycle.by_key()["color"]
+curve_kwargs_list = [
+    dict(alpha=0.3, lw=1, color=colors[fold % len(colors)]) for fold in range(n_splits)
+]
+names = [f"ROC fold {idx}" for idx in range(n_splits)]
 
-tprs = []
-aucs = []
 mean_fpr = np.linspace(0, 1, 100)
+interp_tprs = []
+
+_, ax = plt.subplots(figsize=(6, 6))
+viz = RocCurveDisplay.from_cv_results(
+    cv_results,
+    X,
+    y,
+    ax=ax,
+    name=names,
+    curve_kwargs=curve_kwargs_list,
+    plot_chance_level=True,
+)
 
-fig, ax = plt.subplots(figsize=(6, 6))
-for fold, (train, test) in enumerate(cv.split(X, y)):
-    classifier.fit(X[train], y[train])
-    viz = RocCurveDisplay.from_estimator(
-        classifier,
-        X[test],
-        y[test],
-        name=f"ROC fold {fold}",
-        curve_kwargs=dict(alpha=0.3, lw=1),
-        ax=ax,
-        plot_chance_level=(fold == n_splits - 1),
-    )
-    interp_tpr = np.interp(mean_fpr, viz.fpr, viz.tpr)
+for idx in range(n_splits):
+    interp_tpr = np.interp(mean_fpr, viz.fpr[idx], viz.tpr[idx])
     interp_tpr[0] = 0.0
-    tprs.append(interp_tpr)
-    aucs.append(viz.roc_auc)
+    interp_tprs.append(interp_tpr)
 
-mean_tpr = np.mean(tprs, axis=0)
+mean_tpr = np.mean(interp_tprs, axis=0)
 mean_tpr[-1] = 1.0
 mean_auc = auc(mean_fpr, mean_tpr)
-std_auc = np.std(aucs)
+std_auc = np.std(viz.roc_auc)
+
 ax.plot(
     mean_fpr,
     mean_tpr,
@@ -111,7 +121,7 @@
     alpha=0.8,
 )
 
-std_tpr = np.std(tprs, axis=0)
+std_tpr = np.std(interp_tprs, axis=0)
 tprs_upper = np.minimum(mean_tpr + std_tpr, 1)
 tprs_lower = np.maximum(mean_tpr - std_tpr, 0)
 ax.fill_between(
 
@@ -58,7 +58,7 @@
       "cell_type": "markdown",
       "metadata": {},
       "source": [
-        "### Classification and ROC analysis\n\nHere we run a :class:`~sklearn.svm.SVC` classifier with cross-validation and\nplot the ROC curves fold-wise. Notice that the baseline to define the chance\nlevel (dashed ROC curve) is a classifier that would always predict the most\nfrequent class.\n\n"
+        "### Classification and ROC analysis\n\nHere we run :func:`~sklearn.model_selection.cross_validate` on a\n:class:`~sklearn.svm.SVC` classifier, then use the computed cross-validation results\nto plot the ROC curves fold-wise. Notice that the baseline to define the chance\nlevel (dashed ROC curve) is a classifier that would always predict the most\nfrequent class.\n\n"
       ]
     },
     {
@@ -69,7 +69,7 @@
       },
       "outputs": [],
       "source": [
-        "import matplotlib.pyplot as plt\n\nfrom sklearn import svm\nfrom sklearn.metrics import RocCurveDisplay, auc\nfrom sklearn.model_selection import StratifiedKFold\n\nn_splits = 6\ncv = StratifiedKFold(n_splits=n_splits)\nclassifier = svm.SVC(kernel=\"linear\", probability=True, random_state=random_state)\n\ntprs = []\naucs = []\nmean_fpr = np.linspace(0, 1, 100)\n\nfig, ax = plt.subplots(figsize=(6, 6))\nfor fold, (train, test) in enumerate(cv.split(X, y)):\n    classifier.fit(X[train], y[train])\n    viz = RocCurveDisplay.from_estimator(\n        classifier,\n        X[test],\n        y[test],\n        name=f\"ROC fold {fold}\",\n        curve_kwargs=dict(alpha=0.3, lw=1),\n        ax=ax,\n        plot_chance_level=(fold == n_splits - 1),\n    )\n    interp_tpr = np.interp(mean_fpr, viz.fpr, viz.tpr)\n    interp_tpr[0] = 0.0\n    tprs.append(interp_tpr)\n    aucs.append(viz.roc_auc)\n\nmean_tpr = np.mean(tprs, axis=0)\nmean_tpr[-1] = 1.0\nmean_auc = auc(mean_fpr, mean_tpr)\nstd_auc = np.std(aucs)\nax.plot(\n    mean_fpr,\n    mean_tpr,\n    color=\"b\",\n    label=r\"Mean ROC (AUC = %0.2f $\\pm$ %0.2f)\" % (mean_auc, std_auc),\n    lw=2,\n    alpha=0.8,\n)\n\nstd_tpr = np.std(tprs, axis=0)\ntprs_upper = np.minimum(mean_tpr + std_tpr, 1)\ntprs_lower = np.maximum(mean_tpr - std_tpr, 0)\nax.fill_between(\n    mean_fpr,\n    tprs_lower,\n    tprs_upper,\n    color=\"grey\",\n    alpha=0.2,\n    label=r\"$\\pm$ 1 std. dev.\",\n)\n\nax.set(\n    xlabel=\"False Positive Rate\",\n    ylabel=\"True Positive Rate\",\n    title=f\"Mean ROC curve with variability\\n(Positive label '{target_names[1]}')\",\n)\nax.legend(loc=\"lower right\")\nplt.show()"
+        "import matplotlib.pyplot as plt\n\nfrom sklearn import svm\nfrom sklearn.metrics import RocCurveDisplay, auc\nfrom sklearn.model_selection import StratifiedKFold, cross_validate\n\nn_splits = 6\ncv = StratifiedKFold(n_splits=n_splits)\nclassifier = svm.SVC(kernel=\"linear\", probability=True, random_state=random_state)\ncv_results = cross_validate(\n    classifier, X, y, cv=cv, return_estimator=True, return_indices=True\n)\n\nprop_cycle = plt.rcParams[\"axes.prop_cycle\"]\ncolors = prop_cycle.by_key()[\"color\"]\ncurve_kwargs_list = [\n    dict(alpha=0.3, lw=1, color=colors[fold % len(colors)]) for fold in range(n_splits)\n]\nnames = [f\"ROC fold {idx}\" for idx in range(n_splits)]\n\nmean_fpr = np.linspace(0, 1, 100)\ninterp_tprs = []\n\n_, ax = plt.subplots(figsize=(6, 6))\nviz = RocCurveDisplay.from_cv_results(\n    cv_results,\n    X,\n    y,\n    ax=ax,\n    name=names,\n    curve_kwargs=curve_kwargs_list,\n    plot_chance_level=True,\n)\n\nfor idx in range(n_splits):\n    interp_tpr = np.interp(mean_fpr, viz.fpr[idx], viz.tpr[idx])\n    interp_tpr[0] = 0.0\n    interp_tprs.append(interp_tpr)\n\nmean_tpr = np.mean(interp_tprs, axis=0)\nmean_tpr[-1] = 1.0\nmean_auc = auc(mean_fpr, mean_tpr)\nstd_auc = np.std(viz.roc_auc)\n\nax.plot(\n    mean_fpr,\n    mean_tpr,\n    color=\"b\",\n    label=r\"Mean ROC (AUC = %0.2f $\\pm$ %0.2f)\" % (mean_auc, std_auc),\n    lw=2,\n    alpha=0.8,\n)\n\nstd_tpr = np.std(interp_tprs, axis=0)\ntprs_upper = np.minimum(mean_tpr + std_tpr, 1)\ntprs_lower = np.maximum(mean_tpr - std_tpr, 0)\nax.fill_between(\n    mean_fpr,\n    tprs_lower,\n    tprs_upper,\n    color=\"grey\",\n    alpha=0.2,\n    label=r\"$\\pm$ 1 std. dev.\",\n)\n\nax.set(\n    xlabel=\"False Positive Rate\",\n    ylabel=\"True Positive Rate\",\n    title=f\"Mean ROC curve with variability\\n(Positive label '{target_names[1]}')\",\n)\nax.legend(loc=\"lower right\")\nplt.show()"
       ]
     }
   ],
Original file line number	Diff line number	Diff line change
`@@ -58,7 +58,7 @@`
`58`	`58`	`"cell_type": "markdown",`
`59`	`59`	`"metadata": {},`
`60`	`60`	`"source": [`
`61`		- "### Classification and ROC analysis\n\nHere we run a :class:`~sklearn.svm.SVC` classifier with cross-validation and\nplot the ROC curves fold-wise. Notice that the baseline to define the chance\nlevel (dashed ROC curve) is a classifier that would always predict the most\nfrequent class.\n\n"
	`61`	+ "### Classification and ROC analysis\n\nHere we run :func:`~sklearn.model_selection.cross_validate` on a\n:class:`~sklearn.svm.SVC` classifier, then use the computed cross-validation results\nto plot the ROC curves fold-wise. Notice that the baseline to define the chance\nlevel (dashed ROC curve) is a classifier that would always predict the most\nfrequent class.\n\n"
`62`	`62`	`]`
`63`	`63`	`},`
`64`	`64`	`{`
`@@ -69,7 +69,7 @@`
`69`	`69`	`},`
`70`	`70`	`"outputs": [],`
`71`	`71`	`"source": [`
`72`		- "import matplotlib.pyplot as plt\n\nfrom sklearn import svm\nfrom sklearn.metrics import RocCurveDisplay, auc\nfrom sklearn.model_selection import StratifiedKFold\n\nn_splits = 6\ncv = StratifiedKFold(n_splits=n_splits)\nclassifier = svm.SVC(kernel=\"linear\", probability=True, random_state=random_state)\n\ntprs = []\naucs = []\nmean_fpr = np.linspace(0, 1, 100)\n\nfig, ax = plt.subplots(figsize=(6, 6))\nfor fold, (train, test) in enumerate(cv.split(X, y)):\n classifier.fit(X[train], y[train])\n viz = RocCurveDisplay.from_estimator(\n classifier,\n X[test],\n y[test],\n name=f\"ROC fold {fold}\",\n curve_kwargs=dict(alpha=0.3, lw=1),\n ax=ax,\n plot_chance_level=(fold == n_splits - 1),\n )\n interp_tpr = np.interp(mean_fpr, viz.fpr, viz.tpr)\n interp_tpr[0] = 0.0\n tprs.append(interp_tpr)\n aucs.append(viz.roc_auc)\n\nmean_tpr = np.mean(tprs, axis=0)\nmean_tpr[-1] = 1.0\nmean_auc = auc(mean_fpr, mean_tpr)\nstd_auc = np.std(aucs)\nax.plot(\n mean_fpr,\n mean_tpr,\n color=\"b\",\n label=r\"Mean ROC (AUC = %0.2f $\\pm$ %0.2f)\" % (mean_auc, std_auc),\n lw=2,\n alpha=0.8,\n)\n\nstd_tpr = np.std(tprs, axis=0)\ntprs_upper = np.minimum(mean_tpr + std_tpr, 1)\ntprs_lower = np.maximum(mean_tpr - std_tpr, 0)\nax.fill_between(\n mean_fpr,\n tprs_lower,\n tprs_upper,\n color=\"grey\",\n alpha=0.2,\n label=r\"$\\pm$ 1 std. dev.\",\n)\n\nax.set(\n xlabel=\"False Positive Rate\",\n ylabel=\"True Positive Rate\",\n title=f\"Mean ROC curve with variability\\n(Positive label '{target_names[1]}')\",\n)\nax.legend(loc=\"lower right\")\nplt.show()"
	`72`	+ "import matplotlib.pyplot as plt\n\nfrom sklearn import svm\nfrom sklearn.metrics import RocCurveDisplay, auc\nfrom sklearn.model_selection import StratifiedKFold, cross_validate\n\nn_splits = 6\ncv = StratifiedKFold(n_splits=n_splits)\nclassifier = svm.SVC(kernel=\"linear\", probability=True, random_state=random_state)\ncv_results = cross_validate(\n classifier, X, y, cv=cv, return_estimator=True, return_indices=True\n)\n\nprop_cycle = plt.rcParams[\"axes.prop_cycle\"]\ncolors = prop_cycle.by_key()[\"color\"]\ncurve_kwargs_list = [\n dict(alpha=0.3, lw=1, color=colors[fold % len(colors)]) for fold in range(n_splits)\n]\nnames = [f\"ROC fold {idx}\" for idx in range(n_splits)]\n\nmean_fpr = np.linspace(0, 1, 100)\ninterp_tprs = []\n\n_, ax = plt.subplots(figsize=(6, 6))\nviz = RocCurveDisplay.from_cv_results(\n cv_results,\n X,\n y,\n ax=ax,\n name=names,\n curve_kwargs=curve_kwargs_list,\n plot_chance_level=True,\n)\n\nfor idx in range(n_splits):\n interp_tpr = np.interp(mean_fpr, viz.fpr[idx], viz.tpr[idx])\n interp_tpr[0] = 0.0\n interp_tprs.append(interp_tpr)\n\nmean_tpr = np.mean(interp_tprs, axis=0)\nmean_tpr[-1] = 1.0\nmean_auc = auc(mean_fpr, mean_tpr)\nstd_auc = np.std(viz.roc_auc)\n\nax.plot(\n mean_fpr,\n mean_tpr,\n color=\"b\",\n label=r\"Mean ROC (AUC = %0.2f $\\pm$ %0.2f)\" % (mean_auc, std_auc),\n lw=2,\n alpha=0.8,\n)\n\nstd_tpr = np.std(interp_tprs, axis=0)\ntprs_upper = np.minimum(mean_tpr + std_tpr, 1)\ntprs_lower = np.maximum(mean_tpr - std_tpr, 0)\nax.fill_between(\n mean_fpr,\n tprs_lower,\n tprs_upper,\n color=\"grey\",\n alpha=0.2,\n label=r\"$\\pm$ 1 std. dev.\",\n)\n\nax.set(\n xlabel=\"False Positive Rate\",\n ylabel=\"True Positive Rate\",\n title=f\"Mean ROC curve with variability\\n(Positive label '{target_names[1]}')\",\n)\nax.legend(loc=\"lower right\")\nplt.show()"
`73`	`73`	`]`
`74`	`74`	`}`
`75`	`75`	`],`