From 1f4bcdb6d209a52be7dfe798134f98e38fb57d18 Mon Sep 17 00:00:00 2001 From: Ramanathan Venkatasubramanian Date: Wed, 6 Jan 2021 20:52:47 +0530 Subject: [PATCH 1/9] Included StringIndexer and StringIndexerModel along with related test cases --- .../ML/Feature/StringIndexerModelTests.cs | 80 ++++++++ .../IpcTests/ML/Feature/StringIndexerTests.cs | 52 ++++++ .../ML/Feature/StringIndexer.cs | 174 ++++++++++++++++++ .../ML/Feature/StringIndexerModel.cs | 92 +++++++++ 4 files changed, 398 insertions(+) create mode 100644 src/csharp/Microsoft.Spark.E2ETest/IpcTests/ML/Feature/StringIndexerModelTests.cs create mode 100644 src/csharp/Microsoft.Spark.E2ETest/IpcTests/ML/Feature/StringIndexerTests.cs create mode 100644 src/csharp/Microsoft.Spark/ML/Feature/StringIndexer.cs create mode 100644 src/csharp/Microsoft.Spark/ML/Feature/StringIndexerModel.cs diff --git a/src/csharp/Microsoft.Spark.E2ETest/IpcTests/ML/Feature/StringIndexerModelTests.cs b/src/csharp/Microsoft.Spark.E2ETest/IpcTests/ML/Feature/StringIndexerModelTests.cs new file mode 100644 index 000000000..4dc01ae8f --- /dev/null +++ b/src/csharp/Microsoft.Spark.E2ETest/IpcTests/ML/Feature/StringIndexerModelTests.cs @@ -0,0 +1,80 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +// See the LICENSE file in the project root for more information. + +using System.Collections.Generic; +using System.IO; +using System.Linq; +using Microsoft.Spark.ML.Feature; +using Microsoft.Spark.Sql; +using Microsoft.Spark.Sql.Types; +using Microsoft.Spark.UnitTest.TestUtils; +using Xunit; + +namespace Microsoft.Spark.E2ETest.IpcTests.ML.Feature +{ + [Collection("Spark E2E Tests")] + public class StringIndexerModelTests : FeatureBaseTests + { + private readonly SparkSession _spark; + + public StringIndexerModelTests(SparkFixture fixture) : base(fixture) + { + _spark = fixture.Spark; + } + + /// + /// Create a , create a and test the + /// available methods. + /// + [Fact] + public void TestStringIndexerModel() + { + DataFrame input = _spark.CreateDataFrame( + new List + { + new GenericRow(new object[] { (0, "a") }), + new GenericRow(new object[] { (1, "b") }), + new GenericRow(new object[] { (2, "c") }), + new GenericRow(new object[] { (3, "a") }), + new GenericRow(new object[] { (4, "a") }), + new GenericRow(new object[] { (5, "c") }) + }, + new StructType(new List + { + new StructField("id", new IntegerType()), + new StructField("category", new StringType()) + })); + + string expectedUid = "theUid"; + StringIndexer stringIndexer = new StringIndexer(expectedUid) + .SetInputCol("category") + .SetOutputCol("categoryIndex"); + + StringIndexerModel stringIndexerModel = stringIndexer.Fit(input); + DataFrame transformedDF = stringIndexerModel.Transform(input); + List observed = transformedDF.Select("category", new string[] { "categoryIndex" }) + .Collect().ToList(); + List expected = new List + { + new Row(new GenericRow(new object[] {("a", "0") })), + new Row(new GenericRow(new object[] {("c", "1") })), + new Row(new GenericRow(new object[] {("b", "2") })) + }; + + Assert.Equal(observed, expected); + Assert.Equal("category", stringIndexer.GetInputCol()); + Assert.Equal("categoryIndex", stringIndexer.GetOutputCol()); + Assert.Equal(expectedUid, stringIndexer.Uid()); + + using (var tempDirectory = new TemporaryDirectory()) + { + string savePath = Path.Join(tempDirectory.Path, "stringIndexerModel"); + stringIndexerModel.Save(savePath); + + StringIndexerModel loadedModel = StringIndexerModel.Load(savePath); + Assert.Equal(stringIndexerModel.Uid(), loadedModel.Uid()); + } + } + } +} diff --git a/src/csharp/Microsoft.Spark.E2ETest/IpcTests/ML/Feature/StringIndexerTests.cs b/src/csharp/Microsoft.Spark.E2ETest/IpcTests/ML/Feature/StringIndexerTests.cs new file mode 100644 index 000000000..8fc2b70f7 --- /dev/null +++ b/src/csharp/Microsoft.Spark.E2ETest/IpcTests/ML/Feature/StringIndexerTests.cs @@ -0,0 +1,52 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +// See the LICENSE file in the project root for more information. + +using System.Collections.Generic; +using System.IO; +using System.Linq; +using Microsoft.Spark.ML.Feature; +using Microsoft.Spark.Sql; +using Microsoft.Spark.Sql.Types; +using Microsoft.Spark.UnitTest.TestUtils; +using Xunit; + +namespace Microsoft.Spark.E2ETest.IpcTests.ML.Feature +{ + [Collection("Spark E2E Tests")] + public class StringIndexerTests : FeatureBaseTests + { + private readonly SparkSession _spark; + + public StringIndexerTests(SparkFixture fixture) : base(fixture) + { + _spark = fixture.Spark; + } + + /// + /// Create a , create a and test the + /// available methods. + /// + [Fact] + public void TestStringIndexer() + { + string expectedUid = "theUid"; + StringIndexer stringIndexer = new StringIndexer(expectedUid) + .SetInputCol("category") + .SetOutputCol("categoryIndex"); + + Assert.Equal("category", stringIndexer.GetInputCol()); + Assert.Equal("categoryIndex", stringIndexer.GetOutputCol()); + Assert.Equal(expectedUid, stringIndexer.Uid()); + + using (var tempDirectory = new TemporaryDirectory()) + { + string savePath = Path.Join(tempDirectory.Path, "stringIndexer"); + stringIndexer.Save(savePath); + + StringIndexer loadedstringIndexer = StringIndexer.Load(savePath); + Assert.Equal(stringIndexer.Uid(), loadedstringIndexer.Uid()); + } + } + } +} diff --git a/src/csharp/Microsoft.Spark/ML/Feature/StringIndexer.cs b/src/csharp/Microsoft.Spark/ML/Feature/StringIndexer.cs new file mode 100644 index 000000000..a6acbd2e3 --- /dev/null +++ b/src/csharp/Microsoft.Spark/ML/Feature/StringIndexer.cs @@ -0,0 +1,174 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +// See the LICENSE file in the project root for more information. + +using Microsoft.Spark.Interop; +using Microsoft.Spark.Interop.Ipc; +using Microsoft.Spark.Sql; +using Microsoft.Spark.Sql.Types; + +namespace Microsoft.Spark.ML.Feature +{ + /// + /// encodes a string column of labels to a column of label indices. + /// + public class StringIndexer : FeatureBase, IJvmObjectReferenceProvider + { + private static readonly string s_StringIndexerClassName = + "org.apache.spark.ml.feature.StringIndexer"; + + /// + /// Create a without any parameters. + /// + public StringIndexer() : base(s_StringIndexerClassName) + { + } + + /// + /// Create a with a UID that is used to give the + /// a unique ID. + /// + /// An immutable unique ID for the object and its derivatives. + public StringIndexer(string uid) : base(s_StringIndexerClassName, uid) + { + } + + internal StringIndexer(JvmObjectReference jvmObject) : base(jvmObject) + { + } + + JvmObjectReference IJvmObjectReferenceProvider.Reference => _jvmObject; + + /// + /// Executes the and transforms the schema. + /// + /// The Schema to be transformed + /// + /// New object with the schema transformed. + /// + public StructType TransformSchema(StructType value) => + new StructType( + (JvmObjectReference)_jvmObject.Invoke( + "transformSchema", + DataType.FromJson(_jvmObject.Jvm, value.Json))); + + /// + /// Executes the and fits a model to the input data. + /// + /// + /// + public StringIndexerModel Fit(DataFrame source) => + new StringIndexerModel((JvmObjectReference)_jvmObject.Invoke("fit", source)); + + /// + /// Gets the HandleInvalid. + /// + /// Handle Invalid option + public string GetHandleInvalid() => (string)_jvmObject.Invoke("handleInvalid"); + + /// + /// Sets the Handle Invalid option to . + /// + /// Handle Invalid option + /// + /// with the Handle Invalid set. + /// + public StringIndexer SetHandleInvalid(string handleInvalid) => + WrapAsStringIndexer((JvmObjectReference)_jvmObject.Invoke("setHandleInvalid", handleInvalid)); + + /// + /// Gets the InputCol. + /// + /// Input Col option + public string GetInputCol() => (string)_jvmObject.Invoke("inputCol"); + + /// + /// Sets the Input Col option to . + /// + /// Input Col option + /// + /// with the Input Col set. + /// + public StringIndexer SetInputCol(string inputCol) => + WrapAsStringIndexer((JvmObjectReference)_jvmObject.Invoke("setInputCol", inputCol)); + + /// + /// Gets the InputCols array. + /// + /// Input Cols array option + public string[] GetInputCols() => (string[])_jvmObject.Invoke("inputCols"); + + /// + /// Sets the Input Cols array option to . + /// + /// Input Cols array option + /// + /// with the Input Cols array set. + /// + public StringIndexer SetInputCols(string[] inputCols) => + WrapAsStringIndexer((JvmObjectReference)_jvmObject.Invoke("setInputCol", inputCols)); + + /// + /// Gets the OutputCol. + /// + /// Output Col option + public string GetOutputCol() => (string)_jvmObject.Invoke("outputCol"); + + /// + /// Sets the Output Col option to . + /// + /// Output Col option + /// + /// with the Output Col set. + /// + public StringIndexer SetOutputCol(string outputCol) => + WrapAsStringIndexer((JvmObjectReference)_jvmObject.Invoke("setOutputCol", outputCol)); + + /// + /// Gets the OutputCols array. + /// + /// Output Cols array option + public string[] GetOutputCols() => (string[])_jvmObject.Invoke("outputCols"); + + /// + /// Sets the Output Cols array option to . + /// + /// Output Cols array option + /// + /// with the Output Cols array set. + /// + public StringIndexer SetOutputCols(string[] outputCols) => + WrapAsStringIndexer((JvmObjectReference)_jvmObject.Invoke("setOutputCol", outputCols)); + + /// + /// Gets the String Order Type. + /// + /// String Order Type + public string GetStringOrderType() => (string)_jvmObject.Invoke("stringOrderType"); + + /// + /// Sets the String Order Type to . + /// + /// String Order Type + /// + /// with the String Order Type set. + /// + public StringIndexer SetStringOrderType(string stringOrderType) => + WrapAsStringIndexer((JvmObjectReference)_jvmObject.Invoke("setStringOrderType", stringOrderType)); + + /// + /// Loads the that was previously saved using Save. + /// + /// The path the previous was saved to + /// New object, loaded from path + public static StringIndexer Load(string path) => + WrapAsStringIndexer( + SparkEnvironment.JvmBridge.CallStaticJavaMethod( + s_StringIndexerClassName, + "load", + path)); + + private static StringIndexer WrapAsStringIndexer(object obj) => + new StringIndexer((JvmObjectReference)obj); + } +} diff --git a/src/csharp/Microsoft.Spark/ML/Feature/StringIndexerModel.cs b/src/csharp/Microsoft.Spark/ML/Feature/StringIndexerModel.cs new file mode 100644 index 000000000..e16757a93 --- /dev/null +++ b/src/csharp/Microsoft.Spark/ML/Feature/StringIndexerModel.cs @@ -0,0 +1,92 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +// See the LICENSE file in the project root for more information. + +using System.Collections.Generic; +using Microsoft.Spark.Interop; +using Microsoft.Spark.Interop.Ipc; +using Microsoft.Spark.Sql; +using Microsoft.Spark.Sql.Types; + +namespace Microsoft.Spark.ML.Feature +{ + public class StringIndexerModel + : FeatureBase, IJvmObjectReferenceProvider + { + private static readonly string s_stringIndexerModelClassName = + "org.apache.spark.ml.feature.StringIndexerModel"; + + /// + /// Creates a without any parameters + /// + /// The vocabulary to use + public StringIndexerModel(List vocabulary) + : this(SparkEnvironment.JvmBridge.CallConstructor( + s_stringIndexerModelClassName, vocabulary)) + { + } + + /// + /// Creates a with a UID that is used to give the + /// a unique ID + /// + /// An immutable unique ID for the object and its derivatives. + /// The vocabulary to use + public StringIndexerModel(string uid, List vocabulary) + : this(SparkEnvironment.JvmBridge.CallConstructor( + s_stringIndexerModelClassName, uid, vocabulary)) + { + } + + internal StringIndexerModel(JvmObjectReference jvmObject) : base(jvmObject) + { + } + + JvmObjectReference IJvmObjectReferenceProvider.Reference => _jvmObject; + + /// + /// Loads the that was previously saved using Save + /// + /// + /// The path the previous was saved to + /// + /// New object + public static StringIndexerModel Load(string path) => + WrapAsStringIndexerModel( + SparkEnvironment.JvmBridge.CallStaticJavaMethod( + s_stringIndexerModelClassName, "load", path)); + + /// + /// Check transform validity and derive the output schema from the input schema. + /// + /// This checks for validity of interactions between parameters during Transform and + /// raises an exception if any parameter value is invalid. + /// + /// Typical implementation should first conduct verification on schema change and parameter + /// validity, including complex parameter interaction checks. + /// + /// + /// The of the which will be transformed. + /// + /// + /// The of the output schema that would have been derived from the + /// input schema, if Transform had been called. + /// + public StructType TransformSchema(StructType value) => + new StructType( + (JvmObjectReference)_jvmObject.Invoke( + "transformSchema", + DataType.FromJson(_jvmObject.Jvm, value.Json))); + + /// + /// Converts a DataFrame with a text document to a sparse vector of token counts. + /// + /// to transform + /// containing the original data and the counts + public DataFrame Transform(DataFrame document) => + new DataFrame((JvmObjectReference)_jvmObject.Invoke("transform", document)); + + private static StringIndexerModel WrapAsStringIndexerModel(object obj) => + new StringIndexerModel((JvmObjectReference)obj); + } +} From 65f43d023225785e17eca6ef5a3a2bb7772a095b Mon Sep 17 00:00:00 2001 From: Ramanathan Venkatasubramanian Date: Wed, 6 Jan 2021 21:30:50 +0530 Subject: [PATCH 2/9] Corrected issue in test case --- .../ML/Feature/StringIndexerModelTests.cs | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/src/csharp/Microsoft.Spark.E2ETest/IpcTests/ML/Feature/StringIndexerModelTests.cs b/src/csharp/Microsoft.Spark.E2ETest/IpcTests/ML/Feature/StringIndexerModelTests.cs index 4dc01ae8f..d111ed6b7 100644 --- a/src/csharp/Microsoft.Spark.E2ETest/IpcTests/ML/Feature/StringIndexerModelTests.cs +++ b/src/csharp/Microsoft.Spark.E2ETest/IpcTests/ML/Feature/StringIndexerModelTests.cs @@ -33,12 +33,12 @@ public void TestStringIndexerModel() DataFrame input = _spark.CreateDataFrame( new List { - new GenericRow(new object[] { (0, "a") }), - new GenericRow(new object[] { (1, "b") }), - new GenericRow(new object[] { (2, "c") }), - new GenericRow(new object[] { (3, "a") }), - new GenericRow(new object[] { (4, "a") }), - new GenericRow(new object[] { (5, "c") }) + new GenericRow(new object[] {0, "a"}), + new GenericRow(new object[] {1, "b"}), + new GenericRow(new object[] {2, "c"}), + new GenericRow(new object[] {3, "a"}), + new GenericRow(new object[] {4, "a"}), + new GenericRow(new object[] {5, "c"}) }, new StructType(new List { @@ -57,9 +57,9 @@ public void TestStringIndexerModel() .Collect().ToList(); List expected = new List { - new Row(new GenericRow(new object[] {("a", "0") })), - new Row(new GenericRow(new object[] {("c", "1") })), - new Row(new GenericRow(new object[] {("b", "2") })) + new Row(new GenericRow(new object[] {"a", "0"})), + new Row(new GenericRow(new object[] {"c", "1"})), + new Row(new GenericRow(new object[] {"b", "2"})) }; Assert.Equal(observed, expected); From f3b287c84b338ca678fb411aad5ac12c6772c827 Mon Sep 17 00:00:00 2001 From: Ramanathan Venkatasubramanian Date: Wed, 6 Jan 2021 22:22:28 +0530 Subject: [PATCH 3/9] Corrected issue in test case --- .../IpcTests/ML/Feature/StringIndexerModelTests.cs | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/src/csharp/Microsoft.Spark.E2ETest/IpcTests/ML/Feature/StringIndexerModelTests.cs b/src/csharp/Microsoft.Spark.E2ETest/IpcTests/ML/Feature/StringIndexerModelTests.cs index d111ed6b7..c9a73d937 100644 --- a/src/csharp/Microsoft.Spark.E2ETest/IpcTests/ML/Feature/StringIndexerModelTests.cs +++ b/src/csharp/Microsoft.Spark.E2ETest/IpcTests/ML/Feature/StringIndexerModelTests.cs @@ -58,11 +58,14 @@ public void TestStringIndexerModel() List expected = new List { new Row(new GenericRow(new object[] {"a", "0"})), + new Row(new GenericRow(new object[] {"b", "2"})), new Row(new GenericRow(new object[] {"c", "1"})), - new Row(new GenericRow(new object[] {"b", "2"})) + new Row(new GenericRow(new object[] {"a", "0"})), + new Row(new GenericRow(new object[] {"a", "0"})), + new Row(new GenericRow(new object[] {"c", "1"})) }; - Assert.Equal(observed, expected); + Assert.Equal(expected, observed); Assert.Equal("category", stringIndexer.GetInputCol()); Assert.Equal("categoryIndex", stringIndexer.GetOutputCol()); Assert.Equal(expectedUid, stringIndexer.Uid()); From f78d4ce4ae1f28dc6ce9a1591068aba1814f849b Mon Sep 17 00:00:00 2001 From: Ramanathan Venkatasubramanian Date: Fri, 29 Jan 2021 16:41:49 +0530 Subject: [PATCH 4/9] Corrected the test case --- .../IpcTests/ML/Feature/StringIndexerModelTests.cs | 6 +++++- src/csharp/Microsoft.Spark/ML/Feature/StringIndexer.cs | 4 ++-- 2 files changed, 7 insertions(+), 3 deletions(-) diff --git a/src/csharp/Microsoft.Spark.E2ETest/IpcTests/ML/Feature/StringIndexerModelTests.cs b/src/csharp/Microsoft.Spark.E2ETest/IpcTests/ML/Feature/StringIndexerModelTests.cs index c9a73d937..b043ab8b3 100644 --- a/src/csharp/Microsoft.Spark.E2ETest/IpcTests/ML/Feature/StringIndexerModelTests.cs +++ b/src/csharp/Microsoft.Spark.E2ETest/IpcTests/ML/Feature/StringIndexerModelTests.cs @@ -65,7 +65,11 @@ public void TestStringIndexerModel() new Row(new GenericRow(new object[] {"c", "1"})) }; - Assert.Equal(expected, observed); + observed.ForEach(a => + { + Assert.Equal(a, expected.FirstOrDefault(b => b == a)); + } + ); Assert.Equal("category", stringIndexer.GetInputCol()); Assert.Equal("categoryIndex", stringIndexer.GetOutputCol()); Assert.Equal(expectedUid, stringIndexer.Uid()); diff --git a/src/csharp/Microsoft.Spark/ML/Feature/StringIndexer.cs b/src/csharp/Microsoft.Spark/ML/Feature/StringIndexer.cs index a6acbd2e3..174e4b539 100644 --- a/src/csharp/Microsoft.Spark/ML/Feature/StringIndexer.cs +++ b/src/csharp/Microsoft.Spark/ML/Feature/StringIndexer.cs @@ -55,8 +55,8 @@ public StructType TransformSchema(StructType value) => /// /// Executes the and fits a model to the input data. /// - /// - /// + /// The to fit the model to. + /// public StringIndexerModel Fit(DataFrame source) => new StringIndexerModel((JvmObjectReference)_jvmObject.Invoke("fit", source)); From 6cd1a7c29fafbfc1b72009fa0d16655cdae8a271 Mon Sep 17 00:00:00 2001 From: Ramanathan Venkatasubramanian Date: Mon, 1 Feb 2021 11:35:08 +0530 Subject: [PATCH 5/9] Changed FirstorDefault to Where --- .../IpcTests/ML/Feature/StringIndexerModelTests.cs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/csharp/Microsoft.Spark.E2ETest/IpcTests/ML/Feature/StringIndexerModelTests.cs b/src/csharp/Microsoft.Spark.E2ETest/IpcTests/ML/Feature/StringIndexerModelTests.cs index b043ab8b3..b4ced6878 100644 --- a/src/csharp/Microsoft.Spark.E2ETest/IpcTests/ML/Feature/StringIndexerModelTests.cs +++ b/src/csharp/Microsoft.Spark.E2ETest/IpcTests/ML/Feature/StringIndexerModelTests.cs @@ -67,7 +67,7 @@ public void TestStringIndexerModel() observed.ForEach(a => { - Assert.Equal(a, expected.FirstOrDefault(b => b == a)); + Assert.Equal(a, expected.Where(b => b == a).FirstOrDefault()); } ); Assert.Equal("category", stringIndexer.GetInputCol()); From 6ead3932c234d9beaaf13ec76c143d6587fa3404 Mon Sep 17 00:00:00 2001 From: Ramanathan Venkatasubramanian Date: Mon, 1 Feb 2021 12:50:16 +0530 Subject: [PATCH 6/9] Modified List datatype --- .../IpcTests/ML/Feature/StringIndexerModelTests.cs | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/src/csharp/Microsoft.Spark.E2ETest/IpcTests/ML/Feature/StringIndexerModelTests.cs b/src/csharp/Microsoft.Spark.E2ETest/IpcTests/ML/Feature/StringIndexerModelTests.cs index b4ced6878..febd17713 100644 --- a/src/csharp/Microsoft.Spark.E2ETest/IpcTests/ML/Feature/StringIndexerModelTests.cs +++ b/src/csharp/Microsoft.Spark.E2ETest/IpcTests/ML/Feature/StringIndexerModelTests.cs @@ -57,12 +57,12 @@ public void TestStringIndexerModel() .Collect().ToList(); List expected = new List { - new Row(new GenericRow(new object[] {"a", "0"})), - new Row(new GenericRow(new object[] {"b", "2"})), - new Row(new GenericRow(new object[] {"c", "1"})), - new Row(new GenericRow(new object[] {"a", "0"})), - new Row(new GenericRow(new object[] {"a", "0"})), - new Row(new GenericRow(new object[] {"c", "1"})) + new GenericRow(new object[] {"a", "0"}), + new GenericRow(new object[] {"b", "2"}), + new GenericRow(new object[] {"c", "1"}), + new GenericRow(new object[] {"a", "0"}), + new GenericRow(new object[] {"a", "0"}), + new GenericRow(new object[] {"c", "1"}) }; observed.ForEach(a => From fa1add4f9ac13e6bfdff6d82946a6f94752f3eea Mon Sep 17 00:00:00 2001 From: Ramanathan Venkatasubramanian Date: Wed, 3 Feb 2021 22:44:40 +0530 Subject: [PATCH 7/9] Corrected the internal property names --- .../IpcTests/ML/Feature/StringIndexerModelTests.cs | 12 ++++++------ .../Microsoft.Spark/ML/Feature/StringIndexer.cs | 14 +++++++------- 2 files changed, 13 insertions(+), 13 deletions(-) diff --git a/src/csharp/Microsoft.Spark.E2ETest/IpcTests/ML/Feature/StringIndexerModelTests.cs b/src/csharp/Microsoft.Spark.E2ETest/IpcTests/ML/Feature/StringIndexerModelTests.cs index febd17713..b4ced6878 100644 --- a/src/csharp/Microsoft.Spark.E2ETest/IpcTests/ML/Feature/StringIndexerModelTests.cs +++ b/src/csharp/Microsoft.Spark.E2ETest/IpcTests/ML/Feature/StringIndexerModelTests.cs @@ -57,12 +57,12 @@ public void TestStringIndexerModel() .Collect().ToList(); List expected = new List { - new GenericRow(new object[] {"a", "0"}), - new GenericRow(new object[] {"b", "2"}), - new GenericRow(new object[] {"c", "1"}), - new GenericRow(new object[] {"a", "0"}), - new GenericRow(new object[] {"a", "0"}), - new GenericRow(new object[] {"c", "1"}) + new Row(new GenericRow(new object[] {"a", "0"})), + new Row(new GenericRow(new object[] {"b", "2"})), + new Row(new GenericRow(new object[] {"c", "1"})), + new Row(new GenericRow(new object[] {"a", "0"})), + new Row(new GenericRow(new object[] {"a", "0"})), + new Row(new GenericRow(new object[] {"c", "1"})) }; observed.ForEach(a => diff --git a/src/csharp/Microsoft.Spark/ML/Feature/StringIndexer.cs b/src/csharp/Microsoft.Spark/ML/Feature/StringIndexer.cs index 174e4b539..d5bbab383 100644 --- a/src/csharp/Microsoft.Spark/ML/Feature/StringIndexer.cs +++ b/src/csharp/Microsoft.Spark/ML/Feature/StringIndexer.cs @@ -64,7 +64,7 @@ public StringIndexerModel Fit(DataFrame source) => /// Gets the HandleInvalid. /// /// Handle Invalid option - public string GetHandleInvalid() => (string)_jvmObject.Invoke("handleInvalid"); + public string GetHandleInvalid() => (string)_jvmObject.Invoke("getHandleInvalid"); /// /// Sets the Handle Invalid option to . @@ -80,7 +80,7 @@ public StringIndexer SetHandleInvalid(string handleInvalid) => /// Gets the InputCol. /// /// Input Col option - public string GetInputCol() => (string)_jvmObject.Invoke("inputCol"); + public string GetInputCol() => (string)_jvmObject.Invoke("getInputCol"); /// /// Sets the Input Col option to . @@ -96,7 +96,7 @@ public StringIndexer SetInputCol(string inputCol) => /// Gets the InputCols array. /// /// Input Cols array option - public string[] GetInputCols() => (string[])_jvmObject.Invoke("inputCols"); + public string[] GetInputCols() => (string[])_jvmObject.Invoke("getInputCols"); /// /// Sets the Input Cols array option to . @@ -106,13 +106,13 @@ public StringIndexer SetInputCol(string inputCol) => /// with the Input Cols array set. /// public StringIndexer SetInputCols(string[] inputCols) => - WrapAsStringIndexer((JvmObjectReference)_jvmObject.Invoke("setInputCol", inputCols)); + WrapAsStringIndexer((JvmObjectReference)_jvmObject.Invoke("setInputCols", inputCols)); /// /// Gets the OutputCol. /// /// Output Col option - public string GetOutputCol() => (string)_jvmObject.Invoke("outputCol"); + public string GetOutputCol() => (string)_jvmObject.Invoke("getOutputCol"); /// /// Sets the Output Col option to . @@ -128,7 +128,7 @@ public StringIndexer SetOutputCol(string outputCol) => /// Gets the OutputCols array. /// /// Output Cols array option - public string[] GetOutputCols() => (string[])_jvmObject.Invoke("outputCols"); + public string[] GetOutputCols() => (string[])_jvmObject.Invoke("getOutputCols"); /// /// Sets the Output Cols array option to . @@ -144,7 +144,7 @@ public StringIndexer SetOutputCols(string[] outputCols) => /// Gets the String Order Type. /// /// String Order Type - public string GetStringOrderType() => (string)_jvmObject.Invoke("stringOrderType"); + public string GetStringOrderType() => (string)_jvmObject.Invoke("getStringOrderType"); /// /// Sets the String Order Type to . From 643789c9d046890d90341094aa402a26f20a52dc Mon Sep 17 00:00:00 2001 From: Ramanathan Venkatasubramanian Date: Wed, 3 Feb 2021 23:13:58 +0530 Subject: [PATCH 8/9] Changed List comparison --- .../IpcTests/ML/Feature/StringIndexerModelTests.cs | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/src/csharp/Microsoft.Spark.E2ETest/IpcTests/ML/Feature/StringIndexerModelTests.cs b/src/csharp/Microsoft.Spark.E2ETest/IpcTests/ML/Feature/StringIndexerModelTests.cs index b4ced6878..c9a73d937 100644 --- a/src/csharp/Microsoft.Spark.E2ETest/IpcTests/ML/Feature/StringIndexerModelTests.cs +++ b/src/csharp/Microsoft.Spark.E2ETest/IpcTests/ML/Feature/StringIndexerModelTests.cs @@ -65,11 +65,7 @@ public void TestStringIndexerModel() new Row(new GenericRow(new object[] {"c", "1"})) }; - observed.ForEach(a => - { - Assert.Equal(a, expected.Where(b => b == a).FirstOrDefault()); - } - ); + Assert.Equal(expected, observed); Assert.Equal("category", stringIndexer.GetInputCol()); Assert.Equal("categoryIndex", stringIndexer.GetOutputCol()); Assert.Equal(expectedUid, stringIndexer.Uid()); From 4cc337f02792446369f3c7b6b3e1c00fd1c18079 Mon Sep 17 00:00:00 2001 From: Ramanathan Venkatasubramanian Date: Wed, 3 Feb 2021 23:37:51 +0530 Subject: [PATCH 9/9] Reverted direct List Check --- .../IpcTests/ML/Feature/StringIndexerModelTests.cs | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/src/csharp/Microsoft.Spark.E2ETest/IpcTests/ML/Feature/StringIndexerModelTests.cs b/src/csharp/Microsoft.Spark.E2ETest/IpcTests/ML/Feature/StringIndexerModelTests.cs index c9a73d937..b4ced6878 100644 --- a/src/csharp/Microsoft.Spark.E2ETest/IpcTests/ML/Feature/StringIndexerModelTests.cs +++ b/src/csharp/Microsoft.Spark.E2ETest/IpcTests/ML/Feature/StringIndexerModelTests.cs @@ -65,7 +65,11 @@ public void TestStringIndexerModel() new Row(new GenericRow(new object[] {"c", "1"})) }; - Assert.Equal(expected, observed); + observed.ForEach(a => + { + Assert.Equal(a, expected.Where(b => b == a).FirstOrDefault()); + } + ); Assert.Equal("category", stringIndexer.GetInputCol()); Assert.Equal("categoryIndex", stringIndexer.GetOutputCol()); Assert.Equal(expectedUid, stringIndexer.Uid());