Skip to content

Commit 4b4bdcf

Browse files
vladimirg-dbcloud-fan
authored andcommitted
[SPARK-51388][SQL] Improve SQL fragment propagation in to_timestamp and UNION
### What changes were proposed in this pull request? Improve SQL fragment propagation in to_timestamp and UNION. ### Why are the changes needed? To improve error messages: ![image](https://github.com/user-attachments/assets/4952ce49-5a29-4bf7-9d9f-0e443b1127ec) ### Does this PR introduce _any_ user-facing change? Invalid `Cast` error messages become better. ### How was this patch tested? Golden file tests are regenerated. ### Was this patch authored or co-authored using generative AI tooling? No. Closes #50154 from vladimirg-db/vladimir-golubev_data/improve-sql-fragment-in-some-places. Authored-by: Vladimir Golubev <vladimir.golubev@databricks.com> Signed-off-by: Wenchen Fan <wenchen@databricks.com>
1 parent 26545d0 commit 4b4bdcf

File tree

6 files changed

+35
-19
lines changed

6 files changed

+35
-19
lines changed

sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercionBase.scala

Lines changed: 10 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,7 @@ import org.apache.spark.sql.catalyst.plans.logical.{
4141
Unpivot
4242
}
4343
import org.apache.spark.sql.catalyst.rules.Rule
44+
import org.apache.spark.sql.catalyst.trees.CurrentOrigin.withOrigin
4445
import org.apache.spark.sql.connector.catalog.procedures.BoundProcedure
4546
import org.apache.spark.sql.types.DataType
4647

@@ -142,7 +143,9 @@ abstract class TypeCoercionBase extends TypeCoercionHelper {
142143
case s @ Except(left, right, isAll)
143144
if s.childrenResolved &&
144145
left.output.length == right.output.length && !s.resolved =>
145-
val newChildren: Seq[LogicalPlan] = buildNewChildrenWithWiderTypes(left :: right :: Nil)
146+
val newChildren: Seq[LogicalPlan] = withOrigin(s.origin) {
147+
buildNewChildrenWithWiderTypes(left :: right :: Nil)
148+
}
146149
if (newChildren.isEmpty) {
147150
s -> Nil
148151
} else {
@@ -154,7 +157,9 @@ abstract class TypeCoercionBase extends TypeCoercionHelper {
154157
case s @ Intersect(left, right, isAll)
155158
if s.childrenResolved &&
156159
left.output.length == right.output.length && !s.resolved =>
157-
val newChildren: Seq[LogicalPlan] = buildNewChildrenWithWiderTypes(left :: right :: Nil)
160+
val newChildren: Seq[LogicalPlan] = withOrigin(s.origin) {
161+
buildNewChildrenWithWiderTypes(left :: right :: Nil)
162+
}
158163
if (newChildren.isEmpty) {
159164
s -> Nil
160165
} else {
@@ -166,7 +171,9 @@ abstract class TypeCoercionBase extends TypeCoercionHelper {
166171
case s: Union
167172
if s.childrenResolved && !s.byName &&
168173
s.children.forall(_.output.length == s.children.head.output.length) && !s.resolved =>
169-
val newChildren: Seq[LogicalPlan] = buildNewChildrenWithWiderTypes(s.children)
174+
val newChildren: Seq[LogicalPlan] = withOrigin(s.origin) {
175+
buildNewChildrenWithWiderTypes(s.children)
176+
}
170177
if (newChildren.isEmpty) {
171178
s -> Nil
172179
} else {

sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala

Lines changed: 13 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@ import org.apache.spark.sql.catalyst.InternalRow
2929
import org.apache.spark.sql.catalyst.analysis.{ExpressionBuilder, FunctionRegistry}
3030
import org.apache.spark.sql.catalyst.expressions.codegen._
3131
import org.apache.spark.sql.catalyst.expressions.codegen.Block._
32+
import org.apache.spark.sql.catalyst.trees.CurrentOrigin.withOrigin
3233
import org.apache.spark.sql.catalyst.trees.TreePattern._
3334
import org.apache.spark.sql.catalyst.util.{DateTimeUtils, LegacyDateFormats, TimestampFormatter}
3435
import org.apache.spark.sql.catalyst.util.DateTimeConstants._
@@ -2104,11 +2105,13 @@ case class ParseToDate(
21042105
ansiEnabled: Boolean = SQLConf.get.ansiEnabled)
21052106
extends RuntimeReplaceable with ImplicitCastInputTypes with TimeZoneAwareExpression {
21062107

2107-
override lazy val replacement: Expression = format.map { f =>
2108-
Cast(GetTimestamp(left, f, TimestampType, "try_to_date", timeZoneId, ansiEnabled), DateType,
2109-
timeZoneId, EvalMode.fromBoolean(ansiEnabled))
2110-
}.getOrElse(Cast(left, DateType, timeZoneId,
2111-
EvalMode.fromBoolean(ansiEnabled))) // backwards compatibility
2108+
override lazy val replacement: Expression = withOrigin(origin) {
2109+
format.map { f =>
2110+
Cast(GetTimestamp(left, f, TimestampType, "try_to_date", timeZoneId, ansiEnabled), DateType,
2111+
timeZoneId, EvalMode.fromBoolean(ansiEnabled))
2112+
}.getOrElse(Cast(left, DateType, timeZoneId,
2113+
EvalMode.fromBoolean(ansiEnabled))) // backwards compatibility
2114+
}
21122115

21132116
def this(left: Expression, format: Expression) = {
21142117
this(left, Option(format))
@@ -2183,9 +2186,11 @@ case class ParseToTimestamp(
21832186
failOnError: Boolean = SQLConf.get.ansiEnabled)
21842187
extends RuntimeReplaceable with ImplicitCastInputTypes with TimeZoneAwareExpression {
21852188

2186-
override lazy val replacement: Expression = format.map { f =>
2187-
GetTimestamp(left, f, dataType, "try_to_timestamp", timeZoneId, failOnError = failOnError)
2188-
}.getOrElse(Cast(left, dataType, timeZoneId, ansiEnabled = failOnError))
2189+
override lazy val replacement: Expression = withOrigin(origin) {
2190+
format.map { f =>
2191+
GetTimestamp(left, f, dataType, "try_to_timestamp", timeZoneId, failOnError = failOnError)
2192+
}.getOrElse(Cast(left, dataType, timeZoneId, ansiEnabled = failOnError))
2193+
}
21892194

21902195
def this(left: Expression, format: Expression) = {
21912196
this(left, Option(format), SQLConf.get.timestampType)

sql/core/src/test/resources/sql-tests/results/timestampNTZ/timestamp-ansi.sql.out

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -394,7 +394,9 @@ org.apache.spark.SparkDateTimeException
394394
"queryContext" : [ {
395395
"objectType" : "",
396396
"objectName" : "",
397-
"fragment" : ""
397+
"startIndex" : 8,
398+
"stopIndex" : 22,
399+
"fragment" : "to_timestamp(1)"
398400
} ]
399401
}
400402

sql/core/src/test/resources/sql-tests/results/typeCoercion/native/stringCastAndExpressions.sql.out

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -344,7 +344,9 @@ org.apache.spark.SparkDateTimeException
344344
"queryContext" : [ {
345345
"objectType" : "",
346346
"objectName" : "",
347-
"fragment" : ""
347+
"startIndex" : 8,
348+
"stopIndex" : 22,
349+
"fragment" : "to_timestamp(a)"
348350
} ]
349351
}
350352

sql/core/src/test/resources/sql-tests/results/udf/udf-union.sql.out

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -51,9 +51,9 @@ org.apache.spark.SparkNumberFormatException
5151
"queryContext" : [ {
5252
"objectType" : "",
5353
"objectName" : "",
54-
"startIndex" : 1,
55-
"stopIndex" : 243,
56-
"fragment" : "SELECT udf(c1) as c1, udf(c2) as c2\nFROM (SELECT udf(c1) as c1, udf(c2) as c2 FROM t1 WHERE c2 = 'a'\n UNION ALL\n SELECT udf(c1) as c1, udf(c2) as c2 FROM t2\n UNION ALL\n SELECT udf(c1) as c1, udf(c2) as c2 FROM t2)"
54+
"startIndex" : 45,
55+
"stopIndex" : 172,
56+
"fragment" : "SELECT udf(c1) as c1, udf(c2) as c2 FROM t1 WHERE c2 = 'a'\n UNION ALL\n SELECT udf(c1) as c1, udf(c2) as c2 FROM t2"
5757
} ]
5858
}
5959

sql/core/src/test/resources/sql-tests/results/union.sql.out

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -51,9 +51,9 @@ org.apache.spark.SparkNumberFormatException
5151
"queryContext" : [ {
5252
"objectType" : "",
5353
"objectName" : "",
54-
"startIndex" : 1,
55-
"stopIndex" : 133,
56-
"fragment" : "SELECT *\nFROM (SELECT * FROM t1 where c1 = 1\n UNION ALL\n SELECT * FROM t2\n UNION ALL\n SELECT * FROM t2)"
54+
"startIndex" : 18,
55+
"stopIndex" : 89,
56+
"fragment" : "SELECT * FROM t1 where c1 = 1\n UNION ALL\n SELECT * FROM t2"
5757
} ]
5858
}
5959

0 commit comments

Comments
 (0)