diff --git a/enginetest/queries/join_queries.go b/enginetest/queries/join_queries.go index ab7aef1901..2cd269c03a 100644 --- a/enginetest/queries/join_queries.go +++ b/enginetest/queries/join_queries.go @@ -1161,6 +1161,45 @@ var JoinScriptTests = []ScriptTest{ }, }, }, + { + // After this change: https://github.com/dolthub/go-mysql-server/pull/3038 + // hash.HashOf takes in a sql.Schema to convert and hash keys, so + // we need to pass in the schema of the join key. + // This tests a bug introduced in that same PR where we incorrectly pass in the entire schema, + // resulting in incorrect conversions. + Name: "HashLookup on multiple columns with tables with different schemas", + SetUpScript: []string{ + "create table t1 (i int primary key, k int);", + "create table t2 (i int primary key, j varchar(1), k int);", + "insert into t1 values (111111, 111111);", + "insert into t2 values (111111, 'a', 111111);", + }, + Assertions: []ScriptTestAssertion{ + { + Query: "select /*+ HASH_JOIN(t1, t2) */ * from t1 join t2 on t1.i = t2.i and t1.k = t2.k;", + Expected: []sql.Row{ + {111111, 111111, 111111, "a", 111111}, + }, + }, + }, + }, + { + Name: "HashLookup on multiple columns with collations", + SetUpScript: []string{ + "create table t1 (i int primary key, j varchar(128) collate utf8mb4_0900_ai_ci);", + "create table t2 (i int primary key, j varchar(128) collate utf8mb4_0900_ai_ci);", + "insert into t1 values (1, 'ABCDE');", + "insert into t2 values (1, 'abcde');", + }, + Assertions: []ScriptTestAssertion{ + { + Query: "select /*+ HASH_JOIN(t1, t2) */ * from t1 join t2 on t1.i = t2.i and t1.j = t2.j;", + Expected: []sql.Row{ + {1, "ABCDE", 1, "abcde"}, + }, + }, + }, + }, } var LateralJoinScriptTests = []ScriptTest{ diff --git a/sql/hash/hash.go b/sql/hash/hash.go index e37827f7e5..94bcc64206 100644 --- a/sql/hash/hash.go +++ b/sql/hash/hash.go @@ -30,6 +30,17 @@ var digestPool = sync.Pool{ }, } +// ExprsToSchema converts a list of sql.Expression to a sql.Schema. +// This is used for functions that use HashOf, but don't already have a schema. +// The generated schema ONLY contains the types of the expressions without any column names or any other info. +func ExprsToSchema(exprs ...sql.Expression) sql.Schema { + var sch sql.Schema + for _, expr := range exprs { + sch = append(sch, &sql.Column{Type: expr.Type()}) + } + return sch +} + // HashOf returns a hash of the given value to be used as key in a cache. func HashOf(ctx *sql.Context, sch sql.Schema, row sql.Row) (uint64, error) { hash := digestPool.Get().(*xxhash.Digest) diff --git a/sql/plan/hash_lookup.go b/sql/plan/hash_lookup.go index f65bdecad2..7926d29255 100644 --- a/sql/plan/hash_lookup.go +++ b/sql/plan/hash_lookup.go @@ -33,12 +33,14 @@ import ( // on the projected results. If cached results are not available, it // simply delegates to the child. func NewHashLookup(n sql.Node, rightEntryKey sql.Expression, leftProbeKey sql.Expression, joinType JoinType) *HashLookup { + leftKeySch := hash.ExprsToSchema(leftProbeKey) return &HashLookup{ UnaryNode: UnaryNode{n}, RightEntryKey: rightEntryKey, LeftProbeKey: leftProbeKey, Mutex: new(sync.Mutex), JoinType: joinType, + leftKeySch: leftKeySch, } } @@ -49,6 +51,7 @@ type HashLookup struct { Mutex *sync.Mutex Lookup *map[interface{}][]sql.Row JoinType JoinType + leftKeySch sql.Schema } var _ sql.Node = (*HashLookup)(nil) @@ -70,6 +73,7 @@ func (n *HashLookup) WithExpressions(exprs ...sql.Expression) (sql.Node, error) ret := *n ret.RightEntryKey = exprs[0] ret.LeftProbeKey = exprs[1] + ret.leftKeySch = hash.ExprsToSchema(ret.LeftProbeKey) return &ret, nil } @@ -127,7 +131,7 @@ func (n *HashLookup) GetHashKey(ctx *sql.Context, e sql.Expression, row sql.Row) return nil, err } if s, ok := key.([]interface{}); ok { - return hash.HashOf(ctx, n.Schema(), s) + return hash.HashOf(ctx, n.leftKeySch, s) } // byte slices are not hashable if k, ok := key.([]byte); ok {