Skip to content

Fix schema for call to hash.HashOf() in HashLookups #3038

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 8 commits into from
Jun 21, 2025
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
30 changes: 22 additions & 8 deletions enginetest/queries/join_queries.go
Original file line number Diff line number Diff line change
Expand Up @@ -1162,20 +1162,34 @@ var JoinScriptTests = []ScriptTest{
},
},
{
// Since hash.HashOf takes in a sql.Schema to convert and hash keys,
// we need to pass in the right keys.
Name: "HashLookups regression test",
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can we split this into two different tests?

// After this change: https://github.com/dolthub/go-mysql-server/pull/3038
// hash.HashOf takes in a sql.Schema to convert and hash keys, so
// we need to pass in the schema of the join key.
// This tests a bug introduced in that same PR where we incorrectly pass in the entire schema,
// resulting in incorrect conversions.
Name: "HashLookup on multiple columns with tables with different schemas",
SetUpScript: []string{
"create table t1 (i int primary key, j varchar(1), k int);",
"create table t2 (i int primary key, k int);",
"insert into t1 values (111111, 'a', 111111);",
"insert into t2 values (111111, 111111);",
"create table t1 (i int primary key, k int);",
"create table t2 (i int primary key, j varchar(1), k int);",
"insert into t1 values (111111, 111111);",
"insert into t2 values (111111, 'a', 111111);",

"create table tt2 (i int primary key, j varchar(128) collate utf8mb4_0900_ai_ci);",
"create table tt1 (i int primary key, j varchar(128) collate utf8mb4_0900_ai_ci);",
"insert into tt1 values (1, 'ABCDE');",
"insert into tt2 values (1, 'abcde');",
},
Assertions: []ScriptTestAssertion{
{
Query: "select /*+ HASH_JOIN(t1, t2) */ * from t1 join t2 on t1.i = t2.i and t1.k = t2.k;",
Expected: []sql.Row{
{111111, "a", 111111, 111111, 111111},
{111111, 111111, 111111, "a", 111111},
},
},
{
Query: "select /*+ HASH_JOIN(tt1, tt2) */ * from tt1 join tt2 on tt1.i = tt2.i and tt1.j = tt2.j;",
Expected: []sql.Row{
{1, "ABCDE", 1, "abcde"},
},
},
},
Expand Down
11 changes: 11 additions & 0 deletions sql/hash/hash.go
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,17 @@ var digestPool = sync.Pool{
},
}

// ExprsToSchema converts a list of sql.Expression to a sql.Schema.
// This is used for functions that use HashOf, but don't already have a schema.
// The generated schema ONLY contains the types of the expressions without any column names or any other info.
func ExprsToSchema(exprs ...sql.Expression) sql.Schema {
var sch sql.Schema
for _, expr := range exprs {
sch = append(sch, &sql.Column{Type: expr.Type()})
}
return sch
}

// HashOf returns a hash of the given value to be used as key in a cache.
func HashOf(ctx *sql.Context, sch sql.Schema, row sql.Row) (uint64, error) {
hash := digestPool.Get().(*xxhash.Digest)
Expand Down
13 changes: 5 additions & 8 deletions sql/plan/hash_lookup.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,6 @@ import (
"sync"

"github.com/dolthub/go-mysql-server/sql"
"github.com/dolthub/go-mysql-server/sql/expression"
"github.com/dolthub/go-mysql-server/sql/hash"
"github.com/dolthub/go-mysql-server/sql/types"
)
Expand All @@ -34,12 +33,14 @@ import (
// on the projected results. If cached results are not available, it
// simply delegates to the child.
func NewHashLookup(n sql.Node, rightEntryKey sql.Expression, leftProbeKey sql.Expression, joinType JoinType) *HashLookup {
leftKeySch := hash.ExprsToSchema(leftProbeKey)
return &HashLookup{
UnaryNode: UnaryNode{n},
RightEntryKey: rightEntryKey,
LeftProbeKey: leftProbeKey,
Mutex: new(sync.Mutex),
JoinType: joinType,
leftKeySch: leftKeySch,
}
}

Expand All @@ -50,6 +51,7 @@ type HashLookup struct {
Mutex *sync.Mutex
Lookup *map[interface{}][]sql.Row
JoinType JoinType
leftKeySch sql.Schema
}

var _ sql.Node = (*HashLookup)(nil)
Expand All @@ -71,6 +73,7 @@ func (n *HashLookup) WithExpressions(exprs ...sql.Expression) (sql.Node, error)
ret := *n
ret.RightEntryKey = exprs[0]
ret.LeftProbeKey = exprs[1]
ret.leftKeySch = hash.ExprsToSchema(ret.LeftProbeKey)
return &ret, nil
}

Expand Down Expand Up @@ -128,13 +131,7 @@ func (n *HashLookup) GetHashKey(ctx *sql.Context, e sql.Expression, row sql.Row)
return nil, err
}
if s, ok := key.([]interface{}); ok {
var sch sql.Schema
if tup, isTup := e.(*expression.Tuple); isTup {
for _, expr := range tup.Children() {
sch = append(sch, &sql.Column{Type: expr.Type()})
}
}
return hash.HashOf(ctx, sch, s)
return hash.HashOf(ctx, n.leftKeySch, s)
}
// byte slices are not hashable
if k, ok := key.([]byte); ok {
Expand Down