Skip to content

Commit 612f578

Browse files
committed
x86_64: implement optimized float @reduce(.Add)
1 parent 7c31f9d commit 612f578

File tree

9 files changed

+3908
-660
lines changed

9 files changed

+3908
-660
lines changed

src/arch/x86_64/CodeGen.zig

Lines changed: 3355 additions & 530 deletions
Large diffs are not rendered by default.

src/arch/x86_64/Encoding.zig

Lines changed: 17 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -313,7 +313,7 @@ pub const Mnemonic = enum {
313313
@"or", out, outs, outsb, outsd, outsw,
314314
pause, pop, popf, popfd, popfq, push, pushfq,
315315
rcl, rcr,
316-
rdfsbase, rdgsbase, rdmsr, rdpid, rdpkru, rdpmc, rdrand, rdseed, rdssd, rdssq, rdtsc, rdtscp,
316+
rdfsbase, rdgsbase, rdmsr, rdpid, rdpkru, rdpmc, rdrand, rdseed, rdsspd, rdsspq, rdtsc, rdtscp,
317317
ret, rol, ror, rsm,
318318
sahf, sal, sar, sbb,
319319
scas, scasb, scasd, scasq, scasw,
@@ -436,6 +436,7 @@ pub const Mnemonic = enum {
436436
pblendvb, pblendw,
437437
pcmpeqq,
438438
pextrb, pextrd, pextrq,
439+
phminposuw,
439440
pinsrb, pinsrd, pinsrq,
440441
pmaxsb, pmaxsd, pmaxud, pmaxuw, pminsb, pminsd, pminud, pminuw,
441442
pmovsxbd, pmovsxbq, pmovsxbw, pmovsxdq, pmovsxwd, pmovsxwq,
@@ -494,19 +495,19 @@ pub const Mnemonic = enum {
494495
vpblendvb, vpblendw, vpclmulqdq,
495496
vpcmpeqb, vpcmpeqd, vpcmpeqq, vpcmpeqw,
496497
vpcmpgtb, vpcmpgtd, vpcmpgtq, vpcmpgtw,
497-
vphaddw, vphaddsw, vphaddd, vphsubw, vphsubsw, vphsubd,
498498
vperm2f128, vpermilpd, vpermilps,
499499
vpextrb, vpextrd, vpextrq, vpextrw,
500+
vphaddw, vphaddsw, vphaddd, vphminposuw, vphsubw, vphsubsw, vphsubd,
500501
vpinsrb, vpinsrd, vpinsrq, vpinsrw,
502+
vpmaddubsw, vpmaddwd,
501503
vpmaxsb, vpmaxsd, vpmaxsw, vpmaxub, vpmaxud, vpmaxuw,
502504
vpminsb, vpminsd, vpminsw, vpminub, vpminud, vpminuw,
503-
vpmaddubsw,
504505
vpmovmskb,
505506
vpmovsxbd, vpmovsxbq, vpmovsxbw, vpmovsxdq, vpmovsxwd, vpmovsxwq,
506507
vpmovzxbd, vpmovzxbq, vpmovzxbw, vpmovzxdq, vpmovzxwd, vpmovzxwq,
507-
vpmuldq, vpmulhrsw, vpmulhw, vpmulld, vpmullw, vpmuludq,
508+
vpmuldq, vpmulhrsw, vpmulhuw, vpmulhw, vpmulld, vpmullw, vpmuludq,
508509
vpor,
509-
vpshufb, vpshufd, vpshufhw, vpshuflw,
510+
vpsadbw, vpshufb, vpshufd, vpshufhw, vpshuflw,
510511
vpsignb, vpsignd, vpsignw,
511512
vpslld, vpslldq, vpsllq, vpsllw,
512513
vpsrad, vpsraq, vpsraw,
@@ -1029,7 +1030,7 @@ fn estimateInstructionLength(prefix: Prefix, encoding: Encoding, ops: []const Op
10291030
}
10301031

10311032
const mnemonic_to_encodings_map = init: {
1032-
@setEvalBranchQuota(5_800);
1033+
@setEvalBranchQuota(5_900);
10331034
const ModrmExt = u3;
10341035
const Entry = struct { Mnemonic, OpEn, []const Op, []const u8, ModrmExt, Mode, Feature };
10351036
const encodings: []const Entry = @import("encodings.zon");
@@ -1038,17 +1039,17 @@ const mnemonic_to_encodings_map = init: {
10381039
var mnemonic_map: [mnemonic_count][]Data = @splat(&.{});
10391040
for (encodings) |entry| mnemonic_map[@intFromEnum(entry[0])].len += 1;
10401041
var data_storage: [encodings.len]Data = undefined;
1041-
var storage_i: usize = 0;
1042+
var storage_index: usize = 0;
10421043
for (&mnemonic_map) |*value| {
1043-
value.ptr = data_storage[storage_i..].ptr;
1044-
storage_i += value.len;
1044+
value.ptr = data_storage[storage_index..].ptr;
1045+
storage_index += value.len;
10451046
}
1046-
var mnemonic_i: [mnemonic_count]usize = @splat(0);
1047+
var mnemonic_index: [mnemonic_count]usize = @splat(0);
10471048
const ops_len = @typeInfo(@FieldType(Data, "ops")).array.len;
10481049
const opc_len = @typeInfo(@FieldType(Data, "opc")).array.len;
10491050
for (encodings) |entry| {
1050-
const i = &mnemonic_i[@intFromEnum(entry[0])];
1051-
mnemonic_map[@intFromEnum(entry[0])][i.*] = .{
1051+
const index = &mnemonic_index[@intFromEnum(entry[0])];
1052+
mnemonic_map[@intFromEnum(entry[0])][index.*] = .{
10521053
.op_en = entry[1],
10531054
.ops = (entry[2] ++ .{.none} ** (ops_len - entry[2].len)).*,
10541055
.opc_len = entry[3].len,
@@ -1057,14 +1058,14 @@ const mnemonic_to_encodings_map = init: {
10571058
.mode = entry[5],
10581059
.feature = entry[6],
10591060
};
1060-
i.* += 1;
1061+
index.* += 1;
10611062
}
10621063
const final_storage = data_storage;
10631064
var final_map: [mnemonic_count][]const Data = @splat(&.{});
1064-
storage_i = 0;
1065+
storage_index = 0;
10651066
for (&final_map, mnemonic_map) |*final_value, value| {
1066-
final_value.* = final_storage[storage_i..][0..value.len];
1067-
storage_i += value.len;
1067+
final_value.* = final_storage[storage_index..][0..value.len];
1068+
storage_index += value.len;
10681069
}
10691070
break :init final_map;
10701071
};

src/arch/x86_64/Lower.zig

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -567,7 +567,7 @@ fn emit(lower: *Lower, prefix: Prefix, mnemonic: Mnemonic, ops: []const Operand)
567567
}
568568

569569
fn generic(lower: *Lower, inst: Mir.Inst) Error!void {
570-
@setEvalBranchQuota(2_600);
570+
@setEvalBranchQuota(2_800);
571571
const fixes = switch (inst.ops) {
572572
.none => inst.data.none.fixes,
573573
.inst => inst.data.inst.fixes,
@@ -601,9 +601,9 @@ fn generic(lower: *Lower, inst: Mir.Inst) Error!void {
601601
var buf: [max_len]u8 = undefined;
602602

603603
const fixes_name = @tagName(fixes);
604-
const pattern = fixes_name[if (std.mem.indexOfScalar(u8, fixes_name, ' ')) |i| i + 1 else 0..];
605-
const wildcard_i = std.mem.indexOfScalar(u8, pattern, '_').?;
606-
const parts = .{ pattern[0..wildcard_i], @tagName(inst.tag), pattern[wildcard_i + 1 ..] };
604+
const pattern = fixes_name[if (std.mem.indexOfScalar(u8, fixes_name, ' ')) |i| i + " ".len else 0..];
605+
const wildcard_index = std.mem.indexOfScalar(u8, pattern, '_').?;
606+
const parts = .{ pattern[0..wildcard_index], @tagName(inst.tag), pattern[wildcard_index + "_".len ..] };
607607
const err_msg = "unsupported mnemonic: ";
608608
const mnemonic = std.fmt.bufPrint(&buf, "{s}{s}{s}", parts) catch
609609
return lower.fail(err_msg ++ "'{s}{s}{s}'", parts);

0 commit comments

Comments
 (0)